diff --git a/build/doctrees/ammico.doctree b/build/doctrees/ammico.doctree index 22784e7..324baac 100644 Binary files a/build/doctrees/ammico.doctree and b/build/doctrees/ammico.doctree differ diff --git a/build/doctrees/create_API_key_link.doctree b/build/doctrees/create_API_key_link.doctree index 1ad24bb..3d1e72c 100644 Binary files a/build/doctrees/create_API_key_link.doctree and b/build/doctrees/create_API_key_link.doctree differ diff --git a/build/doctrees/environment.pickle b/build/doctrees/environment.pickle index 10feda5..e513cc8 100644 Binary files a/build/doctrees/environment.pickle and b/build/doctrees/environment.pickle differ diff --git a/build/doctrees/faq_link.doctree b/build/doctrees/faq_link.doctree index eb96cf9..b728852 100644 Binary files a/build/doctrees/faq_link.doctree and b/build/doctrees/faq_link.doctree differ diff --git a/build/doctrees/index.doctree b/build/doctrees/index.doctree index 4a63957..b1a5349 100644 Binary files a/build/doctrees/index.doctree and b/build/doctrees/index.doctree differ diff --git a/build/doctrees/license_link.doctree b/build/doctrees/license_link.doctree index 7b9361d..85f4c8e 100644 Binary files a/build/doctrees/license_link.doctree and b/build/doctrees/license_link.doctree differ diff --git a/build/doctrees/modules.doctree b/build/doctrees/modules.doctree index b3f9a29..7b3ddef 100644 Binary files a/build/doctrees/modules.doctree and b/build/doctrees/modules.doctree differ diff --git a/build/doctrees/nbsphinx/notebooks/DemoNotebook_ammico.ipynb b/build/doctrees/nbsphinx/notebooks/DemoNotebook_ammico.ipynb index a2bbe92..b5fba7a 100644 --- a/build/doctrees/nbsphinx/notebooks/DemoNotebook_ammico.ipynb +++ b/build/doctrees/nbsphinx/notebooks/DemoNotebook_ammico.ipynb @@ -22,7 +22,7 @@ "source": [ "# if running on google colab\\\n", "# PLEASE RUN THIS ONLY AS CPU RUNTIME\n", - "# for a GPU runtime, there are conflicts with pre-installed packages - \n", + "# for a GPU runtime, there are conflicts with pre-installed packages -\n", "# you first need to uninstall them (prepare a clean environment with no pre-installs) and then install ammico\n", "# flake8-noqa-cell\n", "\n", @@ -103,11 +103,11 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "# jax also sometimes leads to problems on google colab\n", - "# if this is the case, try restarting the kernel and executing this \n", + "# if this is the case, try restarting the kernel and executing this\n", "# and the above two code cells again\n", "import ammico\n", + "\n", "# for displaying a progress bar\n", "from tqdm import tqdm" ] @@ -276,7 +276,7 @@ "source": [ "# dump file name\n", "dump_file = \"dump_file.csv\"\n", - "# dump every N images \n", + "# dump every N images\n", "dump_every = 10" ] }, @@ -299,12 +299,18 @@ "# the highest possible value is 100\n", "race_threshold = 50\n", "gender_threshold = 50\n", - "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)): # loop through all images\n", - " image_dict[key] = ammico.EmotionDetector(image_dict[key],\n", - " emotion_threshold=emotion_threshold,\n", - " race_threshold=race_threshold,\n", - " gender_threshold=gender_threshold).analyse_image() # analyse image with EmotionDetector and update dict\n", - " if num % dump_every == 0 or num == len(image_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = ammico.EmotionDetector(\n", + " image_dict[key],\n", + " emotion_threshold=emotion_threshold,\n", + " race_threshold=race_threshold,\n", + " gender_threshold=gender_threshold,\n", + " ).analyse_image() # analyse image with EmotionDetector and update dict\n", + " if (\n", + " num % dump_every == 0 or num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -322,10 +328,16 @@ "metadata": {}, "outputs": [], "source": [ - "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)): # loop through all images\n", - " image_dict[key] = ammico.TextDetector(image_dict[key], analyse_text=True).analyse_image() # analyse image with EmotionDetector and update dict\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = ammico.TextDetector(\n", + " image_dict[key], analyse_text=True\n", + " ).analyse_image() # analyse image with EmotionDetector and update dict\n", + "\n", + " if (\n", + " num % dump_every == 0 | num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -344,13 +356,21 @@ "outputs": [], "source": [ "# initialize the models\n", - "image_summary_detector = ammico.SummaryDetector(subdict = image_dict, analysis_type=\"summary\", model_type=\"base\")\n", + "image_summary_detector = ammico.SummaryDetector(\n", + " subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n", + ")\n", "\n", "# run the analysis without having to re-iniatialize the model\n", - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)): # loop through all images\n", - " image_dict[key] = image_summary_detector.analyse_image(subdict = image_dict[key], analysis_type=\"summary\") # analyse image with SummaryDetector and update dict\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = image_summary_detector.analyse_image(\n", + " subdict=image_dict[key], analysis_type=\"summary\"\n", + " ) # analyse image with SummaryDetector and update dict\n", + "\n", + " if (\n", + " num % dump_every == 0 | num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -369,14 +389,26 @@ "outputs": [], "source": [ "# initialize the models\n", - "image_summary_detector = ammico.SummaryDetector(subdict = image_dict, analysis_type=\"summary\", model_type=\"base\")\n", + "image_summary_detector = ammico.SummaryDetector(\n", + " subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n", + ")\n", "\n", - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)): # loop through all images\n", - " image_dict[key] = ammico.EmotionDetector(image_dict[key]).analyse_image() # analyse image with EmotionDetector and update dict\n", - " image_dict[key] = ammico.TextDetector(image_dict[key], analyse_text=True).analyse_image() # analyse image with TextDetector and update dict\n", - " image_dict[key] = image_summary_detector.analyse_image(subdict = image_dict[key], analysis_type=\"summary\") # analyse image with SummaryDetector and update dict\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: # save results every dump_every to dump_file \n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = ammico.EmotionDetector(\n", + " image_dict[key]\n", + " ).analyse_image() # analyse image with EmotionDetector and update dict\n", + " image_dict[key] = ammico.TextDetector(\n", + " image_dict[key], analyse_text=True\n", + " ).analyse_image() # analyse image with TextDetector and update dict\n", + " image_dict[key] = image_summary_detector.analyse_image(\n", + " subdict=image_dict[key], analysis_type=\"summary\"\n", + " ) # analyse image with SummaryDetector and update dict\n", + "\n", + " if (\n", + " num % dump_every == 0 | num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -477,7 +509,7 @@ "# set the dump file\n", "# dump file name\n", "dump_file = \"dump_file.csv\"\n", - "# dump every N images \n", + "# dump every N images\n", "dump_every = 10" ] }, @@ -488,9 +520,15 @@ "outputs": [], "source": [ "# analyze the csv file\n", - "for num, key in tqdm(enumerate(text_dict.keys()), total=len(text_dict)): # loop through all text entries\n", - " ammico.TextDetector(text_dict[key], analyse_text=True, skip_extraction=True).analyse_image() # analyse text with TextDetector and update dict\n", - " if num % dump_every == 0 | num == len(text_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(text_dict.keys()), total=len(text_dict)\n", + "): # loop through all text entries\n", + " ammico.TextDetector(\n", + " text_dict[key], analyse_text=True, skip_extraction=True\n", + " ).analyse_image() # analyse text with TextDetector and update dict\n", + " if (\n", + " num % dump_every == 0 | num == len(text_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(text_dict)\n", " image_df.to_csv(dump_file)" ] @@ -565,14 +603,23 @@ "metadata": {}, "outputs": [], "source": [ - "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)): # loop through all images\n", - " image_dict[key] = ammico.TextDetector(image_dict[key], # analyse image with TextDetector and update dict\n", - " analyse_text=True, model_names=[\"sshleifer/distilbart-cnn-12-6\", \n", - " \"distilbert-base-uncased-finetuned-sst-2-english\", \n", - " \"dbmdz/bert-large-cased-finetuned-conll03-english\"], \n", - " revision_numbers=[\"a4f8f3e\", \"af0f99b\", \"f2482bf\"]).analyse_image()\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = ammico.TextDetector(\n", + " image_dict[key], # analyse image with TextDetector and update dict\n", + " analyse_text=True,\n", + " model_names=[\n", + " \"sshleifer/distilbart-cnn-12-6\",\n", + " \"distilbert-base-uncased-finetuned-sst-2-english\",\n", + " \"dbmdz/bert-large-cased-finetuned-conll03-english\",\n", + " ],\n", + " revision_numbers=[\"a4f8f3e\", \"af0f99b\", \"f2482bf\"],\n", + " ).analyse_image()\n", + "\n", + " if (\n", + " num % dump_every == 0 | num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -654,7 +701,9 @@ "metadata": {}, "outputs": [], "source": [ - "image_summary_detector = ammico.SummaryDetector(image_dict, analysis_type=\"summary\", model_type=\"base\")" + "image_summary_detector = ammico.SummaryDetector(\n", + " image_dict, analysis_type=\"summary\", model_type=\"base\"\n", + ")" ] }, { @@ -663,10 +712,12 @@ "metadata": {}, "outputs": [], "source": [ - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)):\n", - " image_dict[key] = image_summary_detector.analyse_image(subdict = image_dict[key], analysis_type=\"summary\")\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: \n", + "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):\n", + " image_dict[key] = image_summary_detector.analyse_image(\n", + " subdict=image_dict[key], analysis_type=\"summary\"\n", + " )\n", + "\n", + " if num % dump_every == 0 | num == len(image_dict) - 1:\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -704,14 +755,17 @@ "metadata": {}, "outputs": [], "source": [ - "image_summary_vqa_detector = ammico.SummaryDetector(image_dict, analysis_type=\"questions\", \n", - " model_type=\"vqa\")\n", + "image_summary_vqa_detector = ammico.SummaryDetector(\n", + " image_dict, analysis_type=\"questions\", model_type=\"vqa\"\n", + ")\n", "\n", - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)):\n", - " image_dict[key] = image_summary_vqa_detector.analyse_image(subdict=image_dict[key], \n", - " analysis_type=\"questions\", \n", - " list_of_questions = list_of_questions)\n", - " if num % dump_every == 0 | num == len(image_dict) - 1: \n", + "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):\n", + " image_dict[key] = image_summary_vqa_detector.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"questions\",\n", + " list_of_questions=list_of_questions,\n", + " )\n", + " if num % dump_every == 0 | num == len(image_dict) - 1:\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -729,13 +783,16 @@ "metadata": {}, "outputs": [], "source": [ - "image_summary_vqa_detector = ammico.SummaryDetector(image_dict, analysis_type=\"summary_and_questions\", \n", - " model_type=\"base\")\n", - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)):\n", - " image_dict[key] = image_summary_vqa_detector.analyse_image(subdict=image_dict[key], \n", - " analysis_type=\"summary_and_questions\", \n", - " list_of_questions = list_of_questions)\n", - " if num % dump_every == 0 | num == len(image_dict) - 1: \n", + "image_summary_vqa_detector = ammico.SummaryDetector(\n", + " image_dict, analysis_type=\"summary_and_questions\", model_type=\"base\"\n", + ")\n", + "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):\n", + " image_dict[key] = image_summary_vqa_detector.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"summary_and_questions\",\n", + " list_of_questions=list_of_questions,\n", + " )\n", + " if num % dump_every == 0 | num == len(image_dict) - 1:\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -767,7 +824,11 @@ "metadata": {}, "outputs": [], "source": [ - "obj = ammico.SummaryDetector(subdict=image_dict, analysis_type = \"summary_and_questions\", model_type = \"blip2_t5_caption_coco_flant5xl\")\n", + "obj = ammico.SummaryDetector(\n", + " subdict=image_dict,\n", + " analysis_type=\"summary_and_questions\",\n", + " model_type=\"blip2_t5_caption_coco_flant5xl\",\n", + ")\n", "# list of the new models that can be used:\n", "# \"blip2_t5_pretrain_flant5xxl\",\n", "# \"blip2_t5_pretrain_flant5xl\",\n", @@ -781,7 +842,7 @@ "# Or you can use `caption_coco_`` model types to generate coco-style captions.\n", "# `flant5` and `opt` means that the model equipped with FlanT5 and OPT LLMs respectively.\n", "\n", - "#also you can perform all calculation on cpu if you set device_type= \"cpu\" or gpu if you set device_type= \"cuda\"" + "# also you can perform all calculation on cpu if you set device_type= \"cpu\" or gpu if you set device_type= \"cuda\"" ] }, { @@ -812,9 +873,13 @@ "outputs": [], "source": [ "for key in image_dict:\n", - " image_dict[key] = obj.analyse_image(subdict = image_dict[key], analysis_type=\"questions\", list_of_questions=list_of_questions)\n", + " image_dict[key] = obj.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"questions\",\n", + " list_of_questions=list_of_questions,\n", + " )\n", "\n", - "# analysis_type can be \n", + "# analysis_type can be\n", "# \"summary\",\n", "# \"questions\",\n", "# \"summary_and_questions\"." @@ -848,7 +913,11 @@ "outputs": [], "source": [ "for key in image_dict:\n", - " image_dict[key] = obj.analyse_image(subdict = image_dict[key], analysis_type=\"questions\", list_of_questions=list_of_questions)" + " image_dict[key] = obj.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"questions\",\n", + " list_of_questions=list_of_questions,\n", + " )" ] }, { @@ -886,7 +955,12 @@ "outputs": [], "source": [ "for key in image_dict:\n", - " image_dict[key] = obj.analyse_image(subdict = image_dict[key], analysis_type=\"questions\", list_of_questions=list_of_questions, consequential_questions=True)" + " image_dict[key] = obj.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"questions\",\n", + " list_of_questions=list_of_questions,\n", + " consequential_questions=True,\n", + " )" ] }, { @@ -940,9 +1014,13 @@ "outputs": [], "source": [ "for key in image_dict.keys():\n", - " image_dict[key] = ammico.EmotionDetector(image_dict[key], emotion_threshold=50, race_threshold=50,\n", - " gender_threshold=50,\n", - " accept_disclosure=\"DISCLOSURE_AMMICO\").analyse_image()" + " image_dict[key] = ammico.EmotionDetector(\n", + " image_dict[key],\n", + " emotion_threshold=50,\n", + " race_threshold=50,\n", + " gender_threshold=50,\n", + " accept_disclosure=\"DISCLOSURE_AMMICO\",\n", + " ).analyse_image()" ] }, { @@ -1055,9 +1133,9 @@ " image_names,\n", " features_image_stacked,\n", ") = my_obj.parsing_images(\n", - " model_type, \n", + " model_type,\n", " path_to_save_tensors=\"/content/drive/MyDrive/misinformation-data/\",\n", - " )" + ")" ] }, { @@ -1112,14 +1190,19 @@ "metadata": {}, "outputs": [], "source": [ - "import importlib_resources # only require for image query example\n", - "image_example_query = str(importlib_resources.files(\"ammico\") / \"data\" / \"test-crop-image.png\") # creating the path to the image for the image query example\n", + "import importlib_resources # only require for image query example\n", + "\n", + "image_example_query = str(\n", + " importlib_resources.files(\"ammico\") / \"data\" / \"test-crop-image.png\"\n", + ") # creating the path to the image for the image query example\n", "\n", "search_query = [\n", - " {\"text_input\": \"politician press conference\"}, \n", + " {\"text_input\": \"politician press conference\"},\n", " {\"text_input\": \"a world map\"},\n", - " {\"text_input\": \"a dog\"}, # This is how looks text query\n", - " {\"image\": image_example_query}, # This is how looks image query, here `image_example_path` is the path to query image like \"data/test-crop-image.png\"\n", + " {\"text_input\": \"a dog\"}, # This is how looks text query\n", + " {\n", + " \"image\": image_example_query\n", + " }, # This is how looks image query, here `image_example_path` is the path to query image like \"data/test-crop-image.png\"\n", "]" ] }, @@ -1199,7 +1282,7 @@ "outputs": [], "source": [ "my_obj.show_results(\n", - " search_query[0], # you can change the index to see the results for other queries\n", + " search_query[0], # you can change the index to see the results for other queries\n", ")" ] }, @@ -1210,7 +1293,7 @@ "outputs": [], "source": [ "my_obj.show_results(\n", - " search_query[3], # you can change the index to see the results for other queries\n", + " search_query[3], # you can change the index to see the results for other queries\n", ")" ] }, @@ -1349,7 +1432,7 @@ "outputs": [], "source": [ "analysis_explorer = ammico.AnalysisExplorer(image_dict)\n", - "analysis_explorer.run_server(port = 8057)" + "analysis_explorer.run_server(port=8057)" ] }, { @@ -1447,7 +1530,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/build/doctrees/nbsphinx/notebooks/Example cropposts.ipynb b/build/doctrees/nbsphinx/notebooks/Example cropposts.ipynb deleted file mode 100644 index d7c2bcb..0000000 --- a/build/doctrees/nbsphinx/notebooks/Example cropposts.ipynb +++ /dev/null @@ -1,183 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Crop posts module" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "1", - "metadata": {}, - "source": [ - "Crop posts from social media posts images, to keep import text informations from social media posts images.\n", - "We can set some manually cropped views from social media posts as reference for cropping the same type social media posts images." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "# Please ignore this cell: extra install steps that are only executed when running the notebook on Google Colab\n", - "# flake8-noqa-cell\n", - "import os\n", - "if 'google.colab' in str(get_ipython()):\n", - " # we're running on colab\n", - " # first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)\n", - " %pip install setuptools==61 -qqq\n", - " # install the moralization package\n", - " %pip install git+https://github.com/ssciwr/AMMICO.git -qqq\n", - "\n", - " # prevent loading of the wrong opencv library\n", - " %pip uninstall -y opencv-contrib-python\n", - " %pip install opencv-contrib-python\n", - "\n", - " from google.colab import drive\n", - " drive.mount('/content/drive')\n", - "\n", - " if not os.path.isdir('/content/ref'):\n", - " !wget https://github.com/ssciwr/AMMICO/archive/refs/heads/ref-data.zip -q\n", - " !unzip -qq ref-data.zip -d . && mv -f AMMICO-ref-data/data/ref . && rm -rf AMMICO-ref-data ref-data.zip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "import ammico.cropposts as crpo\n", - "import ammico.utils as utils\n", - "import matplotlib.pyplot as plt\n", - "import cv2\n", - "import importlib_resources\n", - "pkg = importlib_resources.files(\"ammico\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4", - "metadata": {}, - "source": [ - "The cropping is carried out by finding reference images on the image to be cropped. If a reference matches a region on the image, then everything below the matched region is removed. Manually look at a reference and an example post with the code below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# load ref view for cropping the same type social media posts images.\n", - "# substitute the below paths for your samples\n", - "path_ref = pkg / \"data\" / \"ref\" / \"ref-00.png\"\n", - "ref_view = cv2.imread(path_ref.as_posix())\n", - "RGB_ref_view = cv2.cvtColor(ref_view, cv2.COLOR_BGR2RGB)\n", - "plt.figure(figsize=(10, 15))\n", - "plt.imshow(RGB_ref_view)\n", - "plt.show()\n", - "\n", - "path_post = pkg / \"data\" / \"test-crop-image.png\"\n", - "view = cv2.imread(path_post.as_posix())\n", - "RGB_view = cv2.cvtColor(view, cv2.COLOR_BGR2RGB)\n", - "plt.figure(figsize=(10, 15))\n", - "plt.imshow(RGB_view)\n", - "plt.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "6", - "metadata": {}, - "source": [ - "You can now crop the image and check on the way that everything looks fine. `plt_match` will plot the matches on the image and below which line content will be cropped; `plt_crop` will plot the cropped text part of the social media post with the comments removed; `plt_image` will plot the image part of the social media post if applicable." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "# crop a posts from reference view, check the cropping \n", - "# this will only plot something if the reference is found on the image\n", - "crop_view = crpo.crop_posts_from_refs(\n", - " [ref_view], view, \n", - " plt_match=True, plt_crop=True, plt_image=True,\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "8", - "metadata": {}, - "source": [ - "Batch crop images from the image folder given in `crop_dir`. The cropped images will save in `save_crop_dir` folder with the same file name as the original file. The reference images with the items to match are provided in `ref_dir`.\n", - "\n", - "Sometimes the cropping will be imperfect, due to improper matches on the image. It is sometimes easier to first categorize the social media posts and then set different references in the reference folder `ref_dir`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "crop_dir = \"data/\"\n", - "ref_dir = pkg / \"data\" / \"ref\" \n", - "save_crop_dir = \"data/crop/\"\n", - "\n", - "files = utils.find_files(path=crop_dir,limit=10,)\n", - "ref_files = utils.find_files(path=ref_dir.as_posix(), limit=100)\n", - "\n", - "crpo.crop_media_posts(files, ref_files, save_crop_dir, plt_match=True, plt_crop=False, plt_image=False)\n", - "print(\"Batch cropping images done\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/build/doctrees/notebooks/DemoNotebook_ammico.doctree b/build/doctrees/notebooks/DemoNotebook_ammico.doctree index 484e23e..5ce5935 100644 Binary files a/build/doctrees/notebooks/DemoNotebook_ammico.doctree and b/build/doctrees/notebooks/DemoNotebook_ammico.doctree differ diff --git a/build/doctrees/notebooks/Example cropposts.doctree b/build/doctrees/notebooks/Example cropposts.doctree deleted file mode 100644 index 83eae56..0000000 Binary files a/build/doctrees/notebooks/Example cropposts.doctree and /dev/null differ diff --git a/build/doctrees/readme_link.doctree b/build/doctrees/readme_link.doctree index bb10054..cf2a52b 100644 Binary files a/build/doctrees/readme_link.doctree and b/build/doctrees/readme_link.doctree differ diff --git a/build/doctrees/set_up_credentials.doctree b/build/doctrees/set_up_credentials.doctree index ad4ddab..f0e9527 100644 Binary files a/build/doctrees/set_up_credentials.doctree and b/build/doctrees/set_up_credentials.doctree differ diff --git a/build/html/.buildinfo b/build/html/.buildinfo index ebf1f6b..47be452 100644 --- a/build/html/.buildinfo +++ b/build/html/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 -# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 9487e2edbbf95a60cd8fdb622afe617f +# This file records the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 2d7d574ccf2e81bd903f57e595d58511 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/build/html/_sources/notebooks/DemoNotebook_ammico.ipynb.txt b/build/html/_sources/notebooks/DemoNotebook_ammico.ipynb.txt index a2bbe92..b5fba7a 100644 --- a/build/html/_sources/notebooks/DemoNotebook_ammico.ipynb.txt +++ b/build/html/_sources/notebooks/DemoNotebook_ammico.ipynb.txt @@ -22,7 +22,7 @@ "source": [ "# if running on google colab\\\n", "# PLEASE RUN THIS ONLY AS CPU RUNTIME\n", - "# for a GPU runtime, there are conflicts with pre-installed packages - \n", + "# for a GPU runtime, there are conflicts with pre-installed packages -\n", "# you first need to uninstall them (prepare a clean environment with no pre-installs) and then install ammico\n", "# flake8-noqa-cell\n", "\n", @@ -103,11 +103,11 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", "# jax also sometimes leads to problems on google colab\n", - "# if this is the case, try restarting the kernel and executing this \n", + "# if this is the case, try restarting the kernel and executing this\n", "# and the above two code cells again\n", "import ammico\n", + "\n", "# for displaying a progress bar\n", "from tqdm import tqdm" ] @@ -276,7 +276,7 @@ "source": [ "# dump file name\n", "dump_file = \"dump_file.csv\"\n", - "# dump every N images \n", + "# dump every N images\n", "dump_every = 10" ] }, @@ -299,12 +299,18 @@ "# the highest possible value is 100\n", "race_threshold = 50\n", "gender_threshold = 50\n", - "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)): # loop through all images\n", - " image_dict[key] = ammico.EmotionDetector(image_dict[key],\n", - " emotion_threshold=emotion_threshold,\n", - " race_threshold=race_threshold,\n", - " gender_threshold=gender_threshold).analyse_image() # analyse image with EmotionDetector and update dict\n", - " if num % dump_every == 0 or num == len(image_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = ammico.EmotionDetector(\n", + " image_dict[key],\n", + " emotion_threshold=emotion_threshold,\n", + " race_threshold=race_threshold,\n", + " gender_threshold=gender_threshold,\n", + " ).analyse_image() # analyse image with EmotionDetector and update dict\n", + " if (\n", + " num % dump_every == 0 or num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -322,10 +328,16 @@ "metadata": {}, "outputs": [], "source": [ - "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)): # loop through all images\n", - " image_dict[key] = ammico.TextDetector(image_dict[key], analyse_text=True).analyse_image() # analyse image with EmotionDetector and update dict\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = ammico.TextDetector(\n", + " image_dict[key], analyse_text=True\n", + " ).analyse_image() # analyse image with EmotionDetector and update dict\n", + "\n", + " if (\n", + " num % dump_every == 0 | num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -344,13 +356,21 @@ "outputs": [], "source": [ "# initialize the models\n", - "image_summary_detector = ammico.SummaryDetector(subdict = image_dict, analysis_type=\"summary\", model_type=\"base\")\n", + "image_summary_detector = ammico.SummaryDetector(\n", + " subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n", + ")\n", "\n", "# run the analysis without having to re-iniatialize the model\n", - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)): # loop through all images\n", - " image_dict[key] = image_summary_detector.analyse_image(subdict = image_dict[key], analysis_type=\"summary\") # analyse image with SummaryDetector and update dict\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = image_summary_detector.analyse_image(\n", + " subdict=image_dict[key], analysis_type=\"summary\"\n", + " ) # analyse image with SummaryDetector and update dict\n", + "\n", + " if (\n", + " num % dump_every == 0 | num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -369,14 +389,26 @@ "outputs": [], "source": [ "# initialize the models\n", - "image_summary_detector = ammico.SummaryDetector(subdict = image_dict, analysis_type=\"summary\", model_type=\"base\")\n", + "image_summary_detector = ammico.SummaryDetector(\n", + " subdict=image_dict, analysis_type=\"summary\", model_type=\"base\"\n", + ")\n", "\n", - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)): # loop through all images\n", - " image_dict[key] = ammico.EmotionDetector(image_dict[key]).analyse_image() # analyse image with EmotionDetector and update dict\n", - " image_dict[key] = ammico.TextDetector(image_dict[key], analyse_text=True).analyse_image() # analyse image with TextDetector and update dict\n", - " image_dict[key] = image_summary_detector.analyse_image(subdict = image_dict[key], analysis_type=\"summary\") # analyse image with SummaryDetector and update dict\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: # save results every dump_every to dump_file \n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = ammico.EmotionDetector(\n", + " image_dict[key]\n", + " ).analyse_image() # analyse image with EmotionDetector and update dict\n", + " image_dict[key] = ammico.TextDetector(\n", + " image_dict[key], analyse_text=True\n", + " ).analyse_image() # analyse image with TextDetector and update dict\n", + " image_dict[key] = image_summary_detector.analyse_image(\n", + " subdict=image_dict[key], analysis_type=\"summary\"\n", + " ) # analyse image with SummaryDetector and update dict\n", + "\n", + " if (\n", + " num % dump_every == 0 | num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -477,7 +509,7 @@ "# set the dump file\n", "# dump file name\n", "dump_file = \"dump_file.csv\"\n", - "# dump every N images \n", + "# dump every N images\n", "dump_every = 10" ] }, @@ -488,9 +520,15 @@ "outputs": [], "source": [ "# analyze the csv file\n", - "for num, key in tqdm(enumerate(text_dict.keys()), total=len(text_dict)): # loop through all text entries\n", - " ammico.TextDetector(text_dict[key], analyse_text=True, skip_extraction=True).analyse_image() # analyse text with TextDetector and update dict\n", - " if num % dump_every == 0 | num == len(text_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(text_dict.keys()), total=len(text_dict)\n", + "): # loop through all text entries\n", + " ammico.TextDetector(\n", + " text_dict[key], analyse_text=True, skip_extraction=True\n", + " ).analyse_image() # analyse text with TextDetector and update dict\n", + " if (\n", + " num % dump_every == 0 | num == len(text_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(text_dict)\n", " image_df.to_csv(dump_file)" ] @@ -565,14 +603,23 @@ "metadata": {}, "outputs": [], "source": [ - "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)): # loop through all images\n", - " image_dict[key] = ammico.TextDetector(image_dict[key], # analyse image with TextDetector and update dict\n", - " analyse_text=True, model_names=[\"sshleifer/distilbart-cnn-12-6\", \n", - " \"distilbert-base-uncased-finetuned-sst-2-english\", \n", - " \"dbmdz/bert-large-cased-finetuned-conll03-english\"], \n", - " revision_numbers=[\"a4f8f3e\", \"af0f99b\", \"f2482bf\"]).analyse_image()\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: # save results every dump_every to dump_file\n", + "for num, key in tqdm(\n", + " enumerate(image_dict.keys()), total=len(image_dict)\n", + "): # loop through all images\n", + " image_dict[key] = ammico.TextDetector(\n", + " image_dict[key], # analyse image with TextDetector and update dict\n", + " analyse_text=True,\n", + " model_names=[\n", + " \"sshleifer/distilbart-cnn-12-6\",\n", + " \"distilbert-base-uncased-finetuned-sst-2-english\",\n", + " \"dbmdz/bert-large-cased-finetuned-conll03-english\",\n", + " ],\n", + " revision_numbers=[\"a4f8f3e\", \"af0f99b\", \"f2482bf\"],\n", + " ).analyse_image()\n", + "\n", + " if (\n", + " num % dump_every == 0 | num == len(image_dict) - 1\n", + " ): # save results every dump_every to dump_file\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -654,7 +701,9 @@ "metadata": {}, "outputs": [], "source": [ - "image_summary_detector = ammico.SummaryDetector(image_dict, analysis_type=\"summary\", model_type=\"base\")" + "image_summary_detector = ammico.SummaryDetector(\n", + " image_dict, analysis_type=\"summary\", model_type=\"base\"\n", + ")" ] }, { @@ -663,10 +712,12 @@ "metadata": {}, "outputs": [], "source": [ - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)):\n", - " image_dict[key] = image_summary_detector.analyse_image(subdict = image_dict[key], analysis_type=\"summary\")\n", - " \n", - " if num % dump_every == 0 | num == len(image_dict) - 1: \n", + "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):\n", + " image_dict[key] = image_summary_detector.analyse_image(\n", + " subdict=image_dict[key], analysis_type=\"summary\"\n", + " )\n", + "\n", + " if num % dump_every == 0 | num == len(image_dict) - 1:\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -704,14 +755,17 @@ "metadata": {}, "outputs": [], "source": [ - "image_summary_vqa_detector = ammico.SummaryDetector(image_dict, analysis_type=\"questions\", \n", - " model_type=\"vqa\")\n", + "image_summary_vqa_detector = ammico.SummaryDetector(\n", + " image_dict, analysis_type=\"questions\", model_type=\"vqa\"\n", + ")\n", "\n", - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)):\n", - " image_dict[key] = image_summary_vqa_detector.analyse_image(subdict=image_dict[key], \n", - " analysis_type=\"questions\", \n", - " list_of_questions = list_of_questions)\n", - " if num % dump_every == 0 | num == len(image_dict) - 1: \n", + "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):\n", + " image_dict[key] = image_summary_vqa_detector.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"questions\",\n", + " list_of_questions=list_of_questions,\n", + " )\n", + " if num % dump_every == 0 | num == len(image_dict) - 1:\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -729,13 +783,16 @@ "metadata": {}, "outputs": [], "source": [ - "image_summary_vqa_detector = ammico.SummaryDetector(image_dict, analysis_type=\"summary_and_questions\", \n", - " model_type=\"base\")\n", - "for num, key in tqdm(enumerate(image_dict.keys()),total=len(image_dict)):\n", - " image_dict[key] = image_summary_vqa_detector.analyse_image(subdict=image_dict[key], \n", - " analysis_type=\"summary_and_questions\", \n", - " list_of_questions = list_of_questions)\n", - " if num % dump_every == 0 | num == len(image_dict) - 1: \n", + "image_summary_vqa_detector = ammico.SummaryDetector(\n", + " image_dict, analysis_type=\"summary_and_questions\", model_type=\"base\"\n", + ")\n", + "for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):\n", + " image_dict[key] = image_summary_vqa_detector.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"summary_and_questions\",\n", + " list_of_questions=list_of_questions,\n", + " )\n", + " if num % dump_every == 0 | num == len(image_dict) - 1:\n", " image_df = ammico.get_dataframe(image_dict)\n", " image_df.to_csv(dump_file)" ] @@ -767,7 +824,11 @@ "metadata": {}, "outputs": [], "source": [ - "obj = ammico.SummaryDetector(subdict=image_dict, analysis_type = \"summary_and_questions\", model_type = \"blip2_t5_caption_coco_flant5xl\")\n", + "obj = ammico.SummaryDetector(\n", + " subdict=image_dict,\n", + " analysis_type=\"summary_and_questions\",\n", + " model_type=\"blip2_t5_caption_coco_flant5xl\",\n", + ")\n", "# list of the new models that can be used:\n", "# \"blip2_t5_pretrain_flant5xxl\",\n", "# \"blip2_t5_pretrain_flant5xl\",\n", @@ -781,7 +842,7 @@ "# Or you can use `caption_coco_`` model types to generate coco-style captions.\n", "# `flant5` and `opt` means that the model equipped with FlanT5 and OPT LLMs respectively.\n", "\n", - "#also you can perform all calculation on cpu if you set device_type= \"cpu\" or gpu if you set device_type= \"cuda\"" + "# also you can perform all calculation on cpu if you set device_type= \"cpu\" or gpu if you set device_type= \"cuda\"" ] }, { @@ -812,9 +873,13 @@ "outputs": [], "source": [ "for key in image_dict:\n", - " image_dict[key] = obj.analyse_image(subdict = image_dict[key], analysis_type=\"questions\", list_of_questions=list_of_questions)\n", + " image_dict[key] = obj.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"questions\",\n", + " list_of_questions=list_of_questions,\n", + " )\n", "\n", - "# analysis_type can be \n", + "# analysis_type can be\n", "# \"summary\",\n", "# \"questions\",\n", "# \"summary_and_questions\"." @@ -848,7 +913,11 @@ "outputs": [], "source": [ "for key in image_dict:\n", - " image_dict[key] = obj.analyse_image(subdict = image_dict[key], analysis_type=\"questions\", list_of_questions=list_of_questions)" + " image_dict[key] = obj.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"questions\",\n", + " list_of_questions=list_of_questions,\n", + " )" ] }, { @@ -886,7 +955,12 @@ "outputs": [], "source": [ "for key in image_dict:\n", - " image_dict[key] = obj.analyse_image(subdict = image_dict[key], analysis_type=\"questions\", list_of_questions=list_of_questions, consequential_questions=True)" + " image_dict[key] = obj.analyse_image(\n", + " subdict=image_dict[key],\n", + " analysis_type=\"questions\",\n", + " list_of_questions=list_of_questions,\n", + " consequential_questions=True,\n", + " )" ] }, { @@ -940,9 +1014,13 @@ "outputs": [], "source": [ "for key in image_dict.keys():\n", - " image_dict[key] = ammico.EmotionDetector(image_dict[key], emotion_threshold=50, race_threshold=50,\n", - " gender_threshold=50,\n", - " accept_disclosure=\"DISCLOSURE_AMMICO\").analyse_image()" + " image_dict[key] = ammico.EmotionDetector(\n", + " image_dict[key],\n", + " emotion_threshold=50,\n", + " race_threshold=50,\n", + " gender_threshold=50,\n", + " accept_disclosure=\"DISCLOSURE_AMMICO\",\n", + " ).analyse_image()" ] }, { @@ -1055,9 +1133,9 @@ " image_names,\n", " features_image_stacked,\n", ") = my_obj.parsing_images(\n", - " model_type, \n", + " model_type,\n", " path_to_save_tensors=\"/content/drive/MyDrive/misinformation-data/\",\n", - " )" + ")" ] }, { @@ -1112,14 +1190,19 @@ "metadata": {}, "outputs": [], "source": [ - "import importlib_resources # only require for image query example\n", - "image_example_query = str(importlib_resources.files(\"ammico\") / \"data\" / \"test-crop-image.png\") # creating the path to the image for the image query example\n", + "import importlib_resources # only require for image query example\n", + "\n", + "image_example_query = str(\n", + " importlib_resources.files(\"ammico\") / \"data\" / \"test-crop-image.png\"\n", + ") # creating the path to the image for the image query example\n", "\n", "search_query = [\n", - " {\"text_input\": \"politician press conference\"}, \n", + " {\"text_input\": \"politician press conference\"},\n", " {\"text_input\": \"a world map\"},\n", - " {\"text_input\": \"a dog\"}, # This is how looks text query\n", - " {\"image\": image_example_query}, # This is how looks image query, here `image_example_path` is the path to query image like \"data/test-crop-image.png\"\n", + " {\"text_input\": \"a dog\"}, # This is how looks text query\n", + " {\n", + " \"image\": image_example_query\n", + " }, # This is how looks image query, here `image_example_path` is the path to query image like \"data/test-crop-image.png\"\n", "]" ] }, @@ -1199,7 +1282,7 @@ "outputs": [], "source": [ "my_obj.show_results(\n", - " search_query[0], # you can change the index to see the results for other queries\n", + " search_query[0], # you can change the index to see the results for other queries\n", ")" ] }, @@ -1210,7 +1293,7 @@ "outputs": [], "source": [ "my_obj.show_results(\n", - " search_query[3], # you can change the index to see the results for other queries\n", + " search_query[3], # you can change the index to see the results for other queries\n", ")" ] }, @@ -1349,7 +1432,7 @@ "outputs": [], "source": [ "analysis_explorer = ammico.AnalysisExplorer(image_dict)\n", - "analysis_explorer.run_server(port = 8057)" + "analysis_explorer.run_server(port=8057)" ] }, { @@ -1447,7 +1530,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.11.13" } }, "nbformat": 4, diff --git a/build/html/_sources/notebooks/Example cropposts.ipynb.txt b/build/html/_sources/notebooks/Example cropposts.ipynb.txt deleted file mode 100644 index d7c2bcb..0000000 --- a/build/html/_sources/notebooks/Example cropposts.ipynb.txt +++ /dev/null @@ -1,183 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Crop posts module" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "1", - "metadata": {}, - "source": [ - "Crop posts from social media posts images, to keep import text informations from social media posts images.\n", - "We can set some manually cropped views from social media posts as reference for cropping the same type social media posts images." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "# Please ignore this cell: extra install steps that are only executed when running the notebook on Google Colab\n", - "# flake8-noqa-cell\n", - "import os\n", - "if 'google.colab' in str(get_ipython()):\n", - " # we're running on colab\n", - " # first install pinned version of setuptools (latest version doesn't seem to work with this package on colab)\n", - " %pip install setuptools==61 -qqq\n", - " # install the moralization package\n", - " %pip install git+https://github.com/ssciwr/AMMICO.git -qqq\n", - "\n", - " # prevent loading of the wrong opencv library\n", - " %pip uninstall -y opencv-contrib-python\n", - " %pip install opencv-contrib-python\n", - "\n", - " from google.colab import drive\n", - " drive.mount('/content/drive')\n", - "\n", - " if not os.path.isdir('/content/ref'):\n", - " !wget https://github.com/ssciwr/AMMICO/archive/refs/heads/ref-data.zip -q\n", - " !unzip -qq ref-data.zip -d . && mv -f AMMICO-ref-data/data/ref . && rm -rf AMMICO-ref-data ref-data.zip" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "import ammico.cropposts as crpo\n", - "import ammico.utils as utils\n", - "import matplotlib.pyplot as plt\n", - "import cv2\n", - "import importlib_resources\n", - "pkg = importlib_resources.files(\"ammico\")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "4", - "metadata": {}, - "source": [ - "The cropping is carried out by finding reference images on the image to be cropped. If a reference matches a region on the image, then everything below the matched region is removed. Manually look at a reference and an example post with the code below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# load ref view for cropping the same type social media posts images.\n", - "# substitute the below paths for your samples\n", - "path_ref = pkg / \"data\" / \"ref\" / \"ref-00.png\"\n", - "ref_view = cv2.imread(path_ref.as_posix())\n", - "RGB_ref_view = cv2.cvtColor(ref_view, cv2.COLOR_BGR2RGB)\n", - "plt.figure(figsize=(10, 15))\n", - "plt.imshow(RGB_ref_view)\n", - "plt.show()\n", - "\n", - "path_post = pkg / \"data\" / \"test-crop-image.png\"\n", - "view = cv2.imread(path_post.as_posix())\n", - "RGB_view = cv2.cvtColor(view, cv2.COLOR_BGR2RGB)\n", - "plt.figure(figsize=(10, 15))\n", - "plt.imshow(RGB_view)\n", - "plt.show()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "6", - "metadata": {}, - "source": [ - "You can now crop the image and check on the way that everything looks fine. `plt_match` will plot the matches on the image and below which line content will be cropped; `plt_crop` will plot the cropped text part of the social media post with the comments removed; `plt_image` will plot the image part of the social media post if applicable." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "# crop a posts from reference view, check the cropping \n", - "# this will only plot something if the reference is found on the image\n", - "crop_view = crpo.crop_posts_from_refs(\n", - " [ref_view], view, \n", - " plt_match=True, plt_crop=True, plt_image=True,\n", - ")" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "8", - "metadata": {}, - "source": [ - "Batch crop images from the image folder given in `crop_dir`. The cropped images will save in `save_crop_dir` folder with the same file name as the original file. The reference images with the items to match are provided in `ref_dir`.\n", - "\n", - "Sometimes the cropping will be imperfect, due to improper matches on the image. It is sometimes easier to first categorize the social media posts and then set different references in the reference folder `ref_dir`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "crop_dir = \"data/\"\n", - "ref_dir = pkg / \"data\" / \"ref\" \n", - "save_crop_dir = \"data/crop/\"\n", - "\n", - "files = utils.find_files(path=crop_dir,limit=10,)\n", - "ref_files = utils.find_files(path=ref_dir.as_posix(), limit=100)\n", - "\n", - "crpo.crop_media_posts(files, ref_files, save_crop_dir, plt_match=True, plt_crop=False, plt_image=False)\n", - "print(\"Batch cropping images done\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.16" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/build/html/_static/basic.css b/build/html/_static/basic.css index f316efc..4738b2e 100644 --- a/build/html/_static/basic.css +++ b/build/html/_static/basic.css @@ -1,12 +1,5 @@ /* - * basic.css - * ~~~~~~~~~ - * * Sphinx stylesheet -- basic theme. - * - * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * */ /* -- main layout ----------------------------------------------------------- */ @@ -115,15 +108,11 @@ img { /* -- search page ----------------------------------------------------------- */ ul.search { - margin: 10px 0 0 20px; - padding: 0; + margin-top: 10px; } ul.search li { - padding: 5px 0 5px 20px; - background-image: url(file.png); - background-repeat: no-repeat; - background-position: 0 7px; + padding: 5px 0; } ul.search li a { @@ -752,14 +741,6 @@ abbr, acronym { cursor: help; } -.translated { - background-color: rgba(207, 255, 207, 0.2) -} - -.untranslated { - background-color: rgba(255, 207, 207, 0.2) -} - /* -- code displays --------------------------------------------------------- */ pre { diff --git a/build/html/_static/doctools.js b/build/html/_static/doctools.js index 4d67807..0398ebb 100644 --- a/build/html/_static/doctools.js +++ b/build/html/_static/doctools.js @@ -1,12 +1,5 @@ /* - * doctools.js - * ~~~~~~~~~~~ - * * Base JavaScript utilities for all Sphinx HTML documentation. - * - * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * */ "use strict"; diff --git a/build/html/_static/language_data.js b/build/html/_static/language_data.js index 367b8ed..c7fe6c6 100644 --- a/build/html/_static/language_data.js +++ b/build/html/_static/language_data.js @@ -1,13 +1,6 @@ /* - * language_data.js - * ~~~~~~~~~~~~~~~~ - * * This script contains the language-specific data used by searchtools.js, * namely the list of stopwords, stemmer, scorer and splitter. - * - * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * */ var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; diff --git a/build/html/_static/searchtools.js b/build/html/_static/searchtools.js index b08d58c..91f4be5 100644 --- a/build/html/_static/searchtools.js +++ b/build/html/_static/searchtools.js @@ -1,12 +1,5 @@ /* - * searchtools.js - * ~~~~~~~~~~~~~~~~ - * * Sphinx JavaScript utilities for the full-text search. - * - * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * */ "use strict"; @@ -20,7 +13,7 @@ if (typeof Scorer === "undefined") { // and returns the new score. /* score: result => { - const [docname, title, anchor, descr, score, filename] = result + const [docname, title, anchor, descr, score, filename, kind] = result return score }, */ @@ -47,6 +40,14 @@ if (typeof Scorer === "undefined") { }; } +// Global search result kind enum, used by themes to style search results. +class SearchResultKind { + static get index() { return "index"; } + static get object() { return "object"; } + static get text() { return "text"; } + static get title() { return "title"; } +} + const _removeChildren = (element) => { while (element && element.lastChild) element.removeChild(element.lastChild); }; @@ -64,9 +65,13 @@ const _displayItem = (item, searchTerms, highlightTerms) => { const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; const contentRoot = document.documentElement.dataset.content_root; - const [docName, title, anchor, descr, score, _filename] = item; + const [docName, title, anchor, descr, score, _filename, kind] = item; let listItem = document.createElement("li"); + // Add a class representing the item's type: + // can be used by a theme's CSS selector for styling + // See SearchResultKind for the class names. + listItem.classList.add(`kind-${kind}`); let requestUrl; let linkUrl; if (docBuilder === "dirhtml") { @@ -115,8 +120,10 @@ const _finishSearch = (resultCount) => { "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." ); else - Search.status.innerText = _( - "Search finished, found ${resultCount} page(s) matching the search query." + Search.status.innerText = Documentation.ngettext( + "Search finished, found one page matching the search query.", + "Search finished, found ${resultCount} pages matching the search query.", + resultCount, ).replace('${resultCount}', resultCount); }; const _displayNextItem = ( @@ -138,7 +145,7 @@ const _displayNextItem = ( else _finishSearch(resultCount); }; // Helper function used by query() to order search results. -// Each input is an array of [docname, title, anchor, descr, score, filename]. +// Each input is an array of [docname, title, anchor, descr, score, filename, kind]. // Order the results by score (in opposite order of appearance, since the // `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. const _orderResultsByScoreThenName = (a, b) => { @@ -248,6 +255,7 @@ const Search = { searchSummary.classList.add("search-summary"); searchSummary.innerText = ""; const searchList = document.createElement("ul"); + searchList.setAttribute("role", "list"); searchList.classList.add("search"); const out = document.getElementById("search-results"); @@ -318,7 +326,7 @@ const Search = { const indexEntries = Search._index.indexentries; // Collect multiple result groups to be sorted separately and then ordered. - // Each is an array of [docname, title, anchor, descr, score, filename]. + // Each is an array of [docname, title, anchor, descr, score, filename, kind]. const normalResults = []; const nonMainIndexResults = []; @@ -337,6 +345,7 @@ const Search = { null, score + boost, filenames[file], + SearchResultKind.title, ]); } } @@ -354,6 +363,7 @@ const Search = { null, score, filenames[file], + SearchResultKind.index, ]; if (isMain) { normalResults.push(result); @@ -475,6 +485,7 @@ const Search = { descr, score, filenames[match[0]], + SearchResultKind.object, ]); }; Object.keys(objects).forEach((prefix) => @@ -502,9 +513,11 @@ const Search = { // perform the search on the required terms searchTerms.forEach((word) => { const files = []; + // find documents, if any, containing the query word in their text/title term indices + // use Object.hasOwnProperty to avoid mismatching against prototype properties const arr = [ - { files: terms[word], score: Scorer.term }, - { files: titleTerms[word], score: Scorer.title }, + { files: terms.hasOwnProperty(word) ? terms[word] : undefined, score: Scorer.term }, + { files: titleTerms.hasOwnProperty(word) ? titleTerms[word] : undefined, score: Scorer.title }, ]; // add support for partial matches if (word.length > 2) { @@ -536,8 +549,9 @@ const Search = { // set score for the word in each file recordFiles.forEach((file) => { - if (!scoreMap.has(file)) scoreMap.set(file, {}); - scoreMap.get(file)[word] = record.score; + if (!scoreMap.has(file)) scoreMap.set(file, new Map()); + const fileScores = scoreMap.get(file); + fileScores.set(word, record.score); }); }); @@ -576,7 +590,7 @@ const Search = { break; // select one (max) score for the file. - const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + const score = Math.max(...wordList.map((w) => scoreMap.get(file).get(w))); // add result to the result list results.push([ docNames[file], @@ -585,6 +599,7 @@ const Search = { null, score, filenames[file], + SearchResultKind.text, ]); } return results; diff --git a/build/html/ammico.html b/build/html/ammico.html index a0a4376..80e3d04 100644 --- a/build/html/ammico.html +++ b/build/html/ammico.html @@ -14,7 +14,7 @@ - + @@ -52,35 +52,24 @@
  • Step 0: Create and set a Google Cloud Vision Key
  • Step 1: Read your data into AMMICO
  • The detector modules
  • -
  • Crop posts module
  • AMMICO package modules + + +

    T

    + + + +
    + +

    U

    + +
    diff --git a/build/html/index.html b/build/html/index.html index 9fa606f..b756025 100644 --- a/build/html/index.html +++ b/build/html/index.html @@ -14,11 +14,9 @@ - + - - @@ -53,7 +51,6 @@
  • Step 0: Create and set a Google Cloud Vision Key
  • Step 1: Read your data into AMMICO
  • The detector modules
  • -
  • Crop posts module
  • AMMICO package modules
  • License
  • @@ -123,16 +120,15 @@
  • Further detector modules
  • -
  • Crop posts module
  • AMMICO package modules
  • License
  • diff --git a/build/html/license_link.html b/build/html/license_link.html index 2bf7dab..d82ea83 100644 --- a/build/html/license_link.html +++ b/build/html/license_link.html @@ -14,7 +14,7 @@ - + @@ -51,7 +51,6 @@
  • Step 0: Create and set a Google Cloud Vision Key
  • Step 1: Read your data into AMMICO
  • The detector modules
  • -
  • Crop posts module
  • AMMICO package modules
  • License
  • diff --git a/build/html/modules.html b/build/html/modules.html index e16e986..a801604 100644 --- a/build/html/modules.html +++ b/build/html/modules.html @@ -14,14 +14,14 @@ - + - + @@ -52,16 +52,15 @@
  • Step 0: Create and set a Google Cloud Vision Key
  • Step 1: Read your data into AMMICO
  • The detector modules
  • -
  • Crop posts module
  • AMMICO package modules
  • License
  • @@ -95,33 +94,23 @@

    AMMICO package modules

    @@ -168,7 +174,7 @@