A remote resource that needs on demand downloading.
+
We use this as a wrapper to the pooch library. The wrapper registers
+each data file and allows prefetching through the CLI entry point
+ammico_prefetch_models.
matches (cv2.DMatch) – The matched objects on the image.
-
kp1 (np.ndarray) – Key points of the matches for the reference image.
-
kp2 (np.ndarray) – Key points of the matches for the social media posts.
-
region (int, optional) – Area to consider around the keypoints.
-Defaults to 30.
-
h_margin (int, optional) – Horizontal margin to subtract from the minimum
-horizontal position. Defaults to 0.
-
v_margin (int, optional) – Vertical margin to subtract from the minimum
-vertical position. Defaults to 5.
-
min_match – Minimum number of matches required. Defaults to 6.
+
path (str, optional) – The base directory where we are looking for the images. Defaults
+to None, which uses the ammico data directory if set or the current
+working directory otherwise.
+
pattern (str|list, optional) – The naming pattern that the filename should match.
+Use either ‘.ext’ or just ‘ext’
+Defaults to [“png”, “jpg”, “jpeg”, “gif”, “webp”, “avif”,”tiff”]. Can be used to allow other patterns or to only include
+specific prefixes or suffixes.
+
recursive (bool, optional) – Whether to recurse into subdirectories. Default is set to True.
+
limit (int/list, optional) – The maximum number of images to be found.
+Provide a list or tuple of length 2 to batch the images.
+Defaults to 20. To return all images, set to None or -1.
+
random_seed (int, optional) – The random seed to use for shuffling the images.
+If None is provided the data will not be shuffeled. Defaults to None.
Returns:
-
tuple, optional – Tuple of vertical and horizontal crop corner coordinates.
+
dict – A nested dictionary with file ids and all filenames including the path.
@@ -128,25 +125,25 @@ google colab comes with pre-installed libraries that can lead to dependency conf
[ ]:
-
# if running on google colab\
-# PLEASE RUN THIS ONLY AS CPU RUNTIME
-# for a GPU runtime, there are conflicts with pre-installed packages -
-# you first need to uninstall them (prepare a clean environment with no pre-installs) and then install ammico
-# flake8-noqa-cell
+
# if running on google colab\
+# PLEASE RUN THIS ONLY AS CPU RUNTIME
+# for a GPU runtime, there are conflicts with pre-installed packages -
+# you first need to uninstall them (prepare a clean environment with no pre-installs) and then install ammico
+# flake8-noqa-cell
-if"google.colab"instr(get_ipython()):
- # update python version
- # install setuptools
- # %pip install setuptools==61 -qqq
- # uninstall some pre-installed packages due to incompatibility
- %pip uninstall --yes tensorflow-probability dopamine-rl lida pandas-gbq torchaudio torchdata torchtext orbax-checkpoint flex-y jax jaxlib -qqq
- # install ammico
- %pip install git+https://github.com/ssciwr/ammico.git -qqq
- # install older version of jax to support transformers use of diffusers
- # mount google drive for data and API key
- fromgoogle.colabimportdrive
+if "google.colab" in str(get_ipython()):
+ # update python version
+ # install setuptools
+ # %pip install setuptools==61 -qqq
+ # uninstall some pre-installed packages due to incompatibility
+ %pip uninstall --yes tensorflow-probability dopamine-rl lida pandas-gbq torchaudio torchdata torchtext orbax-checkpoint flex-y jax jaxlib -qqq
+ # install ammico
+ %pip install git+https://github.com/ssciwr/ammico.git -qqq
+ # install older version of jax to support transformers use of diffusers
+ # mount google drive for data and API key
+ from google.colab import drive
- drive.mount("/content/drive")
+ drive.mount("/content/drive")
@@ -157,11 +154,11 @@ google colab comes with pre-installed libraries that can lead to dependency conf
from datasets import load_dataset
+from pathlib import Path
-# If the dataset is gated/private, make sure you have run huggingface-cli login
-dataset=load_dataset("iulusoy/test-images")
+# If the dataset is gated/private, make sure you have run huggingface-cli login
+dataset = load_dataset("iulusoy/test-images")
@@ -170,13 +167,13 @@ google colab comes with pre-installed libraries that can lead to dependency conf
[ ]:
-
data_path="./data-test"
-data_path=Path(data_path)
-data_path.mkdir(parents=True,exist_ok=True)
-# now save the files from the Huggingface dataset as images into the data_path folder
-fori,imageinenumerate(dataset["train"]["image"]):
- filename="img"+str(i)+".png"
- image.save(data_path/filename)
+
data_path = "./data-test"
+data_path = Path(data_path)
+data_path.mkdir(parents=True, exist_ok=True)
+# now save the files from the Huggingface dataset as images into the data_path folder
+for i, image in enumerate(dataset["train"]["image"]):
+ filename = "img" + str(i) + ".png"
+ image.save(data_path / filename)
@@ -187,13 +184,13 @@ google colab comes with pre-installed libraries that can lead to dependency conf
[ ]:
-
importos
-# jax also sometimes leads to problems on google colab
-# if this is the case, try restarting the kernel and executing this
-# and the above two code cells again
-importammico
-# for displaying a progress bar
-fromtqdmimporttqdm
+
# jax also sometimes leads to problems on google colab
+# if this is the case, try restarting the kernel and executing this
+# and the above two code cells again
+import ammico
+
+# for displaying a progress bar
+from tqdm import tqdm
@@ -285,13 +282,13 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# respond to the disclosure statement
-# this will set an environment variable for you
-# if you do not want to re-accept the disclosure every time, you can set this environment variable in your shell
-# to re-set the environment variable, uncomment the below line
-accept_disclosure="DISCLOSURE_AMMICO"
-# os.environ.pop(accept_disclosure, None)
-_=ammico.ethical_disclosure(accept_disclosure=accept_disclosure)
+
# respond to the disclosure statement
+# this will set an environment variable for you
+# if you do not want to re-accept the disclosure every time, you can set this environment variable in your shell
+# to re-set the environment variable, uncomment the below line
+accept_disclosure = "DISCLOSURE_AMMICO"
+# os.environ.pop(accept_disclosure, None)
+_ = ammico.ethical_disclosure(accept_disclosure=accept_disclosure)
@@ -305,13 +302,13 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# respond to the privacy disclosure statement
-# this will set an environment variable for you
-# if you do not want to re-accept the privacy disclosure every time, you can set this environment variable in your shell
-# to re-set the environment variable, uncomment the below line
-accept_privacy="PRIVACY_AMMICO"
-# os.environ.pop(accept_privacy, None)
-_=ammico.privacy_disclosure(accept_privacy=accept_privacy)
+
# respond to the privacy disclosure statement
+# this will set an environment variable for you
+# if you do not want to re-accept the privacy disclosure every time, you can set this environment variable in your shell
+# to re-set the environment variable, uncomment the below line
+accept_privacy = "PRIVACY_AMMICO"
+# os.environ.pop(accept_privacy, None)
+_ = ammico.privacy_disclosure(accept_privacy=accept_privacy)
@@ -319,8 +316,8 @@ directly on the right next to the image. This way, the user can directly inspect
@@ -334,10 +331,10 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# dump file name
-dump_file="dump_file.csv"
-# dump every N images
-dump_every=10
+
# dump file name
+dump_file = "dump_file.csv"
+# dump every N images
+dump_every = 10
@@ -346,20 +343,26 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# set the thresholds for the emotion detection
-emotion_threshold=50# this is the default value for the detection confidence
-# the lowest possible value is 0
-# the highest possible value is 100
-race_threshold=50
-gender_threshold=50
-fornum,keyintqdm(enumerate(image_dict.keys()),total=len(image_dict)):# loop through all images
- image_dict[key]=ammico.EmotionDetector(image_dict[key],
- emotion_threshold=emotion_threshold,
- race_threshold=race_threshold,
- gender_threshold=gender_threshold).analyse_image()# analyse image with EmotionDetector and update dict
- ifnum%dump_every==0ornum==len(image_dict)-1:# save results every dump_every to dump_file
- image_df=ammico.get_dataframe(image_dict)
- image_df.to_csv(dump_file)
+
# set the thresholds for the emotion detection
+emotion_threshold = 50 # this is the default value for the detection confidence
+# the lowest possible value is 0
+# the highest possible value is 100
+race_threshold = 50
+gender_threshold = 50
+for num, key in tqdm(
+ enumerate(image_dict.keys()), total=len(image_dict)
+): # loop through all images
+ image_dict[key] = ammico.EmotionDetector(
+ image_dict[key],
+ emotion_threshold=emotion_threshold,
+ race_threshold=race_threshold,
+ gender_threshold=gender_threshold,
+ ).analyse_image() # analyse image with EmotionDetector and update dict
+ if (
+ num % dump_every == 0 or num == len(image_dict) - 1
+ ): # save results every dump_every to dump_file
+ image_df = ammico.get_dataframe(image_dict)
+ image_df.to_csv(dump_file)
@@ -368,12 +371,18 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
fornum,keyintqdm(enumerate(image_dict.keys()),total=len(image_dict)):# loop through all images
- image_dict[key]=ammico.TextDetector(image_dict[key],analyse_text=True).analyse_image()# analyse image with EmotionDetector and update dict
+
for num, key in tqdm(
+ enumerate(image_dict.keys()), total=len(image_dict)
+): # loop through all images
+ image_dict[key] = ammico.TextDetector(
+ image_dict[key], analyse_text=True
+ ).analyse_image() # analyse image with EmotionDetector and update dict
- ifnum%dump_every==0|num==len(image_dict)-1:# save results every dump_every to dump_file
- image_df=ammico.get_dataframe(image_dict)
- image_df.to_csv(dump_file)
+ if (
+ num % dump_every == 0 | num == len(image_dict) - 1
+ ): # save results every dump_every to dump_file
+ image_df = ammico.get_dataframe(image_dict)
+ image_df.to_csv(dump_file)
@@ -382,16 +391,24 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# initialize the models
-image_summary_detector=ammico.SummaryDetector(subdict=image_dict,analysis_type="summary",model_type="base")
+
# initialize the models
+image_summary_detector = ammico.SummaryDetector(
+ subdict=image_dict, analysis_type="summary", model_type="base"
+)
-# run the analysis without having to re-iniatialize the model
-fornum,keyintqdm(enumerate(image_dict.keys()),total=len(image_dict)):# loop through all images
- image_dict[key]=image_summary_detector.analyse_image(subdict=image_dict[key],analysis_type="summary")# analyse image with SummaryDetector and update dict
+# run the analysis without having to re-iniatialize the model
+for num, key in tqdm(
+ enumerate(image_dict.keys()), total=len(image_dict)
+): # loop through all images
+ image_dict[key] = image_summary_detector.analyse_image(
+ subdict=image_dict[key], analysis_type="summary"
+ ) # analyse image with SummaryDetector and update dict
- ifnum%dump_every==0|num==len(image_dict)-1:# save results every dump_every to dump_file
- image_df=ammico.get_dataframe(image_dict)
- image_df.to_csv(dump_file)
+ if (
+ num % dump_every == 0 | num == len(image_dict) - 1
+ ): # save results every dump_every to dump_file
+ image_df = ammico.get_dataframe(image_dict)
+ image_df.to_csv(dump_file)
@@ -400,17 +417,29 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# initialize the models
-image_summary_detector=ammico.SummaryDetector(subdict=image_dict,analysis_type="summary",model_type="base")
+
# initialize the models
+image_summary_detector = ammico.SummaryDetector(
+ subdict=image_dict, analysis_type="summary", model_type="base"
+)
-fornum,keyintqdm(enumerate(image_dict.keys()),total=len(image_dict)):# loop through all images
- image_dict[key]=ammico.EmotionDetector(image_dict[key]).analyse_image()# analyse image with EmotionDetector and update dict
- image_dict[key]=ammico.TextDetector(image_dict[key],analyse_text=True).analyse_image()# analyse image with TextDetector and update dict
- image_dict[key]=image_summary_detector.analyse_image(subdict=image_dict[key],analysis_type="summary")# analyse image with SummaryDetector and update dict
+for num, key in tqdm(
+ enumerate(image_dict.keys()), total=len(image_dict)
+): # loop through all images
+ image_dict[key] = ammico.EmotionDetector(
+ image_dict[key]
+ ).analyse_image() # analyse image with EmotionDetector and update dict
+ image_dict[key] = ammico.TextDetector(
+ image_dict[key], analyse_text=True
+ ).analyse_image() # analyse image with TextDetector and update dict
+ image_dict[key] = image_summary_detector.analyse_image(
+ subdict=image_dict[key], analysis_type="summary"
+ ) # analyse image with SummaryDetector and update dict
- ifnum%dump_every==0|num==len(image_dict)-1:# save results every dump_every to dump_file
- image_df=ammico.get_dataframe(image_dict)
- image_df.to_csv(dump_file)
+ if (
+ num % dump_every == 0 | num == len(image_dict) - 1
+ ): # save results every dump_every to dump_file
+ image_df = ammico.get_dataframe(image_dict)
+ image_df.to_csv(dump_file)
@@ -423,7 +452,7 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
image_df=ammico.get_dataframe(image_dict)
+
image_df = ammico.get_dataframe(image_dict)
@@ -432,7 +461,7 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
image_df.head(3)
+
image_df.head(3)
@@ -441,7 +470,7 @@ directly on the right next to the image. This way, the user can directly inspect
ta = ammico.TextAnalyzer(csv_path="../data/ref/test.csv", column_key="text")
@@ -461,10 +490,10 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# read the csv file
-ta.read_csv()
-# set up the dict containing all text entries
-text_dict=ta.mydict
+
# read the csv file
+ta.read_csv()
+# set up the dict containing all text entries
+text_dict = ta.mydict
@@ -472,11 +501,11 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# set the dump file
-# dump file name
-dump_file="dump_file.csv"
-# dump every N images
-dump_every=10
+
# set the dump file
+# dump file name
+dump_file = "dump_file.csv"
+# dump every N images
+dump_every = 10
@@ -484,12 +513,18 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# analyze the csv file
-fornum,keyintqdm(enumerate(text_dict.keys()),total=len(text_dict)):# loop through all text entries
- ammico.TextDetector(text_dict[key],analyse_text=True,skip_extraction=True).analyse_image()# analyse text with TextDetector and update dict
- ifnum%dump_every==0|num==len(text_dict)-1:# save results every dump_every to dump_file
- image_df=ammico.get_dataframe(text_dict)
- image_df.to_csv(dump_file)
+
# analyze the csv file
+for num, key in tqdm(
+ enumerate(text_dict.keys()), total=len(text_dict)
+): # loop through all text entries
+ ammico.TextDetector(
+ text_dict[key], analyse_text=True, skip_extraction=True
+ ).analyse_image() # analyse text with TextDetector and update dict
+ if (
+ num % dump_every == 0 | num == len(text_dict) - 1
+ ): # save results every dump_every to dump_file
+ image_df = ammico.get_dataframe(text_dict)
+ image_df.to_csv(dump_file)
@@ -497,8 +532,8 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# save the results to a csv file
-text_df=ammico.get_dataframe(text_dict)
+
# save the results to a csv file
+text_df = ammico.get_dataframe(text_dict)
@@ -506,8 +541,8 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# inspect
-text_df.head(3)
+
# inspect
+text_df.head(3)
@@ -515,8 +550,8 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# write to csv
-text_df.to_csv("data_out.csv")
+
# write to csv
+text_df.to_csv("data_out.csv")
@@ -525,7 +560,7 @@ directly on the right next to the image. This way, the user can directly inspect
The different detector modules with their options are explained in more detail in this section. ## Text detector Text on the images can be extracted using the TextDetector class (text module). The text is initally extracted using the Google Cloud Vision API and then translated into English with googletrans. The translated text is cleaned of whitespace, linebreaks, and numbers using Python syntax and spaCy.
-
+
The user can set if the text should be further summarized, and analyzed for sentiment and named entity recognition, by setting the keyword analyse_text to True (the default is False). If set, the transformers pipeline is used for each of these tasks, with the default models as of 03/2023. Other models can be selected by setting the optional keyword model_names to a list of selected models, on for each task:
model_names=["sshleifer/distilbart-cnn-12-6","distilbert-base-uncased-finetuned-sst-2-english","dbmdz/bert-large-cased-finetuned-conll03-english"] for summary, sentiment, and ner. To be even more specific, revision numbers can also be selected by specifying the optional keyword revision_numbers to a list of revision numbers for each model, for example revision_numbers=["a4f8f3e","af0f99b","f2482bf"].
Please note that for the Google Cloud Vision API (the TextDetector class) you need to set a key in order to process the images. This key is ideally set as an environment variable using for example
@@ -533,7 +568,7 @@ directly on the right next to the image. This way, the user can directly inspect
The SummaryDetector can be used to generate image captions (summary) as well as visual question answering (VQA).
-
+
This module is based on the LAVIS library. Since the models can be quite large, an initial object is created which will load the necessary models into RAM/VRAM and then use them in the analysis. The user can specify the type of analysis to be performed using the analysis_type keyword. Setting it to summary will generate a caption (summary), questions will prepare answers (VQA) to a list of questions as set by the user,
summary_and_questions will do both. Note that the desired analysis type needs to be set here in the initialization of the detector object, and not when running the analysis for each image; the same holds true for the selected model.
The implemented models are listed below.
@@ -666,7 +710,9 @@ directly on the right next to the image. This way, the user can directly inspect
for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)):
+ image_dict[key] = image_summary_detector.analyse_image(
+ subdict=image_dict[key], analysis_type="summary"
+ )
- ifnum%dump_every==0|num==len(image_dict)-1:
- image_df=ammico.get_dataframe(image_dict)
- image_df.to_csv(dump_file)
+ if num % dump_every == 0 | num == len(image_dict) - 1:
+ image_df = ammico.get_dataframe(image_dict)
+ image_df.to_csv(dump_file)
@@ -688,11 +736,11 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
list_of_questions=[
- "How many persons on the picture?",
- "Are there any politicians in the picture?",
- "Does the picture show something from medicine?",
-]
+
list_of_questions = [
+ "How many persons on the picture?",
+ "Are there any politicians in the picture?",
+ "Does the picture show something from medicine?",
+]
@@ -701,16 +749,19 @@ directly on the right next to the image. This way, the user can directly inspect
@@ -761,21 +815,25 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
obj=ammico.SummaryDetector(subdict=image_dict,analysis_type="summary_and_questions",model_type="blip2_t5_caption_coco_flant5xl")
-# list of the new models that can be used:
-# "blip2_t5_pretrain_flant5xxl",
-# "blip2_t5_pretrain_flant5xl",
-# "blip2_t5_caption_coco_flant5xl",
-# "blip2_opt_pretrain_opt2.7b",
-# "blip2_opt_pretrain_opt6.7b",
-# "blip2_opt_caption_coco_opt2.7b",
-# "blip2_opt_caption_coco_opt6.7b",
+
obj = ammico.SummaryDetector(
+ subdict=image_dict,
+ analysis_type="summary_and_questions",
+ model_type="blip2_t5_caption_coco_flant5xl",
+)
+# list of the new models that can be used:
+# "blip2_t5_pretrain_flant5xxl",
+# "blip2_t5_pretrain_flant5xl",
+# "blip2_t5_caption_coco_flant5xl",
+# "blip2_opt_pretrain_opt2.7b",
+# "blip2_opt_pretrain_opt6.7b",
+# "blip2_opt_caption_coco_opt2.7b",
+# "blip2_opt_caption_coco_opt6.7b",
-# You can use `pretrain_` model types for zero-shot image-to-text generation with prompts.
-# Or you can use `caption_coco_`` model types to generate coco-style captions.
-# `flant5` and `opt` means that the model equipped with FlanT5 and OPT LLMs respectively.
+# You can use `pretrain_` model types for zero-shot image-to-text generation with prompts.
+# Or you can use `caption_coco_`` model types to generate coco-style captions.
+# `flant5` and `opt` means that the model equipped with FlanT5 and OPT LLMs respectively.
-#also you can perform all calculation on cpu if you set device_type= "cpu" or gpu if you set device_type= "cuda"
+# also you can perform all calculation on cpu if you set device_type= "cpu" or gpu if you set device_type= "cuda"
@@ -785,10 +843,10 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
list_of_questions=[
- "Question: Are there people in the image? Answer:",
- "Question: What is this picture about? Answer:",
-]
+
list_of_questions = [
+ "Question: Are there people in the image? Answer:",
+ "Question: What is this picture about? Answer:",
+]
@@ -796,13 +854,17 @@ directly on the right next to the image. This way, the user can directly inspect
for key in image_dict:
+ image_dict[key] = obj.analyse_image(
+ subdict=image_dict[key],
+ analysis_type="questions",
+ list_of_questions=list_of_questions,
+ )
-# analysis_type can be
-# "summary",
-# "questions",
-# "summary_and_questions".
+# analysis_type can be
+# "summary",
+# "questions",
+# "summary_and_questions".
@@ -812,10 +874,10 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
list_of_questions=[
- "Question: What country is in the picture? Answer: USA. Question: Why? Answer: Because there is an American flag in the background . Question: Where it comes from? Answer:",
- "Question: Which city is this? Answer: Frankfurt. Question: Why?",
-]
+
list_of_questions = [
+ "Question: What country is in the picture? Answer: USA. Question: Why? Answer: Because there is an American flag in the background . Question: Where it comes from? Answer:",
+ "Question: Which city is this? Answer: Frankfurt. Question: Why?",
+]
@@ -823,8 +885,12 @@ directly on the right next to the image. This way, the user can directly inspect
for key in image_dict:
+ image_dict[key] = obj.analyse_image(
+ subdict=image_dict[key],
+ analysis_type="questions",
+ list_of_questions=list_of_questions,
+ consequential_questions=True,
+ )
@@ -861,7 +932,7 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
image_dict
+
image_dict
@@ -869,9 +940,9 @@ directly on the right next to the image. This way, the user can directly inspect
[ ]:
-
# write output to csv
-image_df=ammico.get_dataframe(image_dict)
-image_df.to_csv("/content/drive/MyDrive/misinformation-data/data_out.csv")
+
# write output to csv
+image_df = ammico.get_dataframe(image_dict)
+image_df.to_csv("/content/drive/MyDrive/misinformation-data/data_out.csv")
@@ -880,7 +951,7 @@ directly on the right next to the image. This way, the user can directly inspect
Detection of faces and facial expression analysis
Faces and facial expressions are detected and analyzed using the EmotionDetector class from the faces module. Initially, it is detected if faces are present on the image using RetinaFace, followed by analysis if face masks are worn (Face-Mask-Detection). The probabilistic detection of age, gender, race, and emotions is carried out with deepface, but only if the disclosure statement has been accepted (see above).
-
+
Depending on the features found on the image, the face detection module returns a different analysis content: If no faces are found on the image, all further steps are skipped and the result "face":"No","multiple_faces":"No","no_faces":0,"wears_mask":["No"],"age":[None],"gender":[None],"race":[None],"emotion":[None],"emotion(category)":[None] is returned. If one or several faces are found, up to three faces are analyzed if they are partially concealed by a face mask. If
yes, only age and gender are detected; if no, also race, emotion, and dominant emotion are detected. In case of the latter, the output could look like this: "face":"Yes","multiple_faces":"Yes","no_faces":2,"wears_mask":["No","No"],"age":[27,28],"gender":["Man","Man"],"race":["asian",None],"emotion":["angry","neutral"],"emotion(category)":["Negative","Neutral"], where for the two faces that are detected (given by no_faces), some of the values are returned as a list
with the first item for the first (largest) face and the second item for the second (smaller) face (for example, "emotion" returns a list ["angry","neutral"] signifying the first face expressing anger, and the second face having a neutral expression).
@@ -895,10 +966,14 @@ default is set to 50%, so that a confidence above 0.5 results in an emotion bein
@@ -1021,19 +1096,19 @@ default is set to 50%, so that a confidence above 0.5 results in an emotion bein
[ ]:
-
# uncomment the code below if you want to load the tensors from the drive
-# and just want to ask different questions for the same set of images
-# (
-# model,
-# vis_processors,
-# txt_processors,
-# image_keys,
-# image_names,
-# features_image_stacked,
-# ) = my_obj.parsing_images(
-# model_type,
-# path_to_load_tensors="/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt",
-# )
+
# uncomment the code below if you want to load the tensors from the drive
+# and just want to ask different questions for the same set of images
+# (
+# model,
+# vis_processors,
+# txt_processors,
+# image_keys,
+# image_names,
+# features_image_stacked,
+# ) = my_obj.parsing_images(
+# model_type,
+# path_to_load_tensors="/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt",
+# )
@@ -1046,15 +1121,20 @@ default is set to 50%, so that a confidence above 0.5 results in an emotion bein
[ ]:
-
importimportlib_resources# only require for image query example
-image_example_query=str(importlib_resources.files("ammico")/"data"/"test-crop-image.png")# creating the path to the image for the image query example
+
import importlib_resources # only require for image query example
-search_query=[
- {"text_input":"politician press conference"},
- {"text_input":"a world map"},
- {"text_input":"a dog"},# This is how looks text query
- {"image":image_example_query},# This is how looks image query, here `image_example_path` is the path to query image like "data/test-crop-image.png"
-]
+image_example_query = str(
+ importlib_resources.files("ammico") / "data" / "test-crop-image.png"
+) # creating the path to the image for the image query example
+
+search_query = [
+ {"text_input": "politician press conference"},
+ {"text_input": "a world map"},
+ {"text_input": "a dog"}, # This is how looks text query
+ {
+ "image": image_example_query
+ }, # This is how looks image query, here `image_example_path` is the path to query image like "data/test-crop-image.png"
+]
@@ -1068,16 +1148,16 @@ default is set to 50%, so that a confidence above 0.5 results in an emotion bein