diff --git a/misinformation/summary.py b/misinformation/summary.py
index e7972fc..4448a0c 100644
--- a/misinformation/summary.py
+++ b/misinformation/summary.py
@@ -5,9 +5,7 @@ from lavis.models import load_model_and_preprocess
class SummaryDetector(AnalysisMethod):
- def __init__(
- self, subdict: dict
- ) -> None:
+ def __init__(self, subdict: dict) -> None:
super().__init__(subdict)
self.subdict.update(self.set_keys())
self.image_summary = {
@@ -19,8 +17,8 @@ class SummaryDetector(AnalysisMethod):
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption", model_type="base_coco", is_eval=True, device=summary_device
- )
-
+ )
+
def set_keys(self) -> dict:
params = {
"const_image_summary": None,
@@ -32,33 +30,54 @@ class SummaryDetector(AnalysisMethod):
path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
- image = self.summary_vis_processors["eval"](raw_image).unsqueeze(0).to(self.summary_device)
- self.image_summary["const_image_summary"] = self.summary_model.generate({"image": image})[0]
- self.image_summary["3_non-deterministic summary"] = self.summary_model.generate({"image": image}, use_nucleus_sampling=True, num_captions=3)
+ image = (
+ self.summary_vis_processors["eval"](raw_image)
+ .unsqueeze(0)
+ .to(self.summary_device)
+ )
+ self.image_summary["const_image_summary"] = self.summary_model.generate(
+ {"image": image}
+ )[0]
+ self.image_summary["3_non-deterministic summary"] = self.summary_model.generate(
+ {"image": image}, use_nucleus_sampling=True, num_captions=3
+ )
for key in self.image_summary:
self.subdict[key] = self.image_summary[key]
return self.subdict
-
- summary_VQA_model, summary_VQA_vis_processors, summary_VQA_txt_processors = load_model_and_preprocess(name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device)
+
+ (
+ summary_VQA_model,
+ summary_VQA_vis_processors,
+ summary_VQA_txt_processors,
+ ) = load_model_and_preprocess(
+ name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device
+ )
def analyse_questions(self, list_of_questions):
- if (len(list_of_questions)>0):
+ if len(list_of_questions) > 0:
path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
- image = self.summary_VQA_vis_processors["eval"](raw_image).unsqueeze(0).to(self.summary_device)
- question_batch =[]
+ image = (
+ self.summary_VQA_vis_processors["eval"](raw_image)
+ .unsqueeze(0)
+ .to(self.summary_device)
+ )
+ question_batch = []
for quest in list_of_questions:
question_batch.append(self.summary_VQA_txt_processors["eval"](quest))
batch_size = len(list_of_questions)
image_batch = image.repeat(batch_size, 1, 1, 1)
- answers_batch = self.summary_VQA_model.predict_answers(samples={"image": image_batch, "text_input": question_batch}, inference_method="generate")
-
- for q,a in zip(question_batch,answers_batch):
+ answers_batch = self.summary_VQA_model.predict_answers(
+ samples={"image": image_batch, "text_input": question_batch},
+ inference_method="generate",
+ )
+
+ for q, a in zip(question_batch, answers_batch):
self.image_summary[q] = a
-
+
for key in self.image_summary:
self.subdict[key] = self.image_summary[key]
- else:
+ else:
print("Please, enter list of questions")
- return self.subdict
\ No newline at end of file
+ return self.subdict
diff --git a/notebooks/image_summary.ipynb b/notebooks/image_summary.ipynb
index 8c084cf..edb83ad 100644
--- a/notebooks/image_summary.ipynb
+++ b/notebooks/image_summary.ipynb
@@ -16,21 +16,9 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2023-01-27 13:43:45.543761: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
- "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
- "2023-01-27 13:43:45.940025: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/pandriushchenko/anaconda3/envs/misinfo/lib/python3.10/site-packages/cv2/../../lib64:\n",
- "2023-01-27 13:43:45.940060: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/pandriushchenko/anaconda3/envs/misinfo/lib/python3.10/site-packages/cv2/../../lib64:\n",
- "2023-01-27 13:43:45.940063: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"import misinformation\n",
"import misinformation.summary as sm"
@@ -45,7 +33,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -57,7 +45,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -66,36 +54,9 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'100132S_ara': {'filename': '../data/images/100132S_ara.png'},\n",
- " '100447_ind': {'filename': '../data/images/100447_ind.png'},\n",
- " '100127S_ara': {'filename': '../data/images/100127S_ara.png'},\n",
- " '100134S_ara': {'filename': '../data/images/100134S_ara.png'},\n",
- " '109257_1_spa': {'filename': '../data/images/109257_1_spa.png'},\n",
- " '100130S_ara': {'filename': '../data/images/100130S_ara.png'},\n",
- " '100131S_ara': {'filename': '../data/images/100131S_ara.png'},\n",
- " '102135S_eng': {'filename': '../data/images/102135S_eng.png'},\n",
- " '102435S_2_eng': {'filename': '../data/images/102435S_2_eng.png'},\n",
- " '100368_asm': {'filename': '../data/images/100368_asm.png'},\n",
- " '100361_asm': {'filename': '../data/images/100361_asm.png'},\n",
- " '102141_1_eng': {'filename': '../data/images/102141_1_eng.png'},\n",
- " '106958S_por': {'filename': '../data/images/106958S_por.png'},\n",
- " '102134S_eng': {'filename': '../data/images/102134S_eng.png'},\n",
- " '102133S_eng': {'filename': '../data/images/102133S_eng.png'},\n",
- " '100450_ind': {'filename': '../data/images/100450_ind.png'},\n",
- " '100451S_ind': {'filename': '../data/images/100451S_ind.png'}}"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"mydict"
]
@@ -109,7 +70,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -128,7 +89,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -147,143 +108,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " filename | \n",
- " const_image_summary | \n",
- " 3_non-deterministic summary | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " ../data/images/100132S_ara.png | \n",
- " a white car parked in front of a building cove... | \n",
- " [someone has wrapped up a large plastic bag ov... | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " ../data/images/100447_ind.png | \n",
- " a woman drinking from a bottle while standing ... | \n",
- " [a woman drinks out of a bottle and stands nex... | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " ../data/images/100127S_ara.png | \n",
- " a map of the world with arabic writing | \n",
- " [a map of the world with a message in arabic, ... | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " ../data/images/100134S_ara.png | \n",
- " a woman is standing in front of a sign | \n",
- " [two women walking and talking to each other, ... | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " ../data/images/109257_1_spa.png | \n",
- " a man in a suit and tie making a face | \n",
- " [a man is smiling and making a funny face, man... | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " ../data/images/100130S_ara.png | \n",
- " a group of people walking down a street next t... | \n",
- " [two people on the street in front of a big tr... | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " ../data/images/100131S_ara.png | \n",
- " a group of people standing in front of a tv | \n",
- " [the president is addressing his nation of the... | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " ../data/images/102135S_eng.png | \n",
- " a woman standing in front of a store filled wi... | \n",
- " [people in a supermarket standing in front of ... | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " ../data/images/102435S_2_eng.png | \n",
- " a man in a suit and glasses is talking | \n",
- " [the man is speaking about his favorite tv sho... | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " ../data/images/100368_asm.png | \n",
- " a group of people standing next to each other | \n",
- " [people doing a job next to a line of men, men... | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " filename \\\n",
- "0 ../data/images/100132S_ara.png \n",
- "1 ../data/images/100447_ind.png \n",
- "2 ../data/images/100127S_ara.png \n",
- "3 ../data/images/100134S_ara.png \n",
- "4 ../data/images/109257_1_spa.png \n",
- "5 ../data/images/100130S_ara.png \n",
- "6 ../data/images/100131S_ara.png \n",
- "7 ../data/images/102135S_eng.png \n",
- "8 ../data/images/102435S_2_eng.png \n",
- "9 ../data/images/100368_asm.png \n",
- "\n",
- " const_image_summary \\\n",
- "0 a white car parked in front of a building cove... \n",
- "1 a woman drinking from a bottle while standing ... \n",
- "2 a map of the world with arabic writing \n",
- "3 a woman is standing in front of a sign \n",
- "4 a man in a suit and tie making a face \n",
- "5 a group of people walking down a street next t... \n",
- "6 a group of people standing in front of a tv \n",
- "7 a woman standing in front of a store filled wi... \n",
- "8 a man in a suit and glasses is talking \n",
- "9 a group of people standing next to each other \n",
- "\n",
- " 3_non-deterministic summary \n",
- "0 [someone has wrapped up a large plastic bag ov... \n",
- "1 [a woman drinks out of a bottle and stands nex... \n",
- "2 [a map of the world with a message in arabic, ... \n",
- "3 [two women walking and talking to each other, ... \n",
- "4 [a man is smiling and making a funny face, man... \n",
- "5 [two people on the street in front of a big tr... \n",
- "6 [the president is addressing his nation of the... \n",
- "7 [people in a supermarket standing in front of ... \n",
- "8 [the man is speaking about his favorite tv sho... \n",
- "9 [people doing a job next to a line of men, men... "
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df.head(10)"
]
@@ -297,7 +124,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -319,24 +146,9 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "0324b10be268470ab4e550cb0153b9e8",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "HBox(children=(Select(layout=Layout(width='20%'), options=('100132S_ara', '100447_ind', '100127S_ara', '100134…"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"misinformation.explore_analysis(mydict, identify=\"summary\")"
]
@@ -357,20 +169,20 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"list_of_questions = [\n",
- "\"How many persons on the picture?\",\n",
- "\"Are there any politicians in the picture?\",\n",
- "\"Does the picture show something from medicine?\", \n",
+ " \"How many persons on the picture?\",\n",
+ " \"Are there any politicians in the picture?\",\n",
+ " \"Does the picture show something from medicine?\",\n",
"]"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -380,24 +192,9 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "e446aa565f6345ab8256771f578fbf92",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "HBox(children=(Select(layout=Layout(width='20%'), options=('100132S_ara', '100447_ind', '100127S_ara', '100134…"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"misinformation.explore_analysis(mydict, identify=\"summary\")"
]
@@ -411,7 +208,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -421,207 +218,16 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " filename | \n",
- " const_image_summary | \n",
- " 3_non-deterministic summary | \n",
- " how many persons on the picture? | \n",
- " are there any politicians in the picture? | \n",
- " does the picture show something from medicine? | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " ../data/images/100132S_ara.png | \n",
- " a white car parked in front of a building cove... | \n",
- " [the man is sitting on a car near a large bann... | \n",
- " 1 | \n",
- " no | \n",
- " no | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " ../data/images/100447_ind.png | \n",
- " None | \n",
- " None | \n",
- " 2 | \n",
- " no | \n",
- " yes | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " ../data/images/100127S_ara.png | \n",
- " None | \n",
- " None | \n",
- " 0 | \n",
- " no | \n",
- " no | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " ../data/images/100134S_ara.png | \n",
- " None | \n",
- " None | \n",
- " 2 | \n",
- " no | \n",
- " yes | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " ../data/images/109257_1_spa.png | \n",
- " None | \n",
- " None | \n",
- " 1 | \n",
- " yes | \n",
- " no | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " ../data/images/100130S_ara.png | \n",
- " None | \n",
- " None | \n",
- " 3 | \n",
- " no | \n",
- " no | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " ../data/images/100131S_ara.png | \n",
- " None | \n",
- " None | \n",
- " many | \n",
- " yes | \n",
- " no | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " ../data/images/102135S_eng.png | \n",
- " None | \n",
- " None | \n",
- " 6 | \n",
- " no | \n",
- " no | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " ../data/images/102435S_2_eng.png | \n",
- " None | \n",
- " None | \n",
- " 1 | \n",
- " yes | \n",
- " no | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " ../data/images/100368_asm.png | \n",
- " None | \n",
- " None | \n",
- " 15 | \n",
- " yes | \n",
- " no | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " filename \\\n",
- "0 ../data/images/100132S_ara.png \n",
- "1 ../data/images/100447_ind.png \n",
- "2 ../data/images/100127S_ara.png \n",
- "3 ../data/images/100134S_ara.png \n",
- "4 ../data/images/109257_1_spa.png \n",
- "5 ../data/images/100130S_ara.png \n",
- "6 ../data/images/100131S_ara.png \n",
- "7 ../data/images/102135S_eng.png \n",
- "8 ../data/images/102435S_2_eng.png \n",
- "9 ../data/images/100368_asm.png \n",
- "\n",
- " const_image_summary \\\n",
- "0 a white car parked in front of a building cove... \n",
- "1 None \n",
- "2 None \n",
- "3 None \n",
- "4 None \n",
- "5 None \n",
- "6 None \n",
- "7 None \n",
- "8 None \n",
- "9 None \n",
- "\n",
- " 3_non-deterministic summary \\\n",
- "0 [the man is sitting on a car near a large bann... \n",
- "1 None \n",
- "2 None \n",
- "3 None \n",
- "4 None \n",
- "5 None \n",
- "6 None \n",
- "7 None \n",
- "8 None \n",
- "9 None \n",
- "\n",
- " how many persons on the picture? are there any politicians in the picture? \\\n",
- "0 1 no \n",
- "1 2 no \n",
- "2 0 no \n",
- "3 2 no \n",
- "4 1 yes \n",
- "5 3 no \n",
- "6 many yes \n",
- "7 6 no \n",
- "8 1 yes \n",
- "9 15 yes \n",
- "\n",
- " does the picture show something from medicine? \n",
- "0 no \n",
- "1 yes \n",
- "2 no \n",
- "3 yes \n",
- "4 no \n",
- "5 no \n",
- "6 no \n",
- "7 no \n",
- "8 no \n",
- "9 no "
- ]
- },
- "execution_count": 14,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"df2.head(10)"
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [