diff --git a/misinformation/summary.py b/misinformation/summary.py index e7972fc..4448a0c 100644 --- a/misinformation/summary.py +++ b/misinformation/summary.py @@ -5,9 +5,7 @@ from lavis.models import load_model_and_preprocess class SummaryDetector(AnalysisMethod): - def __init__( - self, subdict: dict - ) -> None: + def __init__(self, subdict: dict) -> None: super().__init__(subdict) self.subdict.update(self.set_keys()) self.image_summary = { @@ -19,8 +17,8 @@ class SummaryDetector(AnalysisMethod): summary_model, summary_vis_processors, _ = load_model_and_preprocess( name="blip_caption", model_type="base_coco", is_eval=True, device=summary_device - ) - + ) + def set_keys(self) -> dict: params = { "const_image_summary": None, @@ -32,33 +30,54 @@ class SummaryDetector(AnalysisMethod): path = self.subdict["filename"] raw_image = Image.open(path).convert("RGB") - image = self.summary_vis_processors["eval"](raw_image).unsqueeze(0).to(self.summary_device) - self.image_summary["const_image_summary"] = self.summary_model.generate({"image": image})[0] - self.image_summary["3_non-deterministic summary"] = self.summary_model.generate({"image": image}, use_nucleus_sampling=True, num_captions=3) + image = ( + self.summary_vis_processors["eval"](raw_image) + .unsqueeze(0) + .to(self.summary_device) + ) + self.image_summary["const_image_summary"] = self.summary_model.generate( + {"image": image} + )[0] + self.image_summary["3_non-deterministic summary"] = self.summary_model.generate( + {"image": image}, use_nucleus_sampling=True, num_captions=3 + ) for key in self.image_summary: self.subdict[key] = self.image_summary[key] return self.subdict - - summary_VQA_model, summary_VQA_vis_processors, summary_VQA_txt_processors = load_model_and_preprocess(name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device) + + ( + summary_VQA_model, + summary_VQA_vis_processors, + summary_VQA_txt_processors, + ) = load_model_and_preprocess( + name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device + ) def analyse_questions(self, list_of_questions): - if (len(list_of_questions)>0): + if len(list_of_questions) > 0: path = self.subdict["filename"] raw_image = Image.open(path).convert("RGB") - image = self.summary_VQA_vis_processors["eval"](raw_image).unsqueeze(0).to(self.summary_device) - question_batch =[] + image = ( + self.summary_VQA_vis_processors["eval"](raw_image) + .unsqueeze(0) + .to(self.summary_device) + ) + question_batch = [] for quest in list_of_questions: question_batch.append(self.summary_VQA_txt_processors["eval"](quest)) batch_size = len(list_of_questions) image_batch = image.repeat(batch_size, 1, 1, 1) - answers_batch = self.summary_VQA_model.predict_answers(samples={"image": image_batch, "text_input": question_batch}, inference_method="generate") - - for q,a in zip(question_batch,answers_batch): + answers_batch = self.summary_VQA_model.predict_answers( + samples={"image": image_batch, "text_input": question_batch}, + inference_method="generate", + ) + + for q, a in zip(question_batch, answers_batch): self.image_summary[q] = a - + for key in self.image_summary: self.subdict[key] = self.image_summary[key] - else: + else: print("Please, enter list of questions") - return self.subdict \ No newline at end of file + return self.subdict diff --git a/notebooks/image_summary.ipynb b/notebooks/image_summary.ipynb index 8c084cf..edb83ad 100644 --- a/notebooks/image_summary.ipynb +++ b/notebooks/image_summary.ipynb @@ -16,21 +16,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-01-27 13:43:45.543761: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2023-01-27 13:43:45.940025: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/pandriushchenko/anaconda3/envs/misinfo/lib/python3.10/site-packages/cv2/../../lib64:\n", - "2023-01-27 13:43:45.940060: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/pandriushchenko/anaconda3/envs/misinfo/lib/python3.10/site-packages/cv2/../../lib64:\n", - "2023-01-27 13:43:45.940063: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n" - ] - } - ], + "outputs": [], "source": [ "import misinformation\n", "import misinformation.summary as sm" @@ -45,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -66,36 +54,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'100132S_ara': {'filename': '../data/images/100132S_ara.png'},\n", - " '100447_ind': {'filename': '../data/images/100447_ind.png'},\n", - " '100127S_ara': {'filename': '../data/images/100127S_ara.png'},\n", - " '100134S_ara': {'filename': '../data/images/100134S_ara.png'},\n", - " '109257_1_spa': {'filename': '../data/images/109257_1_spa.png'},\n", - " '100130S_ara': {'filename': '../data/images/100130S_ara.png'},\n", - " '100131S_ara': {'filename': '../data/images/100131S_ara.png'},\n", - " '102135S_eng': {'filename': '../data/images/102135S_eng.png'},\n", - " '102435S_2_eng': {'filename': '../data/images/102435S_2_eng.png'},\n", - " '100368_asm': {'filename': '../data/images/100368_asm.png'},\n", - " '100361_asm': {'filename': '../data/images/100361_asm.png'},\n", - " '102141_1_eng': {'filename': '../data/images/102141_1_eng.png'},\n", - " '106958S_por': {'filename': '../data/images/106958S_por.png'},\n", - " '102134S_eng': {'filename': '../data/images/102134S_eng.png'},\n", - " '102133S_eng': {'filename': '../data/images/102133S_eng.png'},\n", - " '100450_ind': {'filename': '../data/images/100450_ind.png'},\n", - " '100451S_ind': {'filename': '../data/images/100451S_ind.png'}}" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mydict" ] @@ -109,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -128,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "tags": [] }, @@ -147,143 +108,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
filenameconst_image_summary3_non-deterministic summary
0../data/images/100132S_ara.pnga white car parked in front of a building cove...[someone has wrapped up a large plastic bag ov...
1../data/images/100447_ind.pnga woman drinking from a bottle while standing ...[a woman drinks out of a bottle and stands nex...
2../data/images/100127S_ara.pnga map of the world with arabic writing[a map of the world with a message in arabic, ...
3../data/images/100134S_ara.pnga woman is standing in front of a sign[two women walking and talking to each other, ...
4../data/images/109257_1_spa.pnga man in a suit and tie making a face[a man is smiling and making a funny face, man...
5../data/images/100130S_ara.pnga group of people walking down a street next t...[two people on the street in front of a big tr...
6../data/images/100131S_ara.pnga group of people standing in front of a tv[the president is addressing his nation of the...
7../data/images/102135S_eng.pnga woman standing in front of a store filled wi...[people in a supermarket standing in front of ...
8../data/images/102435S_2_eng.pnga man in a suit and glasses is talking[the man is speaking about his favorite tv sho...
9../data/images/100368_asm.pnga group of people standing next to each other[people doing a job next to a line of men, men...
\n", - "
" - ], - "text/plain": [ - " filename \\\n", - "0 ../data/images/100132S_ara.png \n", - "1 ../data/images/100447_ind.png \n", - "2 ../data/images/100127S_ara.png \n", - "3 ../data/images/100134S_ara.png \n", - "4 ../data/images/109257_1_spa.png \n", - "5 ../data/images/100130S_ara.png \n", - "6 ../data/images/100131S_ara.png \n", - "7 ../data/images/102135S_eng.png \n", - "8 ../data/images/102435S_2_eng.png \n", - "9 ../data/images/100368_asm.png \n", - "\n", - " const_image_summary \\\n", - "0 a white car parked in front of a building cove... \n", - "1 a woman drinking from a bottle while standing ... \n", - "2 a map of the world with arabic writing \n", - "3 a woman is standing in front of a sign \n", - "4 a man in a suit and tie making a face \n", - "5 a group of people walking down a street next t... \n", - "6 a group of people standing in front of a tv \n", - "7 a woman standing in front of a store filled wi... \n", - "8 a man in a suit and glasses is talking \n", - "9 a group of people standing next to each other \n", - "\n", - " 3_non-deterministic summary \n", - "0 [someone has wrapped up a large plastic bag ov... \n", - "1 [a woman drinks out of a bottle and stands nex... \n", - "2 [a map of the world with a message in arabic, ... \n", - "3 [two women walking and talking to each other, ... \n", - "4 [a man is smiling and making a funny face, man... \n", - "5 [two people on the street in front of a big tr... \n", - "6 [the president is addressing his nation of the... \n", - "7 [people in a supermarket standing in front of ... \n", - "8 [the man is speaking about his favorite tv sho... \n", - "9 [people doing a job next to a line of men, men... " - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df.head(10)" ] @@ -297,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -319,24 +146,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "0324b10be268470ab4e550cb0153b9e8", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(Select(layout=Layout(width='20%'), options=('100132S_ara', '100447_ind', '100127S_ara', '100134…" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "misinformation.explore_analysis(mydict, identify=\"summary\")" ] @@ -357,20 +169,20 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "list_of_questions = [\n", - "\"How many persons on the picture?\",\n", - "\"Are there any politicians in the picture?\",\n", - "\"Does the picture show something from medicine?\", \n", + " \"How many persons on the picture?\",\n", + " \"Are there any politicians in the picture?\",\n", + " \"Does the picture show something from medicine?\",\n", "]" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -380,24 +192,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "e446aa565f6345ab8256771f578fbf92", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "HBox(children=(Select(layout=Layout(width='20%'), options=('100132S_ara', '100447_ind', '100127S_ara', '100134…" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "misinformation.explore_analysis(mydict, identify=\"summary\")" ] @@ -411,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -421,207 +218,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
filenameconst_image_summary3_non-deterministic summaryhow many persons on the picture?are there any politicians in the picture?does the picture show something from medicine?
0../data/images/100132S_ara.pnga white car parked in front of a building cove...[the man is sitting on a car near a large bann...1nono
1../data/images/100447_ind.pngNoneNone2noyes
2../data/images/100127S_ara.pngNoneNone0nono
3../data/images/100134S_ara.pngNoneNone2noyes
4../data/images/109257_1_spa.pngNoneNone1yesno
5../data/images/100130S_ara.pngNoneNone3nono
6../data/images/100131S_ara.pngNoneNonemanyyesno
7../data/images/102135S_eng.pngNoneNone6nono
8../data/images/102435S_2_eng.pngNoneNone1yesno
9../data/images/100368_asm.pngNoneNone15yesno
\n", - "
" - ], - "text/plain": [ - " filename \\\n", - "0 ../data/images/100132S_ara.png \n", - "1 ../data/images/100447_ind.png \n", - "2 ../data/images/100127S_ara.png \n", - "3 ../data/images/100134S_ara.png \n", - "4 ../data/images/109257_1_spa.png \n", - "5 ../data/images/100130S_ara.png \n", - "6 ../data/images/100131S_ara.png \n", - "7 ../data/images/102135S_eng.png \n", - "8 ../data/images/102435S_2_eng.png \n", - "9 ../data/images/100368_asm.png \n", - "\n", - " const_image_summary \\\n", - "0 a white car parked in front of a building cove... \n", - "1 None \n", - "2 None \n", - "3 None \n", - "4 None \n", - "5 None \n", - "6 None \n", - "7 None \n", - "8 None \n", - "9 None \n", - "\n", - " 3_non-deterministic summary \\\n", - "0 [the man is sitting on a car near a large bann... \n", - "1 None \n", - "2 None \n", - "3 None \n", - "4 None \n", - "5 None \n", - "6 None \n", - "7 None \n", - "8 None \n", - "9 None \n", - "\n", - " how many persons on the picture? are there any politicians in the picture? \\\n", - "0 1 no \n", - "1 2 no \n", - "2 0 no \n", - "3 2 no \n", - "4 1 yes \n", - "5 3 no \n", - "6 many yes \n", - "7 6 no \n", - "8 1 yes \n", - "9 15 yes \n", - "\n", - " does the picture show something from medicine? \n", - "0 no \n", - "1 yes \n", - "2 no \n", - "3 yes \n", - "4 no \n", - "5 no \n", - "6 no \n", - "7 no \n", - "8 no \n", - "9 no " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df2.head(10)" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [