changed multimodal_search and corresponded notebook

2025-10-29 21:16:06 +02:00 · 2023-05-24 13:30:02 +02:00 · 2023-05-24 13:30:02 +02:00 · 0452d6607f
--- a/ammico/multimodal_search.py
+++ b/ammico/multimodal_search.py
@ -334,8 +334,8 @@ class MultimodalSearch(AnalysisMethod):
            r = requests.get(url, allow_redirects=False)
            open(path_to_lib + "bpe_simple_vocab_16e6.txt.gz", "wb").write(r.content)

-        image_keys = sorted(self.keys())
-        image_names = [self[k]["filename"] for k in image_keys]
+        image_keys = sorted(self.subdict.keys())
+        image_names = [self.subdict[k]["filename"] for k in image_keys]

        select_model = {
            "blip2": MultimodalSearch.load_feature_extractor_model_blip2,
@ -505,7 +505,7 @@ class MultimodalSearch(AnalysisMethod):
            sorted_lists (list): sorted list of similarity.
        """
        if filter_number_of_images is None:
-            filter_number_of_images = len(self)
+            filter_number_of_images = len(self.subdict)
        if filter_val_limit is None:
            filter_val_limit = 0
        if filter_rel_error is None:
@ -531,17 +531,17 @@ class MultimodalSearch(AnalysisMethod):
                    and 100 * abs(max_val - similarity[key][q].item()) / max_val
                    < filter_rel_error
                ):
-                    self[image_keys[key]][
+                    self.subdict[image_keys[key]][
                        "rank " + list(search_query[q].values())[0]
                    ] = places[q][key]
-                    self[image_keys[key]][
+                    self.subdict[image_keys[key]][
                        list(search_query[q].values())[0]
                    ] = similarity[key][q].item()
                else:
-                    self[image_keys[key]][
+                    self.subdict[image_keys[key]][
                        "rank " + list(search_query[q].values())[0]
                    ] = None
-                    self[image_keys[key]][list(search_query[q].values())[0]] = 0
+                    self.subdict[image_keys[key]][list(search_query[q].values())[0]] = 0
        return similarity, sorted_lists

    def itm_text_precessing(self, search_query: list[dict[str, str]]) -> list:
@ -580,7 +580,9 @@ class MultimodalSearch(AnalysisMethod):
        paths = []
        image_names = []
        for s in sorted(
-            self.items(), key=lambda t: t[1][list(query.values())[0]], reverse=True
+            self.subdict.items(),
+            key=lambda t: t[1][list(query.values())[0]],
+            reverse=True,
        ):
            if s[1]["rank " + list(query.values())[0]] is None:
                break
@ -896,17 +898,17 @@ class MultimodalSearch(AnalysisMethod):
            }
            for i, key in zip(range(len(image_keys)), sorted_lists[index_text_query]):
                if image_keys[key] in image_names:
-                    self[image_keys[key]][
+                    self.subdict[image_keys[key]][
                        "itm " + list(search_query[index_text_query].values())[0]
                    ] = image_names_with_itm[image_keys[key]]
-                    self[image_keys[key]][
+                    self.subdict[image_keys[key]][
                        "itm_rank " + list(search_query[index_text_query].values())[0]
                    ] = image_names_with_new_rank[image_keys[key]]
                else:
-                    self[image_keys[key]][
+                    self.subdict[image_keys[key]][
                        "itm " + list(search_query[index_text_query].values())[0]
                    ] = 0
-                    self[image_keys[key]][
+                    self.subdict[image_keys[key]][
                        "itm_rank " + list(search_query[index_text_query].values())[0]
                    ] = None

@ -966,7 +968,7 @@ class MultimodalSearch(AnalysisMethod):
            current_querry_rank = "rank " + list(query.values())[0]

        for s in sorted(
-            self.items(), key=lambda t: t[1][current_querry_val], reverse=True
+            self.subdict.items(), key=lambda t: t[1][current_querry_val], reverse=True
        ):
            if s[1][current_querry_rank] is None:
                break
--- a/ammico/summary.py
+++ b/ammico/summary.py
@ -7,7 +7,7 @@ from lavis.models import load_model_and_preprocess
 class SummaryDetector(AnalysisMethod):
    def __init__(self, subdict: dict) -> None:
        super().__init__(subdict)
-        self.summary_device = device("cuda" if cuda.is_available() else "cpu")
+        self.summary_device = "cuda" if cuda.is_available() else "cpu"

    def load_model_base(self):
        """
--- a/notebooks/multimodal_search.ipynb
+++ b/notebooks/multimodal_search.ipynb
@ -71,6 +71,16 @@
    ")"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a08bd3a9-e954-4a0e-ad64-6817abd3a25a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "images"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -83,6 +93,16 @@
    "mydict = mutils.initialize_dict(images)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4c091f95-07cf-42c3-82c8-5f3a3c5929f8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mydict"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "987540a8-d800-4c70-a76b-7bfabaf123fa",
@ -130,6 +150,26 @@
    "To process the loaded images using the selected model, use the below code:"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6f2c9b1-4a91-47cb-86b5-2c9c67e4837b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_obj = ms.MultimodalSearch(mydict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16603ded-078e-4362-847b-57ad76829327",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_obj.subdict"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -146,19 +186,28 @@
    "    image_keys,\n",
    "    image_names,\n",
    "    features_image_stacked,\n",
-    ") = ms.MultimodalSearch.parsing_images(\n",
-    "    mydict, \n",
+    ") = my_obj.parsing_images(\n",
    "    model_type, \n",
-    "    path_to_saved_tensors=\"/content/drive/MyDrive/misinformation-data/\"\n",
+    "    path_to_save_tensors=\"/content/drive/MyDrive/misinformation-data/\",\n",
    "    )"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f236c3b1-c3a6-471a-9fc5-ef831b675286",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "features_image_stacked"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52",
   "metadata": {},
   "source": [
-    "The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_saved_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
+    "The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_save_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
    "\n",
    "This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file."
   ]
@ -179,10 +228,9 @@
    "#     image_keys,\n",
    "#     image_names,\n",
    "#     features_image_stacked,\n",
-    "# ) = ms.MultimodalSearch.parsing_images(\n",
-    "#     mydict,\n",
+    "# ) = my_obj.parsing_images(\n",
    "#     model_type,\n",
-    "#     path_to_load_tensors=\".5_blip_saved_features_image.pt\",\n",
+    "#     path_to_load_tensors=\"/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt\",\n",
    "# )"
   ]
  },
@ -240,8 +288,7 @@
   },
   "outputs": [],
   "source": [
-    "similarity, sorted_lists = ms.MultimodalSearch.multimodal_search(\n",
-    "    mydict,\n",
+    "similarity, sorted_lists = my_obj.multimodal_search(\n",
    "    model,\n",
    "    vis_processors,\n",
    "    txt_processors,\n",
@ -253,6 +300,36 @@
    ")"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65210ca2-b674-44bd-807a-4165e14bad74",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "similarity"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "557473df-e2b9-4ef0-9439-3daadf6741ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sorted_lists"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c93d7e88-594d-4095-b5f2-7bf01210dc61",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mydict"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "e1cf7e46-0c2c-4fb2-b89a-ef585ccb9339",
@ -290,8 +367,7 @@
   },
   "outputs": [],
   "source": [
-    "ms.MultimodalSearch.show_results(\n",
-    "    mydict,\n",
+    "my_obj.show_results(\n",
    "    search_query3[0],\n",
    ")"
   ]
@ -329,8 +405,7 @@
   },
   "outputs": [],
   "source": [
-    "itm_scores, image_gradcam_with_itm = ms.MultimodalSearch.image_text_match_reordering(\n",
-    "    mydict,\n",
+    "itm_scores, image_gradcam_with_itm = my_obj.image_text_match_reordering(\n",
    "    search_query3,\n",
    "    itm_model,\n",
    "    image_keys,\n",
@ -357,8 +432,8 @@
   },
   "outputs": [],
   "source": [
-    "ms.MultimodalSearch.show_results(\n",
-    "    mydict, search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n",
+    "my_obj.show_results(\n",
+    "    search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n",
    ")"
   ]
  },
@ -448,7 +523,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },