From 0452d6607fde40cb7c37a373f6af30791848f92c Mon Sep 17 00:00:00 2001 From: Petr Andriushchenko Date: Wed, 24 May 2023 13:30:02 +0200 Subject: [PATCH] changed multimodal_search and corresponded notebook --- ammico/multimodal_search.py | 28 ++++---- ammico/summary.py | 2 +- notebooks/multimodal_search.ipynb | 107 +++++++++++++++++++++++++----- 3 files changed, 107 insertions(+), 30 deletions(-) diff --git a/ammico/multimodal_search.py b/ammico/multimodal_search.py index bec8f50..3344afd 100644 --- a/ammico/multimodal_search.py +++ b/ammico/multimodal_search.py @@ -334,8 +334,8 @@ class MultimodalSearch(AnalysisMethod): r = requests.get(url, allow_redirects=False) open(path_to_lib + "bpe_simple_vocab_16e6.txt.gz", "wb").write(r.content) - image_keys = sorted(self.keys()) - image_names = [self[k]["filename"] for k in image_keys] + image_keys = sorted(self.subdict.keys()) + image_names = [self.subdict[k]["filename"] for k in image_keys] select_model = { "blip2": MultimodalSearch.load_feature_extractor_model_blip2, @@ -505,7 +505,7 @@ class MultimodalSearch(AnalysisMethod): sorted_lists (list): sorted list of similarity. """ if filter_number_of_images is None: - filter_number_of_images = len(self) + filter_number_of_images = len(self.subdict) if filter_val_limit is None: filter_val_limit = 0 if filter_rel_error is None: @@ -531,17 +531,17 @@ class MultimodalSearch(AnalysisMethod): and 100 * abs(max_val - similarity[key][q].item()) / max_val < filter_rel_error ): - self[image_keys[key]][ + self.subdict[image_keys[key]][ "rank " + list(search_query[q].values())[0] ] = places[q][key] - self[image_keys[key]][ + self.subdict[image_keys[key]][ list(search_query[q].values())[0] ] = similarity[key][q].item() else: - self[image_keys[key]][ + self.subdict[image_keys[key]][ "rank " + list(search_query[q].values())[0] ] = None - self[image_keys[key]][list(search_query[q].values())[0]] = 0 + self.subdict[image_keys[key]][list(search_query[q].values())[0]] = 0 return similarity, sorted_lists def itm_text_precessing(self, search_query: list[dict[str, str]]) -> list: @@ -580,7 +580,9 @@ class MultimodalSearch(AnalysisMethod): paths = [] image_names = [] for s in sorted( - self.items(), key=lambda t: t[1][list(query.values())[0]], reverse=True + self.subdict.items(), + key=lambda t: t[1][list(query.values())[0]], + reverse=True, ): if s[1]["rank " + list(query.values())[0]] is None: break @@ -896,17 +898,17 @@ class MultimodalSearch(AnalysisMethod): } for i, key in zip(range(len(image_keys)), sorted_lists[index_text_query]): if image_keys[key] in image_names: - self[image_keys[key]][ + self.subdict[image_keys[key]][ "itm " + list(search_query[index_text_query].values())[0] ] = image_names_with_itm[image_keys[key]] - self[image_keys[key]][ + self.subdict[image_keys[key]][ "itm_rank " + list(search_query[index_text_query].values())[0] ] = image_names_with_new_rank[image_keys[key]] else: - self[image_keys[key]][ + self.subdict[image_keys[key]][ "itm " + list(search_query[index_text_query].values())[0] ] = 0 - self[image_keys[key]][ + self.subdict[image_keys[key]][ "itm_rank " + list(search_query[index_text_query].values())[0] ] = None @@ -966,7 +968,7 @@ class MultimodalSearch(AnalysisMethod): current_querry_rank = "rank " + list(query.values())[0] for s in sorted( - self.items(), key=lambda t: t[1][current_querry_val], reverse=True + self.subdict.items(), key=lambda t: t[1][current_querry_val], reverse=True ): if s[1][current_querry_rank] is None: break diff --git a/ammico/summary.py b/ammico/summary.py index 64cc502..750b6ed 100644 --- a/ammico/summary.py +++ b/ammico/summary.py @@ -7,7 +7,7 @@ from lavis.models import load_model_and_preprocess class SummaryDetector(AnalysisMethod): def __init__(self, subdict: dict) -> None: super().__init__(subdict) - self.summary_device = device("cuda" if cuda.is_available() else "cpu") + self.summary_device = "cuda" if cuda.is_available() else "cpu" def load_model_base(self): """ diff --git a/notebooks/multimodal_search.ipynb b/notebooks/multimodal_search.ipynb index ebfa26d..5fe62e7 100644 --- a/notebooks/multimodal_search.ipynb +++ b/notebooks/multimodal_search.ipynb @@ -71,6 +71,16 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "a08bd3a9-e954-4a0e-ad64-6817abd3a25a", + "metadata": {}, + "outputs": [], + "source": [ + "images" + ] + }, { "cell_type": "code", "execution_count": null, @@ -83,6 +93,16 @@ "mydict = mutils.initialize_dict(images)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c091f95-07cf-42c3-82c8-5f3a3c5929f8", + "metadata": {}, + "outputs": [], + "source": [ + "mydict" + ] + }, { "cell_type": "markdown", "id": "987540a8-d800-4c70-a76b-7bfabaf123fa", @@ -130,6 +150,26 @@ "To process the loaded images using the selected model, use the below code:" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6f2c9b1-4a91-47cb-86b5-2c9c67e4837b", + "metadata": {}, + "outputs": [], + "source": [ + "my_obj = ms.MultimodalSearch(mydict)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16603ded-078e-4362-847b-57ad76829327", + "metadata": {}, + "outputs": [], + "source": [ + "my_obj.subdict" + ] + }, { "cell_type": "code", "execution_count": null, @@ -146,19 +186,28 @@ " image_keys,\n", " image_names,\n", " features_image_stacked,\n", - ") = ms.MultimodalSearch.parsing_images(\n", - " mydict, \n", + ") = my_obj.parsing_images(\n", " model_type, \n", - " path_to_saved_tensors=\"/content/drive/MyDrive/misinformation-data/\"\n", + " path_to_save_tensors=\"/content/drive/MyDrive/misinformation-data/\",\n", " )" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f236c3b1-c3a6-471a-9fc5-ef831b675286", + "metadata": {}, + "outputs": [], + "source": [ + "features_image_stacked" + ] + }, { "cell_type": "markdown", "id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52", "metadata": {}, "source": [ - "The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_saved_tensors`, a file with filename `.__saved_features_image.pt` will be placed there.\n", + "The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_save_tensors`, a file with filename `.__saved_features_image.pt` will be placed there.\n", "\n", "This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file." ] @@ -179,10 +228,9 @@ "# image_keys,\n", "# image_names,\n", "# features_image_stacked,\n", - "# ) = ms.MultimodalSearch.parsing_images(\n", - "# mydict,\n", + "# ) = my_obj.parsing_images(\n", "# model_type,\n", - "# path_to_load_tensors=\".5_blip_saved_features_image.pt\",\n", + "# path_to_load_tensors=\"/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt\",\n", "# )" ] }, @@ -240,8 +288,7 @@ }, "outputs": [], "source": [ - "similarity, sorted_lists = ms.MultimodalSearch.multimodal_search(\n", - " mydict,\n", + "similarity, sorted_lists = my_obj.multimodal_search(\n", " model,\n", " vis_processors,\n", " txt_processors,\n", @@ -253,6 +300,36 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "65210ca2-b674-44bd-807a-4165e14bad74", + "metadata": {}, + "outputs": [], + "source": [ + "similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "557473df-e2b9-4ef0-9439-3daadf6741ac", + "metadata": {}, + "outputs": [], + "source": [ + "sorted_lists" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c93d7e88-594d-4095-b5f2-7bf01210dc61", + "metadata": {}, + "outputs": [], + "source": [ + "mydict" + ] + }, { "cell_type": "markdown", "id": "e1cf7e46-0c2c-4fb2-b89a-ef585ccb9339", @@ -290,8 +367,7 @@ }, "outputs": [], "source": [ - "ms.MultimodalSearch.show_results(\n", - " mydict,\n", + "my_obj.show_results(\n", " search_query3[0],\n", ")" ] @@ -329,8 +405,7 @@ }, "outputs": [], "source": [ - "itm_scores, image_gradcam_with_itm = ms.MultimodalSearch.image_text_match_reordering(\n", - " mydict,\n", + "itm_scores, image_gradcam_with_itm = my_obj.image_text_match_reordering(\n", " search_query3,\n", " itm_model,\n", " image_keys,\n", @@ -357,8 +432,8 @@ }, "outputs": [], "source": [ - "ms.MultimodalSearch.show_results(\n", - " mydict, search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n", + "my_obj.show_results(\n", + " search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n", ")" ] }, @@ -448,7 +523,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" },