changed multimodal_search and corresponded notebook

Этот коммит содержится в:
Petr Andriushchenko 2023-05-24 13:30:02 +02:00
родитель f524ecaad8
Коммит 0452d6607f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4C4A5DCF634115B6
3 изменённых файлов: 107 добавлений и 30 удалений

Просмотреть файл

@ -334,8 +334,8 @@ class MultimodalSearch(AnalysisMethod):
r = requests.get(url, allow_redirects=False) r = requests.get(url, allow_redirects=False)
open(path_to_lib + "bpe_simple_vocab_16e6.txt.gz", "wb").write(r.content) open(path_to_lib + "bpe_simple_vocab_16e6.txt.gz", "wb").write(r.content)
image_keys = sorted(self.keys()) image_keys = sorted(self.subdict.keys())
image_names = [self[k]["filename"] for k in image_keys] image_names = [self.subdict[k]["filename"] for k in image_keys]
select_model = { select_model = {
"blip2": MultimodalSearch.load_feature_extractor_model_blip2, "blip2": MultimodalSearch.load_feature_extractor_model_blip2,
@ -505,7 +505,7 @@ class MultimodalSearch(AnalysisMethod):
sorted_lists (list): sorted list of similarity. sorted_lists (list): sorted list of similarity.
""" """
if filter_number_of_images is None: if filter_number_of_images is None:
filter_number_of_images = len(self) filter_number_of_images = len(self.subdict)
if filter_val_limit is None: if filter_val_limit is None:
filter_val_limit = 0 filter_val_limit = 0
if filter_rel_error is None: if filter_rel_error is None:
@ -531,17 +531,17 @@ class MultimodalSearch(AnalysisMethod):
and 100 * abs(max_val - similarity[key][q].item()) / max_val and 100 * abs(max_val - similarity[key][q].item()) / max_val
< filter_rel_error < filter_rel_error
): ):
self[image_keys[key]][ self.subdict[image_keys[key]][
"rank " + list(search_query[q].values())[0] "rank " + list(search_query[q].values())[0]
] = places[q][key] ] = places[q][key]
self[image_keys[key]][ self.subdict[image_keys[key]][
list(search_query[q].values())[0] list(search_query[q].values())[0]
] = similarity[key][q].item() ] = similarity[key][q].item()
else: else:
self[image_keys[key]][ self.subdict[image_keys[key]][
"rank " + list(search_query[q].values())[0] "rank " + list(search_query[q].values())[0]
] = None ] = None
self[image_keys[key]][list(search_query[q].values())[0]] = 0 self.subdict[image_keys[key]][list(search_query[q].values())[0]] = 0
return similarity, sorted_lists return similarity, sorted_lists
def itm_text_precessing(self, search_query: list[dict[str, str]]) -> list: def itm_text_precessing(self, search_query: list[dict[str, str]]) -> list:
@ -580,7 +580,9 @@ class MultimodalSearch(AnalysisMethod):
paths = [] paths = []
image_names = [] image_names = []
for s in sorted( for s in sorted(
self.items(), key=lambda t: t[1][list(query.values())[0]], reverse=True self.subdict.items(),
key=lambda t: t[1][list(query.values())[0]],
reverse=True,
): ):
if s[1]["rank " + list(query.values())[0]] is None: if s[1]["rank " + list(query.values())[0]] is None:
break break
@ -896,17 +898,17 @@ class MultimodalSearch(AnalysisMethod):
} }
for i, key in zip(range(len(image_keys)), sorted_lists[index_text_query]): for i, key in zip(range(len(image_keys)), sorted_lists[index_text_query]):
if image_keys[key] in image_names: if image_keys[key] in image_names:
self[image_keys[key]][ self.subdict[image_keys[key]][
"itm " + list(search_query[index_text_query].values())[0] "itm " + list(search_query[index_text_query].values())[0]
] = image_names_with_itm[image_keys[key]] ] = image_names_with_itm[image_keys[key]]
self[image_keys[key]][ self.subdict[image_keys[key]][
"itm_rank " + list(search_query[index_text_query].values())[0] "itm_rank " + list(search_query[index_text_query].values())[0]
] = image_names_with_new_rank[image_keys[key]] ] = image_names_with_new_rank[image_keys[key]]
else: else:
self[image_keys[key]][ self.subdict[image_keys[key]][
"itm " + list(search_query[index_text_query].values())[0] "itm " + list(search_query[index_text_query].values())[0]
] = 0 ] = 0
self[image_keys[key]][ self.subdict[image_keys[key]][
"itm_rank " + list(search_query[index_text_query].values())[0] "itm_rank " + list(search_query[index_text_query].values())[0]
] = None ] = None
@ -966,7 +968,7 @@ class MultimodalSearch(AnalysisMethod):
current_querry_rank = "rank " + list(query.values())[0] current_querry_rank = "rank " + list(query.values())[0]
for s in sorted( for s in sorted(
self.items(), key=lambda t: t[1][current_querry_val], reverse=True self.subdict.items(), key=lambda t: t[1][current_querry_val], reverse=True
): ):
if s[1][current_querry_rank] is None: if s[1][current_querry_rank] is None:
break break

Просмотреть файл

@ -7,7 +7,7 @@ from lavis.models import load_model_and_preprocess
class SummaryDetector(AnalysisMethod): class SummaryDetector(AnalysisMethod):
def __init__(self, subdict: dict) -> None: def __init__(self, subdict: dict) -> None:
super().__init__(subdict) super().__init__(subdict)
self.summary_device = device("cuda" if cuda.is_available() else "cpu") self.summary_device = "cuda" if cuda.is_available() else "cpu"
def load_model_base(self): def load_model_base(self):
""" """

107
notebooks/multimodal_search.ipynb сгенерированный
Просмотреть файл

@ -71,6 +71,16 @@
")" ")"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "a08bd3a9-e954-4a0e-ad64-6817abd3a25a",
"metadata": {},
"outputs": [],
"source": [
"images"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -83,6 +93,16 @@
"mydict = mutils.initialize_dict(images)" "mydict = mutils.initialize_dict(images)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "4c091f95-07cf-42c3-82c8-5f3a3c5929f8",
"metadata": {},
"outputs": [],
"source": [
"mydict"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "987540a8-d800-4c70-a76b-7bfabaf123fa", "id": "987540a8-d800-4c70-a76b-7bfabaf123fa",
@ -130,6 +150,26 @@
"To process the loaded images using the selected model, use the below code:" "To process the loaded images using the selected model, use the below code:"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "f6f2c9b1-4a91-47cb-86b5-2c9c67e4837b",
"metadata": {},
"outputs": [],
"source": [
"my_obj = ms.MultimodalSearch(mydict)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16603ded-078e-4362-847b-57ad76829327",
"metadata": {},
"outputs": [],
"source": [
"my_obj.subdict"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -146,19 +186,28 @@
" image_keys,\n", " image_keys,\n",
" image_names,\n", " image_names,\n",
" features_image_stacked,\n", " features_image_stacked,\n",
") = ms.MultimodalSearch.parsing_images(\n", ") = my_obj.parsing_images(\n",
" mydict, \n",
" model_type, \n", " model_type, \n",
" path_to_saved_tensors=\"/content/drive/MyDrive/misinformation-data/\"\n", " path_to_save_tensors=\"/content/drive/MyDrive/misinformation-data/\",\n",
" )" " )"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "f236c3b1-c3a6-471a-9fc5-ef831b675286",
"metadata": {},
"outputs": [],
"source": [
"features_image_stacked"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52", "id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52",
"metadata": {}, "metadata": {},
"source": [ "source": [
"The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_saved_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n", "The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_save_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
"\n", "\n",
"This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file." "This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file."
] ]
@ -179,10 +228,9 @@
"# image_keys,\n", "# image_keys,\n",
"# image_names,\n", "# image_names,\n",
"# features_image_stacked,\n", "# features_image_stacked,\n",
"# ) = ms.MultimodalSearch.parsing_images(\n", "# ) = my_obj.parsing_images(\n",
"# mydict,\n",
"# model_type,\n", "# model_type,\n",
"# path_to_load_tensors=\".5_blip_saved_features_image.pt\",\n", "# path_to_load_tensors=\"/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt\",\n",
"# )" "# )"
] ]
}, },
@ -240,8 +288,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"similarity, sorted_lists = ms.MultimodalSearch.multimodal_search(\n", "similarity, sorted_lists = my_obj.multimodal_search(\n",
" mydict,\n",
" model,\n", " model,\n",
" vis_processors,\n", " vis_processors,\n",
" txt_processors,\n", " txt_processors,\n",
@ -253,6 +300,36 @@
")" ")"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "65210ca2-b674-44bd-807a-4165e14bad74",
"metadata": {},
"outputs": [],
"source": [
"similarity"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "557473df-e2b9-4ef0-9439-3daadf6741ac",
"metadata": {},
"outputs": [],
"source": [
"sorted_lists"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c93d7e88-594d-4095-b5f2-7bf01210dc61",
"metadata": {},
"outputs": [],
"source": [
"mydict"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "e1cf7e46-0c2c-4fb2-b89a-ef585ccb9339", "id": "e1cf7e46-0c2c-4fb2-b89a-ef585ccb9339",
@ -290,8 +367,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"ms.MultimodalSearch.show_results(\n", "my_obj.show_results(\n",
" mydict,\n",
" search_query3[0],\n", " search_query3[0],\n",
")" ")"
] ]
@ -329,8 +405,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"itm_scores, image_gradcam_with_itm = ms.MultimodalSearch.image_text_match_reordering(\n", "itm_scores, image_gradcam_with_itm = my_obj.image_text_match_reordering(\n",
" mydict,\n",
" search_query3,\n", " search_query3,\n",
" itm_model,\n", " itm_model,\n",
" image_keys,\n", " image_keys,\n",
@ -357,8 +432,8 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"ms.MultimodalSearch.show_results(\n", "my_obj.show_results(\n",
" mydict, search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n", " search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n",
")" ")"
] ]
}, },
@ -448,7 +523,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 (ipykernel)", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },