changed multimodal_search and corresponded notebook

Этот коммит содержится в:
Petr Andriushchenko 2023-05-24 13:30:02 +02:00
родитель f524ecaad8
Коммит 0452d6607f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4C4A5DCF634115B6
3 изменённых файлов: 107 добавлений и 30 удалений

Просмотреть файл

@ -334,8 +334,8 @@ class MultimodalSearch(AnalysisMethod):
r = requests.get(url, allow_redirects=False)
open(path_to_lib + "bpe_simple_vocab_16e6.txt.gz", "wb").write(r.content)
image_keys = sorted(self.keys())
image_names = [self[k]["filename"] for k in image_keys]
image_keys = sorted(self.subdict.keys())
image_names = [self.subdict[k]["filename"] for k in image_keys]
select_model = {
"blip2": MultimodalSearch.load_feature_extractor_model_blip2,
@ -505,7 +505,7 @@ class MultimodalSearch(AnalysisMethod):
sorted_lists (list): sorted list of similarity.
"""
if filter_number_of_images is None:
filter_number_of_images = len(self)
filter_number_of_images = len(self.subdict)
if filter_val_limit is None:
filter_val_limit = 0
if filter_rel_error is None:
@ -531,17 +531,17 @@ class MultimodalSearch(AnalysisMethod):
and 100 * abs(max_val - similarity[key][q].item()) / max_val
< filter_rel_error
):
self[image_keys[key]][
self.subdict[image_keys[key]][
"rank " + list(search_query[q].values())[0]
] = places[q][key]
self[image_keys[key]][
self.subdict[image_keys[key]][
list(search_query[q].values())[0]
] = similarity[key][q].item()
else:
self[image_keys[key]][
self.subdict[image_keys[key]][
"rank " + list(search_query[q].values())[0]
] = None
self[image_keys[key]][list(search_query[q].values())[0]] = 0
self.subdict[image_keys[key]][list(search_query[q].values())[0]] = 0
return similarity, sorted_lists
def itm_text_precessing(self, search_query: list[dict[str, str]]) -> list:
@ -580,7 +580,9 @@ class MultimodalSearch(AnalysisMethod):
paths = []
image_names = []
for s in sorted(
self.items(), key=lambda t: t[1][list(query.values())[0]], reverse=True
self.subdict.items(),
key=lambda t: t[1][list(query.values())[0]],
reverse=True,
):
if s[1]["rank " + list(query.values())[0]] is None:
break
@ -896,17 +898,17 @@ class MultimodalSearch(AnalysisMethod):
}
for i, key in zip(range(len(image_keys)), sorted_lists[index_text_query]):
if image_keys[key] in image_names:
self[image_keys[key]][
self.subdict[image_keys[key]][
"itm " + list(search_query[index_text_query].values())[0]
] = image_names_with_itm[image_keys[key]]
self[image_keys[key]][
self.subdict[image_keys[key]][
"itm_rank " + list(search_query[index_text_query].values())[0]
] = image_names_with_new_rank[image_keys[key]]
else:
self[image_keys[key]][
self.subdict[image_keys[key]][
"itm " + list(search_query[index_text_query].values())[0]
] = 0
self[image_keys[key]][
self.subdict[image_keys[key]][
"itm_rank " + list(search_query[index_text_query].values())[0]
] = None
@ -966,7 +968,7 @@ class MultimodalSearch(AnalysisMethod):
current_querry_rank = "rank " + list(query.values())[0]
for s in sorted(
self.items(), key=lambda t: t[1][current_querry_val], reverse=True
self.subdict.items(), key=lambda t: t[1][current_querry_val], reverse=True
):
if s[1][current_querry_rank] is None:
break

Просмотреть файл

@ -7,7 +7,7 @@ from lavis.models import load_model_and_preprocess
class SummaryDetector(AnalysisMethod):
def __init__(self, subdict: dict) -> None:
super().__init__(subdict)
self.summary_device = device("cuda" if cuda.is_available() else "cpu")
self.summary_device = "cuda" if cuda.is_available() else "cpu"
def load_model_base(self):
"""

107
notebooks/multimodal_search.ipynb сгенерированный
Просмотреть файл

@ -71,6 +71,16 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a08bd3a9-e954-4a0e-ad64-6817abd3a25a",
"metadata": {},
"outputs": [],
"source": [
"images"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -83,6 +93,16 @@
"mydict = mutils.initialize_dict(images)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4c091f95-07cf-42c3-82c8-5f3a3c5929f8",
"metadata": {},
"outputs": [],
"source": [
"mydict"
]
},
{
"cell_type": "markdown",
"id": "987540a8-d800-4c70-a76b-7bfabaf123fa",
@ -130,6 +150,26 @@
"To process the loaded images using the selected model, use the below code:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6f2c9b1-4a91-47cb-86b5-2c9c67e4837b",
"metadata": {},
"outputs": [],
"source": [
"my_obj = ms.MultimodalSearch(mydict)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16603ded-078e-4362-847b-57ad76829327",
"metadata": {},
"outputs": [],
"source": [
"my_obj.subdict"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -146,19 +186,28 @@
" image_keys,\n",
" image_names,\n",
" features_image_stacked,\n",
") = ms.MultimodalSearch.parsing_images(\n",
" mydict, \n",
") = my_obj.parsing_images(\n",
" model_type, \n",
" path_to_saved_tensors=\"/content/drive/MyDrive/misinformation-data/\"\n",
" path_to_save_tensors=\"/content/drive/MyDrive/misinformation-data/\",\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f236c3b1-c3a6-471a-9fc5-ef831b675286",
"metadata": {},
"outputs": [],
"source": [
"features_image_stacked"
]
},
{
"cell_type": "markdown",
"id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52",
"metadata": {},
"source": [
"The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_saved_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
"The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_save_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
"\n",
"This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file."
]
@ -179,10 +228,9 @@
"# image_keys,\n",
"# image_names,\n",
"# features_image_stacked,\n",
"# ) = ms.MultimodalSearch.parsing_images(\n",
"# mydict,\n",
"# ) = my_obj.parsing_images(\n",
"# model_type,\n",
"# path_to_load_tensors=\".5_blip_saved_features_image.pt\",\n",
"# path_to_load_tensors=\"/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt\",\n",
"# )"
]
},
@ -240,8 +288,7 @@
},
"outputs": [],
"source": [
"similarity, sorted_lists = ms.MultimodalSearch.multimodal_search(\n",
" mydict,\n",
"similarity, sorted_lists = my_obj.multimodal_search(\n",
" model,\n",
" vis_processors,\n",
" txt_processors,\n",
@ -253,6 +300,36 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "65210ca2-b674-44bd-807a-4165e14bad74",
"metadata": {},
"outputs": [],
"source": [
"similarity"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "557473df-e2b9-4ef0-9439-3daadf6741ac",
"metadata": {},
"outputs": [],
"source": [
"sorted_lists"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c93d7e88-594d-4095-b5f2-7bf01210dc61",
"metadata": {},
"outputs": [],
"source": [
"mydict"
]
},
{
"cell_type": "markdown",
"id": "e1cf7e46-0c2c-4fb2-b89a-ef585ccb9339",
@ -290,8 +367,7 @@
},
"outputs": [],
"source": [
"ms.MultimodalSearch.show_results(\n",
" mydict,\n",
"my_obj.show_results(\n",
" search_query3[0],\n",
")"
]
@ -329,8 +405,7 @@
},
"outputs": [],
"source": [
"itm_scores, image_gradcam_with_itm = ms.MultimodalSearch.image_text_match_reordering(\n",
" mydict,\n",
"itm_scores, image_gradcam_with_itm = my_obj.image_text_match_reordering(\n",
" search_query3,\n",
" itm_model,\n",
" image_keys,\n",
@ -357,8 +432,8 @@
},
"outputs": [],
"source": [
"ms.MultimodalSearch.show_results(\n",
" mydict, search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n",
"my_obj.show_results(\n",
" search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n",
")"
]
},
@ -448,7 +523,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},