зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 21:16:06 +02:00
changed multimodal_search and corresponded notebook
Этот коммит содержится в:
родитель
f524ecaad8
Коммит
0452d6607f
@ -334,8 +334,8 @@ class MultimodalSearch(AnalysisMethod):
|
||||
r = requests.get(url, allow_redirects=False)
|
||||
open(path_to_lib + "bpe_simple_vocab_16e6.txt.gz", "wb").write(r.content)
|
||||
|
||||
image_keys = sorted(self.keys())
|
||||
image_names = [self[k]["filename"] for k in image_keys]
|
||||
image_keys = sorted(self.subdict.keys())
|
||||
image_names = [self.subdict[k]["filename"] for k in image_keys]
|
||||
|
||||
select_model = {
|
||||
"blip2": MultimodalSearch.load_feature_extractor_model_blip2,
|
||||
@ -505,7 +505,7 @@ class MultimodalSearch(AnalysisMethod):
|
||||
sorted_lists (list): sorted list of similarity.
|
||||
"""
|
||||
if filter_number_of_images is None:
|
||||
filter_number_of_images = len(self)
|
||||
filter_number_of_images = len(self.subdict)
|
||||
if filter_val_limit is None:
|
||||
filter_val_limit = 0
|
||||
if filter_rel_error is None:
|
||||
@ -531,17 +531,17 @@ class MultimodalSearch(AnalysisMethod):
|
||||
and 100 * abs(max_val - similarity[key][q].item()) / max_val
|
||||
< filter_rel_error
|
||||
):
|
||||
self[image_keys[key]][
|
||||
self.subdict[image_keys[key]][
|
||||
"rank " + list(search_query[q].values())[0]
|
||||
] = places[q][key]
|
||||
self[image_keys[key]][
|
||||
self.subdict[image_keys[key]][
|
||||
list(search_query[q].values())[0]
|
||||
] = similarity[key][q].item()
|
||||
else:
|
||||
self[image_keys[key]][
|
||||
self.subdict[image_keys[key]][
|
||||
"rank " + list(search_query[q].values())[0]
|
||||
] = None
|
||||
self[image_keys[key]][list(search_query[q].values())[0]] = 0
|
||||
self.subdict[image_keys[key]][list(search_query[q].values())[0]] = 0
|
||||
return similarity, sorted_lists
|
||||
|
||||
def itm_text_precessing(self, search_query: list[dict[str, str]]) -> list:
|
||||
@ -580,7 +580,9 @@ class MultimodalSearch(AnalysisMethod):
|
||||
paths = []
|
||||
image_names = []
|
||||
for s in sorted(
|
||||
self.items(), key=lambda t: t[1][list(query.values())[0]], reverse=True
|
||||
self.subdict.items(),
|
||||
key=lambda t: t[1][list(query.values())[0]],
|
||||
reverse=True,
|
||||
):
|
||||
if s[1]["rank " + list(query.values())[0]] is None:
|
||||
break
|
||||
@ -896,17 +898,17 @@ class MultimodalSearch(AnalysisMethod):
|
||||
}
|
||||
for i, key in zip(range(len(image_keys)), sorted_lists[index_text_query]):
|
||||
if image_keys[key] in image_names:
|
||||
self[image_keys[key]][
|
||||
self.subdict[image_keys[key]][
|
||||
"itm " + list(search_query[index_text_query].values())[0]
|
||||
] = image_names_with_itm[image_keys[key]]
|
||||
self[image_keys[key]][
|
||||
self.subdict[image_keys[key]][
|
||||
"itm_rank " + list(search_query[index_text_query].values())[0]
|
||||
] = image_names_with_new_rank[image_keys[key]]
|
||||
else:
|
||||
self[image_keys[key]][
|
||||
self.subdict[image_keys[key]][
|
||||
"itm " + list(search_query[index_text_query].values())[0]
|
||||
] = 0
|
||||
self[image_keys[key]][
|
||||
self.subdict[image_keys[key]][
|
||||
"itm_rank " + list(search_query[index_text_query].values())[0]
|
||||
] = None
|
||||
|
||||
@ -966,7 +968,7 @@ class MultimodalSearch(AnalysisMethod):
|
||||
current_querry_rank = "rank " + list(query.values())[0]
|
||||
|
||||
for s in sorted(
|
||||
self.items(), key=lambda t: t[1][current_querry_val], reverse=True
|
||||
self.subdict.items(), key=lambda t: t[1][current_querry_val], reverse=True
|
||||
):
|
||||
if s[1][current_querry_rank] is None:
|
||||
break
|
||||
|
||||
@ -7,7 +7,7 @@ from lavis.models import load_model_and_preprocess
|
||||
class SummaryDetector(AnalysisMethod):
|
||||
def __init__(self, subdict: dict) -> None:
|
||||
super().__init__(subdict)
|
||||
self.summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
self.summary_device = "cuda" if cuda.is_available() else "cpu"
|
||||
|
||||
def load_model_base(self):
|
||||
"""
|
||||
|
||||
107
notebooks/multimodal_search.ipynb
сгенерированный
107
notebooks/multimodal_search.ipynb
сгенерированный
@ -71,6 +71,16 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a08bd3a9-e954-4a0e-ad64-6817abd3a25a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"images"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -83,6 +93,16 @@
|
||||
"mydict = mutils.initialize_dict(images)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4c091f95-07cf-42c3-82c8-5f3a3c5929f8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mydict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "987540a8-d800-4c70-a76b-7bfabaf123fa",
|
||||
@ -130,6 +150,26 @@
|
||||
"To process the loaded images using the selected model, use the below code:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f6f2c9b1-4a91-47cb-86b5-2c9c67e4837b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"my_obj = ms.MultimodalSearch(mydict)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "16603ded-078e-4362-847b-57ad76829327",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"my_obj.subdict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@ -146,19 +186,28 @@
|
||||
" image_keys,\n",
|
||||
" image_names,\n",
|
||||
" features_image_stacked,\n",
|
||||
") = ms.MultimodalSearch.parsing_images(\n",
|
||||
" mydict, \n",
|
||||
") = my_obj.parsing_images(\n",
|
||||
" model_type, \n",
|
||||
" path_to_saved_tensors=\"/content/drive/MyDrive/misinformation-data/\"\n",
|
||||
" path_to_save_tensors=\"/content/drive/MyDrive/misinformation-data/\",\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f236c3b1-c3a6-471a-9fc5-ef831b675286",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"features_image_stacked"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9ff8a894-566b-4c4f-acca-21c50b5b1f52",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_saved_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
|
||||
"The images are then processed and stored in a numerical representation, a tensor. These tensors do not change for the same image and same model - so if you run this analysis once, and save the tensors giving a path with the keyword `path_to_save_tensors`, a file with filename `.<Number_of_images>_<model_name>_saved_features_image.pt` will be placed there.\n",
|
||||
"\n",
|
||||
"This will save you a lot of time if you want to analyse same images with the same model but different questions. To run using the saved tensors, execute the below code giving the path and name of the tensor file."
|
||||
]
|
||||
@ -179,10 +228,9 @@
|
||||
"# image_keys,\n",
|
||||
"# image_names,\n",
|
||||
"# features_image_stacked,\n",
|
||||
"# ) = ms.MultimodalSearch.parsing_images(\n",
|
||||
"# mydict,\n",
|
||||
"# ) = my_obj.parsing_images(\n",
|
||||
"# model_type,\n",
|
||||
"# path_to_load_tensors=\".5_blip_saved_features_image.pt\",\n",
|
||||
"# path_to_load_tensors=\"/content/drive/MyDrive/misinformation-data/5_clip_base_saved_features_image.pt\",\n",
|
||||
"# )"
|
||||
]
|
||||
},
|
||||
@ -240,8 +288,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"similarity, sorted_lists = ms.MultimodalSearch.multimodal_search(\n",
|
||||
" mydict,\n",
|
||||
"similarity, sorted_lists = my_obj.multimodal_search(\n",
|
||||
" model,\n",
|
||||
" vis_processors,\n",
|
||||
" txt_processors,\n",
|
||||
@ -253,6 +300,36 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "65210ca2-b674-44bd-807a-4165e14bad74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"similarity"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "557473df-e2b9-4ef0-9439-3daadf6741ac",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sorted_lists"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c93d7e88-594d-4095-b5f2-7bf01210dc61",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mydict"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e1cf7e46-0c2c-4fb2-b89a-ef585ccb9339",
|
||||
@ -290,8 +367,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ms.MultimodalSearch.show_results(\n",
|
||||
" mydict,\n",
|
||||
"my_obj.show_results(\n",
|
||||
" search_query3[0],\n",
|
||||
")"
|
||||
]
|
||||
@ -329,8 +405,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"itm_scores, image_gradcam_with_itm = ms.MultimodalSearch.image_text_match_reordering(\n",
|
||||
" mydict,\n",
|
||||
"itm_scores, image_gradcam_with_itm = my_obj.image_text_match_reordering(\n",
|
||||
" search_query3,\n",
|
||||
" itm_model,\n",
|
||||
" image_keys,\n",
|
||||
@ -357,8 +432,8 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ms.MultimodalSearch.show_results(\n",
|
||||
" mydict, search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n",
|
||||
"my_obj.show_results(\n",
|
||||
" search_query3[0], itm=True, image_gradcam_with_itm=image_gradcam_with_itm\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@ -448,7 +523,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user