зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 21:16:06 +02:00
448 строки
12 KiB
Python
448 строки
12 KiB
Python
import pytest
|
|
import math
|
|
from PIL import Image
|
|
import numpy
|
|
from torch import device, cuda
|
|
import misinformation.multimodal_search as ms
|
|
|
|
|
|
testdict = {
|
|
"IMG_2746": {"filename": "./test/data/IMG_2746.png"},
|
|
"IMG_2809": {"filename": "./test/data/IMG_2809.png"},
|
|
}
|
|
|
|
related_error = 1e-2
|
|
gpu_is_not_available = not cuda.is_available()
|
|
|
|
|
|
cuda.empty_cache()
|
|
|
|
|
|
def test_read_img():
|
|
my_dict = {}
|
|
test_img = ms.MultimodalSearch.read_img(my_dict, testdict["IMG_2746"]["filename"])
|
|
assert list(numpy.array(test_img)[257][34]) == [70, 66, 63]
|
|
|
|
|
|
pre_proc_pic_blip2_blip_albef = [
|
|
-1.0039474964141846,
|
|
-1.0039474964141846,
|
|
]
|
|
pre_proc_pic_clip_vitl14 = [
|
|
-0.7995694875717163,
|
|
-0.7849710583686829,
|
|
]
|
|
|
|
pre_proc_pic_clip_vitl14_336 = [
|
|
-0.7995694875717163,
|
|
-0.7849710583686829,
|
|
]
|
|
|
|
pre_proc_text_blip2_blip_albef = (
|
|
"the bird sat on a tree located at the intersection of 23rd and 43rd streets"
|
|
)
|
|
|
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = (
|
|
"The bird sat on a tree located at the intersection of 23rd and 43rd streets."
|
|
)
|
|
|
|
pre_extracted_feature_img_blip2 = [
|
|
0.04566730558872223,
|
|
-0.042554520070552826,
|
|
]
|
|
|
|
pre_extracted_feature_img_blip = [
|
|
-0.02480311505496502,
|
|
0.05037587881088257,
|
|
]
|
|
|
|
pre_extracted_feature_img_albef = [
|
|
0.08971136063337326,
|
|
-0.10915573686361313,
|
|
]
|
|
|
|
pre_extracted_feature_img_clip = [
|
|
0.01621132344007492,
|
|
-0.004035486374050379,
|
|
]
|
|
|
|
pre_extracted_feature_img_parsing_clip = [
|
|
0.01621132344007492,
|
|
-0.004035486374050379,
|
|
]
|
|
|
|
pre_extracted_feature_img_clip_vitl14 = [
|
|
-0.023943455889821053,
|
|
-0.021703708916902542,
|
|
]
|
|
|
|
pre_extracted_feature_img_clip_vitl14_336 = [
|
|
-0.009511193260550499,
|
|
-0.012618942186236382,
|
|
]
|
|
|
|
pre_extracted_feature_text_blip2 = [
|
|
-0.1384204626083374,
|
|
-0.008662976324558258,
|
|
]
|
|
|
|
pre_extracted_feature_text_blip = [
|
|
0.0118643119931221,
|
|
-0.01291718054562807,
|
|
]
|
|
|
|
pre_extracted_feature_text_albef = [
|
|
-0.06229640915989876,
|
|
0.11278597265481949,
|
|
]
|
|
|
|
pre_extracted_feature_text_clip = [
|
|
0.018169036135077477,
|
|
0.03634127229452133,
|
|
]
|
|
|
|
pre_extracted_feature_text_clip_vitl14 = [
|
|
-0.0055463071912527084,
|
|
0.006908962037414312,
|
|
]
|
|
|
|
pre_extracted_feature_text_clip_vitl14_336 = [
|
|
-0.008720514364540577,
|
|
0.005284308455884457,
|
|
]
|
|
|
|
simularity_blip2 = [
|
|
[0.05826476216316223, -0.02717375010251999],
|
|
[0.06297147274017334, 0.47339022159576416],
|
|
]
|
|
|
|
sorted_blip2 = [
|
|
[1, 0],
|
|
[1, 0],
|
|
]
|
|
|
|
simularity_blip = [
|
|
[0.15640679001808167, 0.752173662185669],
|
|
[0.17233705520629883, 0.8448910117149353],
|
|
]
|
|
|
|
sorted_blip = [
|
|
[1, 0],
|
|
[1, 0],
|
|
]
|
|
|
|
simularity_albef = [
|
|
[0.12321824580430984, 0.35511350631713867],
|
|
[0.10870333760976791, 0.5143978595733643],
|
|
]
|
|
|
|
sorted_albef = [
|
|
[0, 1],
|
|
[1, 0],
|
|
]
|
|
|
|
simularity_clip = [
|
|
[0.23923014104366302, 0.5325412750244141],
|
|
[0.2310466319322586, 0.5910375714302063],
|
|
]
|
|
|
|
sorted_clip = [
|
|
[1, 0],
|
|
[1, 0],
|
|
]
|
|
|
|
simularity_clip_vitl14 = [
|
|
[0.1051270067691803, 0.5184808373451233],
|
|
[0.1277746558189392, 0.6841973662376404],
|
|
]
|
|
|
|
sorted_clip_vitl14 = [
|
|
[1, 0],
|
|
[1, 0],
|
|
]
|
|
|
|
simularity_clip_vitl14_336 = [
|
|
[0.09391091763973236, 0.49337542057037354],
|
|
[0.13700757920742035, 0.7003108263015747],
|
|
]
|
|
|
|
sorted_clip_vitl14_336 = [
|
|
[1, 0],
|
|
[1, 0],
|
|
]
|
|
|
|
dict_itm_scores_for_blib = {
|
|
"blip_base": [
|
|
0.07107225805521011,
|
|
0.004100032616406679,
|
|
],
|
|
"blip_large": [
|
|
0.07890705019235611,
|
|
0.00271016638725996,
|
|
],
|
|
"blip2_coco": [
|
|
0.0833505243062973,
|
|
0.004216152708977461,
|
|
],
|
|
}
|
|
|
|
dict_image_gradcam_with_itm_for_blip = {
|
|
"blip_base": [123.36285799741745, 132.31662154197693, 53.38280035299249],
|
|
"blip_large": [119.99512910842896, 128.7044593691826, 55.552959859540515],
|
|
}
|
|
|
|
|
|
@pytest.mark.long
|
|
@pytest.mark.parametrize(
|
|
(
|
|
"pre_multimodal_device",
|
|
"pre_model",
|
|
"pre_proc_pic",
|
|
"pre_proc_text",
|
|
"pre_extracted_feature_img",
|
|
"pre_extracted_feature_text",
|
|
"pre_simularity",
|
|
"pre_sorted",
|
|
),
|
|
[
|
|
# (
|
|
# device("cpu"),
|
|
# "blip2",
|
|
# pre_proc_pic_blip2_blip_albef,
|
|
# pre_proc_text_blip2_blip_albef,
|
|
# pre_extracted_feature_img_blip2,
|
|
# pre_extracted_feature_text_blip2,
|
|
# simularity_blip2,
|
|
# sorted_blip2,
|
|
# ),
|
|
# pytest.param(
|
|
# device("cuda"),
|
|
# "blip2",
|
|
# pre_proc_pic_blip2_blip_albef,
|
|
# pre_proc_text_blip2_blip_albef,
|
|
# pre_extracted_feature_img_blip2,
|
|
# pre_extracted_feature_text_blip2,
|
|
# simularity_blip2,
|
|
# sorted_blip2,
|
|
# marks=pytest.mark.skipif(
|
|
# gpu_is_not_available, reason="gpu_is_not_availible"
|
|
# ),
|
|
# ),
|
|
(
|
|
device("cpu"),
|
|
"blip",
|
|
pre_proc_pic_blip2_blip_albef,
|
|
pre_proc_text_blip2_blip_albef,
|
|
pre_extracted_feature_img_blip,
|
|
pre_extracted_feature_text_blip,
|
|
simularity_blip,
|
|
sorted_blip,
|
|
),
|
|
pytest.param(
|
|
device("cuda"),
|
|
"blip",
|
|
pre_proc_pic_blip2_blip_albef,
|
|
pre_proc_text_blip2_blip_albef,
|
|
pre_extracted_feature_img_blip,
|
|
pre_extracted_feature_text_blip,
|
|
simularity_blip,
|
|
sorted_blip,
|
|
marks=pytest.mark.skipif(
|
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
|
),
|
|
),
|
|
(
|
|
device("cpu"),
|
|
"albef",
|
|
pre_proc_pic_blip2_blip_albef,
|
|
pre_proc_text_blip2_blip_albef,
|
|
pre_extracted_feature_img_albef,
|
|
pre_extracted_feature_text_albef,
|
|
simularity_albef,
|
|
sorted_albef,
|
|
),
|
|
pytest.param(
|
|
device("cuda"),
|
|
"albef",
|
|
pre_proc_pic_blip2_blip_albef,
|
|
pre_proc_text_blip2_blip_albef,
|
|
pre_extracted_feature_img_albef,
|
|
pre_extracted_feature_text_albef,
|
|
simularity_albef,
|
|
sorted_albef,
|
|
marks=pytest.mark.skipif(
|
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
|
),
|
|
),
|
|
(
|
|
device("cpu"),
|
|
"clip_base",
|
|
pre_proc_pic_clip_vitl14,
|
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
|
pre_extracted_feature_img_clip,
|
|
pre_extracted_feature_text_clip,
|
|
simularity_clip,
|
|
sorted_clip,
|
|
),
|
|
pytest.param(
|
|
device("cuda"),
|
|
"clip_base",
|
|
pre_proc_pic_clip_vitl14,
|
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
|
pre_extracted_feature_img_clip,
|
|
pre_extracted_feature_text_clip,
|
|
simularity_clip,
|
|
sorted_clip,
|
|
marks=pytest.mark.skipif(
|
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
|
),
|
|
),
|
|
(
|
|
device("cpu"),
|
|
"clip_vitl14",
|
|
pre_proc_pic_clip_vitl14,
|
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
|
pre_extracted_feature_img_clip_vitl14,
|
|
pre_extracted_feature_text_clip_vitl14,
|
|
simularity_clip_vitl14,
|
|
sorted_clip_vitl14,
|
|
),
|
|
pytest.param(
|
|
device("cuda"),
|
|
"clip_vitl14",
|
|
pre_proc_pic_clip_vitl14,
|
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
|
pre_extracted_feature_img_clip_vitl14,
|
|
pre_extracted_feature_text_clip_vitl14,
|
|
simularity_clip_vitl14,
|
|
sorted_clip_vitl14,
|
|
marks=pytest.mark.skipif(
|
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
|
),
|
|
),
|
|
(
|
|
device("cpu"),
|
|
"clip_vitl14_336",
|
|
pre_proc_pic_clip_vitl14_336,
|
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
|
pre_extracted_feature_img_clip_vitl14_336,
|
|
pre_extracted_feature_text_clip_vitl14_336,
|
|
simularity_clip_vitl14_336,
|
|
sorted_clip_vitl14_336,
|
|
),
|
|
pytest.param(
|
|
device("cuda"),
|
|
"clip_vitl14_336",
|
|
pre_proc_pic_clip_vitl14_336,
|
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
|
pre_extracted_feature_img_clip_vitl14_336,
|
|
pre_extracted_feature_text_clip_vitl14_336,
|
|
simularity_clip_vitl14_336,
|
|
sorted_clip_vitl14_336,
|
|
marks=pytest.mark.skipif(
|
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
|
),
|
|
),
|
|
],
|
|
)
|
|
def test_parsing_images(
|
|
pre_multimodal_device,
|
|
pre_model,
|
|
pre_proc_pic,
|
|
pre_proc_text,
|
|
pre_extracted_feature_img,
|
|
pre_extracted_feature_text,
|
|
pre_simularity,
|
|
pre_sorted,
|
|
tmp_path,
|
|
):
|
|
ms.MultimodalSearch.multimodal_device = pre_multimodal_device
|
|
(
|
|
model,
|
|
vis_processor,
|
|
txt_processor,
|
|
image_keys,
|
|
_,
|
|
features_image_stacked,
|
|
) = ms.MultimodalSearch.parsing_images(
|
|
testdict, pre_model, path_to_saved_tensors=tmp_path
|
|
)
|
|
|
|
for i, num in zip(range(10), features_image_stacked[0, 10:12].tolist()):
|
|
assert (
|
|
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
|
|
is True
|
|
)
|
|
|
|
test_pic = Image.open(testdict["IMG_2746"]["filename"]).convert("RGB")
|
|
test_querry = (
|
|
"The bird sat on a tree located at the intersection of 23rd and 43rd streets."
|
|
)
|
|
processed_pic = (
|
|
vis_processor["eval"](test_pic).unsqueeze(0).to(pre_multimodal_device)
|
|
)
|
|
processed_text = txt_processor["eval"](test_querry)
|
|
|
|
for i, num in zip(range(10), processed_pic[0, 0, 0, 25:27].tolist()):
|
|
assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True
|
|
|
|
assert processed_text == pre_proc_text
|
|
|
|
search_query = [
|
|
{"text_input": test_querry},
|
|
{"image": testdict["IMG_2746"]["filename"]},
|
|
]
|
|
multi_features_stacked = ms.MultimodalSearch.querys_processing(
|
|
testdict, search_query, model, txt_processor, vis_processor, pre_model
|
|
)
|
|
|
|
for i, num in zip(range(10), multi_features_stacked[0, 10:12].tolist()):
|
|
assert (
|
|
math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error)
|
|
is True
|
|
)
|
|
|
|
for i, num in zip(range(10), multi_features_stacked[1, 10:12].tolist()):
|
|
assert (
|
|
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
|
|
is True
|
|
)
|
|
|
|
search_query2 = [
|
|
{"text_input": "A bus"},
|
|
{"image": "../misinformation/test/data/IMG_3758.png"},
|
|
]
|
|
|
|
similarity, sorted_list = ms.MultimodalSearch.multimodal_search(
|
|
testdict,
|
|
model,
|
|
vis_processor,
|
|
txt_processor,
|
|
pre_model,
|
|
image_keys,
|
|
features_image_stacked,
|
|
search_query2,
|
|
)
|
|
|
|
for i, num in zip(range(len(pre_simularity)), similarity.tolist()):
|
|
for j, num2 in zip(range(len(num)), num):
|
|
assert (
|
|
math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error)
|
|
is True
|
|
)
|
|
|
|
for i, num in zip(range(len(pre_sorted)), sorted_list):
|
|
for j, num2 in zip(range(2), num):
|
|
assert num2 == pre_sorted[i][j]
|
|
|
|
del (
|
|
model,
|
|
vis_processor,
|
|
txt_processor,
|
|
similarity,
|
|
features_image_stacked,
|
|
processed_pic,
|
|
multi_features_stacked,
|
|
)
|
|
cuda.empty_cache()
|