зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-30 13:36:04 +02:00
added test_multimodal_search tests
Этот коммит содержится в:
родитель
ce45a2c030
Коммит
e12d50a195
@ -668,20 +668,7 @@ model_type = "blip"
|
|||||||
# model_type = "clip_vitl14_336"
|
# model_type = "clip_vitl14_336"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
pre_proc_pic_blip2_blip_albef = [
|
||||||
(
|
|
||||||
"pre_multimodal_device",
|
|
||||||
"pre_model",
|
|
||||||
"pre_proc_pic",
|
|
||||||
"pre_proc_text",
|
|
||||||
"pre_extracted_feature_img",
|
|
||||||
"pre_extracted_feature_text",
|
|
||||||
),
|
|
||||||
[
|
|
||||||
pytest.param(
|
|
||||||
device("cuda"),
|
|
||||||
"blip2",
|
|
||||||
[
|
|
||||||
-1.0039474964141846,
|
-1.0039474964141846,
|
||||||
-1.0039474964141846,
|
-1.0039474964141846,
|
||||||
-0.8433647751808167,
|
-0.8433647751808167,
|
||||||
@ -692,9 +679,42 @@ model_type = "blip"
|
|||||||
-0.6097899675369263,
|
-0.6097899675369263,
|
||||||
-0.7119789123535156,
|
-0.7119789123535156,
|
||||||
-1.0623412132263184,
|
-1.0623412132263184,
|
||||||
],
|
]
|
||||||
"the bird sat on a tree located at the intersection of 23rd and 43rd streets",
|
pre_proc_pic_clip_vitl14 = [
|
||||||
[
|
-0.7995694875717163,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7703726291656494,
|
||||||
|
-0.7703726291656494,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7703726291656494,
|
||||||
|
-0.7703726291656494,
|
||||||
|
-0.7703726291656494,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_proc_pic_clip_vitl14_336 = [
|
||||||
|
-0.7995694875717163,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.7849710583686829,
|
||||||
|
-0.9163569211959839,
|
||||||
|
-1.149931788444519,
|
||||||
|
-1.0039474964141846,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_proc_text_blip2_blip_albef = (
|
||||||
|
"the bird sat on a tree located at the intersection of 23rd and 43rd streets"
|
||||||
|
)
|
||||||
|
|
||||||
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = (
|
||||||
|
"The bird sat on a tree located at the intersection of 23rd and 43rd streets."
|
||||||
|
)
|
||||||
|
|
||||||
|
pre_extracted_feature_img_blip2 = [
|
||||||
0.04566730558872223,
|
0.04566730558872223,
|
||||||
-0.042554520070552826,
|
-0.042554520070552826,
|
||||||
-0.06970272958278656,
|
-0.06970272958278656,
|
||||||
@ -705,8 +725,87 @@ model_type = "blip"
|
|||||||
-0.020045937970280647,
|
-0.020045937970280647,
|
||||||
0.12923966348171234,
|
0.12923966348171234,
|
||||||
0.006452132016420364,
|
0.006452132016420364,
|
||||||
],
|
]
|
||||||
[
|
|
||||||
|
pre_extracted_feature_img_blip = [
|
||||||
|
-0.02480311505496502,
|
||||||
|
0.05037587881088257,
|
||||||
|
0.039517853409051895,
|
||||||
|
-0.06994109600782394,
|
||||||
|
-0.12886561453342438,
|
||||||
|
0.047039758414030075,
|
||||||
|
-0.11620642244815826,
|
||||||
|
-0.003398326924070716,
|
||||||
|
-0.07324369996786118,
|
||||||
|
0.06994668394327164,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_img_albef = [
|
||||||
|
0.08971136063337326,
|
||||||
|
-0.10915573686361313,
|
||||||
|
-0.020636577159166336,
|
||||||
|
0.048121627420186996,
|
||||||
|
-0.05943416804075241,
|
||||||
|
-0.129856139421463,
|
||||||
|
-0.0034469354432076216,
|
||||||
|
0.017888527363538742,
|
||||||
|
-0.03284582123160362,
|
||||||
|
-0.1037328764796257,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_img_clip = [
|
||||||
|
0.01621132344007492,
|
||||||
|
-0.004035486374050379,
|
||||||
|
-0.04304071143269539,
|
||||||
|
-0.03459808602929115,
|
||||||
|
0.016922621056437492,
|
||||||
|
-0.025056276470422745,
|
||||||
|
-0.04178355261683464,
|
||||||
|
0.02165347896516323,
|
||||||
|
-0.003224249929189682,
|
||||||
|
0.020485712215304375,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_img_parsing_clip = [
|
||||||
|
0.01621132344007492,
|
||||||
|
-0.004035486374050379,
|
||||||
|
-0.04304071143269539,
|
||||||
|
-0.03459808602929115,
|
||||||
|
0.016922621056437492,
|
||||||
|
-0.025056276470422745,
|
||||||
|
-0.04178355261683464,
|
||||||
|
0.02165347896516323,
|
||||||
|
-0.003224249929189682,
|
||||||
|
0.020485712215304375,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_img_clip_vitl14 = [
|
||||||
|
-0.023943455889821053,
|
||||||
|
-0.021703708916902542,
|
||||||
|
0.035043686628341675,
|
||||||
|
0.019495919346809387,
|
||||||
|
0.014351222664117813,
|
||||||
|
-0.008634116500616074,
|
||||||
|
0.01610446907579899,
|
||||||
|
-0.003426523646339774,
|
||||||
|
0.011931191198527813,
|
||||||
|
0.0008691544644534588,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_img_clip_vitl14_336 = [
|
||||||
|
-0.15060146152973175,
|
||||||
|
-0.1998099535703659,
|
||||||
|
0.5503129363059998,
|
||||||
|
0.2589969336986542,
|
||||||
|
-0.0182882659137249,
|
||||||
|
-0.12753525376319885,
|
||||||
|
0.018985718488693237,
|
||||||
|
-0.17110440135002136,
|
||||||
|
0.02220013737678528,
|
||||||
|
0.01086437702178955,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_text_blip2 = [
|
||||||
-0.1384204626083374,
|
-0.1384204626083374,
|
||||||
-0.008662976324558258,
|
-0.008662976324558258,
|
||||||
0.006269007455557585,
|
0.006269007455557585,
|
||||||
@ -717,20 +816,185 @@ model_type = "blip"
|
|||||||
-0.11856459826231003,
|
-0.11856459826231003,
|
||||||
-0.058296192437410355,
|
-0.058296192437410355,
|
||||||
0.03699290752410889,
|
0.03699290752410889,
|
||||||
],
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_text_blip = [
|
||||||
|
0.0118643119931221,
|
||||||
|
-0.01291718054562807,
|
||||||
|
-0.0009687161073088646,
|
||||||
|
0.01428765058517456,
|
||||||
|
-0.05591396614909172,
|
||||||
|
0.07386433333158493,
|
||||||
|
-0.11475936323404312,
|
||||||
|
0.01620068959891796,
|
||||||
|
0.0062415082938969135,
|
||||||
|
0.0034833091776818037,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_text_albef = [
|
||||||
|
-0.06229640915989876,
|
||||||
|
0.11278597265481949,
|
||||||
|
0.06628583371639252,
|
||||||
|
0.1649140566587448,
|
||||||
|
0.068987175822258,
|
||||||
|
0.006291372701525688,
|
||||||
|
0.03244050219655037,
|
||||||
|
-0.049556829035282135,
|
||||||
|
0.050752390176057816,
|
||||||
|
-0.0421440489590168,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_text_clip = [
|
||||||
|
0.018169036135077477,
|
||||||
|
0.03634127229452133,
|
||||||
|
0.025660742074251175,
|
||||||
|
0.009149895049631596,
|
||||||
|
-0.035570453852415085,
|
||||||
|
0.033126577734947205,
|
||||||
|
-0.004808237310498953,
|
||||||
|
-0.0031453112605959177,
|
||||||
|
-0.02194291725754738,
|
||||||
|
0.024019461125135422,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_text_clip_vitl14 = [
|
||||||
|
-0.0055463071912527084,
|
||||||
|
0.006908962037414312,
|
||||||
|
-0.019450219348073006,
|
||||||
|
-0.018097277730703354,
|
||||||
|
0.017567576840519905,
|
||||||
|
-0.03828490898013115,
|
||||||
|
-0.03781530633568764,
|
||||||
|
-0.023951737210154533,
|
||||||
|
0.01365653332322836,
|
||||||
|
-0.02341713197529316,
|
||||||
|
]
|
||||||
|
|
||||||
|
pre_extracted_feature_text_clip_vitl14_336 = [
|
||||||
|
-0.1172553077340126,
|
||||||
|
0.07105237245559692,
|
||||||
|
-0.283934086561203,
|
||||||
|
-0.24353823065757751,
|
||||||
|
0.22662702202796936,
|
||||||
|
-0.472959041595459,
|
||||||
|
-0.5191791653633118,
|
||||||
|
-0.29402273893356323,
|
||||||
|
0.22669515013694763,
|
||||||
|
-0.32044747471809387,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
(
|
||||||
|
"pre_multimodal_device",
|
||||||
|
"pre_model",
|
||||||
|
"pre_proc_pic",
|
||||||
|
"pre_proc_text",
|
||||||
|
"pre_extracted_feature_img",
|
||||||
|
"pre_extracted_feature_text",
|
||||||
|
"pre_extracted_feature_img2",
|
||||||
|
),
|
||||||
|
[
|
||||||
|
pytest.param(
|
||||||
|
device("cuda"),
|
||||||
|
"blip2",
|
||||||
|
pre_proc_pic_blip2_blip_albef,
|
||||||
|
pre_proc_text_blip2_blip_albef,
|
||||||
|
pre_extracted_feature_img_blip2,
|
||||||
|
pre_extracted_feature_text_blip2,
|
||||||
|
pre_extracted_feature_img_blip2,
|
||||||
|
marks=pytest.mark.skipif(
|
||||||
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
device("cuda"),
|
||||||
|
"blip",
|
||||||
|
pre_proc_pic_blip2_blip_albef,
|
||||||
|
pre_proc_text_blip2_blip_albef,
|
||||||
|
pre_extracted_feature_img_blip,
|
||||||
|
pre_extracted_feature_text_blip,
|
||||||
|
pre_extracted_feature_img_blip,
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
device("cuda"),
|
||||||
|
"blip",
|
||||||
|
pre_proc_pic_blip2_blip_albef,
|
||||||
|
pre_proc_text_blip2_blip_albef,
|
||||||
|
pre_extracted_feature_img_blip,
|
||||||
|
pre_extracted_feature_text_blip,
|
||||||
|
pre_extracted_feature_img_blip,
|
||||||
|
marks=pytest.mark.skipif(
|
||||||
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
device("cpu"),
|
||||||
|
"albef",
|
||||||
|
pre_proc_pic_blip2_blip_albef,
|
||||||
|
pre_proc_text_blip2_blip_albef,
|
||||||
|
pre_extracted_feature_img_albef,
|
||||||
|
pre_extracted_feature_text_albef,
|
||||||
|
pre_extracted_feature_img_albef,
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
device("cuda"),
|
||||||
|
"albef",
|
||||||
|
pre_proc_pic_blip2_blip_albef,
|
||||||
|
pre_proc_text_blip2_blip_albef,
|
||||||
|
pre_extracted_feature_img_albef,
|
||||||
|
pre_extracted_feature_text_albef,
|
||||||
|
pre_extracted_feature_img_albef,
|
||||||
|
marks=pytest.mark.skipif(
|
||||||
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
device("cpu"),
|
||||||
|
"clip_base",
|
||||||
|
pre_proc_pic_clip_vitl14,
|
||||||
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
||||||
|
pre_extracted_feature_img_parsing_clip,
|
||||||
|
pre_extracted_feature_text_clip,
|
||||||
|
pre_extracted_feature_img_clip,
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
device("cuda"),
|
||||||
|
"clip_base",
|
||||||
|
pre_proc_pic_clip_vitl14,
|
||||||
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
||||||
|
pre_extracted_feature_img_parsing_clip,
|
||||||
|
pre_extracted_feature_text_clip,
|
||||||
|
pre_extracted_feature_img_clip,
|
||||||
|
marks=pytest.mark.skipif(
|
||||||
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
device("cpu"),
|
||||||
|
"clip_vitl14",
|
||||||
|
pre_proc_pic_clip_vitl14,
|
||||||
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
||||||
|
pre_extracted_feature_img_clip_vitl14,
|
||||||
|
pre_extracted_feature_text_clip_vitl14,
|
||||||
|
pre_extracted_feature_img_clip_vitl14,
|
||||||
|
),
|
||||||
|
pytest.param(
|
||||||
|
device("cuda"),
|
||||||
|
"clip_vitl14",
|
||||||
|
pre_proc_pic_clip_vitl14,
|
||||||
|
pre_proc_text_clip_clip_vitl14_clip_vitl14_336,
|
||||||
|
pre_extracted_feature_img_clip_vitl14,
|
||||||
|
pre_extracted_feature_text_clip_vitl14,
|
||||||
|
pre_extracted_feature_img_clip_vitl14,
|
||||||
marks=pytest.mark.skipif(
|
marks=pytest.mark.skipif(
|
||||||
gpu_is_not_available, reason="gpu_is_not_availible"
|
gpu_is_not_available, reason="gpu_is_not_availible"
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
# (device("cpu"),"blip"),
|
|
||||||
# (device("cpu"),"albef"),
|
|
||||||
# (device("cpu"),"clip_base"),
|
|
||||||
# (device("cpu"),"clip_vitl14"),
|
|
||||||
# (device("cpu"),"clip_vitl14_336"),
|
# (device("cpu"),"clip_vitl14_336"),
|
||||||
# pytest.param( device("cuda"),"blip", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),),
|
#
|
||||||
# pytest.param( device("cuda"),"albef", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),),
|
#
|
||||||
# pytest.param( device("cuda"),"clip_base", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),),
|
#
|
||||||
# pytest.param( device("cuda"),"clip_vitl14", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),),
|
|
||||||
# pytest.param( device("cuda"),"clip_vitl14_336", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),),
|
# pytest.param( device("cuda"),"clip_vitl14_336", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@ -741,6 +1005,7 @@ def test_parsing_images(
|
|||||||
pre_proc_text,
|
pre_proc_text,
|
||||||
pre_extracted_feature_img,
|
pre_extracted_feature_img,
|
||||||
pre_extracted_feature_text,
|
pre_extracted_feature_text,
|
||||||
|
pre_extracted_feature_img2,
|
||||||
):
|
):
|
||||||
mydict = {
|
mydict = {
|
||||||
"IMG_2746": {"filename": "./test/data/IMG_2746.png"},
|
"IMG_2746": {"filename": "./test/data/IMG_2746.png"},
|
||||||
@ -777,9 +1042,8 @@ def test_parsing_images(
|
|||||||
assert processed_text == pre_proc_text
|
assert processed_text == pre_proc_text
|
||||||
|
|
||||||
search_query = [
|
search_query = [
|
||||||
{
|
{"text_input": test_querry},
|
||||||
"text_input": "The bird sat on a tree located at the intersection of 23rd and 43rd streets."
|
{"image": TEST_IMAGE_2},
|
||||||
}
|
|
||||||
]
|
]
|
||||||
multi_features_stacked = ms.MultimodalSearch.querys_processing(
|
multi_features_stacked = ms.MultimodalSearch.querys_processing(
|
||||||
mydict, search_query, model, txt_processor, vis_processor, pre_model
|
mydict, search_query, model, txt_processor, vis_processor, pre_model
|
||||||
@ -791,5 +1055,11 @@ def test_parsing_images(
|
|||||||
is True
|
is True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
for i, num in zip(range(10), multi_features_stacked[1, 10:20].tolist()):
|
||||||
|
assert (
|
||||||
|
math.isclose(num, pre_extracted_feature_img2[i], rel_tol=related_error)
|
||||||
|
is True
|
||||||
|
)
|
||||||
|
|
||||||
del model, vis_processor, txt_processor
|
del model, vis_processor, txt_processor
|
||||||
cuda.empty_cache()
|
cuda.empty_cache()
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user