зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 13:06:04 +02:00
fixed tests in multimodal
Этот коммит содержится в:
родитель
b9ad5b267d
Коммит
a00ac65868
@ -12,6 +12,8 @@ from IPython.display import display
|
||||
from lavis.models import load_model_and_preprocess, load_model, BlipBase
|
||||
from lavis.processors import load_processor
|
||||
|
||||
# from memory_profiler import profile
|
||||
|
||||
|
||||
class MultimodalSearch(AnalysisMethod):
|
||||
def __init__(self, subdict: dict) -> None:
|
||||
@ -339,13 +341,6 @@ class MultimodalSearch(AnalysisMethod):
|
||||
|
||||
return text_query_index
|
||||
|
||||
def itm_images_processing(self, image_paths, vis_processor):
|
||||
raw_images = [MultimodalSearch.read_img(self, path) for path in image_paths]
|
||||
images = [vis_processor(r_img) for r_img in raw_images]
|
||||
images_tensors = torch.stack(images).to(MultimodalSearch.multimodal_device)
|
||||
|
||||
return raw_images, images_tensors
|
||||
|
||||
def get_pathes_from_query(self, query):
|
||||
paths = []
|
||||
image_names = []
|
||||
@ -483,6 +478,7 @@ class MultimodalSearch(AnalysisMethod):
|
||||
vis_processor = load_processor("blip_image_eval").build(image_size=384)
|
||||
return itm_model, vis_processor
|
||||
|
||||
# @profile
|
||||
def image_text_match_reordering(
|
||||
self,
|
||||
search_query,
|
||||
@ -602,6 +598,20 @@ class MultimodalSearch(AnalysisMethod):
|
||||
image_gradcam_with_itm[
|
||||
list(search_query[index_text_query].values())[0]
|
||||
] = localimage_gradcam_with_itm
|
||||
del (
|
||||
itm_model,
|
||||
vis_processor_itm,
|
||||
text_processor,
|
||||
raw_images,
|
||||
images,
|
||||
tokenizer,
|
||||
queries_batch,
|
||||
queries_tok_batch,
|
||||
itm_score,
|
||||
)
|
||||
if need_grad_cam:
|
||||
del itm_output, gradcam, norm_img, grad_cam, avg_gradcam
|
||||
torch.cuda.empty_cache()
|
||||
return itm_scores2, image_gradcam_with_itm
|
||||
|
||||
def show_results(self, query, itm=False, image_gradcam_with_itm=False):
|
||||
|
||||
@ -6,20 +6,8 @@ from torch import device, cuda
|
||||
import misinformation.multimodal_search as ms
|
||||
|
||||
testdict = {
|
||||
"d755771b-225e-432f-802e-fb8dc850fff7": {
|
||||
"filename": "./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png"
|
||||
},
|
||||
"IMG_2746": {"filename": "./test/data/IMG_2746.png"},
|
||||
"IMG_2750": {"filename": "./test/data/IMG_2750.png"},
|
||||
"IMG_2805": {"filename": "./test/data/IMG_2805.png"},
|
||||
"IMG_2806": {"filename": "./test/data/IMG_2806.png"},
|
||||
"IMG_2807": {"filename": "./test/data/IMG_2807.png"},
|
||||
"IMG_2808": {"filename": "./test/data/IMG_2808.png"},
|
||||
"IMG_2809": {"filename": "./test/data/IMG_2809.png"},
|
||||
"IMG_3755": {"filename": "./test/data/IMG_3755.jpg"},
|
||||
"IMG_3756": {"filename": "./test/data/IMG_3756.jpg"},
|
||||
"IMG_3757": {"filename": "./test/data/IMG_3757.jpg"},
|
||||
"pic1": {"filename": "./test/data/pic1.png"},
|
||||
}
|
||||
|
||||
related_error = 1e-2
|
||||
@ -38,39 +26,15 @@ def test_read_img():
|
||||
pre_proc_pic_blip2_blip_albef = [
|
||||
-1.0039474964141846,
|
||||
-1.0039474964141846,
|
||||
-0.8433647751808167,
|
||||
-0.6097899675369263,
|
||||
-0.5951915383338928,
|
||||
-0.6243883967399597,
|
||||
-0.6827820539474487,
|
||||
-0.6097899675369263,
|
||||
-0.7119789123535156,
|
||||
-1.0623412132263184,
|
||||
]
|
||||
pre_proc_pic_clip_vitl14 = [
|
||||
-0.7995694875717163,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7703726291656494,
|
||||
-0.7703726291656494,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7703726291656494,
|
||||
-0.7703726291656494,
|
||||
-0.7703726291656494,
|
||||
]
|
||||
|
||||
pre_proc_pic_clip_vitl14_336 = [
|
||||
-0.7995694875717163,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.9163569211959839,
|
||||
-1.149931788444519,
|
||||
-1.0039474964141846,
|
||||
]
|
||||
|
||||
pre_proc_text_blip2_blip_albef = (
|
||||
@ -84,340 +48,146 @@ pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = (
|
||||
pre_extracted_feature_img_blip2 = [
|
||||
0.04566730558872223,
|
||||
-0.042554520070552826,
|
||||
-0.06970272958278656,
|
||||
-0.009771779179573059,
|
||||
0.01446065679192543,
|
||||
0.10173682868480682,
|
||||
0.007092420011758804,
|
||||
-0.020045937970280647,
|
||||
0.12923966348171234,
|
||||
0.006452132016420364,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_blip = [
|
||||
-0.02480311505496502,
|
||||
0.05037587881088257,
|
||||
0.039517853409051895,
|
||||
-0.06994109600782394,
|
||||
-0.12886561453342438,
|
||||
0.047039758414030075,
|
||||
-0.11620642244815826,
|
||||
-0.003398326924070716,
|
||||
-0.07324369996786118,
|
||||
0.06994668394327164,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_albef = [
|
||||
0.08971136063337326,
|
||||
-0.10915573686361313,
|
||||
-0.020636577159166336,
|
||||
0.048121627420186996,
|
||||
-0.05943416804075241,
|
||||
-0.129856139421463,
|
||||
-0.0034469354432076216,
|
||||
0.017888527363538742,
|
||||
-0.03284582123160362,
|
||||
-0.1037328764796257,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_clip = [
|
||||
0.01621132344007492,
|
||||
-0.004035486374050379,
|
||||
-0.04304071143269539,
|
||||
-0.03459808602929115,
|
||||
0.016922621056437492,
|
||||
-0.025056276470422745,
|
||||
-0.04178355261683464,
|
||||
0.02165347896516323,
|
||||
-0.003224249929189682,
|
||||
0.020485712215304375,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_parsing_clip = [
|
||||
0.01621132344007492,
|
||||
-0.004035486374050379,
|
||||
-0.04304071143269539,
|
||||
-0.03459808602929115,
|
||||
0.016922621056437492,
|
||||
-0.025056276470422745,
|
||||
-0.04178355261683464,
|
||||
0.02165347896516323,
|
||||
-0.003224249929189682,
|
||||
0.020485712215304375,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_clip_vitl14 = [
|
||||
-0.023943455889821053,
|
||||
-0.021703708916902542,
|
||||
0.035043686628341675,
|
||||
0.019495919346809387,
|
||||
0.014351222664117813,
|
||||
-0.008634116500616074,
|
||||
0.01610446907579899,
|
||||
-0.003426523646339774,
|
||||
0.011931191198527813,
|
||||
0.0008691544644534588,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_clip_vitl14_336 = [
|
||||
-0.009511193260550499,
|
||||
-0.012618942186236382,
|
||||
0.034754861146211624,
|
||||
0.016356879845261574,
|
||||
-0.0011549904011189938,
|
||||
-0.008054453879594803,
|
||||
0.0011990377679467201,
|
||||
-0.010806051082909107,
|
||||
0.00140204350464046,
|
||||
0.0006861367146484554,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_blip2 = [
|
||||
-0.1384204626083374,
|
||||
-0.008662976324558258,
|
||||
0.006269007455557585,
|
||||
0.03151319921016693,
|
||||
0.060558050870895386,
|
||||
-0.03230040520429611,
|
||||
0.015861615538597107,
|
||||
-0.11856459826231003,
|
||||
-0.058296192437410355,
|
||||
0.03699290752410889,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_blip = [
|
||||
0.0118643119931221,
|
||||
-0.01291718054562807,
|
||||
-0.0009687161073088646,
|
||||
0.01428765058517456,
|
||||
-0.05591396614909172,
|
||||
0.07386433333158493,
|
||||
-0.11475936323404312,
|
||||
0.01620068959891796,
|
||||
0.0062415082938969135,
|
||||
0.0034833091776818037,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_albef = [
|
||||
-0.06229640915989876,
|
||||
0.11278597265481949,
|
||||
0.06628583371639252,
|
||||
0.1649140566587448,
|
||||
0.068987175822258,
|
||||
0.006291372701525688,
|
||||
0.03244050219655037,
|
||||
-0.049556829035282135,
|
||||
0.050752390176057816,
|
||||
-0.0421440489590168,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_clip = [
|
||||
0.018169036135077477,
|
||||
0.03634127229452133,
|
||||
0.025660742074251175,
|
||||
0.009149895049631596,
|
||||
-0.035570453852415085,
|
||||
0.033126577734947205,
|
||||
-0.004808237310498953,
|
||||
-0.0031453112605959177,
|
||||
-0.02194291725754738,
|
||||
0.024019461125135422,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_clip_vitl14 = [
|
||||
-0.0055463071912527084,
|
||||
0.006908962037414312,
|
||||
-0.019450219348073006,
|
||||
-0.018097277730703354,
|
||||
0.017567576840519905,
|
||||
-0.03828490898013115,
|
||||
-0.03781530633568764,
|
||||
-0.023951737210154533,
|
||||
0.01365653332322836,
|
||||
-0.02341713197529316,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_clip_vitl14_336 = [
|
||||
-0.008720514364540577,
|
||||
0.005284308455884457,
|
||||
-0.021116750314831734,
|
||||
-0.018112430348992348,
|
||||
0.01685470901429653,
|
||||
-0.03517491742968559,
|
||||
-0.038612402975559235,
|
||||
-0.021867064759135246,
|
||||
0.01685977540910244,
|
||||
-0.023832324892282486,
|
||||
]
|
||||
|
||||
simularity_blip2 = [
|
||||
[0.05826476216316223, -0.02717375010251999],
|
||||
[0.12869958579540253, 0.006344856694340706],
|
||||
[0.11073512583971024, 0.12327021360397339],
|
||||
[0.08743024617433548, 0.058944884687662125],
|
||||
[0.04591086134314537, 0.4905201494693756],
|
||||
[0.06297147274017334, 0.47339022159576416],
|
||||
[0.18486255407333374, 0.6350338459014893],
|
||||
[0.015455856919288635, 0.018462061882019043],
|
||||
[-0.008606988936662674, 0.00741103570908308],
|
||||
[-0.0415784977376461, -0.1267213076353073],
|
||||
[-0.025470387190580368, 0.1315656304359436],
|
||||
[-0.05090826004743576, 0.059172093868255615],
|
||||
]
|
||||
|
||||
sorted_blip2 = [
|
||||
[6, 1, 2, 3, 5, 0, 4, 7, 8, 10, 9, 11],
|
||||
[6, 4, 5, 10, 2, 11, 3, 7, 8, 1, 0, 9],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_blip = [
|
||||
[0.15640679001808167, 0.752173662185669],
|
||||
[0.15139800310134888, 0.7804810404777527],
|
||||
[0.13010388612747192, 0.755257248878479],
|
||||
[0.13746635615825653, 0.7618774175643921],
|
||||
[0.1756758838891983, 0.8531903624534607],
|
||||
[0.17233705520629883, 0.8448910117149353],
|
||||
[0.1970970332622528, 0.8916105628013611],
|
||||
[0.11693969368934631, 0.5833531618118286],
|
||||
[0.12386563420295715, 0.5981853604316711],
|
||||
[0.08427951484918594, 0.4962371587753296],
|
||||
[0.14193706214427948, 0.7613846659660339],
|
||||
[0.12051936239004135, 0.6492202281951904],
|
||||
]
|
||||
|
||||
sorted_blip = [
|
||||
[6, 4, 5, 0, 1, 10, 3, 2, 8, 11, 7, 9],
|
||||
[6, 4, 5, 1, 3, 10, 2, 0, 11, 8, 7, 9],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_albef = [
|
||||
[0.12321824580430984, 0.35511350631713867],
|
||||
[0.09512615948915482, 0.27168408036231995],
|
||||
[0.09053325653076172, 0.20215675234794617],
|
||||
[0.06335515528917313, 0.15055638551712036],
|
||||
[0.09604836255311966, 0.4658776521682739],
|
||||
[0.10870333760976791, 0.5143978595733643],
|
||||
[0.11748822033405304, 0.6542638540267944],
|
||||
[0.05688793584704399, 0.22170542180538177],
|
||||
[0.05597608536481857, 0.11963296681642532],
|
||||
[0.059643782675266266, 0.14969395101070404],
|
||||
[0.06690303236246109, 0.3149859607219696],
|
||||
[0.07909377664327621, 0.11911341547966003],
|
||||
]
|
||||
|
||||
sorted_albef = [
|
||||
[0, 6, 5, 4, 1, 2, 11, 10, 3, 9, 7, 8],
|
||||
[6, 5, 4, 0, 10, 1, 7, 2, 3, 9, 8, 11],
|
||||
[0, 1],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_clip = [
|
||||
[0.23923014104366302, 0.5325412750244141],
|
||||
[0.20101115107536316, 0.5112978219985962],
|
||||
[0.17522737383842468, 0.49811851978302],
|
||||
[0.20062290132045746, 0.5415266156196594],
|
||||
[0.22865726053714752, 0.5762109756469727],
|
||||
[0.2310466319322586, 0.5910375714302063],
|
||||
[0.2644523084163666, 0.7851459383964539],
|
||||
[0.21474510431289673, 0.4135811924934387],
|
||||
[0.16407863795757294, 0.1474374681711197],
|
||||
[0.19819433987140656, 0.26493316888809204],
|
||||
[0.19545596837997437, 0.5007457137107849],
|
||||
[0.1647854745388031, 0.45705708861351013],
|
||||
]
|
||||
|
||||
sorted_clip = [
|
||||
[6, 0, 5, 4, 7, 1, 3, 9, 10, 2, 11, 8],
|
||||
[6, 5, 4, 3, 0, 1, 10, 2, 11, 7, 9, 8],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_clip_vitl14 = [
|
||||
[0.1051270067691803, 0.5184808373451233],
|
||||
[0.09705893695354462, 0.49574509263038635],
|
||||
[0.11964304000139236, 0.5424358248710632],
|
||||
[0.13881900906562805, 0.5909714698791504],
|
||||
[0.12728188931941986, 0.6758255362510681],
|
||||
[0.1277746558189392, 0.6841973662376404],
|
||||
[0.18026694655418396, 0.803142786026001],
|
||||
[0.13977059721946716, 0.45957139134407043],
|
||||
[0.11180847883224487, 0.24822194874286652],
|
||||
[0.12296056002378464, 0.35143694281578064],
|
||||
[0.11596094071865082, 0.5704031586647034],
|
||||
[0.10174489766359329, 0.44422751665115356],
|
||||
]
|
||||
|
||||
sorted_clip_vitl14 = [
|
||||
[6, 7, 3, 5, 4, 9, 2, 10, 8, 0, 11, 1],
|
||||
[6, 5, 4, 3, 10, 2, 0, 1, 7, 11, 9, 8],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_clip_vitl14_336 = [
|
||||
[0.09391091763973236, 0.49337542057037354],
|
||||
[0.11103834211826324, 0.4881117343902588],
|
||||
[0.12891019880771637, 0.5501476526260376],
|
||||
[0.13288410007953644, 0.5498673915863037],
|
||||
[0.12357455492019653, 0.6749162077903748],
|
||||
[0.13700757920742035, 0.7003108263015747],
|
||||
[0.1788637489080429, 0.7713702321052551],
|
||||
[0.13260436058044434, 0.4300197660923004],
|
||||
[0.11666625738143921, 0.2334875613451004],
|
||||
[0.1316065937280655, 0.3291645646095276],
|
||||
[0.12374477833509445, 0.5632147192955017],
|
||||
[0.10333051532506943, 0.43023794889450073],
|
||||
]
|
||||
|
||||
sorted_clip_vitl14_336 = [
|
||||
[6, 5, 3, 7, 9, 2, 10, 4, 8, 1, 11, 0],
|
||||
[6, 5, 4, 10, 2, 3, 0, 1, 11, 7, 9, 8],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
dict_itm_scores_for_blib = {
|
||||
"blip_base": [
|
||||
0.07107225805521011,
|
||||
0.02078203856945038,
|
||||
0.02083236537873745,
|
||||
0.0042252070270478725,
|
||||
0.0002070252230623737,
|
||||
0.004100032616406679,
|
||||
0.0009893750539049506,
|
||||
0.00015318625082727522,
|
||||
1.9936736862291582e-05,
|
||||
4.0084025386022404e-05,
|
||||
0.0006117739249020815,
|
||||
4.1486648115096614e-05,
|
||||
],
|
||||
"blip_large": [
|
||||
0.07890705019235611,
|
||||
0.04954551160335541,
|
||||
0.05564938113093376,
|
||||
0.002710158471018076,
|
||||
0.0026644798927009106,
|
||||
0.01277624536305666,
|
||||
0.003585426602512598,
|
||||
0.0019450040999799967,
|
||||
0.0036240608897060156,
|
||||
0.0013280785642564297,
|
||||
0.015366943553090096,
|
||||
0.0030039174016565084,
|
||||
0.00271016638725996,
|
||||
],
|
||||
"blip2_coco": [
|
||||
0.0833505243062973,
|
||||
0.046232130378484726,
|
||||
0.04996354877948761,
|
||||
0.004187352955341339,
|
||||
2.5233526685042307e-05,
|
||||
0.002679687924683094,
|
||||
2.4826533262967132e-05,
|
||||
5.1878203521482646e-05,
|
||||
1.3434584616334178e-05,
|
||||
9.76747560343938e-06,
|
||||
7.34204331820365e-06,
|
||||
1.1423194337112363e-05,
|
||||
0.004216152708977461,
|
||||
],
|
||||
}
|
||||
|
||||
dict_image_gradcam_with_itm_for_blip = {
|
||||
"blip_base": [125.12124404, 132.07243145, 65.43589668],
|
||||
"blip_large": [118.75610679, 125.35366997, 69.63849807],
|
||||
"blip_base": [123.36285799741745, 132.31662154197693, 53.38280035299249],
|
||||
"blip_large": [119.99512910842896, 128.7044593691826, 55.552959859540515],
|
||||
}
|
||||
|
||||
|
||||
@ -594,7 +364,7 @@ def test_parsing_images(
|
||||
features_image_stacked,
|
||||
) = ms.MultimodalSearch.parsing_images(testdict, pre_model)
|
||||
|
||||
for i, num in zip(range(10), features_image_stacked[0, 10:20].tolist()):
|
||||
for i, num in zip(range(10), features_image_stacked[0, 10:12].tolist()):
|
||||
assert (
|
||||
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
|
||||
is True
|
||||
@ -609,7 +379,7 @@ def test_parsing_images(
|
||||
)
|
||||
processed_text = txt_processor["eval"](test_querry)
|
||||
|
||||
for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()):
|
||||
for i, num in zip(range(10), processed_pic[0, 0, 0, 25:27].tolist()):
|
||||
assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True
|
||||
|
||||
assert processed_text == pre_proc_text
|
||||
@ -622,13 +392,13 @@ def test_parsing_images(
|
||||
testdict, search_query, model, txt_processor, vis_processor, pre_model
|
||||
)
|
||||
|
||||
for i, num in zip(range(10), multi_features_stacked[0, 10:20].tolist()):
|
||||
for i, num in zip(range(10), multi_features_stacked[0, 10:12].tolist()):
|
||||
assert (
|
||||
math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error)
|
||||
is True
|
||||
)
|
||||
|
||||
for i, num in zip(range(10), multi_features_stacked[1, 10:20].tolist()):
|
||||
for i, num in zip(range(10), multi_features_stacked[1, 10:12].tolist()):
|
||||
assert (
|
||||
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
|
||||
is True
|
||||
@ -650,39 +420,132 @@ def test_parsing_images(
|
||||
search_query2,
|
||||
)
|
||||
|
||||
for i, num in zip(range(12), similarity.tolist()):
|
||||
for i, num in zip(range(len(pre_simularity)), similarity.tolist()):
|
||||
for j, num2 in zip(range(len(num)), num):
|
||||
assert (
|
||||
math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error)
|
||||
is True
|
||||
)
|
||||
|
||||
for i, num in zip(range(2), sorted_list):
|
||||
for i, num in zip(range(len(pre_sorted)), sorted_list):
|
||||
for j, num2 in zip(range(2), num):
|
||||
assert num2 == pre_sorted[i][j]
|
||||
|
||||
del model, vis_processor, txt_processor
|
||||
del (
|
||||
model,
|
||||
vis_processor,
|
||||
txt_processor,
|
||||
similarity,
|
||||
features_image_stacked,
|
||||
processed_pic,
|
||||
multi_features_stacked,
|
||||
)
|
||||
cuda.empty_cache()
|
||||
|
||||
if pre_model == "blip":
|
||||
for itm_model in ["blip_base", "blip_large", "blip2_coco"]:
|
||||
(
|
||||
itm_scores,
|
||||
image_gradcam_with_itm,
|
||||
) = ms.MultimodalSearch.image_text_match_reordering(
|
||||
testdict,
|
||||
search_query2,
|
||||
itm_model,
|
||||
image_keys,
|
||||
sorted_list,
|
||||
batch_size=1,
|
||||
need_grad_cam=False,
|
||||
|
||||
def test_itm():
|
||||
test_my_dict = {
|
||||
"IMG_2746": {
|
||||
"filename": "../misinformation/test/data/IMG_2746.png",
|
||||
"rank A bus": 1,
|
||||
"A bus": 0.15640679001808167,
|
||||
"rank ../misinformation/test/data/IMG_3758.png": 1,
|
||||
"../misinformation/test/data/IMG_3758.png": 0.7533495426177979,
|
||||
},
|
||||
"IMG_2809": {
|
||||
"filename": "../misinformation/test/data/IMG_2809.png",
|
||||
"rank A bus": 0,
|
||||
"A bus": 0.1970970332622528,
|
||||
"rank ../misinformation/test/data/IMG_3758.png": 0,
|
||||
"../misinformation/test/data/IMG_3758.png": 0.8907483816146851,
|
||||
},
|
||||
}
|
||||
search_query3 = [
|
||||
{"text_input": "A bus"},
|
||||
{"image": "../misinformation/test/data/IMG_3758.png"},
|
||||
]
|
||||
image_keys = ["IMG_2746", "IMG_2809"]
|
||||
sorted_list = [[1, 0], [1, 0]]
|
||||
for itm_model in ["blip_base", "blip_large"]:
|
||||
(
|
||||
itm_scores,
|
||||
image_gradcam_with_itm,
|
||||
) = ms.MultimodalSearch.image_text_match_reordering(
|
||||
test_my_dict,
|
||||
search_query3,
|
||||
itm_model,
|
||||
image_keys,
|
||||
sorted_list,
|
||||
batch_size=1,
|
||||
need_grad_cam=True,
|
||||
)
|
||||
for i, itm in zip(
|
||||
range(len(dict_itm_scores_for_blib[itm_model])),
|
||||
dict_itm_scores_for_blib[itm_model],
|
||||
):
|
||||
assert (
|
||||
math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=10 * related_error)
|
||||
is True
|
||||
)
|
||||
for i, itm in zip(
|
||||
range(len(dict_itm_scores_for_blib[itm_model])),
|
||||
dict_itm_scores_for_blib[itm_model],
|
||||
):
|
||||
assert (
|
||||
math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=related_error)
|
||||
is True
|
||||
for i, grad_cam in zip(
|
||||
range(len(dict_image_gradcam_with_itm_for_blip[itm_model])),
|
||||
dict_image_gradcam_with_itm_for_blip[itm_model],
|
||||
):
|
||||
assert (
|
||||
math.isclose(
|
||||
image_gradcam_with_itm["A bus"]["IMG_2809"][0][0].tolist()[i],
|
||||
grad_cam,
|
||||
rel_tol=10 * related_error,
|
||||
)
|
||||
is True
|
||||
)
|
||||
del itm_scores, image_gradcam_with_itm
|
||||
cuda.empty_cache()
|
||||
|
||||
|
||||
def test_itm_blip2_coco():
|
||||
test_my_dict = {
|
||||
"IMG_2746": {
|
||||
"filename": "../misinformation/test/data/IMG_2746.png",
|
||||
"rank A bus": 1,
|
||||
"A bus": 0.15640679001808167,
|
||||
"rank ../misinformation/test/data/IMG_3758.png": 1,
|
||||
"../misinformation/test/data/IMG_3758.png": 0.7533495426177979,
|
||||
},
|
||||
"IMG_2809": {
|
||||
"filename": "../misinformation/test/data/IMG_2809.png",
|
||||
"rank A bus": 0,
|
||||
"A bus": 0.1970970332622528,
|
||||
"rank ../misinformation/test/data/IMG_3758.png": 0,
|
||||
"../misinformation/test/data/IMG_3758.png": 0.8907483816146851,
|
||||
},
|
||||
}
|
||||
search_query3 = [
|
||||
{"text_input": "A bus"},
|
||||
{"image": "../misinformation/test/data/IMG_3758.png"},
|
||||
]
|
||||
image_keys = ["IMG_2746", "IMG_2809"]
|
||||
sorted_list = [[1, 0], [1, 0]]
|
||||
|
||||
(
|
||||
itm_scores,
|
||||
image_gradcam_with_itm,
|
||||
) = ms.MultimodalSearch.image_text_match_reordering(
|
||||
test_my_dict,
|
||||
search_query3,
|
||||
"blip2_coco",
|
||||
image_keys,
|
||||
sorted_list,
|
||||
batch_size=1,
|
||||
need_grad_cam=False,
|
||||
)
|
||||
for i, itm in zip(
|
||||
range(len(dict_itm_scores_for_blib["blip2_coco"])),
|
||||
dict_itm_scores_for_blib["blip2_coco"],
|
||||
):
|
||||
assert (
|
||||
math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=10 * related_error)
|
||||
is True
|
||||
)
|
||||
del itm_scores, image_gradcam_with_itm
|
||||
cuda.empty_cache()
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user