зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-31 14:06:04 +02:00 
			
		
		
		
	fixed tests in multimodal
Этот коммит содержится в:
		
							родитель
							
								
									b9ad5b267d
								
							
						
					
					
						Коммит
						a00ac65868
					
				| @ -12,6 +12,8 @@ from IPython.display import display | |||||||
| from lavis.models import load_model_and_preprocess, load_model, BlipBase | from lavis.models import load_model_and_preprocess, load_model, BlipBase | ||||||
| from lavis.processors import load_processor | from lavis.processors import load_processor | ||||||
| 
 | 
 | ||||||
|  | # from memory_profiler import profile | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class MultimodalSearch(AnalysisMethod): | class MultimodalSearch(AnalysisMethod): | ||||||
|     def __init__(self, subdict: dict) -> None: |     def __init__(self, subdict: dict) -> None: | ||||||
| @ -339,13 +341,6 @@ class MultimodalSearch(AnalysisMethod): | |||||||
| 
 | 
 | ||||||
|         return text_query_index |         return text_query_index | ||||||
| 
 | 
 | ||||||
|     def itm_images_processing(self, image_paths, vis_processor): |  | ||||||
|         raw_images = [MultimodalSearch.read_img(self, path) for path in image_paths] |  | ||||||
|         images = [vis_processor(r_img) for r_img in raw_images] |  | ||||||
|         images_tensors = torch.stack(images).to(MultimodalSearch.multimodal_device) |  | ||||||
| 
 |  | ||||||
|         return raw_images, images_tensors |  | ||||||
| 
 |  | ||||||
|     def get_pathes_from_query(self, query): |     def get_pathes_from_query(self, query): | ||||||
|         paths = [] |         paths = [] | ||||||
|         image_names = [] |         image_names = [] | ||||||
| @ -483,6 +478,7 @@ class MultimodalSearch(AnalysisMethod): | |||||||
|         vis_processor = load_processor("blip_image_eval").build(image_size=384) |         vis_processor = load_processor("blip_image_eval").build(image_size=384) | ||||||
|         return itm_model, vis_processor |         return itm_model, vis_processor | ||||||
| 
 | 
 | ||||||
|  |     #    @profile | ||||||
|     def image_text_match_reordering( |     def image_text_match_reordering( | ||||||
|         self, |         self, | ||||||
|         search_query, |         search_query, | ||||||
| @ -602,6 +598,20 @@ class MultimodalSearch(AnalysisMethod): | |||||||
|             image_gradcam_with_itm[ |             image_gradcam_with_itm[ | ||||||
|                 list(search_query[index_text_query].values())[0] |                 list(search_query[index_text_query].values())[0] | ||||||
|             ] = localimage_gradcam_with_itm |             ] = localimage_gradcam_with_itm | ||||||
|  |             del ( | ||||||
|  |                 itm_model, | ||||||
|  |                 vis_processor_itm, | ||||||
|  |                 text_processor, | ||||||
|  |                 raw_images, | ||||||
|  |                 images, | ||||||
|  |                 tokenizer, | ||||||
|  |                 queries_batch, | ||||||
|  |                 queries_tok_batch, | ||||||
|  |                 itm_score, | ||||||
|  |             ) | ||||||
|  |             if need_grad_cam: | ||||||
|  |                 del itm_output, gradcam, norm_img, grad_cam, avg_gradcam | ||||||
|  |             torch.cuda.empty_cache() | ||||||
|         return itm_scores2, image_gradcam_with_itm |         return itm_scores2, image_gradcam_with_itm | ||||||
| 
 | 
 | ||||||
|     def show_results(self, query, itm=False, image_gradcam_with_itm=False): |     def show_results(self, query, itm=False, image_gradcam_with_itm=False): | ||||||
|  | |||||||
| @ -6,20 +6,8 @@ from torch import device, cuda | |||||||
| import misinformation.multimodal_search as ms | import misinformation.multimodal_search as ms | ||||||
| 
 | 
 | ||||||
| testdict = { | testdict = { | ||||||
|     "d755771b-225e-432f-802e-fb8dc850fff7": { |  | ||||||
|         "filename": "./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png" |  | ||||||
|     }, |  | ||||||
|     "IMG_2746": {"filename": "./test/data/IMG_2746.png"}, |     "IMG_2746": {"filename": "./test/data/IMG_2746.png"}, | ||||||
|     "IMG_2750": {"filename": "./test/data/IMG_2750.png"}, |  | ||||||
|     "IMG_2805": {"filename": "./test/data/IMG_2805.png"}, |  | ||||||
|     "IMG_2806": {"filename": "./test/data/IMG_2806.png"}, |  | ||||||
|     "IMG_2807": {"filename": "./test/data/IMG_2807.png"}, |  | ||||||
|     "IMG_2808": {"filename": "./test/data/IMG_2808.png"}, |  | ||||||
|     "IMG_2809": {"filename": "./test/data/IMG_2809.png"}, |     "IMG_2809": {"filename": "./test/data/IMG_2809.png"}, | ||||||
|     "IMG_3755": {"filename": "./test/data/IMG_3755.jpg"}, |  | ||||||
|     "IMG_3756": {"filename": "./test/data/IMG_3756.jpg"}, |  | ||||||
|     "IMG_3757": {"filename": "./test/data/IMG_3757.jpg"}, |  | ||||||
|     "pic1": {"filename": "./test/data/pic1.png"}, |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| related_error = 1e-2 | related_error = 1e-2 | ||||||
| @ -38,39 +26,15 @@ def test_read_img(): | |||||||
| pre_proc_pic_blip2_blip_albef = [ | pre_proc_pic_blip2_blip_albef = [ | ||||||
|     -1.0039474964141846, |     -1.0039474964141846, | ||||||
|     -1.0039474964141846, |     -1.0039474964141846, | ||||||
|     -0.8433647751808167, |  | ||||||
|     -0.6097899675369263, |  | ||||||
|     -0.5951915383338928, |  | ||||||
|     -0.6243883967399597, |  | ||||||
|     -0.6827820539474487, |  | ||||||
|     -0.6097899675369263, |  | ||||||
|     -0.7119789123535156, |  | ||||||
|     -1.0623412132263184, |  | ||||||
| ] | ] | ||||||
| pre_proc_pic_clip_vitl14 = [ | pre_proc_pic_clip_vitl14 = [ | ||||||
|     -0.7995694875717163, |     -0.7995694875717163, | ||||||
|     -0.7849710583686829, |     -0.7849710583686829, | ||||||
|     -0.7849710583686829, |  | ||||||
|     -0.7703726291656494, |  | ||||||
|     -0.7703726291656494, |  | ||||||
|     -0.7849710583686829, |  | ||||||
|     -0.7849710583686829, |  | ||||||
|     -0.7703726291656494, |  | ||||||
|     -0.7703726291656494, |  | ||||||
|     -0.7703726291656494, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_proc_pic_clip_vitl14_336 = [ | pre_proc_pic_clip_vitl14_336 = [ | ||||||
|     -0.7995694875717163, |     -0.7995694875717163, | ||||||
|     -0.7849710583686829, |     -0.7849710583686829, | ||||||
|     -0.7849710583686829, |  | ||||||
|     -0.7849710583686829, |  | ||||||
|     -0.7849710583686829, |  | ||||||
|     -0.7849710583686829, |  | ||||||
|     -0.7849710583686829, |  | ||||||
|     -0.9163569211959839, |  | ||||||
|     -1.149931788444519, |  | ||||||
|     -1.0039474964141846, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_proc_text_blip2_blip_albef = ( | pre_proc_text_blip2_blip_albef = ( | ||||||
| @ -84,340 +48,146 @@ pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = ( | |||||||
| pre_extracted_feature_img_blip2 = [ | pre_extracted_feature_img_blip2 = [ | ||||||
|     0.04566730558872223, |     0.04566730558872223, | ||||||
|     -0.042554520070552826, |     -0.042554520070552826, | ||||||
|     -0.06970272958278656, |  | ||||||
|     -0.009771779179573059, |  | ||||||
|     0.01446065679192543, |  | ||||||
|     0.10173682868480682, |  | ||||||
|     0.007092420011758804, |  | ||||||
|     -0.020045937970280647, |  | ||||||
|     0.12923966348171234, |  | ||||||
|     0.006452132016420364, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_img_blip = [ | pre_extracted_feature_img_blip = [ | ||||||
|     -0.02480311505496502, |     -0.02480311505496502, | ||||||
|     0.05037587881088257, |     0.05037587881088257, | ||||||
|     0.039517853409051895, |  | ||||||
|     -0.06994109600782394, |  | ||||||
|     -0.12886561453342438, |  | ||||||
|     0.047039758414030075, |  | ||||||
|     -0.11620642244815826, |  | ||||||
|     -0.003398326924070716, |  | ||||||
|     -0.07324369996786118, |  | ||||||
|     0.06994668394327164, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_img_albef = [ | pre_extracted_feature_img_albef = [ | ||||||
|     0.08971136063337326, |     0.08971136063337326, | ||||||
|     -0.10915573686361313, |     -0.10915573686361313, | ||||||
|     -0.020636577159166336, |  | ||||||
|     0.048121627420186996, |  | ||||||
|     -0.05943416804075241, |  | ||||||
|     -0.129856139421463, |  | ||||||
|     -0.0034469354432076216, |  | ||||||
|     0.017888527363538742, |  | ||||||
|     -0.03284582123160362, |  | ||||||
|     -0.1037328764796257, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_img_clip = [ | pre_extracted_feature_img_clip = [ | ||||||
|     0.01621132344007492, |     0.01621132344007492, | ||||||
|     -0.004035486374050379, |     -0.004035486374050379, | ||||||
|     -0.04304071143269539, |  | ||||||
|     -0.03459808602929115, |  | ||||||
|     0.016922621056437492, |  | ||||||
|     -0.025056276470422745, |  | ||||||
|     -0.04178355261683464, |  | ||||||
|     0.02165347896516323, |  | ||||||
|     -0.003224249929189682, |  | ||||||
|     0.020485712215304375, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_img_parsing_clip = [ | pre_extracted_feature_img_parsing_clip = [ | ||||||
|     0.01621132344007492, |     0.01621132344007492, | ||||||
|     -0.004035486374050379, |     -0.004035486374050379, | ||||||
|     -0.04304071143269539, |  | ||||||
|     -0.03459808602929115, |  | ||||||
|     0.016922621056437492, |  | ||||||
|     -0.025056276470422745, |  | ||||||
|     -0.04178355261683464, |  | ||||||
|     0.02165347896516323, |  | ||||||
|     -0.003224249929189682, |  | ||||||
|     0.020485712215304375, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_img_clip_vitl14 = [ | pre_extracted_feature_img_clip_vitl14 = [ | ||||||
|     -0.023943455889821053, |     -0.023943455889821053, | ||||||
|     -0.021703708916902542, |     -0.021703708916902542, | ||||||
|     0.035043686628341675, |  | ||||||
|     0.019495919346809387, |  | ||||||
|     0.014351222664117813, |  | ||||||
|     -0.008634116500616074, |  | ||||||
|     0.01610446907579899, |  | ||||||
|     -0.003426523646339774, |  | ||||||
|     0.011931191198527813, |  | ||||||
|     0.0008691544644534588, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_img_clip_vitl14_336 = [ | pre_extracted_feature_img_clip_vitl14_336 = [ | ||||||
|     -0.009511193260550499, |     -0.009511193260550499, | ||||||
|     -0.012618942186236382, |     -0.012618942186236382, | ||||||
|     0.034754861146211624, |  | ||||||
|     0.016356879845261574, |  | ||||||
|     -0.0011549904011189938, |  | ||||||
|     -0.008054453879594803, |  | ||||||
|     0.0011990377679467201, |  | ||||||
|     -0.010806051082909107, |  | ||||||
|     0.00140204350464046, |  | ||||||
|     0.0006861367146484554, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_text_blip2 = [ | pre_extracted_feature_text_blip2 = [ | ||||||
|     -0.1384204626083374, |     -0.1384204626083374, | ||||||
|     -0.008662976324558258, |     -0.008662976324558258, | ||||||
|     0.006269007455557585, |  | ||||||
|     0.03151319921016693, |  | ||||||
|     0.060558050870895386, |  | ||||||
|     -0.03230040520429611, |  | ||||||
|     0.015861615538597107, |  | ||||||
|     -0.11856459826231003, |  | ||||||
|     -0.058296192437410355, |  | ||||||
|     0.03699290752410889, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_text_blip = [ | pre_extracted_feature_text_blip = [ | ||||||
|     0.0118643119931221, |     0.0118643119931221, | ||||||
|     -0.01291718054562807, |     -0.01291718054562807, | ||||||
|     -0.0009687161073088646, |  | ||||||
|     0.01428765058517456, |  | ||||||
|     -0.05591396614909172, |  | ||||||
|     0.07386433333158493, |  | ||||||
|     -0.11475936323404312, |  | ||||||
|     0.01620068959891796, |  | ||||||
|     0.0062415082938969135, |  | ||||||
|     0.0034833091776818037, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_text_albef = [ | pre_extracted_feature_text_albef = [ | ||||||
|     -0.06229640915989876, |     -0.06229640915989876, | ||||||
|     0.11278597265481949, |     0.11278597265481949, | ||||||
|     0.06628583371639252, |  | ||||||
|     0.1649140566587448, |  | ||||||
|     0.068987175822258, |  | ||||||
|     0.006291372701525688, |  | ||||||
|     0.03244050219655037, |  | ||||||
|     -0.049556829035282135, |  | ||||||
|     0.050752390176057816, |  | ||||||
|     -0.0421440489590168, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_text_clip = [ | pre_extracted_feature_text_clip = [ | ||||||
|     0.018169036135077477, |     0.018169036135077477, | ||||||
|     0.03634127229452133, |     0.03634127229452133, | ||||||
|     0.025660742074251175, |  | ||||||
|     0.009149895049631596, |  | ||||||
|     -0.035570453852415085, |  | ||||||
|     0.033126577734947205, |  | ||||||
|     -0.004808237310498953, |  | ||||||
|     -0.0031453112605959177, |  | ||||||
|     -0.02194291725754738, |  | ||||||
|     0.024019461125135422, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_text_clip_vitl14 = [ | pre_extracted_feature_text_clip_vitl14 = [ | ||||||
|     -0.0055463071912527084, |     -0.0055463071912527084, | ||||||
|     0.006908962037414312, |     0.006908962037414312, | ||||||
|     -0.019450219348073006, |  | ||||||
|     -0.018097277730703354, |  | ||||||
|     0.017567576840519905, |  | ||||||
|     -0.03828490898013115, |  | ||||||
|     -0.03781530633568764, |  | ||||||
|     -0.023951737210154533, |  | ||||||
|     0.01365653332322836, |  | ||||||
|     -0.02341713197529316, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| pre_extracted_feature_text_clip_vitl14_336 = [ | pre_extracted_feature_text_clip_vitl14_336 = [ | ||||||
|     -0.008720514364540577, |     -0.008720514364540577, | ||||||
|     0.005284308455884457, |     0.005284308455884457, | ||||||
|     -0.021116750314831734, |  | ||||||
|     -0.018112430348992348, |  | ||||||
|     0.01685470901429653, |  | ||||||
|     -0.03517491742968559, |  | ||||||
|     -0.038612402975559235, |  | ||||||
|     -0.021867064759135246, |  | ||||||
|     0.01685977540910244, |  | ||||||
|     -0.023832324892282486, |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| simularity_blip2 = [ | simularity_blip2 = [ | ||||||
|     [0.05826476216316223, -0.02717375010251999], |     [0.05826476216316223, -0.02717375010251999], | ||||||
|     [0.12869958579540253, 0.006344856694340706], |  | ||||||
|     [0.11073512583971024, 0.12327021360397339], |  | ||||||
|     [0.08743024617433548, 0.058944884687662125], |  | ||||||
|     [0.04591086134314537, 0.4905201494693756], |  | ||||||
|     [0.06297147274017334, 0.47339022159576416], |     [0.06297147274017334, 0.47339022159576416], | ||||||
|     [0.18486255407333374, 0.6350338459014893], |  | ||||||
|     [0.015455856919288635, 0.018462061882019043], |  | ||||||
|     [-0.008606988936662674, 0.00741103570908308], |  | ||||||
|     [-0.0415784977376461, -0.1267213076353073], |  | ||||||
|     [-0.025470387190580368, 0.1315656304359436], |  | ||||||
|     [-0.05090826004743576, 0.059172093868255615], |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| sorted_blip2 = [ | sorted_blip2 = [ | ||||||
|     [6, 1, 2, 3, 5, 0, 4, 7, 8, 10, 9, 11], |     [1, 0], | ||||||
|     [6, 4, 5, 10, 2, 11, 3, 7, 8, 1, 0, 9], |     [1, 0], | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| simularity_blip = [ | simularity_blip = [ | ||||||
|     [0.15640679001808167, 0.752173662185669], |     [0.15640679001808167, 0.752173662185669], | ||||||
|     [0.15139800310134888, 0.7804810404777527], |  | ||||||
|     [0.13010388612747192, 0.755257248878479], |  | ||||||
|     [0.13746635615825653, 0.7618774175643921], |  | ||||||
|     [0.1756758838891983, 0.8531903624534607], |  | ||||||
|     [0.17233705520629883, 0.8448910117149353], |     [0.17233705520629883, 0.8448910117149353], | ||||||
|     [0.1970970332622528, 0.8916105628013611], |  | ||||||
|     [0.11693969368934631, 0.5833531618118286], |  | ||||||
|     [0.12386563420295715, 0.5981853604316711], |  | ||||||
|     [0.08427951484918594, 0.4962371587753296], |  | ||||||
|     [0.14193706214427948, 0.7613846659660339], |  | ||||||
|     [0.12051936239004135, 0.6492202281951904], |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| sorted_blip = [ | sorted_blip = [ | ||||||
|     [6, 4, 5, 0, 1, 10, 3, 2, 8, 11, 7, 9], |     [1, 0], | ||||||
|     [6, 4, 5, 1, 3, 10, 2, 0, 11, 8, 7, 9], |     [1, 0], | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| simularity_albef = [ | simularity_albef = [ | ||||||
|     [0.12321824580430984, 0.35511350631713867], |     [0.12321824580430984, 0.35511350631713867], | ||||||
|     [0.09512615948915482, 0.27168408036231995], |  | ||||||
|     [0.09053325653076172, 0.20215675234794617], |  | ||||||
|     [0.06335515528917313, 0.15055638551712036], |  | ||||||
|     [0.09604836255311966, 0.4658776521682739], |  | ||||||
|     [0.10870333760976791, 0.5143978595733643], |     [0.10870333760976791, 0.5143978595733643], | ||||||
|     [0.11748822033405304, 0.6542638540267944], |  | ||||||
|     [0.05688793584704399, 0.22170542180538177], |  | ||||||
|     [0.05597608536481857, 0.11963296681642532], |  | ||||||
|     [0.059643782675266266, 0.14969395101070404], |  | ||||||
|     [0.06690303236246109, 0.3149859607219696], |  | ||||||
|     [0.07909377664327621, 0.11911341547966003], |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| sorted_albef = [ | sorted_albef = [ | ||||||
|     [0, 6, 5, 4, 1, 2, 11, 10, 3, 9, 7, 8], |     [0, 1], | ||||||
|     [6, 5, 4, 0, 10, 1, 7, 2, 3, 9, 8, 11], |     [1, 0], | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| simularity_clip = [ | simularity_clip = [ | ||||||
|     [0.23923014104366302, 0.5325412750244141], |     [0.23923014104366302, 0.5325412750244141], | ||||||
|     [0.20101115107536316, 0.5112978219985962], |  | ||||||
|     [0.17522737383842468, 0.49811851978302], |  | ||||||
|     [0.20062290132045746, 0.5415266156196594], |  | ||||||
|     [0.22865726053714752, 0.5762109756469727], |  | ||||||
|     [0.2310466319322586, 0.5910375714302063], |     [0.2310466319322586, 0.5910375714302063], | ||||||
|     [0.2644523084163666, 0.7851459383964539], |  | ||||||
|     [0.21474510431289673, 0.4135811924934387], |  | ||||||
|     [0.16407863795757294, 0.1474374681711197], |  | ||||||
|     [0.19819433987140656, 0.26493316888809204], |  | ||||||
|     [0.19545596837997437, 0.5007457137107849], |  | ||||||
|     [0.1647854745388031, 0.45705708861351013], |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| sorted_clip = [ | sorted_clip = [ | ||||||
|     [6, 0, 5, 4, 7, 1, 3, 9, 10, 2, 11, 8], |     [1, 0], | ||||||
|     [6, 5, 4, 3, 0, 1, 10, 2, 11, 7, 9, 8], |     [1, 0], | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| simularity_clip_vitl14 = [ | simularity_clip_vitl14 = [ | ||||||
|     [0.1051270067691803, 0.5184808373451233], |     [0.1051270067691803, 0.5184808373451233], | ||||||
|     [0.09705893695354462, 0.49574509263038635], |  | ||||||
|     [0.11964304000139236, 0.5424358248710632], |  | ||||||
|     [0.13881900906562805, 0.5909714698791504], |  | ||||||
|     [0.12728188931941986, 0.6758255362510681], |  | ||||||
|     [0.1277746558189392, 0.6841973662376404], |     [0.1277746558189392, 0.6841973662376404], | ||||||
|     [0.18026694655418396, 0.803142786026001], |  | ||||||
|     [0.13977059721946716, 0.45957139134407043], |  | ||||||
|     [0.11180847883224487, 0.24822194874286652], |  | ||||||
|     [0.12296056002378464, 0.35143694281578064], |  | ||||||
|     [0.11596094071865082, 0.5704031586647034], |  | ||||||
|     [0.10174489766359329, 0.44422751665115356], |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| sorted_clip_vitl14 = [ | sorted_clip_vitl14 = [ | ||||||
|     [6, 7, 3, 5, 4, 9, 2, 10, 8, 0, 11, 1], |     [1, 0], | ||||||
|     [6, 5, 4, 3, 10, 2, 0, 1, 7, 11, 9, 8], |     [1, 0], | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| simularity_clip_vitl14_336 = [ | simularity_clip_vitl14_336 = [ | ||||||
|     [0.09391091763973236, 0.49337542057037354], |     [0.09391091763973236, 0.49337542057037354], | ||||||
|     [0.11103834211826324, 0.4881117343902588], |  | ||||||
|     [0.12891019880771637, 0.5501476526260376], |  | ||||||
|     [0.13288410007953644, 0.5498673915863037], |  | ||||||
|     [0.12357455492019653, 0.6749162077903748], |  | ||||||
|     [0.13700757920742035, 0.7003108263015747], |     [0.13700757920742035, 0.7003108263015747], | ||||||
|     [0.1788637489080429, 0.7713702321052551], |  | ||||||
|     [0.13260436058044434, 0.4300197660923004], |  | ||||||
|     [0.11666625738143921, 0.2334875613451004], |  | ||||||
|     [0.1316065937280655, 0.3291645646095276], |  | ||||||
|     [0.12374477833509445, 0.5632147192955017], |  | ||||||
|     [0.10333051532506943, 0.43023794889450073], |  | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| sorted_clip_vitl14_336 = [ | sorted_clip_vitl14_336 = [ | ||||||
|     [6, 5, 3, 7, 9, 2, 10, 4, 8, 1, 11, 0], |     [1, 0], | ||||||
|     [6, 5, 4, 10, 2, 3, 0, 1, 11, 7, 9, 8], |     [1, 0], | ||||||
| ] | ] | ||||||
| 
 | 
 | ||||||
| dict_itm_scores_for_blib = { | dict_itm_scores_for_blib = { | ||||||
|     "blip_base": [ |     "blip_base": [ | ||||||
|         0.07107225805521011, |         0.07107225805521011, | ||||||
|         0.02078203856945038, |  | ||||||
|         0.02083236537873745, |  | ||||||
|         0.0042252070270478725, |  | ||||||
|         0.0002070252230623737, |  | ||||||
|         0.004100032616406679, |         0.004100032616406679, | ||||||
|         0.0009893750539049506, |  | ||||||
|         0.00015318625082727522, |  | ||||||
|         1.9936736862291582e-05, |  | ||||||
|         4.0084025386022404e-05, |  | ||||||
|         0.0006117739249020815, |  | ||||||
|         4.1486648115096614e-05, |  | ||||||
|     ], |     ], | ||||||
|     "blip_large": [ |     "blip_large": [ | ||||||
|         0.07890705019235611, |         0.07890705019235611, | ||||||
|         0.04954551160335541, |         0.00271016638725996, | ||||||
|         0.05564938113093376, |  | ||||||
|         0.002710158471018076, |  | ||||||
|         0.0026644798927009106, |  | ||||||
|         0.01277624536305666, |  | ||||||
|         0.003585426602512598, |  | ||||||
|         0.0019450040999799967, |  | ||||||
|         0.0036240608897060156, |  | ||||||
|         0.0013280785642564297, |  | ||||||
|         0.015366943553090096, |  | ||||||
|         0.0030039174016565084, |  | ||||||
|     ], |     ], | ||||||
|     "blip2_coco": [ |     "blip2_coco": [ | ||||||
|         0.0833505243062973, |         0.0833505243062973, | ||||||
|         0.046232130378484726, |         0.004216152708977461, | ||||||
|         0.04996354877948761, |  | ||||||
|         0.004187352955341339, |  | ||||||
|         2.5233526685042307e-05, |  | ||||||
|         0.002679687924683094, |  | ||||||
|         2.4826533262967132e-05, |  | ||||||
|         5.1878203521482646e-05, |  | ||||||
|         1.3434584616334178e-05, |  | ||||||
|         9.76747560343938e-06, |  | ||||||
|         7.34204331820365e-06, |  | ||||||
|         1.1423194337112363e-05, |  | ||||||
|     ], |     ], | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| dict_image_gradcam_with_itm_for_blip = { | dict_image_gradcam_with_itm_for_blip = { | ||||||
|     "blip_base": [125.12124404, 132.07243145, 65.43589668], |     "blip_base": [123.36285799741745, 132.31662154197693, 53.38280035299249], | ||||||
|     "blip_large": [118.75610679, 125.35366997, 69.63849807], |     "blip_large": [119.99512910842896, 128.7044593691826, 55.552959859540515], | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -594,7 +364,7 @@ def test_parsing_images( | |||||||
|         features_image_stacked, |         features_image_stacked, | ||||||
|     ) = ms.MultimodalSearch.parsing_images(testdict, pre_model) |     ) = ms.MultimodalSearch.parsing_images(testdict, pre_model) | ||||||
| 
 | 
 | ||||||
|     for i, num in zip(range(10), features_image_stacked[0, 10:20].tolist()): |     for i, num in zip(range(10), features_image_stacked[0, 10:12].tolist()): | ||||||
|         assert ( |         assert ( | ||||||
|             math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error) |             math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error) | ||||||
|             is True |             is True | ||||||
| @ -609,7 +379,7 @@ def test_parsing_images( | |||||||
|     ) |     ) | ||||||
|     processed_text = txt_processor["eval"](test_querry) |     processed_text = txt_processor["eval"](test_querry) | ||||||
| 
 | 
 | ||||||
|     for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): |     for i, num in zip(range(10), processed_pic[0, 0, 0, 25:27].tolist()): | ||||||
|         assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True |         assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True | ||||||
| 
 | 
 | ||||||
|     assert processed_text == pre_proc_text |     assert processed_text == pre_proc_text | ||||||
| @ -622,13 +392,13 @@ def test_parsing_images( | |||||||
|         testdict, search_query, model, txt_processor, vis_processor, pre_model |         testdict, search_query, model, txt_processor, vis_processor, pre_model | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     for i, num in zip(range(10), multi_features_stacked[0, 10:20].tolist()): |     for i, num in zip(range(10), multi_features_stacked[0, 10:12].tolist()): | ||||||
|         assert ( |         assert ( | ||||||
|             math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error) |             math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error) | ||||||
|             is True |             is True | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     for i, num in zip(range(10), multi_features_stacked[1, 10:20].tolist()): |     for i, num in zip(range(10), multi_features_stacked[1, 10:12].tolist()): | ||||||
|         assert ( |         assert ( | ||||||
|             math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error) |             math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error) | ||||||
|             is True |             is True | ||||||
| @ -650,39 +420,132 @@ def test_parsing_images( | |||||||
|         search_query2, |         search_query2, | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     for i, num in zip(range(12), similarity.tolist()): |     for i, num in zip(range(len(pre_simularity)), similarity.tolist()): | ||||||
|         for j, num2 in zip(range(len(num)), num): |         for j, num2 in zip(range(len(num)), num): | ||||||
|             assert ( |             assert ( | ||||||
|                 math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error) |                 math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error) | ||||||
|                 is True |                 is True | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|     for i, num in zip(range(2), sorted_list): |     for i, num in zip(range(len(pre_sorted)), sorted_list): | ||||||
|         for j, num2 in zip(range(2), num): |         for j, num2 in zip(range(2), num): | ||||||
|             assert num2 == pre_sorted[i][j] |             assert num2 == pre_sorted[i][j] | ||||||
| 
 | 
 | ||||||
|     del model, vis_processor, txt_processor |     del ( | ||||||
|  |         model, | ||||||
|  |         vis_processor, | ||||||
|  |         txt_processor, | ||||||
|  |         similarity, | ||||||
|  |         features_image_stacked, | ||||||
|  |         processed_pic, | ||||||
|  |         multi_features_stacked, | ||||||
|  |     ) | ||||||
|     cuda.empty_cache() |     cuda.empty_cache() | ||||||
| 
 | 
 | ||||||
|     if pre_model == "blip": | 
 | ||||||
|         for itm_model in ["blip_base", "blip_large", "blip2_coco"]: | def test_itm(): | ||||||
|  |     test_my_dict = { | ||||||
|  |         "IMG_2746": { | ||||||
|  |             "filename": "../misinformation/test/data/IMG_2746.png", | ||||||
|  |             "rank A bus": 1, | ||||||
|  |             "A bus": 0.15640679001808167, | ||||||
|  |             "rank ../misinformation/test/data/IMG_3758.png": 1, | ||||||
|  |             "../misinformation/test/data/IMG_3758.png": 0.7533495426177979, | ||||||
|  |         }, | ||||||
|  |         "IMG_2809": { | ||||||
|  |             "filename": "../misinformation/test/data/IMG_2809.png", | ||||||
|  |             "rank A bus": 0, | ||||||
|  |             "A bus": 0.1970970332622528, | ||||||
|  |             "rank ../misinformation/test/data/IMG_3758.png": 0, | ||||||
|  |             "../misinformation/test/data/IMG_3758.png": 0.8907483816146851, | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  |     search_query3 = [ | ||||||
|  |         {"text_input": "A bus"}, | ||||||
|  |         {"image": "../misinformation/test/data/IMG_3758.png"}, | ||||||
|  |     ] | ||||||
|  |     image_keys = ["IMG_2746", "IMG_2809"] | ||||||
|  |     sorted_list = [[1, 0], [1, 0]] | ||||||
|  |     for itm_model in ["blip_base", "blip_large"]: | ||||||
|         ( |         ( | ||||||
|             itm_scores, |             itm_scores, | ||||||
|             image_gradcam_with_itm, |             image_gradcam_with_itm, | ||||||
|         ) = ms.MultimodalSearch.image_text_match_reordering( |         ) = ms.MultimodalSearch.image_text_match_reordering( | ||||||
|                 testdict, |             test_my_dict, | ||||||
|                 search_query2, |             search_query3, | ||||||
|             itm_model, |             itm_model, | ||||||
|             image_keys, |             image_keys, | ||||||
|             sorted_list, |             sorted_list, | ||||||
|             batch_size=1, |             batch_size=1, | ||||||
|                 need_grad_cam=False, |             need_grad_cam=True, | ||||||
|         ) |         ) | ||||||
|         for i, itm in zip( |         for i, itm in zip( | ||||||
|             range(len(dict_itm_scores_for_blib[itm_model])), |             range(len(dict_itm_scores_for_blib[itm_model])), | ||||||
|             dict_itm_scores_for_blib[itm_model], |             dict_itm_scores_for_blib[itm_model], | ||||||
|         ): |         ): | ||||||
|             assert ( |             assert ( | ||||||
|                     math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=related_error) |                 math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=10 * related_error) | ||||||
|                 is True |                 is True | ||||||
|             ) |             ) | ||||||
|  |         for i, grad_cam in zip( | ||||||
|  |             range(len(dict_image_gradcam_with_itm_for_blip[itm_model])), | ||||||
|  |             dict_image_gradcam_with_itm_for_blip[itm_model], | ||||||
|  |         ): | ||||||
|  |             assert ( | ||||||
|  |                 math.isclose( | ||||||
|  |                     image_gradcam_with_itm["A bus"]["IMG_2809"][0][0].tolist()[i], | ||||||
|  |                     grad_cam, | ||||||
|  |                     rel_tol=10 * related_error, | ||||||
|  |                 ) | ||||||
|  |                 is True | ||||||
|  |             ) | ||||||
|  |         del itm_scores, image_gradcam_with_itm | ||||||
|  |         cuda.empty_cache() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_itm_blip2_coco(): | ||||||
|  |     test_my_dict = { | ||||||
|  |         "IMG_2746": { | ||||||
|  |             "filename": "../misinformation/test/data/IMG_2746.png", | ||||||
|  |             "rank A bus": 1, | ||||||
|  |             "A bus": 0.15640679001808167, | ||||||
|  |             "rank ../misinformation/test/data/IMG_3758.png": 1, | ||||||
|  |             "../misinformation/test/data/IMG_3758.png": 0.7533495426177979, | ||||||
|  |         }, | ||||||
|  |         "IMG_2809": { | ||||||
|  |             "filename": "../misinformation/test/data/IMG_2809.png", | ||||||
|  |             "rank A bus": 0, | ||||||
|  |             "A bus": 0.1970970332622528, | ||||||
|  |             "rank ../misinformation/test/data/IMG_3758.png": 0, | ||||||
|  |             "../misinformation/test/data/IMG_3758.png": 0.8907483816146851, | ||||||
|  |         }, | ||||||
|  |     } | ||||||
|  |     search_query3 = [ | ||||||
|  |         {"text_input": "A bus"}, | ||||||
|  |         {"image": "../misinformation/test/data/IMG_3758.png"}, | ||||||
|  |     ] | ||||||
|  |     image_keys = ["IMG_2746", "IMG_2809"] | ||||||
|  |     sorted_list = [[1, 0], [1, 0]] | ||||||
|  | 
 | ||||||
|  |     ( | ||||||
|  |         itm_scores, | ||||||
|  |         image_gradcam_with_itm, | ||||||
|  |     ) = ms.MultimodalSearch.image_text_match_reordering( | ||||||
|  |         test_my_dict, | ||||||
|  |         search_query3, | ||||||
|  |         "blip2_coco", | ||||||
|  |         image_keys, | ||||||
|  |         sorted_list, | ||||||
|  |         batch_size=1, | ||||||
|  |         need_grad_cam=False, | ||||||
|  |     ) | ||||||
|  |     for i, itm in zip( | ||||||
|  |         range(len(dict_itm_scores_for_blib["blip2_coco"])), | ||||||
|  |         dict_itm_scores_for_blib["blip2_coco"], | ||||||
|  |     ): | ||||||
|  |         assert ( | ||||||
|  |             math.isclose(itm_scores[0].tolist()[i], itm, rel_tol=10 * related_error) | ||||||
|  |             is True | ||||||
|  |         ) | ||||||
|  |     del itm_scores, image_gradcam_with_itm | ||||||
|  |     cuda.empty_cache() | ||||||
|  | |||||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Petr Andriushchenko
						Petr Andriushchenko