зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-30 05:26:05 +02:00 
			
		
		
		
	added test_multimodal_search tests
Этот коммит содержится в:
		
							родитель
							
								
									ce45a2c030
								
							
						
					
					
						Коммит
						e12d50a195
					
				| @ -668,20 +668,7 @@ model_type = "blip" | ||||
| # model_type = "clip_vitl14_336" | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     ( | ||||
|         "pre_multimodal_device", | ||||
|         "pre_model", | ||||
|         "pre_proc_pic", | ||||
|         "pre_proc_text", | ||||
|         "pre_extracted_feature_img", | ||||
|         "pre_extracted_feature_text", | ||||
|     ), | ||||
|     [ | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             "blip2", | ||||
|             [ | ||||
| pre_proc_pic_blip2_blip_albef = [ | ||||
|     -1.0039474964141846, | ||||
|     -1.0039474964141846, | ||||
|     -0.8433647751808167, | ||||
| @ -692,9 +679,42 @@ model_type = "blip" | ||||
|     -0.6097899675369263, | ||||
|     -0.7119789123535156, | ||||
|     -1.0623412132263184, | ||||
|             ], | ||||
|             "the bird sat on a tree located at the intersection of 23rd and 43rd streets", | ||||
|             [ | ||||
| ] | ||||
| pre_proc_pic_clip_vitl14 = [ | ||||
|     -0.7995694875717163, | ||||
|     -0.7849710583686829, | ||||
|     -0.7849710583686829, | ||||
|     -0.7703726291656494, | ||||
|     -0.7703726291656494, | ||||
|     -0.7849710583686829, | ||||
|     -0.7849710583686829, | ||||
|     -0.7703726291656494, | ||||
|     -0.7703726291656494, | ||||
|     -0.7703726291656494, | ||||
| ] | ||||
| 
 | ||||
| pre_proc_pic_clip_vitl14_336 = [ | ||||
|     -0.7995694875717163, | ||||
|     -0.7849710583686829, | ||||
|     -0.7849710583686829, | ||||
|     -0.7849710583686829, | ||||
|     -0.7849710583686829, | ||||
|     -0.7849710583686829, | ||||
|     -0.7849710583686829, | ||||
|     -0.9163569211959839, | ||||
|     -1.149931788444519, | ||||
|     -1.0039474964141846, | ||||
| ] | ||||
| 
 | ||||
| pre_proc_text_blip2_blip_albef = ( | ||||
|     "the bird sat on a tree located at the intersection of 23rd and 43rd streets" | ||||
| ) | ||||
| 
 | ||||
| pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = ( | ||||
|     "The bird sat on a tree located at the intersection of 23rd and 43rd streets." | ||||
| ) | ||||
| 
 | ||||
| pre_extracted_feature_img_blip2 = [ | ||||
|     0.04566730558872223, | ||||
|     -0.042554520070552826, | ||||
|     -0.06970272958278656, | ||||
| @ -705,8 +725,87 @@ model_type = "blip" | ||||
|     -0.020045937970280647, | ||||
|     0.12923966348171234, | ||||
|     0.006452132016420364, | ||||
|             ], | ||||
|             [ | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_img_blip = [ | ||||
|     -0.02480311505496502, | ||||
|     0.05037587881088257, | ||||
|     0.039517853409051895, | ||||
|     -0.06994109600782394, | ||||
|     -0.12886561453342438, | ||||
|     0.047039758414030075, | ||||
|     -0.11620642244815826, | ||||
|     -0.003398326924070716, | ||||
|     -0.07324369996786118, | ||||
|     0.06994668394327164, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_img_albef = [ | ||||
|     0.08971136063337326, | ||||
|     -0.10915573686361313, | ||||
|     -0.020636577159166336, | ||||
|     0.048121627420186996, | ||||
|     -0.05943416804075241, | ||||
|     -0.129856139421463, | ||||
|     -0.0034469354432076216, | ||||
|     0.017888527363538742, | ||||
|     -0.03284582123160362, | ||||
|     -0.1037328764796257, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_img_clip = [ | ||||
|     0.01621132344007492, | ||||
|     -0.004035486374050379, | ||||
|     -0.04304071143269539, | ||||
|     -0.03459808602929115, | ||||
|     0.016922621056437492, | ||||
|     -0.025056276470422745, | ||||
|     -0.04178355261683464, | ||||
|     0.02165347896516323, | ||||
|     -0.003224249929189682, | ||||
|     0.020485712215304375, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_img_parsing_clip = [ | ||||
|     0.01621132344007492, | ||||
|     -0.004035486374050379, | ||||
|     -0.04304071143269539, | ||||
|     -0.03459808602929115, | ||||
|     0.016922621056437492, | ||||
|     -0.025056276470422745, | ||||
|     -0.04178355261683464, | ||||
|     0.02165347896516323, | ||||
|     -0.003224249929189682, | ||||
|     0.020485712215304375, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_img_clip_vitl14 = [ | ||||
|     -0.023943455889821053, | ||||
|     -0.021703708916902542, | ||||
|     0.035043686628341675, | ||||
|     0.019495919346809387, | ||||
|     0.014351222664117813, | ||||
|     -0.008634116500616074, | ||||
|     0.01610446907579899, | ||||
|     -0.003426523646339774, | ||||
|     0.011931191198527813, | ||||
|     0.0008691544644534588, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_img_clip_vitl14_336 = [ | ||||
|     -0.15060146152973175, | ||||
|     -0.1998099535703659, | ||||
|     0.5503129363059998, | ||||
|     0.2589969336986542, | ||||
|     -0.0182882659137249, | ||||
|     -0.12753525376319885, | ||||
|     0.018985718488693237, | ||||
|     -0.17110440135002136, | ||||
|     0.02220013737678528, | ||||
|     0.01086437702178955, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_text_blip2 = [ | ||||
|     -0.1384204626083374, | ||||
|     -0.008662976324558258, | ||||
|     0.006269007455557585, | ||||
| @ -717,20 +816,185 @@ model_type = "blip" | ||||
|     -0.11856459826231003, | ||||
|     -0.058296192437410355, | ||||
|     0.03699290752410889, | ||||
|             ], | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_text_blip = [ | ||||
|     0.0118643119931221, | ||||
|     -0.01291718054562807, | ||||
|     -0.0009687161073088646, | ||||
|     0.01428765058517456, | ||||
|     -0.05591396614909172, | ||||
|     0.07386433333158493, | ||||
|     -0.11475936323404312, | ||||
|     0.01620068959891796, | ||||
|     0.0062415082938969135, | ||||
|     0.0034833091776818037, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_text_albef = [ | ||||
|     -0.06229640915989876, | ||||
|     0.11278597265481949, | ||||
|     0.06628583371639252, | ||||
|     0.1649140566587448, | ||||
|     0.068987175822258, | ||||
|     0.006291372701525688, | ||||
|     0.03244050219655037, | ||||
|     -0.049556829035282135, | ||||
|     0.050752390176057816, | ||||
|     -0.0421440489590168, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_text_clip = [ | ||||
|     0.018169036135077477, | ||||
|     0.03634127229452133, | ||||
|     0.025660742074251175, | ||||
|     0.009149895049631596, | ||||
|     -0.035570453852415085, | ||||
|     0.033126577734947205, | ||||
|     -0.004808237310498953, | ||||
|     -0.0031453112605959177, | ||||
|     -0.02194291725754738, | ||||
|     0.024019461125135422, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_text_clip_vitl14 = [ | ||||
|     -0.0055463071912527084, | ||||
|     0.006908962037414312, | ||||
|     -0.019450219348073006, | ||||
|     -0.018097277730703354, | ||||
|     0.017567576840519905, | ||||
|     -0.03828490898013115, | ||||
|     -0.03781530633568764, | ||||
|     -0.023951737210154533, | ||||
|     0.01365653332322836, | ||||
|     -0.02341713197529316, | ||||
| ] | ||||
| 
 | ||||
| pre_extracted_feature_text_clip_vitl14_336 = [ | ||||
|     -0.1172553077340126, | ||||
|     0.07105237245559692, | ||||
|     -0.283934086561203, | ||||
|     -0.24353823065757751, | ||||
|     0.22662702202796936, | ||||
|     -0.472959041595459, | ||||
|     -0.5191791653633118, | ||||
|     -0.29402273893356323, | ||||
|     0.22669515013694763, | ||||
|     -0.32044747471809387, | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize( | ||||
|     ( | ||||
|         "pre_multimodal_device", | ||||
|         "pre_model", | ||||
|         "pre_proc_pic", | ||||
|         "pre_proc_text", | ||||
|         "pre_extracted_feature_img", | ||||
|         "pre_extracted_feature_text", | ||||
|         "pre_extracted_feature_img2", | ||||
|     ), | ||||
|     [ | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             "blip2", | ||||
|             pre_proc_pic_blip2_blip_albef, | ||||
|             pre_proc_text_blip2_blip_albef, | ||||
|             pre_extracted_feature_img_blip2, | ||||
|             pre_extracted_feature_text_blip2, | ||||
|             pre_extracted_feature_img_blip2, | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             device("cuda"), | ||||
|             "blip", | ||||
|             pre_proc_pic_blip2_blip_albef, | ||||
|             pre_proc_text_blip2_blip_albef, | ||||
|             pre_extracted_feature_img_blip, | ||||
|             pre_extracted_feature_text_blip, | ||||
|             pre_extracted_feature_img_blip, | ||||
|         ), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             "blip", | ||||
|             pre_proc_pic_blip2_blip_albef, | ||||
|             pre_proc_text_blip2_blip_albef, | ||||
|             pre_extracted_feature_img_blip, | ||||
|             pre_extracted_feature_text_blip, | ||||
|             pre_extracted_feature_img_blip, | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             device("cpu"), | ||||
|             "albef", | ||||
|             pre_proc_pic_blip2_blip_albef, | ||||
|             pre_proc_text_blip2_blip_albef, | ||||
|             pre_extracted_feature_img_albef, | ||||
|             pre_extracted_feature_text_albef, | ||||
|             pre_extracted_feature_img_albef, | ||||
|         ), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             "albef", | ||||
|             pre_proc_pic_blip2_blip_albef, | ||||
|             pre_proc_text_blip2_blip_albef, | ||||
|             pre_extracted_feature_img_albef, | ||||
|             pre_extracted_feature_text_albef, | ||||
|             pre_extracted_feature_img_albef, | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             device("cpu"), | ||||
|             "clip_base", | ||||
|             pre_proc_pic_clip_vitl14, | ||||
|             pre_proc_text_clip_clip_vitl14_clip_vitl14_336, | ||||
|             pre_extracted_feature_img_parsing_clip, | ||||
|             pre_extracted_feature_text_clip, | ||||
|             pre_extracted_feature_img_clip, | ||||
|         ), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             "clip_base", | ||||
|             pre_proc_pic_clip_vitl14, | ||||
|             pre_proc_text_clip_clip_vitl14_clip_vitl14_336, | ||||
|             pre_extracted_feature_img_parsing_clip, | ||||
|             pre_extracted_feature_text_clip, | ||||
|             pre_extracted_feature_img_clip, | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|         ( | ||||
|             device("cpu"), | ||||
|             "clip_vitl14", | ||||
|             pre_proc_pic_clip_vitl14, | ||||
|             pre_proc_text_clip_clip_vitl14_clip_vitl14_336, | ||||
|             pre_extracted_feature_img_clip_vitl14, | ||||
|             pre_extracted_feature_text_clip_vitl14, | ||||
|             pre_extracted_feature_img_clip_vitl14, | ||||
|         ), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             "clip_vitl14", | ||||
|             pre_proc_pic_clip_vitl14, | ||||
|             pre_proc_text_clip_clip_vitl14_clip_vitl14_336, | ||||
|             pre_extracted_feature_img_clip_vitl14, | ||||
|             pre_extracted_feature_text_clip_vitl14, | ||||
|             pre_extracted_feature_img_clip_vitl14, | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|         #        (device("cpu"),"blip"), | ||||
|         #        (device("cpu"),"albef"), | ||||
|         #        (device("cpu"),"clip_base"), | ||||
|         #        (device("cpu"),"clip_vitl14"), | ||||
|         #        (device("cpu"),"clip_vitl14_336"), | ||||
|         #        pytest.param( device("cuda"),"blip", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),), | ||||
|         #        pytest.param( device("cuda"),"albef", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),), | ||||
|         #        pytest.param( device("cuda"),"clip_base", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),), | ||||
|         #        pytest.param( device("cuda"),"clip_vitl14", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),), | ||||
|         # | ||||
|         # | ||||
|         # | ||||
|         #        pytest.param( device("cuda"),"clip_vitl14_336", marks=pytest.mark.skipif(gpu_is_not_available, reason="gpu_is_not_availible"),), | ||||
|     ], | ||||
| ) | ||||
| @ -741,6 +1005,7 @@ def test_parsing_images( | ||||
|     pre_proc_text, | ||||
|     pre_extracted_feature_img, | ||||
|     pre_extracted_feature_text, | ||||
|     pre_extracted_feature_img2, | ||||
| ): | ||||
|     mydict = { | ||||
|         "IMG_2746": {"filename": "./test/data/IMG_2746.png"}, | ||||
| @ -777,9 +1042,8 @@ def test_parsing_images( | ||||
|     assert processed_text == pre_proc_text | ||||
| 
 | ||||
|     search_query = [ | ||||
|         { | ||||
|             "text_input": "The bird sat on a tree located at the intersection of 23rd and 43rd streets." | ||||
|         } | ||||
|         {"text_input": test_querry}, | ||||
|         {"image": TEST_IMAGE_2}, | ||||
|     ] | ||||
|     multi_features_stacked = ms.MultimodalSearch.querys_processing( | ||||
|         mydict, search_query, model, txt_processor, vis_processor, pre_model | ||||
| @ -791,5 +1055,11 @@ def test_parsing_images( | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     for i, num in zip(range(10), multi_features_stacked[1, 10:20].tolist()): | ||||
|         assert ( | ||||
|             math.isclose(num, pre_extracted_feature_img2[i], rel_tol=related_error) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     del model, vis_processor, txt_processor | ||||
|     cuda.empty_cache() | ||||
|  | ||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Petr Andriushchenko
						Petr Andriushchenko