зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-30 21:46:04 +02:00 
			
		
		
		
	fixed test multimodal search on cpu and gpu machines
Этот коммит содержится в:
		
							родитель
							
								
									ea275cdd09
								
							
						
					
					
						Коммит
						b0cfab05e9
					
				| @ -1,4 +1,5 @@ | ||||
| import pytest | ||||
| import math | ||||
| from PIL import Image | ||||
| import numpy | ||||
| from torch import device, cuda, no_grad | ||||
| @ -17,6 +18,8 @@ TEST_IMAGE_9 = "./test/data/IMG_3755.jpg" | ||||
| TEST_IMAGE_10 = "./test/data/IMG_3756.jpg" | ||||
| TEST_IMAGE_11 = "./test/data/IMG_3757.jpg" | ||||
| TEST_IMAGE_12 = "./test/data/pic1.png" | ||||
| related_error = 1e-3 | ||||
| gpu_is_not_available = not cuda.is_available() | ||||
| 
 | ||||
| 
 | ||||
| def test_read_img(): | ||||
| @ -51,8 +54,7 @@ def test_load_feature_extractor_model_blip2(): | ||||
|             extracted_feature_text = model.extract_features( | ||||
|                 {"image": "", "text_input": processed_text}, mode="text" | ||||
|             ) | ||||
| 
 | ||||
|     assert processed_pic[0, 0, 0, 25:35].tolist() == [ | ||||
|     check_list_processed_pic = [ | ||||
|         -1.0039474964141846, | ||||
|         -1.0039474964141846, | ||||
|         -0.8433647751808167, | ||||
| @ -64,13 +66,18 @@ def test_load_feature_extractor_model_blip2(): | ||||
|         -0.7119789123535156, | ||||
|         -1.0623412132263184, | ||||
|     ] | ||||
|     for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): | ||||
|         assert ( | ||||
|             math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert ( | ||||
|         processed_text | ||||
|         == "the bird sat on a tree located at the intersection of 23rd and 43rd streets" | ||||
|     ) | ||||
| 
 | ||||
|     assert extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_img = [ | ||||
|         0.04566730558872223, | ||||
|         -0.042554520070552826, | ||||
|         -0.06970272958278656, | ||||
| @ -82,8 +89,17 @@ def test_load_feature_extractor_model_blip2(): | ||||
|         0.12923966348171234, | ||||
|         0.006452132016420364, | ||||
|     ] | ||||
|     for i, num in zip( | ||||
|         range(10), extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() | ||||
|     ): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_img[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_text = [ | ||||
|         -0.1384519338607788, | ||||
|         -0.008663734421133995, | ||||
|         0.006240826100111008, | ||||
| @ -95,14 +111,57 @@ def test_load_feature_extractor_model_blip2(): | ||||
|         -0.05823372304439545, | ||||
|         0.036941494792699814, | ||||
|     ] | ||||
|     for i, num in zip( | ||||
|         range(10), extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() | ||||
|     ): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_text[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     image_paths = [TEST_IMAGE_2, TEST_IMAGE_3] | ||||
|     raw_images, images_tensors = ms.MultimodalSearch.read_and_process_images( | ||||
|         my_dict, image_paths, vis_processor | ||||
|     ) | ||||
| 
 | ||||
|     check_list_images_tensors = [ | ||||
|         -1.0039474964141846, | ||||
|         -1.0039474964141846, | ||||
|         -0.8433647751808167, | ||||
|         -0.6097899675369263, | ||||
|         -0.5951915383338928, | ||||
|         -0.6243883967399597, | ||||
|         -0.6827820539474487, | ||||
|         -0.6097899675369263, | ||||
|         -0.7119789123535156, | ||||
|         -1.0623412132263184, | ||||
|     ] | ||||
|     for i, num in zip(range(10), images_tensors[0, 0, 0, 0, 25:35].tolist()): | ||||
|         assert ( | ||||
|             math.isclose(num, check_list_images_tensors[i], rel_tol=related_error) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     del model, vis_processor, txt_processor | ||||
|     cuda.empty_cache() | ||||
| 
 | ||||
| 
 | ||||
| def test_load_feature_extractor_model_blip(): | ||||
| @pytest.mark.parametrize( | ||||
|     ("multimodal_device"), | ||||
|     [ | ||||
|         device("cpu"), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_load_feature_extractor_model_blip(multimodal_device): | ||||
|     my_dict = {} | ||||
|     multimodal_device = device("cuda" if cuda.is_available() else "cpu") | ||||
|     ( | ||||
|         model, | ||||
|         vis_processor, | ||||
| @ -125,7 +184,7 @@ def test_load_feature_extractor_model_blip(): | ||||
|             {"image": "", "text_input": processed_text}, mode="text" | ||||
|         ) | ||||
| 
 | ||||
|     assert processed_pic[0, 0, 0, 25:35].tolist() == [ | ||||
|     check_list_processed_pic = [ | ||||
|         -1.0039474964141846, | ||||
|         -1.0039474964141846, | ||||
|         -0.8433647751808167, | ||||
| @ -137,13 +196,18 @@ def test_load_feature_extractor_model_blip(): | ||||
|         -0.7119789123535156, | ||||
|         -1.0623412132263184, | ||||
|     ] | ||||
|     for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): | ||||
|         assert ( | ||||
|             math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert ( | ||||
|         processed_text | ||||
|         == "the bird sat on a tree located at the intersection of 23rd and 43rd streets" | ||||
|     ) | ||||
| 
 | ||||
|     assert extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_img = [ | ||||
|         -0.02480311505496502, | ||||
|         0.05037587881088257, | ||||
|         0.039517853409051895, | ||||
| @ -155,8 +219,17 @@ def test_load_feature_extractor_model_blip(): | ||||
|         -0.07324369996786118, | ||||
|         0.06994668394327164, | ||||
|     ] | ||||
|     for i, num in zip( | ||||
|         range(10), extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() | ||||
|     ): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_img[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_text = [ | ||||
|         0.0118643119931221, | ||||
|         -0.01291718054562807, | ||||
|         -0.0009687161073088646, | ||||
| @ -168,14 +241,34 @@ def test_load_feature_extractor_model_blip(): | ||||
|         0.0062415082938969135, | ||||
|         0.0034833091776818037, | ||||
|     ] | ||||
|     for i, num in zip( | ||||
|         range(10), extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() | ||||
|     ): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_text[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     del model, vis_processor, txt_processor | ||||
|     cuda.empty_cache() | ||||
| 
 | ||||
| 
 | ||||
| def test_load_feature_extractor_model_albef(): | ||||
| @pytest.mark.parametrize( | ||||
|     ("multimodal_device"), | ||||
|     [ | ||||
|         device("cpu"), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_load_feature_extractor_model_albef(multimodal_device): | ||||
|     my_dict = {} | ||||
|     multimodal_device = device("cuda" if cuda.is_available() else "cpu") | ||||
|     ( | ||||
|         model, | ||||
|         vis_processor, | ||||
| @ -198,7 +291,7 @@ def test_load_feature_extractor_model_albef(): | ||||
|             {"image": "", "text_input": processed_text}, mode="text" | ||||
|         ) | ||||
| 
 | ||||
|     assert processed_pic[0, 0, 0, 25:35].tolist() == [ | ||||
|     check_list_processed_pic = [ | ||||
|         -1.0039474964141846, | ||||
|         -1.0039474964141846, | ||||
|         -0.8433647751808167, | ||||
| @ -210,13 +303,18 @@ def test_load_feature_extractor_model_albef(): | ||||
|         -0.7119789123535156, | ||||
|         -1.0623412132263184, | ||||
|     ] | ||||
|     for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): | ||||
|         assert ( | ||||
|             math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert ( | ||||
|         processed_text | ||||
|         == "the bird sat on a tree located at the intersection of 23rd and 43rd streets" | ||||
|     ) | ||||
| 
 | ||||
|     assert extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_img = [ | ||||
|         0.08971136063337326, | ||||
|         -0.10915573686361313, | ||||
|         -0.020636577159166336, | ||||
| @ -228,8 +326,17 @@ def test_load_feature_extractor_model_albef(): | ||||
|         -0.03284582123160362, | ||||
|         -0.1037328764796257, | ||||
|     ] | ||||
|     for i, num in zip( | ||||
|         range(10), extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() | ||||
|     ): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_img[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_text = [ | ||||
|         -0.06229640915989876, | ||||
|         0.11278597265481949, | ||||
|         0.06628583371639252, | ||||
| @ -241,14 +348,34 @@ def test_load_feature_extractor_model_albef(): | ||||
|         0.050752390176057816, | ||||
|         -0.0421440489590168, | ||||
|     ] | ||||
|     for i, num in zip( | ||||
|         range(10), extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() | ||||
|     ): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_text[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     del model, vis_processor, txt_processor | ||||
|     cuda.empty_cache() | ||||
| 
 | ||||
| 
 | ||||
| def test_load_feature_extractor_model_clip_base(): | ||||
| @pytest.mark.parametrize( | ||||
|     ("multimodal_device"), | ||||
|     [ | ||||
|         device("cpu"), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_load_feature_extractor_model_clip_base(multimodal_device): | ||||
|     my_dict = {} | ||||
|     multimodal_device = device("cuda" if cuda.is_available() else "cpu") | ||||
|     ( | ||||
|         model, | ||||
|         vis_processor, | ||||
| @ -267,7 +394,7 @@ def test_load_feature_extractor_model_clip_base(): | ||||
|         extracted_feature_img = model.extract_features({"image": processed_pic}) | ||||
|         extracted_feature_text = model.extract_features({"text_input": processed_text}) | ||||
| 
 | ||||
|     assert processed_pic[0, 0, 0, 25:35].tolist() == [ | ||||
|     check_list_processed_pic = [ | ||||
|         -0.7995694875717163, | ||||
|         -0.7849710583686829, | ||||
|         -0.7849710583686829, | ||||
| @ -279,13 +406,18 @@ def test_load_feature_extractor_model_clip_base(): | ||||
|         -0.7703726291656494, | ||||
|         -0.7703726291656494, | ||||
|     ] | ||||
|     for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): | ||||
|         assert ( | ||||
|             math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert ( | ||||
|         processed_text | ||||
|         == "The bird sat on a tree located at the intersection of 23rd and 43rd streets." | ||||
|     ) | ||||
| 
 | ||||
|     assert extracted_feature_img[0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_img = [ | ||||
|         0.15101124346256256, | ||||
|         -0.03759124130010605, | ||||
|         -0.40093156695365906, | ||||
| @ -297,8 +429,15 @@ def test_load_feature_extractor_model_clip_base(): | ||||
|         -0.030034437775611877, | ||||
|         0.19082790613174438, | ||||
|     ] | ||||
|     for i, num in zip(range(10), extracted_feature_img[0, 10:20].tolist()): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_img[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert extracted_feature_text[0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_text = [ | ||||
|         0.15391531586647034, | ||||
|         0.3078577518463135, | ||||
|         0.21737979352474213, | ||||
| @ -310,14 +449,32 @@ def test_load_feature_extractor_model_clip_base(): | ||||
|         -0.1858849972486496, | ||||
|         0.20347601175308228, | ||||
|     ] | ||||
|     for i, num in zip(range(10), extracted_feature_text[0, 10:20].tolist()): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_text[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     del model, vis_processor, txt_processor | ||||
|     cuda.empty_cache() | ||||
| 
 | ||||
| 
 | ||||
| def test_load_feature_extractor_model_clip_vitl14(): | ||||
| @pytest.mark.parametrize( | ||||
|     ("multimodal_device"), | ||||
|     [ | ||||
|         device("cpu"), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_load_feature_extractor_model_clip_vitl14(multimodal_device): | ||||
|     my_dict = {} | ||||
|     multimodal_device = device("cuda" if cuda.is_available() else "cpu") | ||||
|     ( | ||||
|         model, | ||||
|         vis_processor, | ||||
| @ -336,7 +493,7 @@ def test_load_feature_extractor_model_clip_vitl14(): | ||||
|         extracted_feature_img = model.extract_features({"image": processed_pic}) | ||||
|         extracted_feature_text = model.extract_features({"text_input": processed_text}) | ||||
| 
 | ||||
|     assert processed_pic[0, 0, 0, 25:35].tolist() == [ | ||||
|     check_list_processed_pic = [ | ||||
|         -0.7995694875717163, | ||||
|         -0.7849710583686829, | ||||
|         -0.7849710583686829, | ||||
| @ -348,13 +505,18 @@ def test_load_feature_extractor_model_clip_vitl14(): | ||||
|         -0.7703726291656494, | ||||
|         -0.7703726291656494, | ||||
|     ] | ||||
|     for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): | ||||
|         assert ( | ||||
|             math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert ( | ||||
|         processed_text | ||||
|         == "The bird sat on a tree located at the intersection of 23rd and 43rd streets." | ||||
|     ) | ||||
| 
 | ||||
|     assert extracted_feature_img[0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_img = [ | ||||
|         -0.3911527395248413, | ||||
|         -0.35456305742263794, | ||||
|         0.5724918842315674, | ||||
| @ -366,8 +528,15 @@ def test_load_feature_extractor_model_clip_vitl14(): | ||||
|         0.19491413235664368, | ||||
|         0.01419895887374878, | ||||
|     ] | ||||
|     for i, num in zip(range(10), extracted_feature_img[0, 10:20].tolist()): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_img[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert extracted_feature_text[0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_text = [ | ||||
|         -0.07539052516222, | ||||
|         0.0939129889011383, | ||||
|         -0.2643853425979614, | ||||
| @ -379,14 +548,32 @@ def test_load_feature_extractor_model_clip_vitl14(): | ||||
|         0.18563221395015717, | ||||
|         -0.3183072805404663, | ||||
|     ] | ||||
|     for i, num in zip(range(10), extracted_feature_text[0, 10:20].tolist()): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_text[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     del model, vis_processor, txt_processor | ||||
|     cuda.empty_cache() | ||||
| 
 | ||||
| 
 | ||||
| def test_load_feature_extractor_model_clip_vitl14_336(): | ||||
| @pytest.mark.parametrize( | ||||
|     ("multimodal_device"), | ||||
|     [ | ||||
|         device("cpu"), | ||||
|         pytest.param( | ||||
|             device("cuda"), | ||||
|             marks=pytest.mark.skipif( | ||||
|                 gpu_is_not_available, reason="gpu_is_not_availible" | ||||
|             ), | ||||
|         ), | ||||
|     ], | ||||
| ) | ||||
| def test_load_feature_extractor_model_clip_vitl14_336(multimodal_device): | ||||
|     my_dict = {} | ||||
|     multimodal_device = device("cuda" if cuda.is_available() else "cpu") | ||||
|     ( | ||||
|         model, | ||||
|         vis_processor, | ||||
| @ -405,7 +592,7 @@ def test_load_feature_extractor_model_clip_vitl14_336(): | ||||
|         extracted_feature_img = model.extract_features({"image": processed_pic}) | ||||
|         extracted_feature_text = model.extract_features({"text_input": processed_text}) | ||||
| 
 | ||||
|     assert processed_pic[0, 0, 0, 25:35].tolist() == [ | ||||
|     check_list_processed_pic = [ | ||||
|         -0.7995694875717163, | ||||
|         -0.7849710583686829, | ||||
|         -0.7849710583686829, | ||||
| @ -417,13 +604,18 @@ def test_load_feature_extractor_model_clip_vitl14_336(): | ||||
|         -1.149931788444519, | ||||
|         -1.0039474964141846, | ||||
|     ] | ||||
|     for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): | ||||
|         assert ( | ||||
|             math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert ( | ||||
|         processed_text | ||||
|         == "The bird sat on a tree located at the intersection of 23rd and 43rd streets." | ||||
|     ) | ||||
| 
 | ||||
|     assert extracted_feature_img[0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_img = [ | ||||
|         -0.15060146152973175, | ||||
|         -0.1998099535703659, | ||||
|         0.5503129363059998, | ||||
| @ -435,8 +627,15 @@ def test_load_feature_extractor_model_clip_vitl14_336(): | ||||
|         0.02220013737678528, | ||||
|         0.01086437702178955, | ||||
|     ] | ||||
|     for i, num in zip(range(10), extracted_feature_img[0, 10:20].tolist()): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_img[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     assert extracted_feature_text[0, 10:20].tolist() == [ | ||||
|     check_list_extracted_feature_text = [ | ||||
|         -0.1172553077340126, | ||||
|         0.07105237245559692, | ||||
|         -0.283934086561203, | ||||
| @ -448,6 +647,13 @@ def test_load_feature_extractor_model_clip_vitl14_336(): | ||||
|         0.22669515013694763, | ||||
|         -0.32044747471809387, | ||||
|     ] | ||||
|     for i, num in zip(range(10), extracted_feature_text[0, 10:20].tolist()): | ||||
|         assert ( | ||||
|             math.isclose( | ||||
|                 num, check_list_extracted_feature_text[i], rel_tol=related_error | ||||
|             ) | ||||
|             is True | ||||
|         ) | ||||
| 
 | ||||
|     del model, vis_processor, txt_processor | ||||
|     cuda.empty_cache() | ||||
|  | ||||
		Загрузка…
	
	
			
			x
			
			
		
	
		Ссылка в новой задаче
	
	Block a user
	 Petr Andriushchenko
						Petr Andriushchenko