diff --git a/misinformation/test/test_multimodal_search.py b/misinformation/test/test_multimodal_search.py index cd460bf..6b606f1 100644 --- a/misinformation/test/test_multimodal_search.py +++ b/misinformation/test/test_multimodal_search.py @@ -1,4 +1,5 @@ import pytest +import math from PIL import Image import numpy from torch import device, cuda, no_grad @@ -17,6 +18,8 @@ TEST_IMAGE_9 = "./test/data/IMG_3755.jpg" TEST_IMAGE_10 = "./test/data/IMG_3756.jpg" TEST_IMAGE_11 = "./test/data/IMG_3757.jpg" TEST_IMAGE_12 = "./test/data/pic1.png" +related_error = 1e-3 +gpu_is_not_available = not cuda.is_available() def test_read_img(): @@ -51,8 +54,7 @@ def test_load_feature_extractor_model_blip2(): extracted_feature_text = model.extract_features( {"image": "", "text_input": processed_text}, mode="text" ) - - assert processed_pic[0, 0, 0, 25:35].tolist() == [ + check_list_processed_pic = [ -1.0039474964141846, -1.0039474964141846, -0.8433647751808167, @@ -64,13 +66,18 @@ def test_load_feature_extractor_model_blip2(): -0.7119789123535156, -1.0623412132263184, ] + for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): + assert ( + math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) + is True + ) assert ( processed_text == "the bird sat on a tree located at the intersection of 23rd and 43rd streets" ) - assert extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() == [ + check_list_extracted_feature_img = [ 0.04566730558872223, -0.042554520070552826, -0.06970272958278656, @@ -82,8 +89,17 @@ def test_load_feature_extractor_model_blip2(): 0.12923966348171234, 0.006452132016420364, ] + for i, num in zip( + range(10), extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() + ): + assert ( + math.isclose( + num, check_list_extracted_feature_img[i], rel_tol=related_error + ) + is True + ) - assert extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() == [ + check_list_extracted_feature_text = [ -0.1384519338607788, -0.008663734421133995, 0.006240826100111008, @@ -95,14 +111,57 @@ def test_load_feature_extractor_model_blip2(): -0.05823372304439545, 0.036941494792699814, ] + for i, num in zip( + range(10), extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() + ): + assert ( + math.isclose( + num, check_list_extracted_feature_text[i], rel_tol=related_error + ) + is True + ) + + image_paths = [TEST_IMAGE_2, TEST_IMAGE_3] + raw_images, images_tensors = ms.MultimodalSearch.read_and_process_images( + my_dict, image_paths, vis_processor + ) + + check_list_images_tensors = [ + -1.0039474964141846, + -1.0039474964141846, + -0.8433647751808167, + -0.6097899675369263, + -0.5951915383338928, + -0.6243883967399597, + -0.6827820539474487, + -0.6097899675369263, + -0.7119789123535156, + -1.0623412132263184, + ] + for i, num in zip(range(10), images_tensors[0, 0, 0, 0, 25:35].tolist()): + assert ( + math.isclose(num, check_list_images_tensors[i], rel_tol=related_error) + is True + ) del model, vis_processor, txt_processor cuda.empty_cache() -def test_load_feature_extractor_model_blip(): +@pytest.mark.parametrize( + ("multimodal_device"), + [ + device("cpu"), + pytest.param( + device("cuda"), + marks=pytest.mark.skipif( + gpu_is_not_available, reason="gpu_is_not_availible" + ), + ), + ], +) +def test_load_feature_extractor_model_blip(multimodal_device): my_dict = {} - multimodal_device = device("cuda" if cuda.is_available() else "cpu") ( model, vis_processor, @@ -125,7 +184,7 @@ def test_load_feature_extractor_model_blip(): {"image": "", "text_input": processed_text}, mode="text" ) - assert processed_pic[0, 0, 0, 25:35].tolist() == [ + check_list_processed_pic = [ -1.0039474964141846, -1.0039474964141846, -0.8433647751808167, @@ -137,13 +196,18 @@ def test_load_feature_extractor_model_blip(): -0.7119789123535156, -1.0623412132263184, ] + for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): + assert ( + math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) + is True + ) assert ( processed_text == "the bird sat on a tree located at the intersection of 23rd and 43rd streets" ) - assert extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() == [ + check_list_extracted_feature_img = [ -0.02480311505496502, 0.05037587881088257, 0.039517853409051895, @@ -155,8 +219,17 @@ def test_load_feature_extractor_model_blip(): -0.07324369996786118, 0.06994668394327164, ] + for i, num in zip( + range(10), extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() + ): + assert ( + math.isclose( + num, check_list_extracted_feature_img[i], rel_tol=related_error + ) + is True + ) - assert extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() == [ + check_list_extracted_feature_text = [ 0.0118643119931221, -0.01291718054562807, -0.0009687161073088646, @@ -168,14 +241,34 @@ def test_load_feature_extractor_model_blip(): 0.0062415082938969135, 0.0034833091776818037, ] + for i, num in zip( + range(10), extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() + ): + assert ( + math.isclose( + num, check_list_extracted_feature_text[i], rel_tol=related_error + ) + is True + ) del model, vis_processor, txt_processor cuda.empty_cache() -def test_load_feature_extractor_model_albef(): +@pytest.mark.parametrize( + ("multimodal_device"), + [ + device("cpu"), + pytest.param( + device("cuda"), + marks=pytest.mark.skipif( + gpu_is_not_available, reason="gpu_is_not_availible" + ), + ), + ], +) +def test_load_feature_extractor_model_albef(multimodal_device): my_dict = {} - multimodal_device = device("cuda" if cuda.is_available() else "cpu") ( model, vis_processor, @@ -198,7 +291,7 @@ def test_load_feature_extractor_model_albef(): {"image": "", "text_input": processed_text}, mode="text" ) - assert processed_pic[0, 0, 0, 25:35].tolist() == [ + check_list_processed_pic = [ -1.0039474964141846, -1.0039474964141846, -0.8433647751808167, @@ -210,13 +303,18 @@ def test_load_feature_extractor_model_albef(): -0.7119789123535156, -1.0623412132263184, ] + for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): + assert ( + math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) + is True + ) assert ( processed_text == "the bird sat on a tree located at the intersection of 23rd and 43rd streets" ) - assert extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() == [ + check_list_extracted_feature_img = [ 0.08971136063337326, -0.10915573686361313, -0.020636577159166336, @@ -228,8 +326,17 @@ def test_load_feature_extractor_model_albef(): -0.03284582123160362, -0.1037328764796257, ] + for i, num in zip( + range(10), extracted_feature_img["image_embeds_proj"][0, 0, 10:20].tolist() + ): + assert ( + math.isclose( + num, check_list_extracted_feature_img[i], rel_tol=related_error + ) + is True + ) - assert extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() == [ + check_list_extracted_feature_text = [ -0.06229640915989876, 0.11278597265481949, 0.06628583371639252, @@ -241,14 +348,34 @@ def test_load_feature_extractor_model_albef(): 0.050752390176057816, -0.0421440489590168, ] + for i, num in zip( + range(10), extracted_feature_text["text_embeds_proj"][0, 0, 10:20].tolist() + ): + assert ( + math.isclose( + num, check_list_extracted_feature_text[i], rel_tol=related_error + ) + is True + ) del model, vis_processor, txt_processor cuda.empty_cache() -def test_load_feature_extractor_model_clip_base(): +@pytest.mark.parametrize( + ("multimodal_device"), + [ + device("cpu"), + pytest.param( + device("cuda"), + marks=pytest.mark.skipif( + gpu_is_not_available, reason="gpu_is_not_availible" + ), + ), + ], +) +def test_load_feature_extractor_model_clip_base(multimodal_device): my_dict = {} - multimodal_device = device("cuda" if cuda.is_available() else "cpu") ( model, vis_processor, @@ -267,7 +394,7 @@ def test_load_feature_extractor_model_clip_base(): extracted_feature_img = model.extract_features({"image": processed_pic}) extracted_feature_text = model.extract_features({"text_input": processed_text}) - assert processed_pic[0, 0, 0, 25:35].tolist() == [ + check_list_processed_pic = [ -0.7995694875717163, -0.7849710583686829, -0.7849710583686829, @@ -279,13 +406,18 @@ def test_load_feature_extractor_model_clip_base(): -0.7703726291656494, -0.7703726291656494, ] + for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): + assert ( + math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) + is True + ) assert ( processed_text == "The bird sat on a tree located at the intersection of 23rd and 43rd streets." ) - assert extracted_feature_img[0, 10:20].tolist() == [ + check_list_extracted_feature_img = [ 0.15101124346256256, -0.03759124130010605, -0.40093156695365906, @@ -297,8 +429,15 @@ def test_load_feature_extractor_model_clip_base(): -0.030034437775611877, 0.19082790613174438, ] + for i, num in zip(range(10), extracted_feature_img[0, 10:20].tolist()): + assert ( + math.isclose( + num, check_list_extracted_feature_img[i], rel_tol=related_error + ) + is True + ) - assert extracted_feature_text[0, 10:20].tolist() == [ + check_list_extracted_feature_text = [ 0.15391531586647034, 0.3078577518463135, 0.21737979352474213, @@ -310,14 +449,32 @@ def test_load_feature_extractor_model_clip_base(): -0.1858849972486496, 0.20347601175308228, ] + for i, num in zip(range(10), extracted_feature_text[0, 10:20].tolist()): + assert ( + math.isclose( + num, check_list_extracted_feature_text[i], rel_tol=related_error + ) + is True + ) del model, vis_processor, txt_processor cuda.empty_cache() -def test_load_feature_extractor_model_clip_vitl14(): +@pytest.mark.parametrize( + ("multimodal_device"), + [ + device("cpu"), + pytest.param( + device("cuda"), + marks=pytest.mark.skipif( + gpu_is_not_available, reason="gpu_is_not_availible" + ), + ), + ], +) +def test_load_feature_extractor_model_clip_vitl14(multimodal_device): my_dict = {} - multimodal_device = device("cuda" if cuda.is_available() else "cpu") ( model, vis_processor, @@ -336,7 +493,7 @@ def test_load_feature_extractor_model_clip_vitl14(): extracted_feature_img = model.extract_features({"image": processed_pic}) extracted_feature_text = model.extract_features({"text_input": processed_text}) - assert processed_pic[0, 0, 0, 25:35].tolist() == [ + check_list_processed_pic = [ -0.7995694875717163, -0.7849710583686829, -0.7849710583686829, @@ -348,13 +505,18 @@ def test_load_feature_extractor_model_clip_vitl14(): -0.7703726291656494, -0.7703726291656494, ] + for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): + assert ( + math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) + is True + ) assert ( processed_text == "The bird sat on a tree located at the intersection of 23rd and 43rd streets." ) - assert extracted_feature_img[0, 10:20].tolist() == [ + check_list_extracted_feature_img = [ -0.3911527395248413, -0.35456305742263794, 0.5724918842315674, @@ -366,8 +528,15 @@ def test_load_feature_extractor_model_clip_vitl14(): 0.19491413235664368, 0.01419895887374878, ] + for i, num in zip(range(10), extracted_feature_img[0, 10:20].tolist()): + assert ( + math.isclose( + num, check_list_extracted_feature_img[i], rel_tol=related_error + ) + is True + ) - assert extracted_feature_text[0, 10:20].tolist() == [ + check_list_extracted_feature_text = [ -0.07539052516222, 0.0939129889011383, -0.2643853425979614, @@ -379,14 +548,32 @@ def test_load_feature_extractor_model_clip_vitl14(): 0.18563221395015717, -0.3183072805404663, ] + for i, num in zip(range(10), extracted_feature_text[0, 10:20].tolist()): + assert ( + math.isclose( + num, check_list_extracted_feature_text[i], rel_tol=related_error + ) + is True + ) del model, vis_processor, txt_processor cuda.empty_cache() -def test_load_feature_extractor_model_clip_vitl14_336(): +@pytest.mark.parametrize( + ("multimodal_device"), + [ + device("cpu"), + pytest.param( + device("cuda"), + marks=pytest.mark.skipif( + gpu_is_not_available, reason="gpu_is_not_availible" + ), + ), + ], +) +def test_load_feature_extractor_model_clip_vitl14_336(multimodal_device): my_dict = {} - multimodal_device = device("cuda" if cuda.is_available() else "cpu") ( model, vis_processor, @@ -405,7 +592,7 @@ def test_load_feature_extractor_model_clip_vitl14_336(): extracted_feature_img = model.extract_features({"image": processed_pic}) extracted_feature_text = model.extract_features({"text_input": processed_text}) - assert processed_pic[0, 0, 0, 25:35].tolist() == [ + check_list_processed_pic = [ -0.7995694875717163, -0.7849710583686829, -0.7849710583686829, @@ -417,13 +604,18 @@ def test_load_feature_extractor_model_clip_vitl14_336(): -1.149931788444519, -1.0039474964141846, ] + for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): + assert ( + math.isclose(num, check_list_processed_pic[i], rel_tol=related_error) + is True + ) assert ( processed_text == "The bird sat on a tree located at the intersection of 23rd and 43rd streets." ) - assert extracted_feature_img[0, 10:20].tolist() == [ + check_list_extracted_feature_img = [ -0.15060146152973175, -0.1998099535703659, 0.5503129363059998, @@ -435,8 +627,15 @@ def test_load_feature_extractor_model_clip_vitl14_336(): 0.02220013737678528, 0.01086437702178955, ] + for i, num in zip(range(10), extracted_feature_img[0, 10:20].tolist()): + assert ( + math.isclose( + num, check_list_extracted_feature_img[i], rel_tol=related_error + ) + is True + ) - assert extracted_feature_text[0, 10:20].tolist() == [ + check_list_extracted_feature_text = [ -0.1172553077340126, 0.07105237245559692, -0.283934086561203, @@ -448,6 +647,13 @@ def test_load_feature_extractor_model_clip_vitl14_336(): 0.22669515013694763, -0.32044747471809387, ] + for i, num in zip(range(10), extracted_feature_text[0, 10:20].tolist()): + assert ( + math.isclose( + num, check_list_extracted_feature_text[i], rel_tol=related_error + ) + is True + ) del model, vis_processor, txt_processor cuda.empty_cache()