diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f9207b0..4c1c8d2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-22.04,windows-latest] + os: [ubuntu-22.04] python-version: [3.9] steps: - name: Checkout repository @@ -32,10 +32,11 @@ jobs: - name: Run pytest run: | cd misinformation - python -m pytest --cov=. --cov-report=xml + python -m pytest -m "not gcv" -svv --cov=. --cov-report=xml - name: Upload coverage if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9' uses: codecov/codecov-action@v3 with: fail_ci_if_error: true files: misinformation/coverage.xml + verbose: true diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a499ab3..58f94d7 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,8 +3,6 @@ name: Pages on: push: branches: [ main ] - pull_request: - branches: [ main ] workflow_dispatch: jobs: diff --git a/docs/source/notebooks/Example summary.ipynb b/docs/source/notebooks/Example summary.ipynb index d2a69b7..1cf40c1 100644 --- a/docs/source/notebooks/Example summary.ipynb +++ b/docs/source/notebooks/Example summary.ipynb @@ -20,7 +20,6 @@ "metadata": {}, "outputs": [], "source": [ - "import misinformation\n", "from misinformation import utils as mutils\n", "from misinformation import display as mdisplay\n", "import misinformation.summary as sm" @@ -74,7 +73,8 @@ "metadata": {}, "outputs": [], "source": [ - "summary_model, summary_vis_processors = mutils.load_model(\"base\")\n", + "obj = sm.SummaryDetector(mydict)\n", + "summary_model, summary_vis_processors = obj.load_model(\"base\")\n", "# summary_model, summary_vis_processors = mutils.load_model(\"large\")" ] }, @@ -96,7 +96,7 @@ "tags": [] }, "source": [ - "Convert the dictionary of dictionarys into a dictionary with lists:" + "Convert the dictionary of dictionaries into a dictionary with lists:" ] }, { @@ -256,7 +256,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, diff --git a/misinformation/faces.py b/misinformation/faces.py index 18265ac..a82da44 100644 --- a/misinformation/faces.py +++ b/misinformation/faces.py @@ -141,7 +141,7 @@ class EmotionDetector(utils.AnalysisMethod): DeepFace.analyze( img_path=face, actions=actions, - silent=True, + prog_bar=False, detector_backend="skip", ) ) @@ -192,6 +192,10 @@ class EmotionDetector(utils.AnalysisMethod): "Yes" if result[person]["wears_mask"] else "No" ) self.subdict["age"].append(result[person]["age"]) + # gender is now reported as a list of dictionaries + # each dict represents one face + # each dict contains probability for Woman and Man + # take only the higher prob result for each dict self.subdict["gender"].append(result[person]["gender"]) # race, emotion only detected if person does not wear mask if result[person]["wears_mask"]: diff --git a/misinformation/multimodal_search.py b/misinformation/multimodal_search.py index 3405493..ad4ea8f 100644 --- a/misinformation/multimodal_search.py +++ b/misinformation/multimodal_search.py @@ -174,7 +174,7 @@ class MultimodalSearch(AnalysisMethod): "Please, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336" ) - raw_images, images_tensors = MultimodalSearch.read_and_process_images( + _, images_tensors = MultimodalSearch.read_and_process_images( self, image_names, vis_processors ) if path_to_saved_tensors is None: @@ -213,7 +213,7 @@ class MultimodalSearch(AnalysisMethod): for query in search_query: if not (len(query) == 1) and (query in ("image", "text_input")): raise SyntaxError( - 'Each querry must contain either an "image" or a "text_input"' + 'Each query must contain either an "image" or a "text_input"' ) multi_sample = [] for query in search_query: diff --git a/misinformation/summary.py b/misinformation/summary.py index b348119..32cd7f4 100644 --- a/misinformation/summary.py +++ b/misinformation/summary.py @@ -7,36 +7,28 @@ from lavis.models import load_model_and_preprocess class SummaryDetector(AnalysisMethod): def __init__(self, subdict: dict) -> None: super().__init__(subdict) - - summary_device = device("cuda" if cuda.is_available() else "cpu") - summary_model, summary_vis_processors, _ = load_model_and_preprocess( - name="blip_caption", - model_type="base_coco", - is_eval=True, - device=summary_device, - ) + self.summary_device = device("cuda" if cuda.is_available() else "cpu") def load_model_base(self): - summary_device = device("cuda" if cuda.is_available() else "cpu") summary_model, summary_vis_processors, _ = load_model_and_preprocess( name="blip_caption", model_type="base_coco", is_eval=True, - device=summary_device, + device=self.summary_device, ) return summary_model, summary_vis_processors def load_model_large(self): - summary_device = device("cuda" if cuda.is_available() else "cpu") summary_model, summary_vis_processors, _ = load_model_and_preprocess( name="blip_caption", model_type="large_coco", is_eval=True, - device=summary_device, + device=self.summary_device, ) return summary_model, summary_vis_processors def load_model(self, model_type): + # self.summary_device = device("cuda" if cuda.is_available() else "cpu") select_model = { "base": SummaryDetector.load_model_base, "large": SummaryDetector.load_model_large, @@ -47,8 +39,7 @@ class SummaryDetector(AnalysisMethod): def analyse_image(self, summary_model=None, summary_vis_processors=None): if summary_model is None and summary_vis_processors is None: - summary_model = SummaryDetector.summary_model - summary_vis_processors = SummaryDetector.summary_vis_processors + summary_model, summary_vis_processors = self.load_model_base() path = self.subdict["filename"] raw_image = Image.open(path).convert("RGB") @@ -66,32 +57,33 @@ class SummaryDetector(AnalysisMethod): ) return self.subdict - ( - summary_VQA_model, - summary_VQA_vis_processors, - summary_VQA_txt_processors, - ) = load_model_and_preprocess( - name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device - ) - def analyse_questions(self, list_of_questions): - + ( + summary_VQA_model, + summary_VQA_vis_processors, + summary_VQA_txt_processors, + ) = load_model_and_preprocess( + name="blip_vqa", + model_type="vqav2", + is_eval=True, + device=self.summary_device, + ) if len(list_of_questions) > 0: path = self.subdict["filename"] raw_image = Image.open(path).convert("RGB") image = ( - self.summary_VQA_vis_processors["eval"](raw_image) + summary_VQA_vis_processors["eval"](raw_image) .unsqueeze(0) .to(self.summary_device) ) question_batch = [] for quest in list_of_questions: - question_batch.append(self.summary_VQA_txt_processors["eval"](quest)) + question_batch.append(summary_VQA_txt_processors["eval"](quest)) batch_size = len(list_of_questions) image_batch = image.repeat(batch_size, 1, 1, 1) with no_grad(): - answers_batch = self.summary_VQA_model.predict_answers( + answers_batch = summary_VQA_model.predict_answers( samples={"image": image_batch, "text_input": question_batch}, inference_method="generate", ) diff --git a/misinformation/test/data/example_analysis_objects.json b/misinformation/test/data/example_analysis_objects.json index 9ab75d4..89e9e6b 100644 --- a/misinformation/test/data/example_analysis_objects.json +++ b/misinformation/test/data/example_analysis_objects.json @@ -1 +1 @@ -{"IMG_2746": {"filename": "./test/data/IMG_2809.png", "person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}} \ No newline at end of file +{"person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"} diff --git a/misinformation/test/data/example_faces.json b/misinformation/test/data/example_faces.json index 8465ca1..2c984bd 100644 --- a/misinformation/test/data/example_faces.json +++ b/misinformation/test/data/example_faces.json @@ -1,5 +1,4 @@ { - "filename": "./test/data/IMG_2746.png", "face": "Yes", "multiple_faces": "Yes", "no_faces": 11, diff --git a/misinformation/test/data/text_IMG_3756.txt b/misinformation/test/data/text_IMG_3756.txt index 0e2c9bf..49690b1 100644 --- a/misinformation/test/data/text_IMG_3756.txt +++ b/misinformation/test/data/text_IMG_3756.txt @@ -3,10 +3,10 @@ The Quantum Theory of Nonrelativistic Collisions JOHN R. TAYLOR University of Colorado -postaldia Lanbidean +ostaliga Lanbidean 1 ilde ballenger stor goin -gd OOL, STVÍ 23 TL 02 +gdĐOL, SIVI 23 TL 02 de in obl och yd badalang a diff --git a/misinformation/test/data/text_translated_IMG_3756.txt b/misinformation/test/data/text_translated_IMG_3756.txt index 0e2c9bf..04479ee 100644 --- a/misinformation/test/data/text_translated_IMG_3756.txt +++ b/misinformation/test/data/text_translated_IMG_3756.txt @@ -3,12 +3,12 @@ The Quantum Theory of Nonrelativistic Collisions JOHN R. TAYLOR University of Colorado -postaldia Lanbidean +ostaliga Lanbidean 1 ilde -ballenger stor goin -gd OOL, STVÍ 23 TL 02 -de in obl -och yd badalang +balloons big goin +gdĐOL, SIVI 23 TL +there in obl +och yd change a Ber -ook Sy-RW enot go baldus \ No newline at end of file +ook Sy-RW isn't going anywhere \ No newline at end of file diff --git a/misinformation/test/pytest.ini b/misinformation/test/pytest.ini index 1b5c3a7..fc5b670 100644 --- a/misinformation/test/pytest.ini +++ b/misinformation/test/pytest.ini @@ -1,3 +1,4 @@ [pytest] markers = - gcv: mark google cloud vision tests - skip to save money. \ No newline at end of file + gcv: mark google cloud vision tests - skip to save money. + long: mark long running tests - skip to save compute resources. \ No newline at end of file diff --git a/misinformation/test/test_cropposts.py b/misinformation/test/test_cropposts.py index 1a97b8d..d75f385 100644 --- a/misinformation/test/test_cropposts.py +++ b/misinformation/test/test_cropposts.py @@ -2,20 +2,20 @@ import misinformation.cropposts as crpo import numpy as np from PIL import Image -TEST_IMAGE_1 = "./test/data/pic1.png" -TEST_IMAGE_2 = "./test/data/pic2.png" +TEST_IMAGE_1 = "pic1.png" +TEST_IMAGE_2 = "pic2.png" -def test_matching_points(): - ref_view = np.array(Image.open(TEST_IMAGE_2)) - view = np.array(Image.open(TEST_IMAGE_1)) - filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view) +def test_matching_points(get_path): + ref_view = np.array(Image.open(get_path + TEST_IMAGE_2)) + view = np.array(Image.open(get_path + TEST_IMAGE_1)) + filtered_matches, _, _ = crpo.matching_points(ref_view, view) assert len(filtered_matches) > 0 -def test_kp_from_matches(): - ref_view = np.array(Image.open(TEST_IMAGE_2)) - view = np.array(Image.open(TEST_IMAGE_1)) +def test_kp_from_matches(get_path): + ref_view = np.array(Image.open(get_path + TEST_IMAGE_2)) + view = np.array(Image.open(get_path + TEST_IMAGE_1)) filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view) kp1, kp2 = crpo.kp_from_matches(filtered_matches, kp1, kp2) @@ -25,9 +25,9 @@ def test_kp_from_matches(): assert kp2.shape[1] == 2 -def test_compute_crop_corner(): - ref_view = np.array(Image.open(TEST_IMAGE_2)) - view = np.array(Image.open(TEST_IMAGE_1)) +def test_compute_crop_corner(get_path): + ref_view = np.array(Image.open(get_path + TEST_IMAGE_2)) + view = np.array(Image.open(get_path + TEST_IMAGE_1)) filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view) corner = crpo.compute_crop_corner(filtered_matches, kp1, kp2) print(view.shape) @@ -38,9 +38,9 @@ def test_compute_crop_corner(): assert 0 <= h < view.shape[0] -def test_crop_posts_image(): - ref_view = np.array(Image.open(TEST_IMAGE_2)) - view = np.array(Image.open(TEST_IMAGE_1)) +def test_crop_posts_image(get_path): + ref_view = np.array(Image.open(get_path + TEST_IMAGE_2)) + view = np.array(Image.open(get_path + TEST_IMAGE_1)) rte = crpo.crop_posts_image(ref_view, view) assert rte is not None crop_view, match_num = rte @@ -48,16 +48,15 @@ def test_crop_posts_image(): assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1] -def test_crop_posts_from_refs(): - ref_view = np.array(Image.open(TEST_IMAGE_2)) - view = np.array(Image.open(TEST_IMAGE_1)) +def test_crop_posts_from_refs(get_path): + ref_view = np.array(Image.open(get_path + TEST_IMAGE_2)) + view = np.array(Image.open(get_path + TEST_IMAGE_1)) ref_views = [ref_view] crop_view = crpo.crop_posts_from_refs(ref_views, view) assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1] -def test_get_file_list(): +def test_get_file_list(get_path): ref_list = [] - ref_dir = "./test/data" - ref_list = crpo.get_file_list(ref_dir, ref_list, ext="png") + ref_list = crpo.get_file_list(get_path, ref_list, ext="png") assert len(ref_list) > 0 diff --git a/misinformation/test/test_display.py b/misinformation/test/test_display.py index fde8a29..7d7d45f 100644 --- a/misinformation/test/test_display.py +++ b/misinformation/test/test_display.py @@ -1,27 +1,28 @@ import json - -# import misinformation.display as misinf_display -import pytest - -misinf_display = pytest.importorskip("misinformation.display") +import misinformation.display as misinf_display -def test_explore_analysis_faces(): - mydict = {"IMG_2746": {"filename": "./test/data/IMG_2746.png"}} - misinf_display.explore_analysis(mydict, identify="faces") - with open("./test/data/example_faces.json", "r") as file: +def test_explore_analysis_faces(get_path): + mydict = {"IMG_2746": {"filename": get_path + "IMG_2746.png"}} + temp = misinf_display.explore_analysis(mydict, identify="faces") # noqa + temp = None # noqa + with open(get_path + "example_faces.json", "r") as file: outs = json.load(file) - + mydict["IMG_2746"].pop("filename", None) for im_key in mydict.keys(): sub_dict = mydict[im_key] for key in sub_dict.keys(): assert sub_dict[key] == outs[key] -def test_explore_analysis_objects(): - mydict = {"IMG_2746": {"filename": "./test/data/IMG_2809.png"}} - misinf_display.explore_analysis(mydict, identify="objects") - with open("./test/data/example_analysis_objects.json", "r") as file: +def test_explore_analysis_objects(get_path): + mydict = {"IMG_2809": {"filename": get_path + "IMG_2809.png"}} + temp = misinf_display.explore_analysis(mydict, identify="objects") # noqa + temp = None # noqa + with open(get_path + "example_analysis_objects.json", "r") as file: outs = json.load(file) - - assert str(mydict) == str(outs) + mydict["IMG_2809"].pop("filename", None) + for im_key in mydict.keys(): + sub_dict = mydict[im_key] + for key in sub_dict.keys(): + assert sub_dict[key] == outs[key] diff --git a/misinformation/test/test_faces.py b/misinformation/test/test_faces.py index 9682c22..6cf31ab 100644 --- a/misinformation/test/test_faces.py +++ b/misinformation/test/test_faces.py @@ -1,16 +1,17 @@ import misinformation.faces as fc import json -from pytest import approx +import pytest -def test_analyse_faces(): +def test_analyse_faces(get_path): mydict = { - "filename": "./test/data/IMG_2746.png", + "filename": get_path + "IMG_2746.png", } - mydict = fc.EmotionDetector(mydict).analyse_image() + mydict.update(fc.EmotionDetector(mydict).analyse_image()) - with open("./test/data/example_faces.json", "r") as file: + with open(get_path + "example_faces.json", "r") as file: out_dict = json.load(file) - + # delete the filename key + mydict.pop("filename", None) for key in mydict.keys(): assert mydict[key] == out_dict[key] diff --git a/misinformation/test/test_multimodal_search.py b/misinformation/test/test_multimodal_search.py index badfc55..ca5f162 100644 --- a/misinformation/test/test_multimodal_search.py +++ b/misinformation/test/test_multimodal_search.py @@ -5,24 +5,13 @@ import numpy from torch import device, cuda import misinformation.multimodal_search as ms + testdict = { - "d755771b-225e-432f-802e-fb8dc850fff7": { - "filename": "./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png" - }, "IMG_2746": {"filename": "./test/data/IMG_2746.png"}, - "IMG_2750": {"filename": "./test/data/IMG_2750.png"}, - "IMG_2805": {"filename": "./test/data/IMG_2805.png"}, - "IMG_2806": {"filename": "./test/data/IMG_2806.png"}, - "IMG_2807": {"filename": "./test/data/IMG_2807.png"}, - "IMG_2808": {"filename": "./test/data/IMG_2808.png"}, "IMG_2809": {"filename": "./test/data/IMG_2809.png"}, - "IMG_3755": {"filename": "./test/data/IMG_3755.jpg"}, - "IMG_3756": {"filename": "./test/data/IMG_3756.jpg"}, - "IMG_3757": {"filename": "./test/data/IMG_3757.jpg"}, - "pic1": {"filename": "./test/data/pic1.png"}, } -related_error = 1e-3 +related_error = 1e-2 gpu_is_not_available = not cuda.is_available() @@ -38,39 +27,15 @@ def test_read_img(): pre_proc_pic_blip2_blip_albef = [ -1.0039474964141846, -1.0039474964141846, - -0.8433647751808167, - -0.6097899675369263, - -0.5951915383338928, - -0.6243883967399597, - -0.6827820539474487, - -0.6097899675369263, - -0.7119789123535156, - -1.0623412132263184, ] pre_proc_pic_clip_vitl14 = [ -0.7995694875717163, -0.7849710583686829, - -0.7849710583686829, - -0.7703726291656494, - -0.7703726291656494, - -0.7849710583686829, - -0.7849710583686829, - -0.7703726291656494, - -0.7703726291656494, - -0.7703726291656494, ] pre_proc_pic_clip_vitl14_336 = [ -0.7995694875717163, -0.7849710583686829, - -0.7849710583686829, - -0.7849710583686829, - -0.7849710583686829, - -0.7849710583686829, - -0.7849710583686829, - -0.9163569211959839, - -1.149931788444519, - -1.0039474964141846, ] pre_proc_text_blip2_blip_albef = ( @@ -84,293 +49,150 @@ pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = ( pre_extracted_feature_img_blip2 = [ 0.04566730558872223, -0.042554520070552826, - -0.06970272958278656, - -0.009771779179573059, - 0.01446065679192543, - 0.10173682868480682, - 0.007092420011758804, - -0.020045937970280647, - 0.12923966348171234, - 0.006452132016420364, ] pre_extracted_feature_img_blip = [ -0.02480311505496502, 0.05037587881088257, - 0.039517853409051895, - -0.06994109600782394, - -0.12886561453342438, - 0.047039758414030075, - -0.11620642244815826, - -0.003398326924070716, - -0.07324369996786118, - 0.06994668394327164, ] pre_extracted_feature_img_albef = [ 0.08971136063337326, -0.10915573686361313, - -0.020636577159166336, - 0.048121627420186996, - -0.05943416804075241, - -0.129856139421463, - -0.0034469354432076216, - 0.017888527363538742, - -0.03284582123160362, - -0.1037328764796257, ] pre_extracted_feature_img_clip = [ 0.01621132344007492, -0.004035486374050379, - -0.04304071143269539, - -0.03459808602929115, - 0.016922621056437492, - -0.025056276470422745, - -0.04178355261683464, - 0.02165347896516323, - -0.003224249929189682, - 0.020485712215304375, ] pre_extracted_feature_img_parsing_clip = [ 0.01621132344007492, -0.004035486374050379, - -0.04304071143269539, - -0.03459808602929115, - 0.016922621056437492, - -0.025056276470422745, - -0.04178355261683464, - 0.02165347896516323, - -0.003224249929189682, - 0.020485712215304375, ] pre_extracted_feature_img_clip_vitl14 = [ -0.023943455889821053, -0.021703708916902542, - 0.035043686628341675, - 0.019495919346809387, - 0.014351222664117813, - -0.008634116500616074, - 0.01610446907579899, - -0.003426523646339774, - 0.011931191198527813, - 0.0008691544644534588, ] pre_extracted_feature_img_clip_vitl14_336 = [ -0.009511193260550499, -0.012618942186236382, - 0.034754861146211624, - 0.016356879845261574, - -0.0011549904011189938, - -0.008054453879594803, - 0.0011990377679467201, - -0.010806051082909107, - 0.00140204350464046, - 0.0006861367146484554, ] pre_extracted_feature_text_blip2 = [ -0.1384204626083374, -0.008662976324558258, - 0.006269007455557585, - 0.03151319921016693, - 0.060558050870895386, - -0.03230040520429611, - 0.015861615538597107, - -0.11856459826231003, - -0.058296192437410355, - 0.03699290752410889, ] pre_extracted_feature_text_blip = [ 0.0118643119931221, -0.01291718054562807, - -0.0009687161073088646, - 0.01428765058517456, - -0.05591396614909172, - 0.07386433333158493, - -0.11475936323404312, - 0.01620068959891796, - 0.0062415082938969135, - 0.0034833091776818037, ] pre_extracted_feature_text_albef = [ -0.06229640915989876, 0.11278597265481949, - 0.06628583371639252, - 0.1649140566587448, - 0.068987175822258, - 0.006291372701525688, - 0.03244050219655037, - -0.049556829035282135, - 0.050752390176057816, - -0.0421440489590168, ] pre_extracted_feature_text_clip = [ 0.018169036135077477, 0.03634127229452133, - 0.025660742074251175, - 0.009149895049631596, - -0.035570453852415085, - 0.033126577734947205, - -0.004808237310498953, - -0.0031453112605959177, - -0.02194291725754738, - 0.024019461125135422, ] pre_extracted_feature_text_clip_vitl14 = [ -0.0055463071912527084, 0.006908962037414312, - -0.019450219348073006, - -0.018097277730703354, - 0.017567576840519905, - -0.03828490898013115, - -0.03781530633568764, - -0.023951737210154533, - 0.01365653332322836, - -0.02341713197529316, ] pre_extracted_feature_text_clip_vitl14_336 = [ -0.008720514364540577, 0.005284308455884457, - -0.021116750314831734, - -0.018112430348992348, - 0.01685470901429653, - -0.03517491742968559, - -0.038612402975559235, - -0.021867064759135246, - 0.01685977540910244, - -0.023832324892282486, ] simularity_blip2 = [ - [0.05826476216316223, -0.03215287625789642], - [0.12869958579540253, 0.005234059877693653], - [0.11073512583971024, 0.12327003479003906], - [0.08743024617433548, 0.05598106235265732], - [0.04591086134314537, 0.48981112241744995], - [0.06297147274017334, 0.4728018641471863], - [0.18486255407333374, 0.635167121887207], - [0.015356295742094517, 0.015282897278666496], - [-0.008485622704029083, 0.010882291942834854], - [-0.04328630864620209, -0.13117870688438416], - [-0.025470387190580368, 0.13175423443317413], - [-0.05090826004743576, 0.05902523919939995], + [0.05826476216316223, -0.02717375010251999], + [0.06297147274017334, 0.47339022159576416], ] sorted_blip2 = [ - [6, 1, 2, 3, 5, 0, 4, 7, 8, 10, 9, 11], - [6, 4, 5, 10, 2, 11, 3, 7, 8, 1, 0, 9], + [1, 0], + [1, 0], ] simularity_blip = [ [0.15640679001808167, 0.752173662185669], - [0.15139800310134888, 0.7804810404777527], - [0.13010388612747192, 0.755257248878479], - [0.13746635615825653, 0.7618774175643921], - [0.1756758838891983, 0.8531903624534607], [0.17233705520629883, 0.8448910117149353], - [0.1970970332622528, 0.8916105628013611], - [0.11693969368934631, 0.5833531618118286], - [0.12386563420295715, 0.5981853604316711], - [0.08427951484918594, 0.4962371587753296], - [0.14193706214427948, 0.7613846659660339], - [0.12051936239004135, 0.6492202281951904], ] sorted_blip = [ - [6, 4, 5, 0, 1, 10, 3, 2, 8, 11, 7, 9], - [6, 4, 5, 1, 3, 10, 2, 0, 11, 8, 7, 9], + [1, 0], + [1, 0], ] simularity_albef = [ [0.12321824580430984, 0.35511350631713867], - [0.09512615948915482, 0.27168408036231995], - [0.09053325653076172, 0.20215675234794617], - [0.06335515528917313, 0.15055638551712036], - [0.09604836255311966, 0.4658776521682739], [0.10870333760976791, 0.5143978595733643], - [0.11748822033405304, 0.6542638540267944], - [0.05688793584704399, 0.22170542180538177], - [0.05597608536481857, 0.11963296681642532], - [0.059643782675266266, 0.14969395101070404], - [0.06690303236246109, 0.3149859607219696], - [0.07909377664327621, 0.11911341547966003], ] sorted_albef = [ - [0, 6, 5, 4, 1, 2, 11, 10, 3, 9, 7, 8], - [6, 5, 4, 0, 10, 1, 7, 2, 3, 9, 8, 11], + [0, 1], + [1, 0], ] simularity_clip = [ [0.23923014104366302, 0.5325412750244141], - [0.20101115107536316, 0.5112978219985962], - [0.17522737383842468, 0.49811851978302], - [0.20062290132045746, 0.5415266156196594], - [0.22865726053714752, 0.5762109756469727], [0.2310466319322586, 0.5910375714302063], - [0.2644523084163666, 0.7851459383964539], - [0.21474510431289673, 0.4135811924934387], - [0.16407863795757294, 0.1474374681711197], - [0.19819433987140656, 0.26493316888809204], - [0.19545596837997437, 0.5007457137107849], - [0.1647854745388031, 0.45705708861351013], ] sorted_clip = [ - [6, 0, 5, 4, 7, 1, 3, 9, 10, 2, 11, 8], - [6, 5, 4, 3, 0, 1, 10, 2, 11, 7, 9, 8], + [1, 0], + [1, 0], ] simularity_clip_vitl14 = [ [0.1051270067691803, 0.5184808373451233], - [0.09705893695354462, 0.49574509263038635], - [0.11964304000139236, 0.5424358248710632], - [0.13881900906562805, 0.5909714698791504], - [0.12728188931941986, 0.6758255362510681], [0.1277746558189392, 0.6841973662376404], - [0.18026694655418396, 0.803142786026001], - [0.13977059721946716, 0.45957139134407043], - [0.11180847883224487, 0.24822194874286652], - [0.12296056002378464, 0.35143694281578064], - [0.11596094071865082, 0.5704031586647034], - [0.10174489766359329, 0.44422751665115356], ] sorted_clip_vitl14 = [ - [6, 7, 3, 5, 4, 9, 2, 10, 8, 0, 11, 1], - [6, 5, 4, 3, 10, 2, 0, 1, 7, 11, 9, 8], + [1, 0], + [1, 0], ] simularity_clip_vitl14_336 = [ [0.09391091763973236, 0.49337542057037354], - [0.11103834211826324, 0.4881117343902588], - [0.12891019880771637, 0.5501476526260376], - [0.13288410007953644, 0.5498673915863037], - [0.12357455492019653, 0.6749162077903748], [0.13700757920742035, 0.7003108263015747], - [0.1788637489080429, 0.7713702321052551], - [0.13260436058044434, 0.4300197660923004], - [0.11666625738143921, 0.2334875613451004], - [0.1316065937280655, 0.3291645646095276], - [0.12374477833509445, 0.5632147192955017], - [0.10333051532506943, 0.43023794889450073], ] sorted_clip_vitl14_336 = [ - [6, 5, 3, 7, 9, 2, 10, 4, 8, 1, 11, 0], - [6, 5, 4, 10, 2, 3, 0, 1, 11, 7, 9, 8], + [1, 0], + [1, 0], ] +dict_itm_scores_for_blib = { + "blip_base": [ + 0.07107225805521011, + 0.004100032616406679, + ], + "blip_large": [ + 0.07890705019235611, + 0.00271016638725996, + ], + "blip2_coco": [ + 0.0833505243062973, + 0.004216152708977461, + ], +} +dict_image_gradcam_with_itm_for_blip = { + "blip_base": [123.36285799741745, 132.31662154197693, 53.38280035299249], + "blip_large": [119.99512910842896, 128.7044593691826, 55.552959859540515], +} + + +@pytest.mark.long @pytest.mark.parametrize( ( "pre_multimodal_device", @@ -383,19 +205,29 @@ sorted_clip_vitl14_336 = [ "pre_sorted", ), [ - pytest.param( - device("cuda"), - "blip2", - pre_proc_pic_blip2_blip_albef, - pre_proc_text_blip2_blip_albef, - pre_extracted_feature_img_blip2, - pre_extracted_feature_text_blip2, - simularity_blip2, - sorted_blip2, - marks=pytest.mark.skipif( - gpu_is_not_available, reason="gpu_is_not_availible" - ), - ), + # ( + # device("cpu"), + # "blip2", + # pre_proc_pic_blip2_blip_albef, + # pre_proc_text_blip2_blip_albef, + # pre_extracted_feature_img_blip2, + # pre_extracted_feature_text_blip2, + # simularity_blip2, + # sorted_blip2, + # ), + # pytest.param( + # device("cuda"), + # "blip2", + # pre_proc_pic_blip2_blip_albef, + # pre_proc_text_blip2_blip_albef, + # pre_extracted_feature_img_blip2, + # pre_extracted_feature_text_blip2, + # simularity_blip2, + # sorted_blip2, + # marks=pytest.mark.skipif( + # gpu_is_not_available, reason="gpu_is_not_availible" + # ), + # ), ( device("cpu"), "blip", @@ -530,11 +362,11 @@ def test_parsing_images( vis_processor, txt_processor, image_keys, - image_names, + _, features_image_stacked, ) = ms.MultimodalSearch.parsing_images(testdict, pre_model) - for i, num in zip(range(10), features_image_stacked[0, 10:20].tolist()): + for i, num in zip(range(10), features_image_stacked[0, 10:12].tolist()): assert ( math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error) is True @@ -549,7 +381,7 @@ def test_parsing_images( ) processed_text = txt_processor["eval"](test_querry) - for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): + for i, num in zip(range(10), processed_pic[0, 0, 0, 25:27].tolist()): assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True assert processed_text == pre_proc_text @@ -562,13 +394,13 @@ def test_parsing_images( testdict, search_query, model, txt_processor, vis_processor, pre_model ) - for i, num in zip(range(10), multi_features_stacked[0, 10:20].tolist()): + for i, num in zip(range(10), multi_features_stacked[0, 10:12].tolist()): assert ( math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error) is True ) - for i, num in zip(range(10), multi_features_stacked[1, 10:20].tolist()): + for i, num in zip(range(10), multi_features_stacked[1, 10:12].tolist()): assert ( math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error) is True @@ -590,16 +422,24 @@ def test_parsing_images( search_query2, ) - for i, num in zip(range(12), similarity.tolist()): + for i, num in zip(range(len(pre_simularity)), similarity.tolist()): for j, num2 in zip(range(len(num)), num): assert ( math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error) is True ) - for i, num in zip(range(2), sorted_list): + for i, num in zip(range(len(pre_sorted)), sorted_list): for j, num2 in zip(range(2), num): assert num2 == pre_sorted[i][j] - del model, vis_processor, txt_processor + del ( + model, + vis_processor, + txt_processor, + similarity, + features_image_stacked, + processed_pic, + multi_features_stacked, + ) cuda.empty_cache() diff --git a/misinformation/test/test_objects.py b/misinformation/test/test_objects.py index 21029ac..32bd161 100644 --- a/misinformation/test/test_objects.py +++ b/misinformation/test/test_objects.py @@ -31,6 +31,7 @@ def test_analyse_image_cvlib(get_path): with open(get_path + JSON_1, "r") as file: out_dict = json.load(file) + out_dict["filename"] = get_path + out_dict["filename"] for key in mydict.keys(): assert mydict[key] == out_dict[key] @@ -56,10 +57,11 @@ def test_init_default_objects(): def test_analyse_image_from_file_cvlib(get_path): file_path = get_path + TEST_IMAGE_1 - objs = ob_cvlib.ObjectCVLib().analyse_image_from_file(get_path + file_path) + objs = ob_cvlib.ObjectCVLib().analyse_image_from_file(file_path) with open(get_path + JSON_1, "r") as file: out_dict = json.load(file) + out_dict["filename"] = get_path + out_dict["filename"] for key in objs.keys(): assert objs[key] == out_dict[key] @@ -86,5 +88,6 @@ def test_analyse_image(get_path): ob.ObjectDetector(mydict).analyse_image() with open(get_path + JSON_1, "r") as file: out_dict = json.load(file) + out_dict["filename"] = get_path + out_dict["filename"] assert str(mydict) == str(out_dict) diff --git a/misinformation/test/test_summary.py b/misinformation/test/test_summary.py index b92ce59..c7ebf5c 100644 --- a/misinformation/test/test_summary.py +++ b/misinformation/test/test_summary.py @@ -1,166 +1,98 @@ import os +import pytest from torch import device, cuda from lavis.models import load_model_and_preprocess import misinformation.summary as sm -images = [ - "./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png", - "./test/data/IMG_2746.png", - "./test/data/IMG_2750.png", - "./test/data/IMG_2805.png", - "./test/data/IMG_2806.png", - "./test/data/IMG_2807.png", - "./test/data/IMG_2808.png", - "./test/data/IMG_2809.png", - "./test/data/IMG_3755.jpg", - "./test/data/IMG_3756.jpg", - "./test/data/IMG_3757.jpg", - "./test/data/pic1.png", -] + +IMAGES = ["d755771b-225e-432f-802e-fb8dc850fff7.png", "IMG_2746.png"] + +SUMMARY_DEVICE = device("cuda" if cuda.is_available() else "cpu") + +TEST_KWARGS = { + "run1": { + "name": "blip_caption", + "model_type": "base_coco", + "is_eval": True, + "device": SUMMARY_DEVICE, + }, + "run2": { + "name": "blip_caption", + "model_type": "base_coco", + "is_eval": True, + "device": SUMMARY_DEVICE, + }, + "run3": { + "name": "blip_caption", + "model_type": "large_coco", + "is_eval": True, + "device": SUMMARY_DEVICE, + }, +} -def test_analyse_image(): +@pytest.fixture +def get_dict(get_path): mydict = {} - for img_path in images: - id_ = os.path.splitext(os.path.basename(img_path))[0] - mydict[id_] = {"filename": img_path} + for img in IMAGES: + id_ = os.path.splitext(os.path.basename(img))[0] + mydict[id_] = {"filename": get_path + img} + return mydict - for key in mydict: - mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image() - keys = list(mydict.keys()) - assert len(mydict) == 12 - for key in keys: - assert len(mydict[key]["3_non-deterministic summary"]) == 3 - const_image_summary_list = [ - "a river running through a city next to tall buildings", - "a crowd of people standing on top of a tennis court", - "a crowd of people standing on top of a field", - "a room with a desk and a chair", - "a table with plastic containers on top of it", - "a view of a city with mountains in the background", - "a view of a city street from a window", - "a busy city street with cars and pedestrians", - "a close up of an open book with writing on it", - "a book that is open on a table", - "a yellow book with green lettering on it", - "a person running on a beach near a rock formation", - ] - - for i in range(len(const_image_summary_list)): - assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list[i] - - del sm.SummaryDetector.summary_model, sm.SummaryDetector.summary_vis_processors - cuda.empty_cache() - - summary_device = device("cuda" if cuda.is_available() else "cpu") - summary_model, summary_vis_processors, _ = load_model_and_preprocess( - name="blip_caption", - model_type="base_coco", - is_eval=True, - device=summary_device, - ) - - for key in mydict: - mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image( - summary_model, summary_vis_processors +@pytest.mark.long +def test_analyse_image(get_dict): + reference_results = { + "run1": { + "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings", + "IMG_2746": "a crowd of people standing on top of a tennis court", + }, + "run2": { + "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings", + "IMG_2746": "a crowd of people standing on top of a tennis court", + }, + "run3": { + "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a town next to tall buildings", + "IMG_2746": "a crowd of people standing on top of a track", + }, + } + # test three different models + for test_run in TEST_KWARGS.keys(): + summary_model, summary_vis_processors, _ = load_model_and_preprocess( + **TEST_KWARGS[test_run] ) - keys = list(mydict.keys()) - - assert len(mydict) == 12 - for key in keys: - assert len(mydict[key]["3_non-deterministic summary"]) == 3 - - const_image_summary_list2 = [ - "a river running through a city next to tall buildings", - "a crowd of people standing on top of a tennis court", - "a crowd of people standing on top of a field", - "a room with a desk and a chair", - "a table with plastic containers on top of it", - "a view of a city with mountains in the background", - "a view of a city street from a window", - "a busy city street with cars and pedestrians", - "a close up of an open book with writing on it", - "a book that is open on a table", - "a yellow book with green lettering on it", - "a person running on a beach near a rock formation", - ] - - for i in range(len(const_image_summary_list2)): - assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list2[i] - - del summary_model, summary_vis_processors - cuda.empty_cache() - - summary_model, summary_vis_processors, _ = load_model_and_preprocess( - name="blip_caption", - model_type="large_coco", - is_eval=True, - device=summary_device, - ) - - for key in mydict: - mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image( - summary_model, summary_vis_processors - ) - keys = list(mydict.keys()) - assert len(mydict) == 12 - for key in keys: - assert len(mydict[key]["3_non-deterministic summary"]) == 3 - - const_image_summary_list3 = [ - "a river running through a town next to tall buildings", - "a crowd of people standing on top of a track", - "a group of people standing on top of a track", - "a desk and chair in a small room", - "a table that has some chairs on top of it", - "a view of a city from a window of a building", - "a view of a city from a window", - "a city street filled with lots of traffic", - "an open book with german text on it", - "a close up of a book on a table", - "a book with a green cover on a table", - "a person running on a beach near the ocean", - ] - - for i in range(len(const_image_summary_list2)): - assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list3[i] + # run two different images + for key in get_dict.keys(): + get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_image( + summary_model, summary_vis_processors + ) + assert len(get_dict) == 2 + for key in get_dict.keys(): + assert len(get_dict[key]["3_non-deterministic summary"]) == 3 + assert ( + get_dict[key]["const_image_summary"] == reference_results[test_run][key] + ) + cuda.empty_cache() + summary_model = None + summary_vis_processors = None -def test_analyse_questions(): - mydict = {} - for img_path in images: - id_ = os.path.splitext(os.path.basename(img_path))[0] - mydict[id_] = {"filename": img_path} - +def test_analyse_questions(get_dict): list_of_questions = [ "How many persons on the picture?", "What happends on the picture?", ] - for key in mydict: - mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions( + for key in get_dict: + get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_questions( list_of_questions ) - - keys = list(mydict.keys()) - assert len(mydict) == 12 - - list_of_questions_ans = [2, 100, "many", 0, 0, "none", "two", 5, 0, 0, 0, 1] - - list_of_questions_ans2 = [ - "flood", - "festival", - "people are flying kites", - "no one's home", - "chair is being moved", - "traffic jam", - "day time", - "traffic jam", - "nothing", - "nothing", - "nothing", - "running", - ] - - for i in range(len(list_of_questions_ans)): - assert mydict[keys[i]][list_of_questions[1]] == str(list_of_questions_ans2[i]) + assert len(get_dict) == 2 + list_of_questions_ans = ["2", "100"] + list_of_questions_ans2 = ["flood", "festival"] + test_answers = [] + test_answers2 = [] + for key in get_dict.keys(): + test_answers.append(get_dict[key][list_of_questions[0]]) + test_answers2.append(get_dict[key][list_of_questions[1]]) + assert sorted(test_answers) == sorted(list_of_questions_ans) + assert sorted(test_answers2) == sorted(list_of_questions_ans2) diff --git a/misinformation/test/test_text.py b/misinformation/test/test_text.py index b3c8675..8543504 100644 --- a/misinformation/test/test_text.py +++ b/misinformation/test/test_text.py @@ -20,7 +20,7 @@ def set_testdict(get_path): return testdict -LANGUAGES = ["de", "en", "en"] +LANGUAGES = ["de", "om", "en"] def test_TextDetector(set_testdict): diff --git a/misinformation/utils.py b/misinformation/utils.py index 1862b1d..36c7690 100644 --- a/misinformation/utils.py +++ b/misinformation/utils.py @@ -2,8 +2,6 @@ import glob import os from pandas import DataFrame import pooch -from torch import device, cuda -from lavis.models import load_model_and_preprocess class DownloadResource: @@ -108,34 +106,3 @@ if __name__ == "__main__": outdict = append_data_to_dict(mydict) df = dump_df(outdict) print(df.head(10)) - - -def load_model_base(): - summary_device = device("cuda" if cuda.is_available() else "cpu") - summary_model, summary_vis_processors, _ = load_model_and_preprocess( - name="blip_caption", - model_type="base_coco", - is_eval=True, - device=summary_device, - ) - return summary_model, summary_vis_processors - - -def load_model_large(): - summary_device = device("cuda" if cuda.is_available() else "cpu") - summary_model, summary_vis_processors, _ = load_model_and_preprocess( - name="blip_caption", - model_type="large_coco", - is_eval=True, - device=summary_device, - ) - return summary_model, summary_vis_processors - - -def load_model(model_type): - select_model = { - "base": load_model_base, - "large": load_model_large, - } - summary_model, summary_vis_processors = select_model[model_type]() - return summary_model, summary_vis_processors diff --git a/notebooks/image_summary.ipynb b/notebooks/image_summary.ipynb index d38bceb..04d53cc 100644 --- a/notebooks/image_summary.ipynb +++ b/notebooks/image_summary.ipynb @@ -17,7 +17,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import misinformation\n", @@ -36,7 +38,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "images = mutils.find_files(\n", @@ -48,7 +52,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "mydict = mutils.initialize_dict(images[0:10])" @@ -57,7 +63,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "mydict" @@ -80,22 +88,27 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "summary_model, summary_vis_processors = sm.SummaryDetector.load_model(mydict, \"base\")\n", + "obj = sm.SummaryDetector(mydict)\n", + "summary_model, summary_vis_processors = obj.load_model(model_type=\"base\")\n", "# summary_model, summary_vis_processors = mutils.load_model(\"large\")" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "for key in mydict:\n", " mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n", - " summary_model, summary_vis_processors\n", + " summary_model=summary_model, summary_vis_processors=summary_vis_processors\n", " )" ] }, @@ -130,7 +143,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "df.head(10)" @@ -168,7 +183,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "mdisplay.explore_analysis(mydict, identify=\"summary\")" @@ -279,7 +296,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.0" + "version": "3.9.16" }, "vscode": { "interpreter": { diff --git a/pyproject.toml b/pyproject.toml index 440ee43..fb15c17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,12 +24,12 @@ classifiers = [ dependencies = [ "bertopic", "cvlib", - "deepface @ git+https://github.com/iulusoy/deepface.git", + "deepface<=0.0.75", "googletrans==3.1.0a0", "grpcio", "importlib_metadata", "ipython", - "ipywidgets", + "ipywidgets<8.0.5", "ipykernel", "matplotlib", "numpy<=1.23.4", @@ -39,9 +39,10 @@ dependencies = [ "protobuf", "pytest", "pytest-cov", + "pytest-xdist", "requests", - "retina_face @ git+https://github.com/iulusoy/retinaface.git", - "salesforce-lavis @ git+https://github.com/iulusoy/LAVIS.git", + "retina_face", + "salesforce-lavis", "spacy", "spacytextblob", "tensorflow",