merge main to add_itm

2025-10-29 21:16:06 +02:00 · 2023-03-30 13:43:06 +02:00 · 2023-03-30 13:43:06 +02:00 · 46d5d43f0b
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -14,7 +14,7 @@ jobs:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
-        os: [ubuntu-22.04,windows-latest]
+        os: [ubuntu-22.04]
        python-version: [3.9]
    steps:
    - name: Checkout repository
@ -39,3 +39,4 @@ jobs:
      with:
        fail_ci_if_error: true
        files: misinformation/coverage.xml
+        verbose: true
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@ -3,8 +3,6 @@ name: Pages
 on:
  push:
    branches: [ main ]
-  pull_request:
-    branches: [ main ]
  workflow_dispatch:
  
 jobs:
--- a/docs/source/notebooks/Example
+++ b/docs/source/notebooks/Example
@ -20,7 +20,6 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "import misinformation\n",
    "from misinformation import utils as mutils\n",
    "from misinformation import display as mdisplay\n",
    "import misinformation.summary as sm"
@ -74,7 +73,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "summary_model, summary_vis_processors = mutils.load_model(\"base\")\n",
+    "obj = sm.SummaryDetector(mydict)\n",
+    "summary_model, summary_vis_processors = obj.load_model(\"base\")\n",
    "# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
   ]
  },
@ -96,7 +96,7 @@
    "tags": []
   },
   "source": [
-    "Convert the dictionary of dictionarys into a dictionary with lists:"
+    "Convert the dictionary of dictionaries into a dictionary with lists:"
   ]
  },
  {
@ -256,7 +256,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
--- a/misinformation/faces.py
+++ b/misinformation/faces.py
@ -141,7 +141,7 @@ class EmotionDetector(utils.AnalysisMethod):
            DeepFace.analyze(
                img_path=face,
                actions=actions,
-                silent=True,
+                prog_bar=False,
                detector_backend="skip",
            )
        )
@ -192,6 +192,10 @@ class EmotionDetector(utils.AnalysisMethod):
                "Yes" if result[person]["wears_mask"] else "No"
            )
            self.subdict["age"].append(result[person]["age"])
+            # gender is now reported as a list of dictionaries
+            # each dict represents one face
+            # each dict contains probability for Woman and Man
+            # take only the higher prob result for each dict
            self.subdict["gender"].append(result[person]["gender"])
            # race, emotion only detected if person does not wear mask
            if result[person]["wears_mask"]:
--- a/misinformation/multimodal_search.py
+++ b/misinformation/multimodal_search.py
@ -181,7 +181,7 @@ class MultimodalSearch(AnalysisMethod):
                "Please, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336"
            )

-        raw_images, images_tensors = MultimodalSearch.read_and_process_images(
+        _, images_tensors = MultimodalSearch.read_and_process_images(
            self, image_names, vis_processors
        )
        if path_to_saved_tensors is None:
@ -220,7 +220,7 @@ class MultimodalSearch(AnalysisMethod):
        for query in search_query:
            if not (len(query) == 1) and (query in ("image", "text_input")):
                raise SyntaxError(
-                    'Each querry must contain either an "image" or a "text_input"'
+                    'Each query must contain either an "image" or a "text_input"'
                )
        multi_sample = []
        for query in search_query:
--- a/misinformation/summary.py
+++ b/misinformation/summary.py
@ -7,36 +7,28 @@ from lavis.models import load_model_and_preprocess
 class SummaryDetector(AnalysisMethod):
    def __init__(self, subdict: dict) -> None:
        super().__init__(subdict)
-
-    summary_device = device("cuda" if cuda.is_available() else "cpu")
-    summary_model, summary_vis_processors, _ = load_model_and_preprocess(
-        name="blip_caption",
-        model_type="base_coco",
-        is_eval=True,
-        device=summary_device,
-    )
+        self.summary_device = device("cuda" if cuda.is_available() else "cpu")

    def load_model_base(self):
-        summary_device = device("cuda" if cuda.is_available() else "cpu")
        summary_model, summary_vis_processors, _ = load_model_and_preprocess(
            name="blip_caption",
            model_type="base_coco",
            is_eval=True,
-            device=summary_device,
+            device=self.summary_device,
        )
        return summary_model, summary_vis_processors

    def load_model_large(self):
-        summary_device = device("cuda" if cuda.is_available() else "cpu")
        summary_model, summary_vis_processors, _ = load_model_and_preprocess(
            name="blip_caption",
            model_type="large_coco",
            is_eval=True,
-            device=summary_device,
+            device=self.summary_device,
        )
        return summary_model, summary_vis_processors

    def load_model(self, model_type):
+        # self.summary_device = device("cuda" if cuda.is_available() else "cpu")
        select_model = {
            "base": SummaryDetector.load_model_base,
            "large": SummaryDetector.load_model_large,
@ -47,8 +39,7 @@ class SummaryDetector(AnalysisMethod):
    def analyse_image(self, summary_model=None, summary_vis_processors=None):

        if summary_model is None and summary_vis_processors is None:
-            summary_model = SummaryDetector.summary_model
-            summary_vis_processors = SummaryDetector.summary_vis_processors
+            summary_model, summary_vis_processors = self.load_model_base()

        path = self.subdict["filename"]
        raw_image = Image.open(path).convert("RGB")
@ -66,32 +57,33 @@ class SummaryDetector(AnalysisMethod):
            )
        return self.subdict

+    def analyse_questions(self, list_of_questions):
        (
            summary_VQA_model,
            summary_VQA_vis_processors,
            summary_VQA_txt_processors,
        ) = load_model_and_preprocess(
-        name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device
+            name="blip_vqa",
+            model_type="vqav2",
+            is_eval=True,
+            device=self.summary_device,
        )
-
-    def analyse_questions(self, list_of_questions):
-
        if len(list_of_questions) > 0:
            path = self.subdict["filename"]
            raw_image = Image.open(path).convert("RGB")
            image = (
-                self.summary_VQA_vis_processors["eval"](raw_image)
+                summary_VQA_vis_processors["eval"](raw_image)
                .unsqueeze(0)
                .to(self.summary_device)
            )
            question_batch = []
            for quest in list_of_questions:
-                question_batch.append(self.summary_VQA_txt_processors["eval"](quest))
+                question_batch.append(summary_VQA_txt_processors["eval"](quest))
            batch_size = len(list_of_questions)
            image_batch = image.repeat(batch_size, 1, 1, 1)

            with no_grad():
-                answers_batch = self.summary_VQA_model.predict_answers(
+                answers_batch = summary_VQA_model.predict_answers(
                    samples={"image": image_batch, "text_input": question_batch},
                    inference_method="generate",
                )
--- a/misinformation/test/data/example_analysis_objects.json
+++ b/misinformation/test/data/example_analysis_objects.json
@ -1 +1 @@
-{"IMG_2746": {"filename": "./test/data/IMG_2809.png", "person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}}
+{"person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}
--- a/misinformation/test/data/example_faces.json
+++ b/misinformation/test/data/example_faces.json
@ -1,5 +1,4 @@
 {
-        "filename": "./test/data/IMG_2746.png", 
        "face": "Yes", 
        "multiple_faces": "Yes", 
        "no_faces": 11, 
--- a/misinformation/test/data/example_summary.txt
+++ b/misinformation/test/data/example_summary.txt
@ -0,0 +1,16 @@
+I’m sorry, but I don’t want to be an emperor. That’s not my business. I don’t want to rule or conquer anyone. I should like to help everyone - if possible - Jew, Gentile - black man - white. We all want to help one another. Human beings are like that. We want to live by each other’s happiness - not by each other’s misery. We don’t want to hate and despise one another. In this world there is room for everyone. And the good earth is rich and can provide for everyone. The way of life can be free and beautiful, but we have lost the way.
+
+Greed has poisoned men’s souls, has barricaded the world with hate, has goose-stepped us into misery and bloodshed. We have developed speed, but we have shut ourselves in. Machinery that gives abundance has left us in want. Our knowledge has made us cynical. Our cleverness, hard and unkind. We think too much and feel too little. More than machinery we need humanity. More than cleverness we need kindness and gentleness. Without these qualities, life will be violent and all will be lost…
+
+The aeroplane and the radio have brought us closer together. The very nature of these inventions cries out for the goodness in men - cries out for universal brotherhood - for the unity of us all. Even now my voice is reaching millions throughout the world - millions of despairing men, women, and little children - victims of a system that makes men torture and imprison innocent people.
+
+
+To those who can hear me, I say - do not despair. The misery that is now upon us is but the passing of greed - the bitterness of men who fear the way of human progress. The hate of men will pass, and dictators die, and the power they took from the people will return to the people. And so long as men die, liberty will never perish…
+
+Soldiers! don’t give yourselves to brutes - men who despise you - enslave you - who regiment your lives - tell you what to do - what to think and what to feel! Who drill you - diet you - treat you like cattle, use you as cannon fodder. Don’t give yourselves to these unnatural men - machine men with machine minds and machine hearts! You are not machines! You are not cattle! You are men! You have the love of humanity in your hearts! You don’t hate! Only the unloved hate - the unloved and the unnatural! Soldiers! Don’t fight for slavery! Fight for liberty!
+
+In the 17th Chapter of St Luke it is written: “the Kingdom of God is within man” - not one man nor a group of men, but in all men! In you! You, the people have the power - the power to create machines. The power to create happiness! You, the people, have the power to make this life free and beautiful, to make this life a wonderful adventure.
+
+Then - in the name of democracy - let us use that power - let us all unite. Let us fight for a new world - a decent world that will give men a chance to work - that will give youth a future and old age a security. By the promise of these things, brutes have risen to power. But they lie! They do not fulfil that promise. They never will!
+
+Dictators free themselves but they enslave the people! Now let us fight to fulfil that promise! Let us fight to free the world - to do away with national barriers - to do away with greed, with hate and intolerance. Let us fight for a world of reason, a world where science and progress will lead to all men’s happiness. Soldiers! in the name of democracy, let us all unite!
--- a/misinformation/test/data/text_IMG_3756.txt
+++ b/misinformation/test/data/text_IMG_3756.txt
@ -3,10 +3,10 @@ The Quantum Theory of
 Nonrelativistic Collisions
 JOHN R. TAYLOR
 University of Colorado
-postaldia Lanbidean
+ostaliga Lanbidean
 1 ilde
 ballenger stor goin
-gd OOL, STVÍ 23 TL 02
+gdĐOL, SIVI 23 TL 02
 de in obl
 och yd badalang
 a
--- a/misinformation/test/data/text_translated_IMG_3756.txt
+++ b/misinformation/test/data/text_translated_IMG_3756.txt
@ -3,12 +3,12 @@ The Quantum Theory of
 Nonrelativistic Collisions
 JOHN R. TAYLOR
 University of Colorado
-postaldia Lanbidean
+ostaliga Lanbidean
 1 ilde
-ballenger stor goin
-gd OOL, STVÍ 23 TL 02
-de in obl
-och yd badalang
+balloons big goin
+gdĐOL, SIVI 23 TL
+there in obl
+och yd change
 a
 Ber
-ook Sy-RW enot go baldus
+ook Sy-RW isn't going anywhere
--- a/misinformation/test/pytest.ini
+++ b/misinformation/test/pytest.ini
@ -1,3 +1,4 @@
 [pytest]
 markers =
    gcv: mark google cloud vision tests - skip to save money.
+    long: mark long running tests - skip to save compute resources.
--- a/misinformation/test/test_cropposts.py
+++ b/misinformation/test/test_cropposts.py
@ -2,20 +2,20 @@ import misinformation.cropposts as crpo
 import numpy as np
 from PIL import Image

-TEST_IMAGE_1 = "./test/data/pic1.png"
-TEST_IMAGE_2 = "./test/data/pic2.png"
+TEST_IMAGE_1 = "pic1.png"
+TEST_IMAGE_2 = "pic2.png"


-def test_matching_points():
-    ref_view = np.array(Image.open(TEST_IMAGE_2))
-    view = np.array(Image.open(TEST_IMAGE_1))
-    filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
+def test_matching_points(get_path):
+    ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
+    view = np.array(Image.open(get_path + TEST_IMAGE_1))
+    filtered_matches, _, _ = crpo.matching_points(ref_view, view)
    assert len(filtered_matches) > 0


-def test_kp_from_matches():
-    ref_view = np.array(Image.open(TEST_IMAGE_2))
-    view = np.array(Image.open(TEST_IMAGE_1))
+def test_kp_from_matches(get_path):
+    ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
+    view = np.array(Image.open(get_path + TEST_IMAGE_1))
    filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
    kp1, kp2 = crpo.kp_from_matches(filtered_matches, kp1, kp2)

@ -25,9 +25,9 @@ def test_kp_from_matches():
    assert kp2.shape[1] == 2


-def test_compute_crop_corner():
-    ref_view = np.array(Image.open(TEST_IMAGE_2))
-    view = np.array(Image.open(TEST_IMAGE_1))
+def test_compute_crop_corner(get_path):
+    ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
+    view = np.array(Image.open(get_path + TEST_IMAGE_1))
    filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
    corner = crpo.compute_crop_corner(filtered_matches, kp1, kp2)
    print(view.shape)
@ -38,9 +38,9 @@ def test_compute_crop_corner():
    assert 0 <= h < view.shape[0]


-def test_crop_posts_image():
-    ref_view = np.array(Image.open(TEST_IMAGE_2))
-    view = np.array(Image.open(TEST_IMAGE_1))
+def test_crop_posts_image(get_path):
+    ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
+    view = np.array(Image.open(get_path + TEST_IMAGE_1))
    rte = crpo.crop_posts_image(ref_view, view)
    assert rte is not None
    crop_view, match_num = rte
@ -48,16 +48,15 @@ def test_crop_posts_image():
    assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]


-def test_crop_posts_from_refs():
-    ref_view = np.array(Image.open(TEST_IMAGE_2))
-    view = np.array(Image.open(TEST_IMAGE_1))
+def test_crop_posts_from_refs(get_path):
+    ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
+    view = np.array(Image.open(get_path + TEST_IMAGE_1))
    ref_views = [ref_view]
    crop_view = crpo.crop_posts_from_refs(ref_views, view)
    assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]


-def test_get_file_list():
+def test_get_file_list(get_path):
    ref_list = []
-    ref_dir = "./test/data"
-    ref_list = crpo.get_file_list(ref_dir, ref_list, ext="png")
+    ref_list = crpo.get_file_list(get_path, ref_list, ext="png")
    assert len(ref_list) > 0
--- a/misinformation/test/test_display.py
+++ b/misinformation/test/test_display.py
@ -1,27 +1,28 @@
 import json
-
-# import misinformation.display as misinf_display
-import pytest
-
-misinf_display = pytest.importorskip("misinformation.display")
+import misinformation.display as misinf_display


-def test_explore_analysis_faces():
-    mydict = {"IMG_2746": {"filename": "./test/data/IMG_2746.png"}}
-    misinf_display.explore_analysis(mydict, identify="faces")
-    with open("./test/data/example_faces.json", "r") as file:
+def test_explore_analysis_faces(get_path):
+    mydict = {"IMG_2746": {"filename": get_path + "IMG_2746.png"}}
+    temp = misinf_display.explore_analysis(mydict, identify="faces")  # noqa
+    temp = None  # noqa
+    with open(get_path + "example_faces.json", "r") as file:
        outs = json.load(file)
-
+    mydict["IMG_2746"].pop("filename", None)
    for im_key in mydict.keys():
        sub_dict = mydict[im_key]
        for key in sub_dict.keys():
            assert sub_dict[key] == outs[key]


-def test_explore_analysis_objects():
-    mydict = {"IMG_2746": {"filename": "./test/data/IMG_2809.png"}}
-    misinf_display.explore_analysis(mydict, identify="objects")
-    with open("./test/data/example_analysis_objects.json", "r") as file:
+def test_explore_analysis_objects(get_path):
+    mydict = {"IMG_2809": {"filename": get_path + "IMG_2809.png"}}
+    temp = misinf_display.explore_analysis(mydict, identify="objects")  # noqa
+    temp = None  # noqa
+    with open(get_path + "example_analysis_objects.json", "r") as file:
        outs = json.load(file)
-
-    assert str(mydict) == str(outs)
+    mydict["IMG_2809"].pop("filename", None)
+    for im_key in mydict.keys():
+        sub_dict = mydict[im_key]
+        for key in sub_dict.keys():
+            assert sub_dict[key] == outs[key]
--- a/misinformation/test/test_faces.py
+++ b/misinformation/test/test_faces.py
@ -1,16 +1,17 @@
 import misinformation.faces as fc
 import json
-from pytest import approx
+import pytest


-def test_analyse_faces():
+def test_analyse_faces(get_path):
    mydict = {
-        "filename": "./test/data/IMG_2746.png",
+        "filename": get_path + "IMG_2746.png",
    }
-    mydict = fc.EmotionDetector(mydict).analyse_image()
+    mydict.update(fc.EmotionDetector(mydict).analyse_image())

-    with open("./test/data/example_faces.json", "r") as file:
+    with open(get_path + "example_faces.json", "r") as file:
        out_dict = json.load(file)
-
+    # delete the filename key
+    mydict.pop("filename", None)
    for key in mydict.keys():
        assert mydict[key] == out_dict[key]
--- a/misinformation/test/test_multimodal_search.py
+++ b/misinformation/test/test_multimodal_search.py
@ -5,6 +5,7 @@ import numpy
 from torch import device, cuda
 import misinformation.multimodal_search as ms

+
 testdict = {
    "IMG_2746": {"filename": "./test/data/IMG_2746.png"},
    "IMG_2809": {"filename": "./test/data/IMG_2809.png"},
@ -191,6 +192,7 @@ dict_image_gradcam_with_itm_for_blip = {
 }


+@pytest.mark.long
@pytest.mark.parametrize(
    (
        "pre_multimodal_device",
@ -203,29 +205,29 @@ dict_image_gradcam_with_itm_for_blip = {
        "pre_sorted",
    ),
    [
-        (
-            device("cpu"),
-            "blip2",
-            pre_proc_pic_blip2_blip_albef,
-            pre_proc_text_blip2_blip_albef,
-            pre_extracted_feature_img_blip2,
-            pre_extracted_feature_text_blip2,
-            simularity_blip2,
-            sorted_blip2,
-        ),
-        pytest.param(
-            device("cuda"),
-            "blip2",
-            pre_proc_pic_blip2_blip_albef,
-            pre_proc_text_blip2_blip_albef,
-            pre_extracted_feature_img_blip2,
-            pre_extracted_feature_text_blip2,
-            simularity_blip2,
-            sorted_blip2,
-            marks=pytest.mark.skipif(
-                gpu_is_not_available, reason="gpu_is_not_availible"
-            ),
-        ),
+        # (
+        #     device("cpu"),
+        #     "blip2",
+        #     pre_proc_pic_blip2_blip_albef,
+        #     pre_proc_text_blip2_blip_albef,
+        #     pre_extracted_feature_img_blip2,
+        #     pre_extracted_feature_text_blip2,
+        #     simularity_blip2,
+        #     sorted_blip2,
+        # ),
+        # pytest.param(
+        #     device("cuda"),
+        #     "blip2",
+        #     pre_proc_pic_blip2_blip_albef,
+        #     pre_proc_text_blip2_blip_albef,
+        #     pre_extracted_feature_img_blip2,
+        #     pre_extracted_feature_text_blip2,
+        #     simularity_blip2,
+        #     sorted_blip2,
+        #     marks=pytest.mark.skipif(
+        #         gpu_is_not_available, reason="gpu_is_not_availible"
+        #     ),
+        # ),
        (
            device("cpu"),
            "blip",
@ -360,7 +362,7 @@ def test_parsing_images(
        vis_processor,
        txt_processor,
        image_keys,
-        image_names,
+        _,
        features_image_stacked,
    ) = ms.MultimodalSearch.parsing_images(testdict, pre_model)

--- a/misinformation/test/test_summary.py
+++ b/misinformation/test/test_summary.py
@ -1,166 +1,98 @@
 import os
+import pytest
 from torch import device, cuda
 from lavis.models import load_model_and_preprocess
 import misinformation.summary as sm

-images = [
-    "./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png",
-    "./test/data/IMG_2746.png",
-    "./test/data/IMG_2750.png",
-    "./test/data/IMG_2805.png",
-    "./test/data/IMG_2806.png",
-    "./test/data/IMG_2807.png",
-    "./test/data/IMG_2808.png",
-    "./test/data/IMG_2809.png",
-    "./test/data/IMG_3755.jpg",
-    "./test/data/IMG_3756.jpg",
-    "./test/data/IMG_3757.jpg",
-    "./test/data/pic1.png",
-]
+
+IMAGES = ["d755771b-225e-432f-802e-fb8dc850fff7.png", "IMG_2746.png"]
+
+SUMMARY_DEVICE = device("cuda" if cuda.is_available() else "cpu")
+
+TEST_KWARGS = {
+    "run1": {
+        "name": "blip_caption",
+        "model_type": "base_coco",
+        "is_eval": True,
+        "device": SUMMARY_DEVICE,
+    },
+    "run2": {
+        "name": "blip_caption",
+        "model_type": "base_coco",
+        "is_eval": True,
+        "device": SUMMARY_DEVICE,
+    },
+    "run3": {
+        "name": "blip_caption",
+        "model_type": "large_coco",
+        "is_eval": True,
+        "device": SUMMARY_DEVICE,
+    },
+}


-def test_analyse_image():
+@pytest.fixture
+def get_dict(get_path):
    mydict = {}
-    for img_path in images:
-        id_ = os.path.splitext(os.path.basename(img_path))[0]
-        mydict[id_] = {"filename": img_path}
+    for img in IMAGES:
+        id_ = os.path.splitext(os.path.basename(img))[0]
+        mydict[id_] = {"filename": get_path + img}
+    return mydict

-    for key in mydict:
-        mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image()
-    keys = list(mydict.keys())
-    assert len(mydict) == 12
-    for key in keys:
-        assert len(mydict[key]["3_non-deterministic summary"]) == 3

-    const_image_summary_list = [
-        "a river running through a city next to tall buildings",
-        "a crowd of people standing on top of a tennis court",
-        "a crowd of people standing on top of a field",
-        "a room with a desk and a chair",
-        "a table with plastic containers on top of it",
-        "a view of a city with mountains in the background",
-        "a view of a city street from a window",
-        "a busy city street with cars and pedestrians",
-        "a close up of an open book with writing on it",
-        "a book that is open on a table",
-        "a yellow book with green lettering on it",
-        "a person running on a beach near a rock formation",
-    ]
-
-    for i in range(len(const_image_summary_list)):
-        assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list[i]
-
-    del sm.SummaryDetector.summary_model, sm.SummaryDetector.summary_vis_processors
-    cuda.empty_cache()
-
-    summary_device = device("cuda" if cuda.is_available() else "cpu")
+@pytest.mark.long
+def test_analyse_image(get_dict):
+    reference_results = {
+        "run1": {
+            "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
+            "IMG_2746": "a crowd of people standing on top of a tennis court",
+        },
+        "run2": {
+            "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
+            "IMG_2746": "a crowd of people standing on top of a tennis court",
+        },
+        "run3": {
+            "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a town next to tall buildings",
+            "IMG_2746": "a crowd of people standing on top of a track",
+        },
+    }
+    # test three different models
+    for test_run in TEST_KWARGS.keys():
        summary_model, summary_vis_processors, _ = load_model_and_preprocess(
-        name="blip_caption",
-        model_type="base_coco",
-        is_eval=True,
-        device=summary_device,
+            **TEST_KWARGS[test_run]
        )
-
-    for key in mydict:
-        mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
+        # run two different images
+        for key in get_dict.keys():
+            get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_image(
                summary_model, summary_vis_processors
            )
-    keys = list(mydict.keys())
-
-    assert len(mydict) == 12
-    for key in keys:
-        assert len(mydict[key]["3_non-deterministic summary"]) == 3
-
-    const_image_summary_list2 = [
-        "a river running through a city next to tall buildings",
-        "a crowd of people standing on top of a tennis court",
-        "a crowd of people standing on top of a field",
-        "a room with a desk and a chair",
-        "a table with plastic containers on top of it",
-        "a view of a city with mountains in the background",
-        "a view of a city street from a window",
-        "a busy city street with cars and pedestrians",
-        "a close up of an open book with writing on it",
-        "a book that is open on a table",
-        "a yellow book with green lettering on it",
-        "a person running on a beach near a rock formation",
-    ]
-
-    for i in range(len(const_image_summary_list2)):
-        assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list2[i]
-
-    del summary_model, summary_vis_processors
+        assert len(get_dict) == 2
+        for key in get_dict.keys():
+            assert len(get_dict[key]["3_non-deterministic summary"]) == 3
+            assert (
+                get_dict[key]["const_image_summary"] == reference_results[test_run][key]
+            )
        cuda.empty_cache()
-
-    summary_model, summary_vis_processors, _ = load_model_and_preprocess(
-        name="blip_caption",
-        model_type="large_coco",
-        is_eval=True,
-        device=summary_device,
-    )
-
-    for key in mydict:
-        mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
-            summary_model, summary_vis_processors
-        )
-    keys = list(mydict.keys())
-    assert len(mydict) == 12
-    for key in keys:
-        assert len(mydict[key]["3_non-deterministic summary"]) == 3
-
-    const_image_summary_list3 = [
-        "a river running through a town next to tall buildings",
-        "a crowd of people standing on top of a track",
-        "a group of people standing on top of a track",
-        "a desk and chair in a small room",
-        "a table that has some chairs on top of it",
-        "a view of a city from a window of a building",
-        "a view of a city from a window",
-        "a city street with cars and people on it",
-        "an open book with german text on it",
-        "a close up of a book on a table",
-        "a book with a green cover on a table",
-        "a person running on a beach near the ocean",
-    ]
-
-    for i in range(len(const_image_summary_list2)):
-        assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list3[i]
+        summary_model = None
+        summary_vis_processors = None


-def test_analyse_questions():
-    mydict = {}
-    for img_path in images:
-        id_ = os.path.splitext(os.path.basename(img_path))[0]
-        mydict[id_] = {"filename": img_path}
-
+def test_analyse_questions(get_dict):
    list_of_questions = [
        "How many persons on the picture?",
        "What happends on the picture?",
    ]
-    for key in mydict:
-        mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions(
+    for key in get_dict:
+        get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_questions(
            list_of_questions
        )
-
-    keys = list(mydict.keys())
-    assert len(mydict) == 12
-
-    list_of_questions_ans = [2, 100, "many", 0, 0, "none", "two", 5, 0, 0, 0, 1]
-
-    list_of_questions_ans2 = [
-        "flood",
-        "festival",
-        "people are flying kites",
-        "no one's home",
-        "chair is being moved",
-        "traffic jam",
-        "day time",
-        "traffic jam",
-        "nothing",
-        "nothing",
-        "nothing",
-        "running",
-    ]
-
-    for i in range(len(list_of_questions_ans)):
-        assert mydict[keys[i]][list_of_questions[1]] == str(list_of_questions_ans2[i])
+    assert len(get_dict) == 2
+    list_of_questions_ans = ["2", "100"]
+    list_of_questions_ans2 = ["flood", "festival"]
+    test_answers = []
+    test_answers2 = []
+    for key in get_dict.keys():
+        test_answers.append(get_dict[key][list_of_questions[0]])
+        test_answers2.append(get_dict[key][list_of_questions[1]])
+    assert sorted(test_answers) == sorted(list_of_questions_ans)
+    assert sorted(test_answers2) == sorted(list_of_questions_ans2)
--- a/misinformation/test/test_text.py
+++ b/misinformation/test/test_text.py
@ -20,7 +20,7 @@ def set_testdict(get_path):
    return testdict


-LANGUAGES = ["de", "en", "en"]
+LANGUAGES = ["de", "om", "en"]


 def test_TextDetector(set_testdict):
@ -116,6 +116,18 @@ def test_sentiment_analysis():
    assert test_obj.subdict["subjectivity"] == 0.6


+def test_text_summary(get_path):
+    mydict = {}
+    test_obj = tt.TextDetector(mydict, analyse_text=True)
+    ref_file = get_path + "example_summary.txt"
+    with open(ref_file, "r", encoding="utf8") as file:
+        reference_text = file.read()
+    test_obj.subdict["text_english"] = reference_text
+    test_obj.text_summary()
+    reference_summary = " I’m sorry, but I don’t want to be an emperor. That’s not my business. I should like to help everyone - if possible - Jew, Gentile - black man - white . We all want to help one another. In this world there is room for everyone. The way of life can be free and beautiful, but we have lost the way ."
+    assert mydict["summary_text"] == reference_summary
+
+
 def test_PostprocessText(set_testdict, get_path):
    reference_dict = "THE\nALGEBRAIC\nEIGENVALUE\nPROBLEM\nDOM\nNVS TIO\nMINA\nMonographs\non Numerical Analysis\nJ.. H. WILKINSON"
    reference_df = "Mathematische Formelsammlung\nfür Ingenieure und Naturwissenschaftler\nMit zahlreichen Abbildungen und Rechenbeispielen\nund einer ausführlichen Integraltafel\n3., verbesserte Auflage"
--- a/misinformation/text.py
+++ b/misinformation/text.py
@ -9,6 +9,7 @@ from misinformation import utils
 import grpc
 import pandas as pd
 from bertopic import BERTopic
+from transformers import pipeline

 # make widgets work again
 # clean text has weird spaces and separation of "do n't"
@ -119,6 +120,14 @@ class TextDetector(utils.AnalysisMethod):
        # where 0.0 is very objective and 1.0 is very subjective
        self.subdict["subjectivity"] = self.doc._.blob.subjectivity

+    def text_summary(self):
+        # use the transformers pipeline to summarize the text
+        pipe = pipeline("summarization")
+        self.subdict.update(pipe(self.subdict["text_english"])[0])
+
+    # def text_sentiment_transformers(self):
+    # pipe = pipeline("text-classification")
+

 class PostprocessText:
    def __init__(
--- a/notebooks/image_summary.ipynb
+++ b/notebooks/image_summary.ipynb
@ -17,7 +17,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "import misinformation\n",
@ -36,7 +38,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "images = mutils.find_files(\n",
@ -48,7 +52,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "mydict = mutils.initialize_dict(images[0:10])"
@ -57,7 +63,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "mydict"
@ -80,22 +88,27 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
-    "summary_model, summary_vis_processors = sm.SummaryDetector.load_model(mydict, \"base\")\n",
+    "obj = sm.SummaryDetector(mydict)\n",
+    "summary_model, summary_vis_processors = obj.load_model(model_type=\"base\")\n",
    "# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "for key in mydict:\n",
    "    mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n",
-    "        summary_model, summary_vis_processors\n",
+    "        summary_model=summary_model, summary_vis_processors=summary_vis_processors\n",
    "    )"
   ]
  },
@ -130,7 +143,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "df.head(10)"
@ -168,7 +183,9 @@
  {
   "cell_type": "code",
   "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
   "outputs": [],
   "source": [
    "mdisplay.explore_analysis(mydict, identify=\"summary\")"
@ -279,7 +296,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.9.16"
  },
  "vscode": {
   "interpreter": {
--- a/pyproject.toml
+++ b/pyproject.toml
@ -48,6 +48,7 @@ dependencies = [
    "tensorflow",
    "textblob",
    "torch",
+    "transformers",
    "google-cloud-vision",
    "setuptools",
    "opencv-contrib-python",