remove obsolete features (#48)

* reduce code smells * account for transl. version change * remove print statements * remove imageai
2025-10-29 13:06:04 +02:00 · 2022-12-31 22:15:40 +01:00 · 2022-12-31 22:15:40 +01:00 · b585097f19
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -31,7 +31,7 @@ jobs:
    - name: Run pytest
      run: |
        cd misinformation
-        python -m pytest -s -m "not (imageai or gcv)" --cov=. --cov-report=xml
+        python -m pytest -s -m "not gcv" --cov=. --cov-report=xml
    - name: Upload coverage
      if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9'
      uses: codecov/codecov-action@v3
--- a/misinformation/accuracy.py
+++ b/misinformation/accuracy.py
@ -1,147 +0,0 @@
-import pandas as pd
-import json
-
-from misinformation import utils
-from misinformation import faces
-
-
-class LabelManager:
-    def __init__(self):
-        self.labels_code = None
-        self.labels = None
-        self.f_labels = None
-        self.f_labels_code = None
-        self.load()
-
-    def load(self):
-        self.labels_code = pd.read_excel(
-            "./misinformation/test/data/EUROPE_APRMAY20_data_variable_labels_coding.xlsx",
-            sheet_name="variable_labels_codings",
-        )
-        self.labels = pd.read_csv(
-            "./misinformation/test/data/Europe_APRMAY20data190722.csv",
-            sep=",",
-            decimal=".",
-        )
-        self.map = self.read_json("./misinformation/data/map_test_set.json")
-
-    def read_json(self, name):
-        with open("{}".format(name)) as f:
-            mydict = json.load(f)
-        return mydict
-
-    def get_orders(self):
-        return [i["order"] for i in self.map.values()]
-
-    def filter_from_order(self, orders: list):
-        cols = []
-        for order in orders:
-            col = self.labels_code.iloc[order - 1, 1]
-            cols.append(col.lower())
-
-        self.f_labels_code = self.labels_code.loc[
-            self.labels_code["order"].isin(orders)
-        ]
-        self.f_labels = self.labels[cols]
-
-    def gen_dict(self):
-        labels_dict = {}
-        if self.f_labels is None:
-            print("No filtered labels found")
-            return labels_dict
-
-        cols = self.f_labels.columns.tolist()
-        for index, row in self.f_labels.iterrows():
-            row_dict = {}
-            for col in cols:
-                row_dict[col] = row[col]
-            labels_dict[row["pic_id"]] = row_dict
-
-        return labels_dict
-
-    def map_dict(self, mydict):
-        mapped_dict = {}
-        for id, subdict in mydict.items():
-            mapped_subdict = {}
-            mapped_subdict["id"] = id[0:-2]
-            mapped_subdict["pic_order"] = id[-1] if id[-2] == "0" else id[-2::]
-            mapped_subdict["pic_id"] = id
-            for key in self.map.keys():
-                # get the key name
-                mydict_name = self.map[key]["variable_mydict"]
-                mydict_value = self.map[key]["value_mydict"]
-                # find out which value was set
-                mydict_current = subdict[mydict_name]
-                # now map to new key-value pair
-                mapped_subdict[key] = 1 if mydict_current == mydict_value else 0
-                # substitute the values that are not boolean
-                if self.map[key]["variable_coding"] != "Bool":
-                    mapped_subdict[key] = mydict_current
-                # take only first value in lists - this applies to faces,
-                # reported are up to three in a list, we compare only the
-                # largest one here
-                if isinstance(mydict_current, list):
-                    mapped_subdict[key] = 1 if mydict_current[0] == mydict_value else 0
-                    # also cut out the likelihood for detected emotion
-                    if isinstance(mydict_current[0], tuple):
-                        mapped_subdict[key] = (
-                            1 if mydict_current[0][0] == mydict_value else 0
-                        )
-            mapped_dict[id] = mapped_subdict
-        return mapped_dict
-
-
-if __name__ == "__main__":
-    files = utils.find_files(
-        path="/home/inga/projects/misinformation-project/misinformation/misinformation/test/data/Europe APRMAY20 visual data/cropped images",
-        limit=500,
-    )
-    mydict = utils.initialize_dict(files)
-    # analyze faces
-    image_ids = [key for key in mydict.keys()]
-    for i in image_ids:
-        mydict[i] = faces.EmotionDetector(mydict[i]).analyse_image()
-
-    outdict = utils.append_data_to_dict(mydict)
-    df = utils.dump_df(outdict)
-    # print(df.head(10))
-    df.to_csv("mydict_out.csv")
-
-    # example of LabelManager for loading csv data to dict
-    lm = LabelManager()
-    # get the desired label numbers automatically
-    orders = lm.get_orders()
-    # map mydict to the specified variable names and values
-    mydict_map = lm.map_dict(mydict)
-    lm.filter_from_order([1, 2, 3] + orders)
-
-    labels = lm.gen_dict()
-    comp = {}
-    for key in labels.keys():
-        if str(key) not in mydict_map:
-            print("Key {} not found.".format(key))
-            continue
-        print("ref: {}".format(labels[key]))
-        print("com: {}".format(mydict_map[str(key)]))
-        for subkey in labels[key]:
-            if type(labels[key][subkey]) != int:
-                continue
-            if type(mydict_map[str(key)][subkey]) != int:
-                continue
-            comp[subkey] = comp.get(subkey, 0) + abs(
-                labels[key][subkey] - mydict_map[str(key)][subkey]
-            )
-    print("summary: ")
-    # why v9_5a not there - bec reads in as float from the csv
-    print(comp)
-    # summary:
-    # {'v9_4': 42, 'v9_5b': 1579, 'v9_6': 229, 'v9_7': 45, 'v9_8': 39, 'v9_8a': 31, 'v9_9': 58, 'v9_10': 33, 'v9_11': 22, 'v9_12': 2, 'v9_13': 24, 'v11_3': 39}
-    # Important here is:
-    # Overall positive - 'v9_8': 39 deviations
-    # Overall negative - 'v9_9': 58
-    # happy - 'v9_8a': 31
-    # fear - 'v9_10': 33
-    # angry - 'v9_11': 22
-    # disgust - 'v9_12': 2
-    # sad - 'v9_13': 24
-    # respect of rules = wears mask - 'v11_3': 39
--- a/misinformation/cropposts.py
+++ b/misinformation/cropposts.py
@ -44,7 +44,6 @@ def draw_matches(matches, img1, img2, kp1, kp2):
        M = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)[0]

        # Draw detected template in scene image
-        # h, w = img1.shape
        h = img1.shape[0]
        w = img1.shape[1]
        pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(
@ -54,8 +53,6 @@ def draw_matches(matches, img1, img2, kp1, kp2):

        img2 = cv2.polylines(img2, [np.int32(dst)], True, 255, 3, cv2.LINE_AA)

-        # h1, w1 = img1.shape
-        # h2, w2 = img2.shape
        h1 = img1.shape[0]
        h2 = img2.shape[0]
        w1 = img1.shape[1]
@ -85,7 +82,6 @@ def draw_matches(matches, img1, img2, kp1, kp2):
 def matching_points(img1, img2):
    img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
-    # sift = cv2.SIFT_create()
    sift = cv2.xfeatures2d.SIFT_create()
    kp1, des1 = sift.detectAndCompute(img1, None)
    kp2, des2 = sift.detectAndCompute(img2, None)
--- a/misinformation/faces.py
+++ b/misinformation/faces.py
@ -253,17 +253,3 @@ class NocatchOutput(ipywidgets.Output):

    def __exit__(self, *args, **kwargs):
        super().__exit__(*args, **kwargs)
-
-
-if __name__ == "__main__":
-    files = utils.find_files(
-        path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/"
-    )
-    # files = [
-    # "/home/inga/projects/misinformation-project/misinformation/data/test_no_text/102141_1_eng.png"
-    # ]
-    mydict = utils.initialize_dict(files)
-    image_ids = [key for key in mydict.keys()]
-    for i in image_ids:
-        mydict[i] = EmotionDetector(mydict[i]).analyse_image()
-    print(mydict)
--- a/misinformation/objects.py
+++ b/misinformation/objects.py
@ -2,20 +2,13 @@ from misinformation.utils import AnalysisMethod
 from misinformation.objects_cvlib import ObjectCVLib
 from misinformation.objects_cvlib import init_default_objects

-# from misinformation.objects_imageai import ObjectImageAI
-

 class ObjectDetectorClient(AnalysisMethod):
    def __init__(self):
        # The detector is default to CVLib
+        # Here other libraries can be added
        self.detector = ObjectCVLib()

-    def set_client_to_imageai(self):
-        # disable imageai temporarily
-        # self.detector = ObjectImageAI()
-        # maybe reactivate if new imageai release comes out
-        pass
-
    def set_client_to_cvlib(self):
        self.detector = ObjectCVLib()

@ -46,7 +39,3 @@ class ObjectDetector(AnalysisMethod):
    @staticmethod
    def set_client_to_cvlib():
        ObjectDetector.od_client.set_client_to_cvlib()
-
-    @staticmethod
-    def set_client_to_imageai():
-        ObjectDetector.od_client.set_client_to_imageai()
--- a/misinformation/objects_imageai.py
+++ b/misinformation/objects_imageai.py
@ -1,114 +0,0 @@
-from misinformation.utils import DownloadResource
-from misinformation.objects_cvlib import ObjectsMethod
-from misinformation.objects_cvlib import init_default_objects
-from imageai.Detection import ObjectDetection
-
-import cv2
-import os
-import pathlib
-
-
-def objects_from_imageai(detections: list) -> dict:
-    objects = init_default_objects()
-    for obj in detections:
-        obj_name = obj["name"]
-        objects[obj_name] = "yes"
-    return objects
-
-
-def objects_symlink_processor(name):
-    def _processor(fname, action, pooch):
-        if not os.path.exists(os.path.dirname(name)):
-            os.makedirs(os.path.dirname(name))
-
-        if not os.path.exists(name):
-            os.symlink(fname, name)
-        return fname
-
-    return _processor
-
-
-pre_model_path = pathlib.Path.home().joinpath(
-    ".misinformation", "objects", "resnet50_coco_best_v2.1.0.h5"
-)
-
-
-retina_objects_model = DownloadResource(
-    url="https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5/",
-    known_hash="sha256:6518ad56a0cca4d1bd8cbba268dd4e299c7633efe7d15902d5acbb0ba180027c",
-    processor=objects_symlink_processor(pre_model_path),
-)
-
-
-class ObjectImageAI(ObjectsMethod):
-    def __init__(self):
-        # init imageai client
-        retina_objects_model.get()
-        if not os.path.exists(pre_model_path):
-            print("Download retina objects model failed.")
-            return
-        self.imgai_client = ObjectDetection()
-        self.imgai_client.setModelTypeAsRetinaNet()
-        self.imgai_client.setModelPath(pre_model_path)
-        self.imgai_client.loadModel()
-        self.custom = self.imgai_client.CustomObjects(
-            person=True,
-            bicycle=True,
-            car=True,
-            motorcycle=True,
-            airplane=True,
-            bus=True,
-            train=True,
-            truck=True,
-            boat=True,
-            traffic_light=True,
-            cell_phone=True,
-        )
-
-    def detect_objects_imageai(self, image_path, custom=True, min_prob=30):
-        """Localize objects in the local image.
-
-        Args:
-        image_path: The path to the local file.
-        custom: If only detect user defined specific objects.
-        min_prob: Minimum probability that we trust as objects.
-        """
-        img = cv2.imread(image_path)
-        if custom:
-            box_img, detections = self.imgai_client.detectCustomObjectsFromImage(
-                custom_objects=self.custom,
-                input_type="array",
-                input_image=img,
-                output_type="array",
-                minimum_percentage_probability=min_prob,
-            )
-        else:
-            box_img, detections = self.imgai_client.detectObjectsFromImage(
-                input_type="array",
-                input_image=img,
-                output_type="array",
-                minimum_percentage_probability=min_prob,
-            )
-        objects = objects_from_imageai(detections)
-        return objects
-
-    def analyse_image_from_file(self, image_path):
-        """Localize objects in the local image.
-
-        Args:
-        image_path: The path to the local file.
-        """
-        objects = self.detect_objects_imageai(image_path)
-        return objects
-
-    def analyse_image(self, subdict):
-        """Localize objects in the local image.
-
-        Args:
-        subdict: The dictionary for an image expression instance.
-        """
-        objects = self.analyse_image_from_file(subdict["filename"])
-        for key in objects:
-            subdict[key] = objects[key]
-
-        return subdict
--- a/misinformation/test/data/example_objects_imageai.json
+++ b/misinformation/test/data/example_objects_imageai.json
@ -1,14 +0,0 @@
-{
-    "filename": "./test/data/IMG_2809.png",
-    "person": "yes",
-    "bicycle": "yes",
-    "car": "yes",
-    "motorcycle": "no",
-    "airplane": "no",
-    "bus": "yes",
-    "train": "no",
-    "truck": "no",
-    "boat": "no",
-    "traffic light": "no",
-    "cell phone": "no"
-}
--- a/misinformation/test/pytest.ini
+++ b/misinformation/test/pytest.ini
@ -1,4 +1,3 @@
 [pytest]
 markers =
-    imageai: mark a test related to imageai.
    gcv: mark google cloud vision tests - skip to save money.
--- a/misinformation/test/test_objects.py
+++ b/misinformation/test/test_objects.py
@ -3,14 +3,11 @@ import pytest
 import misinformation.objects as ob
 import misinformation.objects_cvlib as ob_cvlib

-# import misinformation.objects_imageai as ob_iai
-
 OBJECT_1 = "cell phone"
 OBJECT_2 = "motorcycle"
 OBJECT_3 = "traffic light"
 TEST_IMAGE_1 = "./test/data/IMG_2809.png"
 JSON_1 = "./test/data/example_objects_cvlib.json"
-JSON_2 = "./test/data/example_objects_imageai.json"


@pytest.fixture()
@ -77,54 +74,6 @@ def test_detect_objects_cvlib():
        assert objs[key] == out_dict[key]


-@pytest.mark.imageai
-def test_objects_from_imageai(default_objects):
-    objects_list = [OBJECT_1, OBJECT_2, OBJECT_3]
-    objs_input = [
-        {"name": OBJECT_1},
-        {"name": OBJECT_2},
-        {"name": OBJECT_3},
-    ]
-    objects = ob_iai.objects_from_imageai(objs_input)  # noqa: F821
-    out_objects = default_objects
-    for obj in objects_list:
-        out_objects[obj] = "yes"
-
-    assert str(objects) == str(out_objects)
-
-
-@pytest.mark.imageai
-def test_analyse_image_from_file_imageai():
-    file_path = TEST_IMAGE_1
-    objs = ob_iai.ObjectImageAI().analyse_image_from_file(file_path)  # noqa: F821
-
-    with open(JSON_2, "r") as file:
-        out_dict = json.load(file)
-    for key in objs.keys():
-        assert objs[key] == out_dict[key]
-
-
-@pytest.mark.imageai
-def test_detect_objects_imageai():
-    file_path = TEST_IMAGE_1
-    objs = ob_iai.ObjectImageAI().detect_objects_imageai(file_path)  # noqa: F821
-
-    with open(JSON_2, "r") as file:
-        out_dict = json.load(file)
-    for key in objs.keys():
-        assert objs[key] == out_dict[key]
-
-
-@pytest.mark.imageai
-def test_analyse_image_imageai():
-    mydict = {"filename": TEST_IMAGE_1}
-    ob_iai.ObjectImageAI().analyse_image(mydict)  # noqa: F821
-    with open(JSON_2, "r") as file:
-        out_dict = json.load(file)
-    for key in mydict.keys():
-        assert mydict[key] == out_dict[key]
-
-
 def test_set_keys(default_objects):
    mydict = {"filename": TEST_IMAGE_1}
    key_objs = ob.ObjectDetector(mydict).set_keys()
--- a/misinformation/text.py
+++ b/misinformation/text.py
@ -41,7 +41,7 @@ class TextDetector(utils.AnalysisMethod):
        # here check if text was found
        self.subdict = {"text": texts}
        if response.error.message:
-            raise Exception(
+            raise ValueError(
                "{}\nFor more info on error messages, check: "
                "https://cloud.google.com/apis/design/errors".format(
                    response.error.message
--- a/misinformation/utils.py
+++ b/misinformation/utils.py
@ -75,8 +75,8 @@ def find_files(path=None, pattern="*.png", recursive=True, limit=20):
 def initialize_dict(filelist: list) -> dict:
    mydict = {}
    for img_path in filelist:
-        id = os.path.splitext(os.path.basename(img_path))[0]
-        mydict[id] = {"filename": img_path}
+        id_ = os.path.splitext(os.path.basename(img_path))[0]
+        mydict[id_] = {"filename": img_path}
    return mydict


@ -89,7 +89,6 @@ def append_data_to_dict(mydict: dict) -> dict:
    for subdict in mydict.values():
        for key in subdict.keys():
            outdict[key].append(subdict[key])
-    # mydict = {key: [mydict[key] for mydict in dictlist] for key in dictlist[0]}
    return outdict


--- a/notebooks/objects_expression.ipynb
+++ b/notebooks/objects_expression.ipynb
@ -11,7 +11,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "This notebooks shows some preliminary work on detecting objects expressions with cvliv and imageai. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `misinformation` package that is imported here:"
+    "This notebooks shows some preliminary work on detecting objects expressions with cvlib. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `misinformation` package that is imported here:"
   ]
  },
  {
@ -24,13 +24,6 @@
    "import misinformation.objects as ob"
   ]
  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "ObjectDetector currently support 2 clinet types: CLIENT_CVLIB and CLIENT_IMAGEAI, default is CLIENT_CVLIB."
-   ]
-  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -63,7 +56,7 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Detect objects with default client type: CLIENT_CVLIB."
+    "## Detect objects and directly write to csv"
   ]
  },
  {
@ -129,6 +122,8 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
+    "## Manually inspect what was detected\n",
+    "\n",
    "To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing."
   ]
  },
@ -165,7 +160,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
+   "version": "3.9.5"
  },
  "vscode": {
   "interpreter": {