From b585097f198ad5418090d0a4d63d959c4f0a08b4 Mon Sep 17 00:00:00 2001 From: Inga Ulusoy Date: Sat, 31 Dec 2022 22:15:40 +0100 Subject: [PATCH] remove obsolete features (#48) * reduce code smells * account for transl. version change * remove print statements * remove imageai --- .github/workflows/ci.yml | 2 +- misinformation/accuracy.py | 147 ------------------ misinformation/cropposts.py | 4 - misinformation/faces.py | 14 -- misinformation/objects.py | 13 +- misinformation/objects_imageai.py | 114 -------------- .../test/data/example_objects_imageai.json | 14 -- misinformation/test/pytest.ini | 1 - misinformation/test/test_objects.py | 51 ------ misinformation/text.py | 2 +- misinformation/utils.py | 5 +- notebooks/objects_expression.ipynb | 15 +- 12 files changed, 10 insertions(+), 372 deletions(-) delete mode 100644 misinformation/accuracy.py delete mode 100644 misinformation/objects_imageai.py delete mode 100644 misinformation/test/data/example_objects_imageai.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 78fa7a2..325787f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,7 +31,7 @@ jobs: - name: Run pytest run: | cd misinformation - python -m pytest -s -m "not (imageai or gcv)" --cov=. --cov-report=xml + python -m pytest -s -m "not gcv" --cov=. --cov-report=xml - name: Upload coverage if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9' uses: codecov/codecov-action@v3 diff --git a/misinformation/accuracy.py b/misinformation/accuracy.py deleted file mode 100644 index 26dce18..0000000 --- a/misinformation/accuracy.py +++ /dev/null @@ -1,147 +0,0 @@ -import pandas as pd -import json - -from misinformation import utils -from misinformation import faces - - -class LabelManager: - def __init__(self): - self.labels_code = None - self.labels = None - self.f_labels = None - self.f_labels_code = None - self.load() - - def load(self): - self.labels_code = pd.read_excel( - "./misinformation/test/data/EUROPE_APRMAY20_data_variable_labels_coding.xlsx", - sheet_name="variable_labels_codings", - ) - self.labels = pd.read_csv( - "./misinformation/test/data/Europe_APRMAY20data190722.csv", - sep=",", - decimal=".", - ) - self.map = self.read_json("./misinformation/data/map_test_set.json") - - def read_json(self, name): - with open("{}".format(name)) as f: - mydict = json.load(f) - return mydict - - def get_orders(self): - return [i["order"] for i in self.map.values()] - - def filter_from_order(self, orders: list): - cols = [] - for order in orders: - col = self.labels_code.iloc[order - 1, 1] - cols.append(col.lower()) - - self.f_labels_code = self.labels_code.loc[ - self.labels_code["order"].isin(orders) - ] - self.f_labels = self.labels[cols] - - def gen_dict(self): - labels_dict = {} - if self.f_labels is None: - print("No filtered labels found") - return labels_dict - - cols = self.f_labels.columns.tolist() - for index, row in self.f_labels.iterrows(): - row_dict = {} - for col in cols: - row_dict[col] = row[col] - labels_dict[row["pic_id"]] = row_dict - - return labels_dict - - def map_dict(self, mydict): - mapped_dict = {} - for id, subdict in mydict.items(): - mapped_subdict = {} - mapped_subdict["id"] = id[0:-2] - mapped_subdict["pic_order"] = id[-1] if id[-2] == "0" else id[-2::] - mapped_subdict["pic_id"] = id - for key in self.map.keys(): - # get the key name - mydict_name = self.map[key]["variable_mydict"] - mydict_value = self.map[key]["value_mydict"] - # find out which value was set - mydict_current = subdict[mydict_name] - # now map to new key-value pair - mapped_subdict[key] = 1 if mydict_current == mydict_value else 0 - # substitute the values that are not boolean - if self.map[key]["variable_coding"] != "Bool": - mapped_subdict[key] = mydict_current - # take only first value in lists - this applies to faces, - # reported are up to three in a list, we compare only the - # largest one here - if isinstance(mydict_current, list): - mapped_subdict[key] = 1 if mydict_current[0] == mydict_value else 0 - # also cut out the likelihood for detected emotion - if isinstance(mydict_current[0], tuple): - mapped_subdict[key] = ( - 1 if mydict_current[0][0] == mydict_value else 0 - ) - mapped_dict[id] = mapped_subdict - return mapped_dict - - -if __name__ == "__main__": - files = utils.find_files( - path="/home/inga/projects/misinformation-project/misinformation/misinformation/test/data/Europe APRMAY20 visual data/cropped images", - limit=500, - ) - mydict = utils.initialize_dict(files) - # analyze faces - image_ids = [key for key in mydict.keys()] - for i in image_ids: - mydict[i] = faces.EmotionDetector(mydict[i]).analyse_image() - - outdict = utils.append_data_to_dict(mydict) - df = utils.dump_df(outdict) - # print(df.head(10)) - df.to_csv("mydict_out.csv") - - # example of LabelManager for loading csv data to dict - lm = LabelManager() - # get the desired label numbers automatically - orders = lm.get_orders() - # map mydict to the specified variable names and values - mydict_map = lm.map_dict(mydict) - lm.filter_from_order([1, 2, 3] + orders) - - labels = lm.gen_dict() - comp = {} - for key in labels.keys(): - if str(key) not in mydict_map: - print("Key {} not found.".format(key)) - continue - print("ref: {}".format(labels[key])) - print("com: {}".format(mydict_map[str(key)])) - for subkey in labels[key]: - if type(labels[key][subkey]) != int: - continue - if type(mydict_map[str(key)][subkey]) != int: - continue - comp[subkey] = comp.get(subkey, 0) + abs( - labels[key][subkey] - mydict_map[str(key)][subkey] - ) - print("summary: ") - # why v9_5a not there - bec reads in as float from the csv - print(comp) - # summary: - # {'v9_4': 42, 'v9_5b': 1579, 'v9_6': 229, 'v9_7': 45, 'v9_8': 39, 'v9_8a': 31, 'v9_9': 58, 'v9_10': 33, 'v9_11': 22, 'v9_12': 2, 'v9_13': 24, 'v11_3': 39} - # Important here is: - # Overall positive - 'v9_8': 39 deviations - # Overall negative - 'v9_9': 58 - # happy - 'v9_8a': 31 - # fear - 'v9_10': 33 - # angry - 'v9_11': 22 - # disgust - 'v9_12': 2 - # sad - 'v9_13': 24 - # respect of rules = wears mask - 'v11_3': 39 diff --git a/misinformation/cropposts.py b/misinformation/cropposts.py index ed745bd..c36ff69 100644 --- a/misinformation/cropposts.py +++ b/misinformation/cropposts.py @@ -44,7 +44,6 @@ def draw_matches(matches, img1, img2, kp1, kp2): M = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)[0] # Draw detected template in scene image - # h, w = img1.shape h = img1.shape[0] w = img1.shape[1] pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape( @@ -54,8 +53,6 @@ def draw_matches(matches, img1, img2, kp1, kp2): img2 = cv2.polylines(img2, [np.int32(dst)], True, 255, 3, cv2.LINE_AA) - # h1, w1 = img1.shape - # h2, w2 = img2.shape h1 = img1.shape[0] h2 = img2.shape[0] w1 = img1.shape[1] @@ -85,7 +82,6 @@ def draw_matches(matches, img1, img2, kp1, kp2): def matching_points(img1, img2): img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY) img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY) - # sift = cv2.SIFT_create() sift = cv2.xfeatures2d.SIFT_create() kp1, des1 = sift.detectAndCompute(img1, None) kp2, des2 = sift.detectAndCompute(img2, None) diff --git a/misinformation/faces.py b/misinformation/faces.py index 58c66d6..76ac369 100644 --- a/misinformation/faces.py +++ b/misinformation/faces.py @@ -253,17 +253,3 @@ class NocatchOutput(ipywidgets.Output): def __exit__(self, *args, **kwargs): super().__exit__(*args, **kwargs) - - -if __name__ == "__main__": - files = utils.find_files( - path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/" - ) - # files = [ - # "/home/inga/projects/misinformation-project/misinformation/data/test_no_text/102141_1_eng.png" - # ] - mydict = utils.initialize_dict(files) - image_ids = [key for key in mydict.keys()] - for i in image_ids: - mydict[i] = EmotionDetector(mydict[i]).analyse_image() - print(mydict) diff --git a/misinformation/objects.py b/misinformation/objects.py index 4657d94..0c94d8d 100644 --- a/misinformation/objects.py +++ b/misinformation/objects.py @@ -2,20 +2,13 @@ from misinformation.utils import AnalysisMethod from misinformation.objects_cvlib import ObjectCVLib from misinformation.objects_cvlib import init_default_objects -# from misinformation.objects_imageai import ObjectImageAI - class ObjectDetectorClient(AnalysisMethod): def __init__(self): # The detector is default to CVLib + # Here other libraries can be added self.detector = ObjectCVLib() - def set_client_to_imageai(self): - # disable imageai temporarily - # self.detector = ObjectImageAI() - # maybe reactivate if new imageai release comes out - pass - def set_client_to_cvlib(self): self.detector = ObjectCVLib() @@ -46,7 +39,3 @@ class ObjectDetector(AnalysisMethod): @staticmethod def set_client_to_cvlib(): ObjectDetector.od_client.set_client_to_cvlib() - - @staticmethod - def set_client_to_imageai(): - ObjectDetector.od_client.set_client_to_imageai() diff --git a/misinformation/objects_imageai.py b/misinformation/objects_imageai.py deleted file mode 100644 index 3f5aa78..0000000 --- a/misinformation/objects_imageai.py +++ /dev/null @@ -1,114 +0,0 @@ -from misinformation.utils import DownloadResource -from misinformation.objects_cvlib import ObjectsMethod -from misinformation.objects_cvlib import init_default_objects -from imageai.Detection import ObjectDetection - -import cv2 -import os -import pathlib - - -def objects_from_imageai(detections: list) -> dict: - objects = init_default_objects() - for obj in detections: - obj_name = obj["name"] - objects[obj_name] = "yes" - return objects - - -def objects_symlink_processor(name): - def _processor(fname, action, pooch): - if not os.path.exists(os.path.dirname(name)): - os.makedirs(os.path.dirname(name)) - - if not os.path.exists(name): - os.symlink(fname, name) - return fname - - return _processor - - -pre_model_path = pathlib.Path.home().joinpath( - ".misinformation", "objects", "resnet50_coco_best_v2.1.0.h5" -) - - -retina_objects_model = DownloadResource( - url="https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5/", - known_hash="sha256:6518ad56a0cca4d1bd8cbba268dd4e299c7633efe7d15902d5acbb0ba180027c", - processor=objects_symlink_processor(pre_model_path), -) - - -class ObjectImageAI(ObjectsMethod): - def __init__(self): - # init imageai client - retina_objects_model.get() - if not os.path.exists(pre_model_path): - print("Download retina objects model failed.") - return - self.imgai_client = ObjectDetection() - self.imgai_client.setModelTypeAsRetinaNet() - self.imgai_client.setModelPath(pre_model_path) - self.imgai_client.loadModel() - self.custom = self.imgai_client.CustomObjects( - person=True, - bicycle=True, - car=True, - motorcycle=True, - airplane=True, - bus=True, - train=True, - truck=True, - boat=True, - traffic_light=True, - cell_phone=True, - ) - - def detect_objects_imageai(self, image_path, custom=True, min_prob=30): - """Localize objects in the local image. - - Args: - image_path: The path to the local file. - custom: If only detect user defined specific objects. - min_prob: Minimum probability that we trust as objects. - """ - img = cv2.imread(image_path) - if custom: - box_img, detections = self.imgai_client.detectCustomObjectsFromImage( - custom_objects=self.custom, - input_type="array", - input_image=img, - output_type="array", - minimum_percentage_probability=min_prob, - ) - else: - box_img, detections = self.imgai_client.detectObjectsFromImage( - input_type="array", - input_image=img, - output_type="array", - minimum_percentage_probability=min_prob, - ) - objects = objects_from_imageai(detections) - return objects - - def analyse_image_from_file(self, image_path): - """Localize objects in the local image. - - Args: - image_path: The path to the local file. - """ - objects = self.detect_objects_imageai(image_path) - return objects - - def analyse_image(self, subdict): - """Localize objects in the local image. - - Args: - subdict: The dictionary for an image expression instance. - """ - objects = self.analyse_image_from_file(subdict["filename"]) - for key in objects: - subdict[key] = objects[key] - - return subdict diff --git a/misinformation/test/data/example_objects_imageai.json b/misinformation/test/data/example_objects_imageai.json deleted file mode 100644 index 7aca40c..0000000 --- a/misinformation/test/data/example_objects_imageai.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "filename": "./test/data/IMG_2809.png", - "person": "yes", - "bicycle": "yes", - "car": "yes", - "motorcycle": "no", - "airplane": "no", - "bus": "yes", - "train": "no", - "truck": "no", - "boat": "no", - "traffic light": "no", - "cell phone": "no" -} \ No newline at end of file diff --git a/misinformation/test/pytest.ini b/misinformation/test/pytest.ini index c1d744a..1b5c3a7 100644 --- a/misinformation/test/pytest.ini +++ b/misinformation/test/pytest.ini @@ -1,4 +1,3 @@ [pytest] markers = - imageai: mark a test related to imageai. gcv: mark google cloud vision tests - skip to save money. \ No newline at end of file diff --git a/misinformation/test/test_objects.py b/misinformation/test/test_objects.py index 0845c05..c73312a 100644 --- a/misinformation/test/test_objects.py +++ b/misinformation/test/test_objects.py @@ -3,14 +3,11 @@ import pytest import misinformation.objects as ob import misinformation.objects_cvlib as ob_cvlib -# import misinformation.objects_imageai as ob_iai - OBJECT_1 = "cell phone" OBJECT_2 = "motorcycle" OBJECT_3 = "traffic light" TEST_IMAGE_1 = "./test/data/IMG_2809.png" JSON_1 = "./test/data/example_objects_cvlib.json" -JSON_2 = "./test/data/example_objects_imageai.json" @pytest.fixture() @@ -77,54 +74,6 @@ def test_detect_objects_cvlib(): assert objs[key] == out_dict[key] -@pytest.mark.imageai -def test_objects_from_imageai(default_objects): - objects_list = [OBJECT_1, OBJECT_2, OBJECT_3] - objs_input = [ - {"name": OBJECT_1}, - {"name": OBJECT_2}, - {"name": OBJECT_3}, - ] - objects = ob_iai.objects_from_imageai(objs_input) # noqa: F821 - out_objects = default_objects - for obj in objects_list: - out_objects[obj] = "yes" - - assert str(objects) == str(out_objects) - - -@pytest.mark.imageai -def test_analyse_image_from_file_imageai(): - file_path = TEST_IMAGE_1 - objs = ob_iai.ObjectImageAI().analyse_image_from_file(file_path) # noqa: F821 - - with open(JSON_2, "r") as file: - out_dict = json.load(file) - for key in objs.keys(): - assert objs[key] == out_dict[key] - - -@pytest.mark.imageai -def test_detect_objects_imageai(): - file_path = TEST_IMAGE_1 - objs = ob_iai.ObjectImageAI().detect_objects_imageai(file_path) # noqa: F821 - - with open(JSON_2, "r") as file: - out_dict = json.load(file) - for key in objs.keys(): - assert objs[key] == out_dict[key] - - -@pytest.mark.imageai -def test_analyse_image_imageai(): - mydict = {"filename": TEST_IMAGE_1} - ob_iai.ObjectImageAI().analyse_image(mydict) # noqa: F821 - with open(JSON_2, "r") as file: - out_dict = json.load(file) - for key in mydict.keys(): - assert mydict[key] == out_dict[key] - - def test_set_keys(default_objects): mydict = {"filename": TEST_IMAGE_1} key_objs = ob.ObjectDetector(mydict).set_keys() diff --git a/misinformation/text.py b/misinformation/text.py index 3c5ab71..4aeccac 100644 --- a/misinformation/text.py +++ b/misinformation/text.py @@ -41,7 +41,7 @@ class TextDetector(utils.AnalysisMethod): # here check if text was found self.subdict = {"text": texts} if response.error.message: - raise Exception( + raise ValueError( "{}\nFor more info on error messages, check: " "https://cloud.google.com/apis/design/errors".format( response.error.message diff --git a/misinformation/utils.py b/misinformation/utils.py index b5fe19d..36c7690 100644 --- a/misinformation/utils.py +++ b/misinformation/utils.py @@ -75,8 +75,8 @@ def find_files(path=None, pattern="*.png", recursive=True, limit=20): def initialize_dict(filelist: list) -> dict: mydict = {} for img_path in filelist: - id = os.path.splitext(os.path.basename(img_path))[0] - mydict[id] = {"filename": img_path} + id_ = os.path.splitext(os.path.basename(img_path))[0] + mydict[id_] = {"filename": img_path} return mydict @@ -89,7 +89,6 @@ def append_data_to_dict(mydict: dict) -> dict: for subdict in mydict.values(): for key in subdict.keys(): outdict[key].append(subdict[key]) - # mydict = {key: [mydict[key] for mydict in dictlist] for key in dictlist[0]} return outdict diff --git a/notebooks/objects_expression.ipynb b/notebooks/objects_expression.ipynb index 6a92294..96e27e6 100644 --- a/notebooks/objects_expression.ipynb +++ b/notebooks/objects_expression.ipynb @@ -11,7 +11,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This notebooks shows some preliminary work on detecting objects expressions with cvliv and imageai. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `misinformation` package that is imported here:" + "This notebooks shows some preliminary work on detecting objects expressions with cvlib. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `misinformation` package that is imported here:" ] }, { @@ -24,13 +24,6 @@ "import misinformation.objects as ob" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "ObjectDetector currently support 2 clinet types: CLIENT_CVLIB and CLIENT_IMAGEAI, default is CLIENT_CVLIB." - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -63,7 +56,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Detect objects with default client type: CLIENT_CVLIB." + "## Detect objects and directly write to csv" ] }, { @@ -129,6 +122,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "## Manually inspect what was detected\n", + "\n", "To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing." ] }, @@ -165,7 +160,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.9.5" }, "vscode": { "interpreter": {