diff --git a/.gitignore b/.gitignore index 11ec5ce..8090558 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,3 @@ -data - # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 60525ab..98f1e2b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/kynan/nbstripout - rev: 0.6.0 + rev: 0.6.1 hooks: - id: nbstripout files: ".ipynb" @@ -17,6 +17,6 @@ repos: hooks: - id: flake8 - repo: https://github.com/s-weigand/flake8-nb - rev: v0.5.2 + rev: v0.5.0 hooks: - id: flake8-nb \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 01f265f..af5c26f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,8 +1,8 @@ -FROM jupyter/base-notebook:2022-06-06 +FROM jupyter/base-notebook # Install system dependencies for computer vision packages USER root -RUN apt update && apt install -y build-essential libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 tesseract-ocr +RUN apt update && apt install -y build-essential libgl1 libglib2.0-0 libsm6 libxrender1 libxext6 USER $NB_USER # Copy the repository into the container @@ -24,6 +24,9 @@ ENV XDG_DATA_HOME=/opt/misinformation/data RUN rm -rf $HOME/work RUN cp /opt/misinformation/notebooks/*.ipynb $HOME +ARG GOOGLE_CREDS +ENV GOOGLE_APPLICATION_CREDENTIALS=credentials.json +RUN echo ${GOOGLE_CREDS} > $GOOGLE_APPLICATION_CREDENTIALS # Bundle the pre-built models (that are downloaded on demand) into the # Docker image. RUN misinformation_prefetch_models diff --git a/README.md b/README.md index c6f14c7..fca56de 100644 --- a/README.md +++ b/README.md @@ -12,4 +12,20 @@ Use the pre-processed social media posts (image files) and process to collect in 1. 5-Color analysis of the images This development will serve the fight to combat misinformation, by providing more comprehensive data about its content and techniques. -The ultimate goal of this project is to develop a computer-assisted toolset to investigate the content of disinformation campaigns worldwide. \ No newline at end of file +The ultimate goal of this project is to develop a computer-assisted toolset to investigate the content of disinformation campaigns worldwide. + +# Installation + +The `misinformation` package can be installed using pip: Navigate into your package folder `misinformation/` and execute +``` +pip install . +``` +This will install the package and its dependencies locally. + +# Usage + +There are sample notebooks in the `misinformation/notebooks` folder for you to explore the package usage: +1. Facial analysis: Use the notebook `facial_expressions.ipynb` to identify if there are faces on the image, if they are wearing masks, and if they are not wearing masks also the race, gender and dominant emotion. +1. Object analysis: Use the notebook `ojects_expression.ipynb` to identify certain objects in the image. Currently, the following objects are being identified: person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, cell phone. + +There are further notebooks that are currently of exploratory nature (`colors_expression` to identify certain colors on the image, `get-text-from-image` to extract text that is contained in an image.) \ No newline at end of file diff --git a/misinformation/__init__.py b/misinformation/__init__.py index 1827151..dd3f43c 100644 --- a/misinformation/__init__.py +++ b/misinformation/__init__.py @@ -1,4 +1,8 @@ -from importlib import metadata +try: + from importlib import metadata +except ImportError: + # Running on pre-3.8 Python; use importlib-metadata package + import importlib_metadata as metadata # type: ignore # Export the version defined in project metadata @@ -6,4 +10,9 @@ __version__ = metadata.version(__package__) del metadata from misinformation.display import explore_analysis -from misinformation.utils import find_files +from misinformation.utils import ( + find_files, + initialize_dict, + append_data_to_dict, + dump_df, +) diff --git a/misinformation/accuracy.py b/misinformation/accuracy.py new file mode 100644 index 0000000..b9e47fa --- /dev/null +++ b/misinformation/accuracy.py @@ -0,0 +1,109 @@ +import pandas as pd +import json + +from misinformation import utils +from misinformation import faces + + +class LabelManager: + def __init__(self): + self.labels_code = None + self.labels = None + self.f_labels = None + self.f_labels_code = None + self.load() + + def load(self): + self.labels_code = pd.read_excel( + "./misinformation/test/data/EUROPE_APRMAY20_data_variable_labels_coding.xlsx", + sheet_name="variable_labels_codings", + ) + self.labels = pd.read_csv( + "./misinformation/test/data/Europe_APRMAY20data190722.csv", + sep=",", + decimal=".", + ) + self.map = self.read_json("./misinformation/data/map_test_set.json") + + def read_json(self, name): + with open("{}".format(name)) as f: + mydict = json.load(f) + return mydict + + def get_orders(self): + return [i["order"] for i in self.map.values()] + + def filter_from_order(self, orders: list): + cols = [] + for order in orders: + col = self.labels_code.iloc[order - 1, 1] + cols.append(col.lower()) + + self.f_labels_code = self.labels_code.loc[ + self.labels_code["order"].isin(orders) + ] + self.f_labels = self.labels[cols] + + def gen_dict(self): + labels_dict = {} + if self.f_labels is None: + print("No filtered labels found") + return labels_dict + + cols = self.f_labels.columns.tolist() + for index, row in self.f_labels.iterrows(): + row_dict = {} + for col in cols: + row_dict[col] = row[col] + labels_dict[row["pic_id"]] = row_dict + + return labels_dict + + def map_dict(self, mydict): + mapped_dict = {} + for id, subdict in mydict.items(): + mapped_subdict = {} + mapped_subdict["id"] = id[0:-2] + mapped_subdict["pic_order"] = id[-1] if id[-2] == "0" else id[-2::] + mapped_subdict["pic_id"] = id + for key in self.map.keys(): + # get the key name + mydict_name = self.map[key]["variable_mydict"] + mydict_value = self.map[key]["value_mydict"] + # find out which value was set + mydict_current = subdict[mydict_name] + # now map to new key-value pair + mapped_subdict[key] = 1 if mydict_current == mydict_value else 0 + # substitute the values that are not boolean + if self.map[key]["variable_coding"] != "Bool": + mapped_subdict[key] = mydict_current + mapped_dict[id] = mapped_subdict + return mapped_dict + + +if __name__ == "__main__": + files = utils.find_files( + path="/home/inga/projects/misinformation-project/misinformation/misinformation/test/data/Europe APRMAY20 visual data/cropped images" + ) + mydict = utils.initialize_dict(files) + # analyze faces + image_ids = [key for key in mydict.keys()] + for i in image_ids: + mydict[i] = faces.EmotionDetector(mydict[i]).analyse_image() + + outdict = utils.append_data_to_dict(mydict) + df = utils.dump_df(outdict) + # print(df.head(10)) + df.to_csv("mydict_out.csv") + + # example of LabelManager for loading csv data to dict + lm = LabelManager() + # get the desired label numbers automatically + orders = lm.get_orders() + # map mydict to the specified variable names and values + mydict_map = lm.map_dict(mydict) + print(mydict_map) + lm.filter_from_order([1, 2, 3] + orders) + + labels = lm.gen_dict() + print(labels) diff --git a/misinformation/data/map_test_set.json b/misinformation/data/map_test_set.json new file mode 100644 index 0000000..eea5690 --- /dev/null +++ b/misinformation/data/map_test_set.json @@ -0,0 +1,127 @@ +{ + "V9_4": { + "order": 169, + "variable_label": "4=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Person visible", + "variable_coding": "Bool", + "variable_comment": "Yes if there's someone shown", + "variable_mydict": "face", + "value_mydict": "Yes" + }, + "V9_5a": { + "order": 170, + "variable_label": "5a=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "More than one person shown", + "variable_coding": "Bool", + "variable_comment": "Yes if there are several individuals who appear in the post (do not count profile pictures)", + "variable_mydict": "multiple_faces", + "value_mydict": "Yes" + }, + "V9_5b": { + "order": 171, + "variable_label": "5b=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "How many people shown?", + "variable_coding": "Int", + "variable_comment": "If more than 15, put 99", + "variable_mydict": "no_faces", + "value_mydict": "0" + }, + "V9_6": { + "order": 172, + "variable_label": "6=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Face fully visible", + "variable_coding": "Bool", + "variable_comment": "Yes if you can see all their face (no mask on)", + "variable_mydict": "wears_mask", + "value_mydict": "No" + }, + "V9_7": { + "order": 173, + "variable_label": "7=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Face ONLY partially visible", + "variable_coding": "Bool", + "variable_comment": "Yes if you can only see part of their face, including when they are wearing a mask", + "variable_mydict": "wears_mask", + "value_mydict": "Yes" + }, + "V9_8": { + "order": 174, + "variable_label": "8=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Facial positive expression", + "variable_coding": "Bool", + "variable_comment": "Yes if they display some kind of positive facial expression (smiling, happy, relieved, hopeful etc.)", + "variable_mydict": "emotion (category)", + "value_mydict": "Positive" + }, + "V9_8a": { + "order": 175, + "variable_label": "8a=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Positive expression: happiness", + "variable_coding": "Bool", + "variable_comment": "Yes if they display happiness", + "variable_mydict": "emotion", + "value_mydict": "happy" + }, + "V9_9": { + "order": 176, + "variable_label": "9=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Facial negative expression", + "variable_coding": "Bool", + "variable_comment": "Yes if they display some kind of negative facial expression (crying, showing ager, fear, disgust etc.)", + "variable_mydict": "emotion (category)", + "value_mydict": "Negative" + }, + "V9_10": { + "order": 177, + "variable_label": "10=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Negative expression: anxiety", + "variable_coding": "Bool", + "variable_comment": "Yes if they show fear or anxiety. If you can't tell, choose No=0", + "variable_mydict": "emotion", + "value_mydict": "fear" + }, + "V9_11": { + "order": 178, + "variable_label": "11=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Negative expression: anger", + "variable_coding": "Bool", + "variable_comment": "Yes if they show anger or outrage. If you can't tell, choose No=0", + "variable_mydict": "emotion", + "value_mydict": "angry" + }, + "V9_12": { + "order": 179, + "variable_label": "12=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Negative expression: disgust", + "variable_coding": "Bool", + "variable_comment": "Yes if they show disgust. If you can't tell, choose No=0", + "variable_mydict": "emotion", + "value_mydict": "disgust" + }, + "V9_13": { + "order": 180, + "variable_label": "13=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Negative expression: other, specify", + "variable_coding": "Bool", + "variable_comment": "Yes if they show any other negative emotion, please specify. If you can't tell, choose No=0", + "variable_mydict": "emotion", + "value_mydict": "sad" + }, + "V9_13_text": { + "order": 181, + "variable_label": "13=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Negative expression: other, specify", + "variable_coding": "Str", + "variable_mydict": "emotion", + "value_mydict": "" + }, + "V11_3": { + "order": 189, + "variable_label": "111_3=PICTURE_SPECIFIC_VisualONLY", + "variable_explanation": "Respect of the rules", + "variable_coding": "Bool", + "variable_comment": "Yes if the post shows mask wearing, vaccine taking, social distancing, any proof of respecting the rules", + "variable_mydict": "wears_mask", + "value_mydict": "Yes" + } +} \ No newline at end of file diff --git a/misinformation/display.py b/misinformation/display.py index 47df631..9ae13ee 100644 --- a/misinformation/display.py +++ b/misinformation/display.py @@ -3,6 +3,7 @@ from IPython.display import display import misinformation.faces as faces import misinformation.text as text +import misinformation.objects as objects class JSONContainer: @@ -22,6 +23,7 @@ def explore_analysis(mydict, identify="faces"): identify_dict = { "faces": faces.EmotionDetector, "text-on-image": text.TextDetector, + "objects": objects.ObjectDetector, } # create a list containing the image ids for the widget # image_paths = [mydict[key]["filename"] for key in mydict.keys()] diff --git a/misinformation/faces.py b/misinformation/faces.py index 08555b7..61ad546 100644 --- a/misinformation/faces.py +++ b/misinformation/faces.py @@ -86,6 +86,8 @@ class EmotionDetector(utils.AnalysisMethod): def set_keys(self) -> dict: params = { "face": "No", + "multiple_faces": "No", + "no_faces": 0, "wears_mask": ["No"], "age": [None], "gender": [None], @@ -145,7 +147,9 @@ class EmotionDetector(utils.AnalysisMethod): # Sort the faces by sight to prioritize prominent faces faces = list(reversed(sorted(faces, key=lambda f: f.shape[0] * f.shape[1]))) - self.subdict["face"] = "yes" + self.subdict["face"] = "Yes" + self.subdict["multiple_faces"] = "Yes" if len(faces) > 1 else "No" + self.subdict["no_faces"] = len(faces) if len(faces) <= 15 else 99 # note number of faces being identified result = {"number_faces": len(faces) if len(faces) <= 3 else 3} # We limit ourselves to three faces diff --git a/misinformation/objects.py b/misinformation/objects.py new file mode 100644 index 0000000..4657d94 --- /dev/null +++ b/misinformation/objects.py @@ -0,0 +1,52 @@ +from misinformation.utils import AnalysisMethod +from misinformation.objects_cvlib import ObjectCVLib +from misinformation.objects_cvlib import init_default_objects + +# from misinformation.objects_imageai import ObjectImageAI + + +class ObjectDetectorClient(AnalysisMethod): + def __init__(self): + # The detector is default to CVLib + self.detector = ObjectCVLib() + + def set_client_to_imageai(self): + # disable imageai temporarily + # self.detector = ObjectImageAI() + # maybe reactivate if new imageai release comes out + pass + + def set_client_to_cvlib(self): + self.detector = ObjectCVLib() + + def analyse_image(self, subdict=None): + """Localize objects in the local image. + + Args: + subdict: The dictionary for an image expression instance. + """ + + return self.detector.analyse_image(subdict) + + +class ObjectDetector(AnalysisMethod): + od_client = ObjectDetectorClient() + + def __init__(self, subdict: dict): + super().__init__(subdict) + self.subdict.update(self.set_keys()) + + def set_keys(self): + return init_default_objects() + + def analyse_image(self): + self.subdict = ObjectDetector.od_client.analyse_image(self.subdict) + return self.subdict + + @staticmethod + def set_client_to_cvlib(): + ObjectDetector.od_client.set_client_to_cvlib() + + @staticmethod + def set_client_to_imageai(): + ObjectDetector.od_client.set_client_to_imageai() diff --git a/misinformation/objects_cvlib.py b/misinformation/objects_cvlib.py new file mode 100644 index 0000000..076aa77 --- /dev/null +++ b/misinformation/objects_cvlib.py @@ -0,0 +1,77 @@ +import cv2 +import cvlib as cv + + +def objects_from_cvlib(objects_list: list) -> dict: + objects = init_default_objects() + for key in objects: + if key in objects_list: + objects[key] = "yes" + return objects + + +def init_default_objects(): + objects = { + "person": "no", + "bicycle": "no", + "car": "no", + "motorcycle": "no", + "airplane": "no", + "bus": "no", + "train": "no", + "truck": "no", + "boat": "no", + "traffic light": "no", + "cell phone": "no", + } + return objects + + +class ObjectsMethod: + """Base class to be inherited by all objects methods.""" + + def __init__(self): + # initialize in child class + pass + + def analyse_image(self, subdict): + raise NotImplementedError() + + +class ObjectCVLib(ObjectsMethod): + def __init__(self, client_type=1): + # as long as imageai is not activated this remains empty + pass + + def detect_objects_cvlib(self, image_path): + """Localize objects in the local image. + + Args: + image_path: The path to the local file. + """ + img = cv2.imread(image_path) + bbox, label, conf = cv.detect_common_objects(img) + # output_image = draw_bbox(im, bbox, label, conf) + objects = objects_from_cvlib(label) + return objects + + def analyse_image_from_file(self, image_path): + """Localize objects in the local image. + + Args: + image_path: The path to the local file. + """ + objects = self.detect_objects_cvlib(image_path) + return objects + + def analyse_image(self, subdict): + """Localize objects in the local image. + + Args: + subdict: The dictionary for an image expression instance. + """ + objects = self.analyse_image_from_file(subdict["filename"]) + for key in objects: + subdict[key] = objects[key] + + return subdict diff --git a/misinformation/objects_imageai.py b/misinformation/objects_imageai.py new file mode 100644 index 0000000..3f5aa78 --- /dev/null +++ b/misinformation/objects_imageai.py @@ -0,0 +1,114 @@ +from misinformation.utils import DownloadResource +from misinformation.objects_cvlib import ObjectsMethod +from misinformation.objects_cvlib import init_default_objects +from imageai.Detection import ObjectDetection + +import cv2 +import os +import pathlib + + +def objects_from_imageai(detections: list) -> dict: + objects = init_default_objects() + for obj in detections: + obj_name = obj["name"] + objects[obj_name] = "yes" + return objects + + +def objects_symlink_processor(name): + def _processor(fname, action, pooch): + if not os.path.exists(os.path.dirname(name)): + os.makedirs(os.path.dirname(name)) + + if not os.path.exists(name): + os.symlink(fname, name) + return fname + + return _processor + + +pre_model_path = pathlib.Path.home().joinpath( + ".misinformation", "objects", "resnet50_coco_best_v2.1.0.h5" +) + + +retina_objects_model = DownloadResource( + url="https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5/", + known_hash="sha256:6518ad56a0cca4d1bd8cbba268dd4e299c7633efe7d15902d5acbb0ba180027c", + processor=objects_symlink_processor(pre_model_path), +) + + +class ObjectImageAI(ObjectsMethod): + def __init__(self): + # init imageai client + retina_objects_model.get() + if not os.path.exists(pre_model_path): + print("Download retina objects model failed.") + return + self.imgai_client = ObjectDetection() + self.imgai_client.setModelTypeAsRetinaNet() + self.imgai_client.setModelPath(pre_model_path) + self.imgai_client.loadModel() + self.custom = self.imgai_client.CustomObjects( + person=True, + bicycle=True, + car=True, + motorcycle=True, + airplane=True, + bus=True, + train=True, + truck=True, + boat=True, + traffic_light=True, + cell_phone=True, + ) + + def detect_objects_imageai(self, image_path, custom=True, min_prob=30): + """Localize objects in the local image. + + Args: + image_path: The path to the local file. + custom: If only detect user defined specific objects. + min_prob: Minimum probability that we trust as objects. + """ + img = cv2.imread(image_path) + if custom: + box_img, detections = self.imgai_client.detectCustomObjectsFromImage( + custom_objects=self.custom, + input_type="array", + input_image=img, + output_type="array", + minimum_percentage_probability=min_prob, + ) + else: + box_img, detections = self.imgai_client.detectObjectsFromImage( + input_type="array", + input_image=img, + output_type="array", + minimum_percentage_probability=min_prob, + ) + objects = objects_from_imageai(detections) + return objects + + def analyse_image_from_file(self, image_path): + """Localize objects in the local image. + + Args: + image_path: The path to the local file. + """ + objects = self.detect_objects_imageai(image_path) + return objects + + def analyse_image(self, subdict): + """Localize objects in the local image. + + Args: + subdict: The dictionary for an image expression instance. + """ + objects = self.analyse_image_from_file(subdict["filename"]) + for key in objects: + subdict[key] = objects[key] + + return subdict diff --git a/misinformation/test/data/IMG_1730.png b/misinformation/test/data/IMG_1730.png new file mode 100644 index 0000000..7d131b2 Binary files /dev/null and b/misinformation/test/data/IMG_1730.png differ diff --git a/misinformation/test/data/IMG_2746.png b/misinformation/test/data/IMG_2746.png new file mode 100644 index 0000000..63d97d4 Binary files /dev/null and b/misinformation/test/data/IMG_2746.png differ diff --git a/misinformation/test/data/IMG_2750.png b/misinformation/test/data/IMG_2750.png new file mode 100644 index 0000000..ae7923f Binary files /dev/null and b/misinformation/test/data/IMG_2750.png differ diff --git a/misinformation/test/data/IMG_2805.png b/misinformation/test/data/IMG_2805.png new file mode 100644 index 0000000..28c4996 Binary files /dev/null and b/misinformation/test/data/IMG_2805.png differ diff --git a/misinformation/test/data/IMG_2806.png b/misinformation/test/data/IMG_2806.png new file mode 100644 index 0000000..e9670e8 Binary files /dev/null and b/misinformation/test/data/IMG_2806.png differ diff --git a/misinformation/test/data/IMG_2807.png b/misinformation/test/data/IMG_2807.png new file mode 100644 index 0000000..f68facd Binary files /dev/null and b/misinformation/test/data/IMG_2807.png differ diff --git a/misinformation/test/data/IMG_2808.png b/misinformation/test/data/IMG_2808.png new file mode 100644 index 0000000..da80a45 Binary files /dev/null and b/misinformation/test/data/IMG_2808.png differ diff --git a/misinformation/test/data/IMG_2809.png b/misinformation/test/data/IMG_2809.png new file mode 100644 index 0000000..689525e Binary files /dev/null and b/misinformation/test/data/IMG_2809.png differ diff --git a/misinformation/test/data/d755771b-225e-432f-802e-fb8dc850fff7.png b/misinformation/test/data/d755771b-225e-432f-802e-fb8dc850fff7.png new file mode 100644 index 0000000..2bc2527 Binary files /dev/null and b/misinformation/test/data/d755771b-225e-432f-802e-fb8dc850fff7.png differ diff --git a/misinformation/test/data/example_append_data_to_dict_in.json b/misinformation/test/data/example_append_data_to_dict_in.json new file mode 100644 index 0000000..284e085 --- /dev/null +++ b/misinformation/test/data/example_append_data_to_dict_in.json @@ -0,0 +1,37 @@ +{"image01": + { + "filename": "./data/image01.jpg", + "person": "yes", + "bicycle": "no", + "car": "no", + "motorcycle": "no", + "airplane": "no", + "bus": "no", + "train": "no", + "truck": "no", + "boat": "no", + "traffic light": "no", + "cell phone": "yes", + "gender": "male", + "wears_mask": "no", + "race": "asian" + }, +"image02": + { + "filename": "./data/image02.jpg", + "person": "no", + "bicycle": "no", + "car": "yes", + "motorcycle": "no", + "airplane": "no", + "bus": "yes", + "train": "no", + "truck": "yes", + "boat": "no", + "traffic light": "yes", + "cell phone": "no", + "gender": "male", + "wears_mask": "no", + "race": "asian" + } +} \ No newline at end of file diff --git a/misinformation/test/data/example_append_data_to_dict_out.json b/misinformation/test/data/example_append_data_to_dict_out.json new file mode 100644 index 0000000..1a75369 --- /dev/null +++ b/misinformation/test/data/example_append_data_to_dict_out.json @@ -0,0 +1,17 @@ +{ + "filename": ["./data/image01.jpg", "./data/image02.jpg"], + "person": ["yes", "no"], + "bicycle": ["no", "no"], + "car": ["no", "yes"], + "motorcycle": ["no", "no"], + "airplane": ["no", "no"], + "bus": ["no", "yes"], + "train": ["no", "no"], + "truck": ["no", "yes"], + "boat": ["no", "no"], + "traffic light": ["no", "yes"], + "cell phone": ["yes", "no"], + "gender": ["male", "male"], + "wears_mask": ["no", "no"], + "race": ["asian", "asian"] +} \ No newline at end of file diff --git a/misinformation/test/data/example_dump_df.csv b/misinformation/test/data/example_dump_df.csv new file mode 100644 index 0000000..373c6f8 --- /dev/null +++ b/misinformation/test/data/example_dump_df.csv @@ -0,0 +1,3 @@ +,filename,person,bicycle,car,motorcycle,airplane,bus,train,truck,boat,traffic light,cell phone,gender,wears_mask,race +0,./data/image01.jpg,yes,no,no,no,no,no,no,no,no,no,yes,male,no,asian +1,./data/image02.jpg,no,no,yes,no,no,yes,no,yes,no,yes,no,male,no,asian diff --git a/misinformation/test/data/example_faces.json b/misinformation/test/data/example_faces.json new file mode 100644 index 0000000..cc44e77 --- /dev/null +++ b/misinformation/test/data/example_faces.json @@ -0,0 +1,12 @@ +{ + "filename": "./test/data/IMG_2746.png", + "face": "Yes", + "multiple_faces": "Yes", + "no_faces": 11, + "wears_mask": ["No", "No", "Yes"], + "age": [36, 35, 33], + "gender": ["Man", "Man", "Man"], + "race": ["white", "white", null], + "emotion": [["sad", 73.24264486090212], ["fear", 84.20093247879356], null], + "emotion (category)": ["Negative", "Negative", null] +} \ No newline at end of file diff --git a/misinformation/test/data/example_objects_cvlib.json b/misinformation/test/data/example_objects_cvlib.json new file mode 100644 index 0000000..17bed1e --- /dev/null +++ b/misinformation/test/data/example_objects_cvlib.json @@ -0,0 +1,14 @@ +{ + "filename": "./test/data/IMG_2809.png", + "person": "yes", + "bicycle": "no", + "car": "yes", + "motorcycle": "no", + "airplane": "no", + "bus": "yes", + "train": "no", + "truck": "no", + "boat": "no", + "traffic light": "no", + "cell phone": "no" +} \ No newline at end of file diff --git a/misinformation/test/data/example_objects_imageai.txt b/misinformation/test/data/example_objects_imageai.txt new file mode 100644 index 0000000..a4daa8e --- /dev/null +++ b/misinformation/test/data/example_objects_imageai.txt @@ -0,0 +1 @@ +{'image_objects': {'filename': './misinformation/test/data/IMG_2809.png', 'person': 'yes', 'bicycle': 'yes', 'car': 'yes', 'motorcycle': 'no', 'airplane': 'no', 'bus': 'yes', 'train': 'no', 'truck': 'no', 'boat': 'no', 'traffic light': 'no', 'cell phone': 'no'}} \ No newline at end of file diff --git a/misinformation/test/data/example_utils_init_dict.json b/misinformation/test/data/example_utils_init_dict.json new file mode 100644 index 0000000..088ab6b --- /dev/null +++ b/misinformation/test/data/example_utils_init_dict.json @@ -0,0 +1,6 @@ +{ + "image_faces": { + "filename": "/test/data/image_faces.jpg"}, + "image_objects": + {"filename": "/test/data/image_objects.jpg"} +} \ No newline at end of file diff --git a/misinformation/test/test_faces.py b/misinformation/test/test_faces.py new file mode 100644 index 0000000..5d839b0 --- /dev/null +++ b/misinformation/test/test_faces.py @@ -0,0 +1,25 @@ +import misinformation.faces as fc +import json + + +def test_analyse_faces(): + mydict = { + "filename": "./test/data/IMG_2746.png", + } + mydict = fc.EmotionDetector(mydict).analyse_image() + print(mydict) + + with open("./test/data/example_faces.json", "r") as file: + out_dict = json.load(file) + + for key in mydict.keys(): + if key != "emotion": + assert mydict[key] == out_dict[key] + # json can't handle tuples natively + for i in range(0, len(mydict["emotion"])): + temp = ( + list(mydict["emotion"][i]) + if type(mydict["emotion"][i]) == tuple + else mydict["emotion"][i] + ) + assert temp == out_dict["emotion"][i] diff --git a/misinformation/test/test_objects.py b/misinformation/test/test_objects.py new file mode 100644 index 0000000..b15adb5 --- /dev/null +++ b/misinformation/test/test_objects.py @@ -0,0 +1,31 @@ +import json +import pytest +import misinformation +import misinformation.objects as ob +import misinformation.objects_cvlib as ob_cvlib + + +@pytest.fixture() +def default_objects(): + return ob.init_default_objects() + + +def test_objects_from_cvlib(default_objects): + objects_list = ["cell phone", "motorcycle", "traffic light"] + objects = ob_cvlib.objects_from_cvlib(objects_list) + out_objects = default_objects + for obj in objects_list: + out_objects[obj] = "yes" + + assert str(objects) == str(out_objects) + + +def test_analyse_image_cvlib(): + mydict = {"filename": "./test/data/IMG_2809.png"} + ob_cvlib.ObjectCVLib().analyse_image(mydict) + + with open("./test/data/example_objects_cvlib.json", "r") as file: + out_dict = json.load(file) + for key in mydict.keys(): + print(key) + assert mydict[key] == out_dict[key] diff --git a/misinformation/test/test_utils.py b/misinformation/test/test_utils.py new file mode 100644 index 0000000..ec0f9ff --- /dev/null +++ b/misinformation/test/test_utils.py @@ -0,0 +1,40 @@ +import json +import pandas as pd +import misinformation.utils as ut + + +def test_find_files(): + result = ut.find_files( + path="./test/data/", pattern="*.png", recursive=True, limit=10 + ) + assert len(result) > 0 + + +def test_initialize_dict(): + result = [ + "/test/data/image_faces.jpg", + "/test/data/image_objects.jpg", + ] + mydict = ut.initialize_dict(result) + with open("./test/data/example_utils_init_dict.json", "r") as file: + out_dict = json.load(file) + assert mydict == out_dict + + +def test_append_data_to_dict(): + with open("./test/data/example_append_data_to_dict_in.json", "r") as file: + mydict = json.load(file) + outdict = ut.append_data_to_dict(mydict) + print(outdict) + with open("./test/data/example_append_data_to_dict_out.json", "r") as file: + example_outdict = json.load(file) + + assert outdict == example_outdict + + +def test_dump_df(): + with open("./test/data/example_append_data_to_dict_out.json", "r") as file: + outdict = json.load(file) + df = ut.dump_df(outdict) + out_df = pd.read_csv("./test/data/example_dump_df.csv", index_col=[0]) + pd.testing.assert_frame_equal(df, out_df) diff --git a/misinformation/utils.py b/misinformation/utils.py index 3b5031f..cb59a42 100644 --- a/misinformation/utils.py +++ b/misinformation/utils.py @@ -1,109 +1,109 @@ -import glob -import os -from pandas import DataFrame -import pooch - - -class DownloadResource: - """A remote resource that needs on demand downloading - - We use this as a wrapper to the pooch library. The wrapper registers - each data file and allows prefetching through the CLI entry point - misinformation_prefetch_models. - """ - - # We store a list of defined resouces in a class variable, allowing - # us prefetching from a CLI e.g. to bundle into a Docker image - resources = [] - - def __init__(self, **kwargs): - DownloadResource.resources.append(self) - self.kwargs = kwargs - - def get(self): - return pooch.retrieve(**self.kwargs) - - -def misinformation_prefetch_models(): - """Prefetch all the download resources""" - for res in DownloadResource.resources: - res.get() - - -class AnalysisMethod: - """Base class to be inherited by all analysis methods.""" - - def __init__(self, subdict) -> None: - self.subdict = subdict - # define keys that will be set by the analysis - - def set_keys(self): - raise NotImplementedError() - - def analyse_image(self): - raise NotImplementedError() - - -def find_files(path=None, pattern="*.png", recursive=True, limit=20): - """Find image files on the file system - - :param path: - The base directory where we are looking for the images. Defaults - to None, which uses the XDG data directory if set or the current - working directory otherwise. - :param pattern: - The naming pattern that the filename should match. Defaults to - "*.png". Can be used to allow other patterns or to only include - specific prefixes or suffixes. - :param recursive: - Whether to recurse into subdirectories. - :param limit: - The maximum number of images to be found. Defaults to 20. - To return all images, set to None. - """ - if path is None: - path = os.environ.get("XDG_DATA_HOME", ".") - - result = list(glob.glob(f"{path}/{pattern}", recursive=recursive)) - - if limit is not None: - result = result[:limit] - - return result - - -def initialize_dict(filelist: list) -> dict: - mydict = {} - for img_path in filelist: - id = img_path.split(".")[0].split("/")[-1] - mydict[id] = {"filename": img_path} - return mydict - - -def append_data_to_dict(mydict: dict) -> dict: - """Append entries from list of dictionaries to keys in global dict.""" - - # first initialize empty list for each key that is present - outdict = {key: [] for key in list(mydict.values())[0].keys()} - # now append the values to each key in a list - for subdict in mydict.values(): - for key in subdict.keys(): - outdict[key].append(subdict[key]) - # mydict = {key: [mydict[key] for mydict in dictlist] for key in dictlist[0]} - return outdict - - -def dump_df(mydict: dict) -> DataFrame: - """Utility to dump the dictionary into a dataframe.""" - return DataFrame.from_dict(mydict) - - -if __name__ == "__main__": - files = find_files( - path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/" - ) - mydict = initialize_dict(files) - outdict = {} - outdict = append_data_to_dict(mydict) - df = dump_df(outdict) - print(df.head(10)) +import glob +import os +from pandas import DataFrame +import pooch + + +class DownloadResource: + """A remote resource that needs on demand downloading + + We use this as a wrapper to the pooch library. The wrapper registers + each data file and allows prefetching through the CLI entry point + misinformation_prefetch_models. + """ + + # We store a list of defined resouces in a class variable, allowing + # us prefetching from a CLI e.g. to bundle into a Docker image + resources = [] + + def __init__(self, **kwargs): + DownloadResource.resources.append(self) + self.kwargs = kwargs + + def get(self): + return pooch.retrieve(**self.kwargs) + + +def misinformation_prefetch_models(): + """Prefetch all the download resources""" + for res in DownloadResource.resources: + res.get() + + +class AnalysisMethod: + """Base class to be inherited by all analysis methods.""" + + def __init__(self, subdict) -> None: + self.subdict = subdict + # define keys that will be set by the analysis + + def set_keys(self): + raise NotImplementedError() + + def analyse_image(self): + raise NotImplementedError() + + +def find_files(path=None, pattern="*.png", recursive=True, limit=20): + """Find image files on the file system + + :param path: + The base directory where we are looking for the images. Defaults + to None, which uses the XDG data directory if set or the current + working directory otherwise. + :param pattern: + The naming pattern that the filename should match. Defaults to + "*.png". Can be used to allow other patterns or to only include + specific prefixes or suffixes. + :param recursive: + Whether to recurse into subdirectories. + :param limit: + The maximum number of images to be found. Defaults to 20. + To return all images, set to None. + """ + if path is None: + path = os.environ.get("XDG_DATA_HOME", ".") + + result = list(glob.glob(f"{path}/{pattern}", recursive=recursive)) + + if limit is not None: + result = result[:limit] + + return result + + +def initialize_dict(filelist: list) -> dict: + mydict = {} + for img_path in filelist: + id = img_path.split(".")[0].split("/")[-1] + mydict[id] = {"filename": img_path} + return mydict + + +def append_data_to_dict(mydict: dict) -> dict: + """Append entries from list of dictionaries to keys in global dict.""" + + # first initialize empty list for each key that is present + outdict = {key: [] for key in list(mydict.values())[0].keys()} + # now append the values to each key in a list + for subdict in mydict.values(): + for key in subdict.keys(): + outdict[key].append(subdict[key]) + # mydict = {key: [mydict[key] for mydict in dictlist] for key in dictlist[0]} + return outdict + + +def dump_df(mydict: dict) -> DataFrame: + """Utility to dump the dictionary into a dataframe.""" + return DataFrame.from_dict(mydict) + + +if __name__ == "__main__": + files = find_files( + path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/" + ) + mydict = initialize_dict(files) + outdict = {} + outdict = append_data_to_dict(mydict) + df = dump_df(outdict) + print(df.head(10)) diff --git a/notebooks/colors_expression.ipynb b/notebooks/colors_expression.ipynb new file mode 100644 index 0000000..d058a25 --- /dev/null +++ b/notebooks/colors_expression.ipynb @@ -0,0 +1,152 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook shows primary color analysis of color image using K-Means algorithm.\n", + "The output are N primary colors and their corresponding percentage." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.cluster import KMeans\n", + "import matplotlib.pyplot as plt\n", + "import cv2\n", + "import numpy as np\n", + "import requests" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def centroid_histogram(clt):\n", + " # grab the number of different clusters and create a histogram\n", + " # based on the number of pixels assigned to each cluster\n", + " numLabels = np.arange(0, len(np.unique(clt.labels_)) + 1)\n", + " (hist, _) = np.histogram(clt.labels_, bins=numLabels)\n", + "\n", + " # normalize the histogram, such that it sums to one\n", + " hist = hist.astype(\"float\")\n", + " hist /= hist.sum()\n", + "\n", + " # return the histogram\n", + " return hist\n", + "\n", + "\n", + "def plot_colors(hist, centroids):\n", + " # initialize the bar chart representing the relative frequency\n", + " # of each of the colors\n", + " bar = np.zeros((50, 300, 3), dtype=\"uint8\")\n", + " startX = 0\n", + " # loop over the percentage of each cluster and the color of\n", + " # each cluster\n", + " for (percent, color) in zip(hist, centroids):\n", + " # plot the relative percentage of each cluster\n", + " endX = startX + (percent * 300)\n", + " cv2.rectangle(\n", + " bar, (int(startX), 0), (int(endX), 50), color.astype(\"uint8\").tolist(), -1\n", + " )\n", + " startX = endX\n", + "\n", + " # return the bar chart\n", + " return bar" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# load the image and convert it from BGR to RGB so that\n", + "# we can dispaly it with matplotlib\n", + "# image_path = './data/blue.jpg'\n", + "# image = cv2.imread(image_path)\n", + "\n", + "file = requests.get(\n", + " \"https://heibox.uni-heidelberg.de/thumbnail/537e6da0a8b44069bc96/1024/images/100361_asm.png\"\n", + ")\n", + "image = cv2.imdecode(np.fromstring(file.content, np.uint8), 1)\n", + "\n", + "# BGR-->RGB cv to matplotlib show\n", + "image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + "\n", + "# show our image\n", + "plt.figure()\n", + "plt.axis(\"off\")\n", + "plt.imshow(image)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# reshape the image to be a list of pixels\n", + "image = image.reshape((image.shape[0] * image.shape[1], 3))\n", + "\n", + "# cluster the pixel intensities\n", + "clt = KMeans(n_clusters=8)\n", + "clt.fit(image)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# build a histogram of clusters and then create a figure\n", + "# representing the number of pixels labeled to each color\n", + "hist = centroid_histogram(clt)\n", + "bar = plot_colors(hist, clt.cluster_centers_)\n", + "\n", + "# show our color bart\n", + "plt.figure()\n", + "plt.axis(\"off\")\n", + "plt.imshow(bar)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for (percent, color) in zip(hist, clt.cluster_centers_):\n", + " print(\"color:\", color, \" percentage:\", percent)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/facial_expressions.ipynb b/notebooks/facial_expressions.ipynb index 8bfceda..3117ee7 100644 --- a/notebooks/facial_expressions.ipynb +++ b/notebooks/facial_expressions.ipynb @@ -42,7 +42,7 @@ "outputs": [], "source": [ "images = misinformation.find_files(\n", - " path=\"/home/jovyan/shared/data/test_no_text/\",\n", + " path=\"/home/inga/projects/misinformation-project/misinformation/data/test_no_text/\",\n", " limit=1000,\n", ")" ] diff --git a/notebooks/obj_dect_cvlib/image.jpg b/notebooks/obj_dect_cvlib/image.jpg new file mode 100644 index 0000000..7fbca16 Binary files /dev/null and b/notebooks/obj_dect_cvlib/image.jpg differ diff --git a/notebooks/obj_dect_cvlib/image02.jpg b/notebooks/obj_dect_cvlib/image02.jpg new file mode 100644 index 0000000..327319f Binary files /dev/null and b/notebooks/obj_dect_cvlib/image02.jpg differ diff --git a/notebooks/obj_dect_cvlib/objdect-cvlib.ipynb b/notebooks/obj_dect_cvlib/objdect-cvlib.ipynb new file mode 100644 index 0000000..796d6c8 --- /dev/null +++ b/notebooks/obj_dect_cvlib/objdect-cvlib.ipynb @@ -0,0 +1,103 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ImageAI for Object Detection\n", + "http://imageai.org/#features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A simple, high level, easy-to-use open source Computer Vision library for Python.\n", + "\n", + "It was developed with a focus on enabling easy and fast experimentation. Being able to go from an idea to prototype with least amount of delay is key to doing good research.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
cvlib detect_common_objects pretrained on coco dataset.
\n", + "Underneath it uses YOLOv3 model trained on COCO dataset capable of detecting 80 common objects in context." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import cv2\n", + "import matplotlib.pyplot as plt\n", + "import cvlib as cv\n", + "from cvlib.object_detection import draw_bbox" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "im = cv2.imread(\"image.jpg\")\n", + "\n", + "bbox, label, conf = cv.detect_common_objects(im)\n", + "\n", + "output_image = draw_bbox(im, bbox, label, conf)\n", + "\n", + "plt.imshow(output_image)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "im = cv2.imread(\"image02.jpg\")\n", + "\n", + "bbox, label, conf = cv.detect_common_objects(im)\n", + "\n", + "output_image = draw_bbox(im, bbox, label, conf)\n", + "\n", + "plt.imshow(output_image)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/obj_dect_cvlib/yolov3.txt b/notebooks/obj_dect_cvlib/yolov3.txt new file mode 100644 index 0000000..1f42c8e --- /dev/null +++ b/notebooks/obj_dect_cvlib/yolov3.txt @@ -0,0 +1,80 @@ +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +dining table +toilet +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/notebooks/obj_dect_imageai/image.jpg b/notebooks/obj_dect_imageai/image.jpg new file mode 100644 index 0000000..7fbca16 Binary files /dev/null and b/notebooks/obj_dect_imageai/image.jpg differ diff --git a/notebooks/obj_dect_imageai/imagenew.jpg b/notebooks/obj_dect_imageai/imagenew.jpg new file mode 100644 index 0000000..9d652bd Binary files /dev/null and b/notebooks/obj_dect_imageai/imagenew.jpg differ diff --git a/notebooks/obj_dect_imageai/obj_dect_imageai.ipynb b/notebooks/obj_dect_imageai/obj_dect_imageai.ipynb new file mode 100644 index 0000000..d533d4a --- /dev/null +++ b/notebooks/obj_dect_imageai/obj_dect_imageai.ipynb @@ -0,0 +1,147 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ImageAI for Object Detection\n", + "http://imageai.org/#features" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ImageAI provides API to recognize 1000 different objects in a picture using pre-trained models that were trained on the ImageNet-1000 dataset. The model implementations provided are SqueezeNet, ResNet, InceptionV3 and DenseNet.\n", + "\n", + "ImageAI provides API to detect, locate and identify 80 most common objects in everyday life in a picture using pre-trained models that were trained on the COCO Dataset. The model implementations provided include RetinaNet, YOLOv3 and TinyYOLOv3." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are 80 possible objects that you can detect with the\n", + "ObjectDetection class, and they are as seen below.\n", + "\n", + " person, bicycle, car, motorcycle, airplane,\n", + " bus, train, truck, boat, traffic light, fire hydrant, stop_sign,\n", + " parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra,\n", + " giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard,\n", + " sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket,\n", + " bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange,\n", + " broccoli, carrot, hot dog, pizza, donot, cake, chair, couch, potted plant, bed,\n", + " dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave,\n", + " oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair dryer,\n", + " toothbrush." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "requirements:
\n", + "tensorflow==1.15.0
\n", + "numpy==1.19.5
\n", + "scipy==1.4.1
\n", + "keras==2.1.0
\n", + "imageai==2.0.2
\n", + "\n", + "Or update to newest version, see https://github.com/OlafenwaMoses/ImageAI
" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Download the RetinaNet model file for object detection\n", + "\n", + "https://github.com/OlafenwaMoses/ImageAI/releases/download/1.0/resnet50_coco_best_v2.0.1.h5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from imageai.Detection import ObjectDetection\n", + "import matplotlib.pyplot as plt\n", + "import skimage.io\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "execution_path = os.getcwd()\n", + "\n", + "detector = ObjectDetection()\n", + "detector.setModelTypeAsRetinaNet()\n", + "detector.setModelPath(os.path.join(execution_path, \"resnet50_coco_best_v2.0.1.h5\"))\n", + "detector.loadModel()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "detections = detector.detectObjectsFromImage(\n", + " input_image=os.path.join(execution_path, \"image.jpg\"),\n", + " output_image_path=os.path.join(execution_path, \"imagenew.jpg\"),\n", + ")\n", + "\n", + "for eachObject in detections:\n", + " print(eachObject[\"name\"], \" : \", eachObject[\"percentage_probability\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "image = skimage.io.imread(\"image.jpg\")\n", + "imagenew = skimage.io.imread(\"imagenew.jpg\")\n", + "\n", + "_, axis = plt.subplots(1, 2)\n", + "axis[0].imshow(image, cmap=\"gray\")\n", + "axis[1].imshow(imagenew, cmap=\"gray\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/objects_expression.ipynb b/notebooks/objects_expression.ipynb new file mode 100644 index 0000000..2bc71db --- /dev/null +++ b/notebooks/objects_expression.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Objects Expression recognition" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebooks shows some preliminary work on detecting objects expressions with cvliv and imageai. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `misinformation` package that is imported here:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import misinformation\n", + "import misinformation.objects as ob" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ObjectDetector currently support 2 clinet types: CLIENT_CVLIB and CLIENT_IMAGEAI, default is CLIENT_CVLIB." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set an image path as input file path." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "images = misinformation.find_files(\n", + " path=\"/home/inga/projects/misinformation-project/misinformation/data/test_no_text/\",\n", + " limit=1000,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mydict = misinformation.utils.initialize_dict(images)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Detect objects with default client type: CLIENT_CVLIB." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for key in mydict:\n", + " mydict[key] = ob.ObjectDetector(mydict[key]).analyse_image()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Convert the dictionary of dictionarys into a dictionary with lists:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "outdict = misinformation.utils.append_data_to_dict(mydict)\n", + "df = misinformation.utils.dump_df(outdict)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check the dataframe:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Write the csv file:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"./data_out.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "misinformation.explore_analysis(mydict, identify=\"objects\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "vscode": { + "interpreter": { + "hash": "f1142466f556ab37fe2d38e2897a16796906208adb09fea90ba58bdf8a56f0ba" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml index 7396dab..a83dd3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "setuptools>=61", + "setuptools==61", ] build-backend = "setuptools.build_meta" @@ -13,7 +13,7 @@ maintainers = [ { name = "Inga Ulusoy", email = "ssc@iwr.uni-heidelberg.de" }, { name = "Dominic Kempf", email = "ssc@iwr.uni-heidelberg.de" }, ] -requires-python = ">=3.8" +requires-python = ">=3.9" license = { text = "MIT" } classifiers = [ "Programming Language :: Python :: 3", @@ -21,11 +21,20 @@ classifiers = [ "License :: OSI Approved :: MIT License", ] dependencies = [ - "deepface", - "ipywidgets >=8", - "pooch", - "retina-face", "google-cloud-vision", + "cvlib", + "deepface", + "ipywidgets", + "numpy", + "opencv_python", + "pandas", + "pooch", + "protobuf", + "retina_face", + "setuptools", + "tensorflow", + "keras", + "openpyxl", ] [project.scripts] diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index 8061b3a..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,11 +0,0 @@ -deepface -ipywidgets>=8 -pooch -retina-face -opencv-python -matplotlib -numpy -keras-ocr -tensorflow -google-cloud-vision -pytesseract \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4823307..80c4370 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,14 @@ +google-cloud-vision +cvlib deepface -ipywidgets>=8 +ipywidgets +numpy +opencv_python +pandas pooch -retina-face -google-cloud-vision \ No newline at end of file +protobuf +retina_face +setuptools +tensorflow +keras +openpyxl \ No newline at end of file