* reduce code smells

* account for transl. version change

* remove print statements

* remove imageai
Этот коммит содержится в:
Inga Ulusoy 2022-12-31 22:15:40 +01:00 коммит произвёл GitHub
родитель 4744ed5256
Коммит b585097f19
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 10 добавлений и 372 удалений

2
.github/workflows/ci.yml поставляемый
Просмотреть файл

@ -31,7 +31,7 @@ jobs:
- name: Run pytest
run: |
cd misinformation
python -m pytest -s -m "not (imageai or gcv)" --cov=. --cov-report=xml
python -m pytest -s -m "not gcv" --cov=. --cov-report=xml
- name: Upload coverage
if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9'
uses: codecov/codecov-action@v3

Просмотреть файл

@ -1,147 +0,0 @@
import pandas as pd
import json
from misinformation import utils
from misinformation import faces
class LabelManager:
def __init__(self):
self.labels_code = None
self.labels = None
self.f_labels = None
self.f_labels_code = None
self.load()
def load(self):
self.labels_code = pd.read_excel(
"./misinformation/test/data/EUROPE_APRMAY20_data_variable_labels_coding.xlsx",
sheet_name="variable_labels_codings",
)
self.labels = pd.read_csv(
"./misinformation/test/data/Europe_APRMAY20data190722.csv",
sep=",",
decimal=".",
)
self.map = self.read_json("./misinformation/data/map_test_set.json")
def read_json(self, name):
with open("{}".format(name)) as f:
mydict = json.load(f)
return mydict
def get_orders(self):
return [i["order"] for i in self.map.values()]
def filter_from_order(self, orders: list):
cols = []
for order in orders:
col = self.labels_code.iloc[order - 1, 1]
cols.append(col.lower())
self.f_labels_code = self.labels_code.loc[
self.labels_code["order"].isin(orders)
]
self.f_labels = self.labels[cols]
def gen_dict(self):
labels_dict = {}
if self.f_labels is None:
print("No filtered labels found")
return labels_dict
cols = self.f_labels.columns.tolist()
for index, row in self.f_labels.iterrows():
row_dict = {}
for col in cols:
row_dict[col] = row[col]
labels_dict[row["pic_id"]] = row_dict
return labels_dict
def map_dict(self, mydict):
mapped_dict = {}
for id, subdict in mydict.items():
mapped_subdict = {}
mapped_subdict["id"] = id[0:-2]
mapped_subdict["pic_order"] = id[-1] if id[-2] == "0" else id[-2::]
mapped_subdict["pic_id"] = id
for key in self.map.keys():
# get the key name
mydict_name = self.map[key]["variable_mydict"]
mydict_value = self.map[key]["value_mydict"]
# find out which value was set
mydict_current = subdict[mydict_name]
# now map to new key-value pair
mapped_subdict[key] = 1 if mydict_current == mydict_value else 0
# substitute the values that are not boolean
if self.map[key]["variable_coding"] != "Bool":
mapped_subdict[key] = mydict_current
# take only first value in lists - this applies to faces,
# reported are up to three in a list, we compare only the
# largest one here
if isinstance(mydict_current, list):
mapped_subdict[key] = 1 if mydict_current[0] == mydict_value else 0
# also cut out the likelihood for detected emotion
if isinstance(mydict_current[0], tuple):
mapped_subdict[key] = (
1 if mydict_current[0][0] == mydict_value else 0
)
mapped_dict[id] = mapped_subdict
return mapped_dict
if __name__ == "__main__":
files = utils.find_files(
path="/home/inga/projects/misinformation-project/misinformation/misinformation/test/data/Europe APRMAY20 visual data/cropped images",
limit=500,
)
mydict = utils.initialize_dict(files)
# analyze faces
image_ids = [key for key in mydict.keys()]
for i in image_ids:
mydict[i] = faces.EmotionDetector(mydict[i]).analyse_image()
outdict = utils.append_data_to_dict(mydict)
df = utils.dump_df(outdict)
# print(df.head(10))
df.to_csv("mydict_out.csv")
# example of LabelManager for loading csv data to dict
lm = LabelManager()
# get the desired label numbers automatically
orders = lm.get_orders()
# map mydict to the specified variable names and values
mydict_map = lm.map_dict(mydict)
lm.filter_from_order([1, 2, 3] + orders)
labels = lm.gen_dict()
comp = {}
for key in labels.keys():
if str(key) not in mydict_map:
print("Key {} not found.".format(key))
continue
print("ref: {}".format(labels[key]))
print("com: {}".format(mydict_map[str(key)]))
for subkey in labels[key]:
if type(labels[key][subkey]) != int:
continue
if type(mydict_map[str(key)][subkey]) != int:
continue
comp[subkey] = comp.get(subkey, 0) + abs(
labels[key][subkey] - mydict_map[str(key)][subkey]
)
print("summary: ")
# why v9_5a not there - bec reads in as float from the csv
print(comp)
# summary:
# {'v9_4': 42, 'v9_5b': 1579, 'v9_6': 229, 'v9_7': 45, 'v9_8': 39, 'v9_8a': 31, 'v9_9': 58, 'v9_10': 33, 'v9_11': 22, 'v9_12': 2, 'v9_13': 24, 'v11_3': 39}
# Important here is:
# Overall positive - 'v9_8': 39 deviations
# Overall negative - 'v9_9': 58
# happy - 'v9_8a': 31
# fear - 'v9_10': 33
# angry - 'v9_11': 22
# disgust - 'v9_12': 2
# sad - 'v9_13': 24
# respect of rules = wears mask - 'v11_3': 39

Просмотреть файл

@ -44,7 +44,6 @@ def draw_matches(matches, img1, img2, kp1, kp2):
M = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)[0]
# Draw detected template in scene image
# h, w = img1.shape
h = img1.shape[0]
w = img1.shape[1]
pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(
@ -54,8 +53,6 @@ def draw_matches(matches, img1, img2, kp1, kp2):
img2 = cv2.polylines(img2, [np.int32(dst)], True, 255, 3, cv2.LINE_AA)
# h1, w1 = img1.shape
# h2, w2 = img2.shape
h1 = img1.shape[0]
h2 = img2.shape[0]
w1 = img1.shape[1]
@ -85,7 +82,6 @@ def draw_matches(matches, img1, img2, kp1, kp2):
def matching_points(img1, img2):
img1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
img2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
# sift = cv2.SIFT_create()
sift = cv2.xfeatures2d.SIFT_create()
kp1, des1 = sift.detectAndCompute(img1, None)
kp2, des2 = sift.detectAndCompute(img2, None)

Просмотреть файл

@ -253,17 +253,3 @@ class NocatchOutput(ipywidgets.Output):
def __exit__(self, *args, **kwargs):
super().__exit__(*args, **kwargs)
if __name__ == "__main__":
files = utils.find_files(
path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/"
)
# files = [
# "/home/inga/projects/misinformation-project/misinformation/data/test_no_text/102141_1_eng.png"
# ]
mydict = utils.initialize_dict(files)
image_ids = [key for key in mydict.keys()]
for i in image_ids:
mydict[i] = EmotionDetector(mydict[i]).analyse_image()
print(mydict)

Просмотреть файл

@ -2,20 +2,13 @@ from misinformation.utils import AnalysisMethod
from misinformation.objects_cvlib import ObjectCVLib
from misinformation.objects_cvlib import init_default_objects
# from misinformation.objects_imageai import ObjectImageAI
class ObjectDetectorClient(AnalysisMethod):
def __init__(self):
# The detector is default to CVLib
# Here other libraries can be added
self.detector = ObjectCVLib()
def set_client_to_imageai(self):
# disable imageai temporarily
# self.detector = ObjectImageAI()
# maybe reactivate if new imageai release comes out
pass
def set_client_to_cvlib(self):
self.detector = ObjectCVLib()
@ -46,7 +39,3 @@ class ObjectDetector(AnalysisMethod):
@staticmethod
def set_client_to_cvlib():
ObjectDetector.od_client.set_client_to_cvlib()
@staticmethod
def set_client_to_imageai():
ObjectDetector.od_client.set_client_to_imageai()

Просмотреть файл

@ -1,114 +0,0 @@
from misinformation.utils import DownloadResource
from misinformation.objects_cvlib import ObjectsMethod
from misinformation.objects_cvlib import init_default_objects
from imageai.Detection import ObjectDetection
import cv2
import os
import pathlib
def objects_from_imageai(detections: list) -> dict:
objects = init_default_objects()
for obj in detections:
obj_name = obj["name"]
objects[obj_name] = "yes"
return objects
def objects_symlink_processor(name):
def _processor(fname, action, pooch):
if not os.path.exists(os.path.dirname(name)):
os.makedirs(os.path.dirname(name))
if not os.path.exists(name):
os.symlink(fname, name)
return fname
return _processor
pre_model_path = pathlib.Path.home().joinpath(
".misinformation", "objects", "resnet50_coco_best_v2.1.0.h5"
)
retina_objects_model = DownloadResource(
url="https://github.com/OlafenwaMoses/ImageAI/releases/download/essentials-v5/resnet50_coco_best_v2.1.0.h5/",
known_hash="sha256:6518ad56a0cca4d1bd8cbba268dd4e299c7633efe7d15902d5acbb0ba180027c",
processor=objects_symlink_processor(pre_model_path),
)
class ObjectImageAI(ObjectsMethod):
def __init__(self):
# init imageai client
retina_objects_model.get()
if not os.path.exists(pre_model_path):
print("Download retina objects model failed.")
return
self.imgai_client = ObjectDetection()
self.imgai_client.setModelTypeAsRetinaNet()
self.imgai_client.setModelPath(pre_model_path)
self.imgai_client.loadModel()
self.custom = self.imgai_client.CustomObjects(
person=True,
bicycle=True,
car=True,
motorcycle=True,
airplane=True,
bus=True,
train=True,
truck=True,
boat=True,
traffic_light=True,
cell_phone=True,
)
def detect_objects_imageai(self, image_path, custom=True, min_prob=30):
"""Localize objects in the local image.
Args:
image_path: The path to the local file.
custom: If only detect user defined specific objects.
min_prob: Minimum probability that we trust as objects.
"""
img = cv2.imread(image_path)
if custom:
box_img, detections = self.imgai_client.detectCustomObjectsFromImage(
custom_objects=self.custom,
input_type="array",
input_image=img,
output_type="array",
minimum_percentage_probability=min_prob,
)
else:
box_img, detections = self.imgai_client.detectObjectsFromImage(
input_type="array",
input_image=img,
output_type="array",
minimum_percentage_probability=min_prob,
)
objects = objects_from_imageai(detections)
return objects
def analyse_image_from_file(self, image_path):
"""Localize objects in the local image.
Args:
image_path: The path to the local file.
"""
objects = self.detect_objects_imageai(image_path)
return objects
def analyse_image(self, subdict):
"""Localize objects in the local image.
Args:
subdict: The dictionary for an image expression instance.
"""
objects = self.analyse_image_from_file(subdict["filename"])
for key in objects:
subdict[key] = objects[key]
return subdict

Просмотреть файл

@ -1,14 +0,0 @@
{
"filename": "./test/data/IMG_2809.png",
"person": "yes",
"bicycle": "yes",
"car": "yes",
"motorcycle": "no",
"airplane": "no",
"bus": "yes",
"train": "no",
"truck": "no",
"boat": "no",
"traffic light": "no",
"cell phone": "no"
}

Просмотреть файл

@ -1,4 +1,3 @@
[pytest]
markers =
imageai: mark a test related to imageai.
gcv: mark google cloud vision tests - skip to save money.

Просмотреть файл

@ -3,14 +3,11 @@ import pytest
import misinformation.objects as ob
import misinformation.objects_cvlib as ob_cvlib
# import misinformation.objects_imageai as ob_iai
OBJECT_1 = "cell phone"
OBJECT_2 = "motorcycle"
OBJECT_3 = "traffic light"
TEST_IMAGE_1 = "./test/data/IMG_2809.png"
JSON_1 = "./test/data/example_objects_cvlib.json"
JSON_2 = "./test/data/example_objects_imageai.json"
@pytest.fixture()
@ -77,54 +74,6 @@ def test_detect_objects_cvlib():
assert objs[key] == out_dict[key]
@pytest.mark.imageai
def test_objects_from_imageai(default_objects):
objects_list = [OBJECT_1, OBJECT_2, OBJECT_3]
objs_input = [
{"name": OBJECT_1},
{"name": OBJECT_2},
{"name": OBJECT_3},
]
objects = ob_iai.objects_from_imageai(objs_input) # noqa: F821
out_objects = default_objects
for obj in objects_list:
out_objects[obj] = "yes"
assert str(objects) == str(out_objects)
@pytest.mark.imageai
def test_analyse_image_from_file_imageai():
file_path = TEST_IMAGE_1
objs = ob_iai.ObjectImageAI().analyse_image_from_file(file_path) # noqa: F821
with open(JSON_2, "r") as file:
out_dict = json.load(file)
for key in objs.keys():
assert objs[key] == out_dict[key]
@pytest.mark.imageai
def test_detect_objects_imageai():
file_path = TEST_IMAGE_1
objs = ob_iai.ObjectImageAI().detect_objects_imageai(file_path) # noqa: F821
with open(JSON_2, "r") as file:
out_dict = json.load(file)
for key in objs.keys():
assert objs[key] == out_dict[key]
@pytest.mark.imageai
def test_analyse_image_imageai():
mydict = {"filename": TEST_IMAGE_1}
ob_iai.ObjectImageAI().analyse_image(mydict) # noqa: F821
with open(JSON_2, "r") as file:
out_dict = json.load(file)
for key in mydict.keys():
assert mydict[key] == out_dict[key]
def test_set_keys(default_objects):
mydict = {"filename": TEST_IMAGE_1}
key_objs = ob.ObjectDetector(mydict).set_keys()

Просмотреть файл

@ -41,7 +41,7 @@ class TextDetector(utils.AnalysisMethod):
# here check if text was found
self.subdict = {"text": texts}
if response.error.message:
raise Exception(
raise ValueError(
"{}\nFor more info on error messages, check: "
"https://cloud.google.com/apis/design/errors".format(
response.error.message

Просмотреть файл

@ -75,8 +75,8 @@ def find_files(path=None, pattern="*.png", recursive=True, limit=20):
def initialize_dict(filelist: list) -> dict:
mydict = {}
for img_path in filelist:
id = os.path.splitext(os.path.basename(img_path))[0]
mydict[id] = {"filename": img_path}
id_ = os.path.splitext(os.path.basename(img_path))[0]
mydict[id_] = {"filename": img_path}
return mydict
@ -89,7 +89,6 @@ def append_data_to_dict(mydict: dict) -> dict:
for subdict in mydict.values():
for key in subdict.keys():
outdict[key].append(subdict[key])
# mydict = {key: [mydict[key] for mydict in dictlist] for key in dictlist[0]}
return outdict

15
notebooks/objects_expression.ipynb сгенерированный
Просмотреть файл

@ -11,7 +11,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebooks shows some preliminary work on detecting objects expressions with cvliv and imageai. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `misinformation` package that is imported here:"
"This notebooks shows some preliminary work on detecting objects expressions with cvlib. It is mainly meant to explore its capabilities and to decide on future research directions. We package our code into a `misinformation` package that is imported here:"
]
},
{
@ -24,13 +24,6 @@
"import misinformation.objects as ob"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"ObjectDetector currently support 2 clinet types: CLIENT_CVLIB and CLIENT_IMAGEAI, default is CLIENT_CVLIB."
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -63,7 +56,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Detect objects with default client type: CLIENT_CVLIB."
"## Detect objects and directly write to csv"
]
},
{
@ -129,6 +122,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Manually inspect what was detected\n",
"\n",
"To check the analysis, you can inspect the analyzed elements here. Loading the results takes a moment, so please be patient. If you are sure of what you are doing."
]
},
@ -165,7 +160,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.9.5"
},
"vscode": {
"interpreter": {