AMMICO/misinformation/accuracy.py
xiaohemaikoo fdcb228294
M objdect (#23)
* colors expression by KMean algorithm

* object detection by imageai

* object detection by cvlib

* add encapsulation of object detection

* remove encapsulation of objdetect v0

* objects expression to dict

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added imageai to requirements

* add objects to dictionary

* update for AnalysisMethod baseline

* add objects dection support explore_analysis display

* extend python version of misinf to allow imageai

* account for older python

* use global functionality for dict to csv convert

* update for docker build

* docker will build now but ipywidgets still not working

* test code

* include test data folder in repo

* add some sample images

* load cvs labels to dict

* add test data

* retrigger checks

* add map to human coding

* get orders from dict, missing dep

* add module to test accuracy

* retrigger checks

* retrigger checks

* now removing imageai

* removed imageai

* move labelmanager to analyse

* multiple faces in mydict

* fix pre-commit issues

* map mydict

* hide imageai

* objects default using cvlib, isolate and disable imageai

* correct python version

* refactor faces tests

* refactor objects tests

* sonarcloud issues

* refactor utils tests

* address code smells

* update readme

* update notebook without imageai

Co-authored-by: Ma Xianghe <825074348@qq.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: iulusoy <inga.ulusoy@uni-heidelberg.de>
2022-10-04 11:34:44 +02:00

110 строки
3.5 KiB
Python

import pandas as pd
import json
from misinformation import utils
from misinformation import faces
class LabelManager:
def __init__(self):
self.labels_code = None
self.labels = None
self.f_labels = None
self.f_labels_code = None
self.load()
def load(self):
self.labels_code = pd.read_excel(
"./misinformation/test/data/EUROPE_APRMAY20_data_variable_labels_coding.xlsx",
sheet_name="variable_labels_codings",
)
self.labels = pd.read_csv(
"./misinformation/test/data/Europe_APRMAY20data190722.csv",
sep=",",
decimal=".",
)
self.map = self.read_json("./misinformation/data/map_test_set.json")
def read_json(self, name):
with open("{}".format(name)) as f:
mydict = json.load(f)
return mydict
def get_orders(self):
return [i["order"] for i in self.map.values()]
def filter_from_order(self, orders: list):
cols = []
for order in orders:
col = self.labels_code.iloc[order - 1, 1]
cols.append(col.lower())
self.f_labels_code = self.labels_code.loc[
self.labels_code["order"].isin(orders)
]
self.f_labels = self.labels[cols]
def gen_dict(self):
labels_dict = {}
if self.f_labels is None:
print("No filtered labels found")
return labels_dict
cols = self.f_labels.columns.tolist()
for index, row in self.f_labels.iterrows():
row_dict = {}
for col in cols:
row_dict[col] = row[col]
labels_dict[row["pic_id"]] = row_dict
return labels_dict
def map_dict(self, mydict):
mapped_dict = {}
for id, subdict in mydict.items():
mapped_subdict = {}
mapped_subdict["id"] = id[0:-2]
mapped_subdict["pic_order"] = id[-1] if id[-2] == "0" else id[-2::]
mapped_subdict["pic_id"] = id
for key in self.map.keys():
# get the key name
mydict_name = self.map[key]["variable_mydict"]
mydict_value = self.map[key]["value_mydict"]
# find out which value was set
mydict_current = subdict[mydict_name]
# now map to new key-value pair
mapped_subdict[key] = 1 if mydict_current == mydict_value else 0
# substitute the values that are not boolean
if self.map[key]["variable_coding"] != "Bool":
mapped_subdict[key] = mydict_current
mapped_dict[id] = mapped_subdict
return mapped_dict
if __name__ == "__main__":
files = utils.find_files(
path="/home/inga/projects/misinformation-project/misinformation/misinformation/test/data/Europe APRMAY20 visual data/cropped images"
)
mydict = utils.initialize_dict(files)
# analyze faces
image_ids = [key for key in mydict.keys()]
for i in image_ids:
mydict[i] = faces.EmotionDetector(mydict[i]).analyse_image()
outdict = utils.append_data_to_dict(mydict)
df = utils.dump_df(outdict)
# print(df.head(10))
df.to_csv("mydict_out.csv")
# example of LabelManager for loading csv data to dict
lm = LabelManager()
# get the desired label numbers automatically
orders = lm.get_orders()
# map mydict to the specified variable names and values
mydict_map = lm.map_dict(mydict)
print(mydict_map)
lm.filter_from_order([1, 2, 3] + orders)
labels = lm.gen_dict()
print(labels)