зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-28 20:54:14 +02:00
* colors expression by KMean algorithm * object detection by imageai * object detection by cvlib * add encapsulation of object detection * remove encapsulation of objdetect v0 * objects expression to dict * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * added imageai to requirements * add objects to dictionary * update for AnalysisMethod baseline * add objects dection support explore_analysis display * extend python version of misinf to allow imageai * account for older python * use global functionality for dict to csv convert * update for docker build * docker will build now but ipywidgets still not working * test code * include test data folder in repo * add some sample images * load cvs labels to dict * add test data * retrigger checks * add map to human coding * get orders from dict, missing dep * add module to test accuracy * retrigger checks * retrigger checks * now removing imageai * removed imageai * move labelmanager to analyse * multiple faces in mydict * fix pre-commit issues * map mydict * hide imageai * objects default using cvlib, isolate and disable imageai * correct python version * refactor faces tests * refactor objects tests * sonarcloud issues * refactor utils tests * address code smells * update readme * update notebook without imageai Co-authored-by: Ma Xianghe <825074348@qq.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: iulusoy <inga.ulusoy@uni-heidelberg.de>
110 строки
3.5 KiB
Python
110 строки
3.5 KiB
Python
import pandas as pd
|
|
import json
|
|
|
|
from misinformation import utils
|
|
from misinformation import faces
|
|
|
|
|
|
class LabelManager:
|
|
def __init__(self):
|
|
self.labels_code = None
|
|
self.labels = None
|
|
self.f_labels = None
|
|
self.f_labels_code = None
|
|
self.load()
|
|
|
|
def load(self):
|
|
self.labels_code = pd.read_excel(
|
|
"./misinformation/test/data/EUROPE_APRMAY20_data_variable_labels_coding.xlsx",
|
|
sheet_name="variable_labels_codings",
|
|
)
|
|
self.labels = pd.read_csv(
|
|
"./misinformation/test/data/Europe_APRMAY20data190722.csv",
|
|
sep=",",
|
|
decimal=".",
|
|
)
|
|
self.map = self.read_json("./misinformation/data/map_test_set.json")
|
|
|
|
def read_json(self, name):
|
|
with open("{}".format(name)) as f:
|
|
mydict = json.load(f)
|
|
return mydict
|
|
|
|
def get_orders(self):
|
|
return [i["order"] for i in self.map.values()]
|
|
|
|
def filter_from_order(self, orders: list):
|
|
cols = []
|
|
for order in orders:
|
|
col = self.labels_code.iloc[order - 1, 1]
|
|
cols.append(col.lower())
|
|
|
|
self.f_labels_code = self.labels_code.loc[
|
|
self.labels_code["order"].isin(orders)
|
|
]
|
|
self.f_labels = self.labels[cols]
|
|
|
|
def gen_dict(self):
|
|
labels_dict = {}
|
|
if self.f_labels is None:
|
|
print("No filtered labels found")
|
|
return labels_dict
|
|
|
|
cols = self.f_labels.columns.tolist()
|
|
for index, row in self.f_labels.iterrows():
|
|
row_dict = {}
|
|
for col in cols:
|
|
row_dict[col] = row[col]
|
|
labels_dict[row["pic_id"]] = row_dict
|
|
|
|
return labels_dict
|
|
|
|
def map_dict(self, mydict):
|
|
mapped_dict = {}
|
|
for id, subdict in mydict.items():
|
|
mapped_subdict = {}
|
|
mapped_subdict["id"] = id[0:-2]
|
|
mapped_subdict["pic_order"] = id[-1] if id[-2] == "0" else id[-2::]
|
|
mapped_subdict["pic_id"] = id
|
|
for key in self.map.keys():
|
|
# get the key name
|
|
mydict_name = self.map[key]["variable_mydict"]
|
|
mydict_value = self.map[key]["value_mydict"]
|
|
# find out which value was set
|
|
mydict_current = subdict[mydict_name]
|
|
# now map to new key-value pair
|
|
mapped_subdict[key] = 1 if mydict_current == mydict_value else 0
|
|
# substitute the values that are not boolean
|
|
if self.map[key]["variable_coding"] != "Bool":
|
|
mapped_subdict[key] = mydict_current
|
|
mapped_dict[id] = mapped_subdict
|
|
return mapped_dict
|
|
|
|
|
|
if __name__ == "__main__":
|
|
files = utils.find_files(
|
|
path="/home/inga/projects/misinformation-project/misinformation/misinformation/test/data/Europe APRMAY20 visual data/cropped images"
|
|
)
|
|
mydict = utils.initialize_dict(files)
|
|
# analyze faces
|
|
image_ids = [key for key in mydict.keys()]
|
|
for i in image_ids:
|
|
mydict[i] = faces.EmotionDetector(mydict[i]).analyse_image()
|
|
|
|
outdict = utils.append_data_to_dict(mydict)
|
|
df = utils.dump_df(outdict)
|
|
# print(df.head(10))
|
|
df.to_csv("mydict_out.csv")
|
|
|
|
# example of LabelManager for loading csv data to dict
|
|
lm = LabelManager()
|
|
# get the desired label numbers automatically
|
|
orders = lm.get_orders()
|
|
# map mydict to the specified variable names and values
|
|
mydict_map = lm.map_dict(mydict)
|
|
print(mydict_map)
|
|
lm.filter_from_order([1, 2, 3] + orders)
|
|
|
|
labels = lm.gen_dict()
|
|
print(labels)
|