зеркало из
				https://github.com/ssciwr/AMMICO.git
				synced 2025-10-30 21:46:04 +02:00 
			
		
		
		
	 37d07da98a
			
		
	
	
		37d07da98a
		
			
		
	
	
	
	
		
			
			* Create ci.yml * include pytest * Update pyproject.toml * include pytest-cov * use approx in pytest * Update test_faces.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add coverage yaml * reduce passing grade * use copy instead of symlink on windows * crude attempt at calculating deviations Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
		
			
				
	
	
		
			148 строки
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			148 строки
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import pandas as pd
 | |
| import json
 | |
| 
 | |
| from misinformation import utils
 | |
| from misinformation import faces
 | |
| 
 | |
| 
 | |
| class LabelManager:
 | |
|     def __init__(self):
 | |
|         self.labels_code = None
 | |
|         self.labels = None
 | |
|         self.f_labels = None
 | |
|         self.f_labels_code = None
 | |
|         self.load()
 | |
| 
 | |
|     def load(self):
 | |
|         self.labels_code = pd.read_excel(
 | |
|             "./misinformation/test/data/EUROPE_APRMAY20_data_variable_labels_coding.xlsx",
 | |
|             sheet_name="variable_labels_codings",
 | |
|         )
 | |
|         self.labels = pd.read_csv(
 | |
|             "./misinformation/test/data/Europe_APRMAY20data190722.csv",
 | |
|             sep=",",
 | |
|             decimal=".",
 | |
|         )
 | |
|         self.map = self.read_json("./misinformation/data/map_test_set.json")
 | |
| 
 | |
|     def read_json(self, name):
 | |
|         with open("{}".format(name)) as f:
 | |
|             mydict = json.load(f)
 | |
|         return mydict
 | |
| 
 | |
|     def get_orders(self):
 | |
|         return [i["order"] for i in self.map.values()]
 | |
| 
 | |
|     def filter_from_order(self, orders: list):
 | |
|         cols = []
 | |
|         for order in orders:
 | |
|             col = self.labels_code.iloc[order - 1, 1]
 | |
|             cols.append(col.lower())
 | |
| 
 | |
|         self.f_labels_code = self.labels_code.loc[
 | |
|             self.labels_code["order"].isin(orders)
 | |
|         ]
 | |
|         self.f_labels = self.labels[cols]
 | |
| 
 | |
|     def gen_dict(self):
 | |
|         labels_dict = {}
 | |
|         if self.f_labels is None:
 | |
|             print("No filtered labels found")
 | |
|             return labels_dict
 | |
| 
 | |
|         cols = self.f_labels.columns.tolist()
 | |
|         for index, row in self.f_labels.iterrows():
 | |
|             row_dict = {}
 | |
|             for col in cols:
 | |
|                 row_dict[col] = row[col]
 | |
|             labels_dict[row["pic_id"]] = row_dict
 | |
| 
 | |
|         return labels_dict
 | |
| 
 | |
|     def map_dict(self, mydict):
 | |
|         mapped_dict = {}
 | |
|         for id, subdict in mydict.items():
 | |
|             mapped_subdict = {}
 | |
|             mapped_subdict["id"] = id[0:-2]
 | |
|             mapped_subdict["pic_order"] = id[-1] if id[-2] == "0" else id[-2::]
 | |
|             mapped_subdict["pic_id"] = id
 | |
|             for key in self.map.keys():
 | |
|                 # get the key name
 | |
|                 mydict_name = self.map[key]["variable_mydict"]
 | |
|                 mydict_value = self.map[key]["value_mydict"]
 | |
|                 # find out which value was set
 | |
|                 mydict_current = subdict[mydict_name]
 | |
|                 # now map to new key-value pair
 | |
|                 mapped_subdict[key] = 1 if mydict_current == mydict_value else 0
 | |
|                 # substitute the values that are not boolean
 | |
|                 if self.map[key]["variable_coding"] != "Bool":
 | |
|                     mapped_subdict[key] = mydict_current
 | |
|                 # take only first value in lists - this applies to faces,
 | |
|                 # reported are up to three in a list, we compare only the
 | |
|                 # largest one here
 | |
|                 if isinstance(mydict_current, list):
 | |
|                     mapped_subdict[key] = 1 if mydict_current[0] == mydict_value else 0
 | |
|                     # also cut out the likelihood for detected emotion
 | |
|                     if isinstance(mydict_current[0], tuple):
 | |
|                         mapped_subdict[key] = (
 | |
|                             1 if mydict_current[0][0] == mydict_value else 0
 | |
|                         )
 | |
|             mapped_dict[id] = mapped_subdict
 | |
|         return mapped_dict
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     files = utils.find_files(
 | |
|         path="/home/inga/projects/misinformation-project/misinformation/misinformation/test/data/Europe APRMAY20 visual data/cropped images",
 | |
|         limit=500,
 | |
|     )
 | |
|     mydict = utils.initialize_dict(files)
 | |
|     # analyze faces
 | |
|     image_ids = [key for key in mydict.keys()]
 | |
|     for i in image_ids:
 | |
|         mydict[i] = faces.EmotionDetector(mydict[i]).analyse_image()
 | |
| 
 | |
|     outdict = utils.append_data_to_dict(mydict)
 | |
|     df = utils.dump_df(outdict)
 | |
|     # print(df.head(10))
 | |
|     df.to_csv("mydict_out.csv")
 | |
| 
 | |
|     # example of LabelManager for loading csv data to dict
 | |
|     lm = LabelManager()
 | |
|     # get the desired label numbers automatically
 | |
|     orders = lm.get_orders()
 | |
|     # map mydict to the specified variable names and values
 | |
|     mydict_map = lm.map_dict(mydict)
 | |
|     lm.filter_from_order([1, 2, 3] + orders)
 | |
| 
 | |
|     labels = lm.gen_dict()
 | |
|     comp = {}
 | |
|     for key in labels.keys():
 | |
|         if str(key) not in mydict_map:
 | |
|             print("Key {} not found.".format(key))
 | |
|             continue
 | |
|         print("ref: {}".format(labels[key]))
 | |
|         print("com: {}".format(mydict_map[str(key)]))
 | |
|         for subkey in labels[key]:
 | |
|             if type(labels[key][subkey]) != int:
 | |
|                 continue
 | |
|             if type(mydict_map[str(key)][subkey]) != int:
 | |
|                 continue
 | |
|             comp[subkey] = comp.get(subkey, 0) + abs(
 | |
|                 labels[key][subkey] - mydict_map[str(key)][subkey]
 | |
|             )
 | |
|     print("summary: ")
 | |
|     # why v9_5a not there - bec reads in as float from the csv
 | |
|     print(comp)
 | |
|     # summary:
 | |
|     # {'v9_4': 42, 'v9_5b': 1579, 'v9_6': 229, 'v9_7': 45, 'v9_8': 39, 'v9_8a': 31, 'v9_9': 58, 'v9_10': 33, 'v9_11': 22, 'v9_12': 2, 'v9_13': 24, 'v11_3': 39}
 | |
|     # Important here is:
 | |
|     # Overall positive - 'v9_8': 39 deviations
 | |
|     # Overall negative - 'v9_9': 58
 | |
|     # happy - 'v9_8a': 31
 | |
|     # fear - 'v9_10': 33
 | |
|     # angry - 'v9_11': 22
 | |
|     # disgust - 'v9_12': 2
 | |
|     # sad - 'v9_13': 24
 | |
|     # respect of rules = wears mask - 'v11_3': 39
 |