Inga Ulusoy d4cda187e3
Facial expression dict (#24)
* convert into dict output

* faces class

* return cleaned dict

* empty methods that are required

* with dict updates

* with dominant emotion confidence as tuple

* multiple images

* update notebook
2022-08-17 23:00:46 +02:00

110 строки
3.2 KiB
Python

import glob
import os
from pandas import DataFrame
import pooch
class DownloadResource:
"""A remote resource that needs on demand downloading
We use this as a wrapper to the pooch library. The wrapper registers
each data file and allows prefetching through the CLI entry point
misinformation_prefetch_models.
"""
# We store a list of defined resouces in a class variable, allowing
# us prefetching from a CLI e.g. to bundle into a Docker image
resources = []
def __init__(self, **kwargs):
DownloadResource.resources.append(self)
self.kwargs = kwargs
def get(self):
return pooch.retrieve(**self.kwargs)
def misinformation_prefetch_models():
"""Prefetch all the download resources"""
for res in DownloadResource.resources:
res.get()
class AnalysisMethod:
"""Base class to be inherited by all analysis methods."""
def __init__(self, subdict) -> None:
self.subdict = subdict
# define keys that will be set by the analysis
def set_keys(self):
raise NotImplementedError()
def analyse_image(self):
raise NotImplementedError()
def find_files(path=None, pattern="*.png", recursive=True, limit=20):
"""Find image files on the file system
:param path:
The base directory where we are looking for the images. Defaults
to None, which uses the XDG data directory if set or the current
working directory otherwise.
:param pattern:
The naming pattern that the filename should match. Defaults to
"*.png". Can be used to allow other patterns or to only include
specific prefixes or suffixes.
:param recursive:
Whether to recurse into subdirectories.
:param limit:
The maximum number of images to be found. Defaults to 20.
To return all images, set to None.
"""
if path is None:
path = os.environ.get("XDG_DATA_HOME", ".")
result = list(glob.glob(f"{path}/{pattern}", recursive=recursive))
if limit is not None:
result = result[:limit]
return result
def initialize_dict(filelist: list) -> dict:
mydict = {}
for img_path in filelist:
id = img_path.split(".")[0].split("/")[-1]
mydict[id] = {"filename": img_path}
return mydict
def append_data_to_dict(mydict: dict) -> dict:
"""Append entries from list of dictionaries to keys in global dict."""
# first initialize empty list for each key that is present
outdict = {key: [] for key in list(mydict.values())[0].keys()}
# now append the values to each key in a list
for subdict in mydict.values():
for key in subdict.keys():
outdict[key].append(subdict[key])
# mydict = {key: [mydict[key] for mydict in dictlist] for key in dictlist[0]}
return outdict
def dump_df(mydict: dict) -> DataFrame:
"""Utility to dump the dictionary into a dataframe."""
return DataFrame.from_dict(mydict)
if __name__ == "__main__":
files = find_files(
path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/"
)
mydict = initialize_dict(files)
outdict = {}
outdict = append_data_to_dict(mydict)
df = dump_df(outdict)
print(df.head(10))