xiaohemaikoo fdcb228294
M objdect (#23)
* colors expression by KMean algorithm

* object detection by imageai

* object detection by cvlib

* add encapsulation of object detection

* remove encapsulation of objdetect v0

* objects expression to dict

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added imageai to requirements

* add objects to dictionary

* update for AnalysisMethod baseline

* add objects dection support explore_analysis display

* extend python version of misinf to allow imageai

* account for older python

* use global functionality for dict to csv convert

* update for docker build

* docker will build now but ipywidgets still not working

* test code

* include test data folder in repo

* add some sample images

* load cvs labels to dict

* add test data

* retrigger checks

* add map to human coding

* get orders from dict, missing dep

* add module to test accuracy

* retrigger checks

* retrigger checks

* now removing imageai

* removed imageai

* move labelmanager to analyse

* multiple faces in mydict

* fix pre-commit issues

* map mydict

* hide imageai

* objects default using cvlib, isolate and disable imageai

* correct python version

* refactor faces tests

* refactor objects tests

* sonarcloud issues

* refactor utils tests

* address code smells

* update readme

* update notebook without imageai

Co-authored-by: Ma Xianghe <825074348@qq.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: iulusoy <inga.ulusoy@uni-heidelberg.de>
2022-10-04 11:34:44 +02:00

110 строки
3.3 KiB
Python

import glob
import os
from pandas import DataFrame
import pooch
class DownloadResource:
"""A remote resource that needs on demand downloading
We use this as a wrapper to the pooch library. The wrapper registers
each data file and allows prefetching through the CLI entry point
misinformation_prefetch_models.
"""
# We store a list of defined resouces in a class variable, allowing
# us prefetching from a CLI e.g. to bundle into a Docker image
resources = []
def __init__(self, **kwargs):
DownloadResource.resources.append(self)
self.kwargs = kwargs
def get(self):
return pooch.retrieve(**self.kwargs)
def misinformation_prefetch_models():
"""Prefetch all the download resources"""
for res in DownloadResource.resources:
res.get()
class AnalysisMethod:
"""Base class to be inherited by all analysis methods."""
def __init__(self, subdict) -> None:
self.subdict = subdict
# define keys that will be set by the analysis
def set_keys(self):
raise NotImplementedError()
def analyse_image(self):
raise NotImplementedError()
def find_files(path=None, pattern="*.png", recursive=True, limit=20):
"""Find image files on the file system
:param path:
The base directory where we are looking for the images. Defaults
to None, which uses the XDG data directory if set or the current
working directory otherwise.
:param pattern:
The naming pattern that the filename should match. Defaults to
"*.png". Can be used to allow other patterns or to only include
specific prefixes or suffixes.
:param recursive:
Whether to recurse into subdirectories.
:param limit:
The maximum number of images to be found. Defaults to 20.
To return all images, set to None.
"""
if path is None:
path = os.environ.get("XDG_DATA_HOME", ".")
result = list(glob.glob(f"{path}/{pattern}", recursive=recursive))
if limit is not None:
result = result[:limit]
return result
def initialize_dict(filelist: list) -> dict:
mydict = {}
for img_path in filelist:
id = img_path.split(".")[0].split("/")[-1]
mydict[id] = {"filename": img_path}
return mydict
def append_data_to_dict(mydict: dict) -> dict:
"""Append entries from list of dictionaries to keys in global dict."""
# first initialize empty list for each key that is present
outdict = {key: [] for key in list(mydict.values())[0].keys()}
# now append the values to each key in a list
for subdict in mydict.values():
for key in subdict.keys():
outdict[key].append(subdict[key])
# mydict = {key: [mydict[key] for mydict in dictlist] for key in dictlist[0]}
return outdict
def dump_df(mydict: dict) -> DataFrame:
"""Utility to dump the dictionary into a dataframe."""
return DataFrame.from_dict(mydict)
if __name__ == "__main__":
files = find_files(
path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/"
)
mydict = initialize_dict(files)
outdict = {}
outdict = append_data_to_dict(mydict)
df = dump_df(outdict)
print(df.head(10))