Petr Andriushchenko a5c43b6488
Test debugging (#62)
* deleted lavis from utils

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixed test_objects

* added 'not gcv' to CI

* fixed multimodal search and summary tests

* disable doc build on PR for now

* restrict ipywidgets version to avoid dummycomm error

* limit deepface version

* original repositories for retinaface lavis

* update gcv test results

* update display test outputs

* update test env

* run all tests

* wo xdist to avoid segfault

* remove widgets ref

* skip long-running tests

* skip long

* verbose codecov upload

* refactor summary test 2

* finish summary test refactor

* reduce memory overhead of SummaryDetector

* remove VQA models from self

* remove VQA models from self

* update notebook for changes

* update notebook for changes

* fixed multimodal search tests

* fixed tests in multimodal search after precommit

* run all tests

* update doc notebook for summary changes

* skip long-running multimodal

* exclude blip2 from testing

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Inga Ulusoy <inga.ulusoy@uni-heidelberg.de>
2023-03-30 10:33:05 +02:00

109 строки
3.2 KiB
Python

import glob
import os
from pandas import DataFrame
import pooch
class DownloadResource:
"""A remote resource that needs on demand downloading
We use this as a wrapper to the pooch library. The wrapper registers
each data file and allows prefetching through the CLI entry point
misinformation_prefetch_models.
"""
# We store a list of defined resouces in a class variable, allowing
# us prefetching from a CLI e.g. to bundle into a Docker image
resources = []
def __init__(self, **kwargs):
DownloadResource.resources.append(self)
self.kwargs = kwargs
def get(self):
return pooch.retrieve(**self.kwargs)
def misinformation_prefetch_models():
"""Prefetch all the download resources"""
for res in DownloadResource.resources:
res.get()
class AnalysisMethod:
"""Base class to be inherited by all analysis methods."""
def __init__(self, subdict) -> None:
self.subdict = subdict
# define keys that will be set by the analysis
def set_keys(self):
raise NotImplementedError()
def analyse_image(self):
raise NotImplementedError()
def find_files(path=None, pattern="*.png", recursive=True, limit=20):
"""Find image files on the file system
:param path:
The base directory where we are looking for the images. Defaults
to None, which uses the XDG data directory if set or the current
working directory otherwise.
:param pattern:
The naming pattern that the filename should match. Defaults to
"*.png". Can be used to allow other patterns or to only include
specific prefixes or suffixes.
:param recursive:
Whether to recurse into subdirectories.
:param limit:
The maximum number of images to be found. Defaults to 20.
To return all images, set to None.
"""
if path is None:
path = os.environ.get("XDG_DATA_HOME", ".")
result = list(glob.glob(f"{path}/{pattern}", recursive=recursive))
if limit is not None:
result = result[:limit]
return result
def initialize_dict(filelist: list) -> dict:
mydict = {}
for img_path in filelist:
id_ = os.path.splitext(os.path.basename(img_path))[0]
mydict[id_] = {"filename": img_path}
return mydict
def append_data_to_dict(mydict: dict) -> dict:
"""Append entries from list of dictionaries to keys in global dict."""
# first initialize empty list for each key that is present
outdict = {key: [] for key in list(mydict.values())[0].keys()}
# now append the values to each key in a list
for subdict in mydict.values():
for key in subdict.keys():
outdict[key].append(subdict[key])
return outdict
def dump_df(mydict: dict) -> DataFrame:
"""Utility to dump the dictionary into a dataframe."""
return DataFrame.from_dict(mydict)
if __name__ == "__main__":
files = find_files(
path="/home/inga/projects/misinformation-project/misinformation/data/test_no_text/"
)
mydict = initialize_dict(files)
outdict = {}
outdict = append_data_to_dict(mydict)
df = dump_df(outdict)
print(df.head(10))