* deleted lavis from utils

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixed test_objects

* added 'not gcv' to CI

* fixed multimodal search and summary tests

* disable doc build on PR for now

* restrict ipywidgets version to avoid dummycomm error

* limit deepface version

* original repositories for retinaface lavis

* update gcv test results

* update display test outputs

* update test env

* run all tests

* wo xdist to avoid segfault

* remove widgets ref

* skip long-running tests

* skip long

* verbose codecov upload

* refactor summary test 2

* finish summary test refactor

* reduce memory overhead of SummaryDetector

* remove VQA models from self

* remove VQA models from self

* update notebook for changes

* update notebook for changes

* fixed multimodal search tests

* fixed tests in multimodal search after precommit

* run all tests

* update doc notebook for summary changes

* skip long-running multimodal

* exclude blip2 from testing

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Inga Ulusoy <inga.ulusoy@uni-heidelberg.de>
Этот коммит содержится в:
Petr Andriushchenko 2023-03-30 10:33:05 +02:00 коммит произвёл GitHub
родитель 0ca9366980
Коммит a5c43b6488
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
21 изменённых файлов: 280 добавлений и 524 удалений

5
.github/workflows/ci.yml поставляемый
Просмотреть файл

@ -14,7 +14,7 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
strategy: strategy:
matrix: matrix:
os: [ubuntu-22.04,windows-latest] os: [ubuntu-22.04]
python-version: [3.9] python-version: [3.9]
steps: steps:
- name: Checkout repository - name: Checkout repository
@ -32,10 +32,11 @@ jobs:
- name: Run pytest - name: Run pytest
run: | run: |
cd misinformation cd misinformation
python -m pytest --cov=. --cov-report=xml python -m pytest -m "not gcv" -svv --cov=. --cov-report=xml
- name: Upload coverage - name: Upload coverage
if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9' if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9'
uses: codecov/codecov-action@v3 uses: codecov/codecov-action@v3
with: with:
fail_ci_if_error: true fail_ci_if_error: true
files: misinformation/coverage.xml files: misinformation/coverage.xml
verbose: true

2
.github/workflows/docs.yml поставляемый
Просмотреть файл

@ -3,8 +3,6 @@ name: Pages
on: on:
push: push:
branches: [ main ] branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch: workflow_dispatch:
jobs: jobs:

Просмотреть файл

@ -20,7 +20,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import misinformation\n",
"from misinformation import utils as mutils\n", "from misinformation import utils as mutils\n",
"from misinformation import display as mdisplay\n", "from misinformation import display as mdisplay\n",
"import misinformation.summary as sm" "import misinformation.summary as sm"
@ -74,7 +73,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"summary_model, summary_vis_processors = mutils.load_model(\"base\")\n", "obj = sm.SummaryDetector(mydict)\n",
"summary_model, summary_vis_processors = obj.load_model(\"base\")\n",
"# summary_model, summary_vis_processors = mutils.load_model(\"large\")" "# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
] ]
}, },
@ -96,7 +96,7 @@
"tags": [] "tags": []
}, },
"source": [ "source": [
"Convert the dictionary of dictionarys into a dictionary with lists:" "Convert the dictionary of dictionaries into a dictionary with lists:"
] ]
}, },
{ {
@ -256,7 +256,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 (ipykernel)", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

Просмотреть файл

@ -141,7 +141,7 @@ class EmotionDetector(utils.AnalysisMethod):
DeepFace.analyze( DeepFace.analyze(
img_path=face, img_path=face,
actions=actions, actions=actions,
silent=True, prog_bar=False,
detector_backend="skip", detector_backend="skip",
) )
) )
@ -192,6 +192,10 @@ class EmotionDetector(utils.AnalysisMethod):
"Yes" if result[person]["wears_mask"] else "No" "Yes" if result[person]["wears_mask"] else "No"
) )
self.subdict["age"].append(result[person]["age"]) self.subdict["age"].append(result[person]["age"])
# gender is now reported as a list of dictionaries
# each dict represents one face
# each dict contains probability for Woman and Man
# take only the higher prob result for each dict
self.subdict["gender"].append(result[person]["gender"]) self.subdict["gender"].append(result[person]["gender"])
# race, emotion only detected if person does not wear mask # race, emotion only detected if person does not wear mask
if result[person]["wears_mask"]: if result[person]["wears_mask"]:

Просмотреть файл

@ -174,7 +174,7 @@ class MultimodalSearch(AnalysisMethod):
"Please, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336" "Please, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336"
) )
raw_images, images_tensors = MultimodalSearch.read_and_process_images( _, images_tensors = MultimodalSearch.read_and_process_images(
self, image_names, vis_processors self, image_names, vis_processors
) )
if path_to_saved_tensors is None: if path_to_saved_tensors is None:
@ -213,7 +213,7 @@ class MultimodalSearch(AnalysisMethod):
for query in search_query: for query in search_query:
if not (len(query) == 1) and (query in ("image", "text_input")): if not (len(query) == 1) and (query in ("image", "text_input")):
raise SyntaxError( raise SyntaxError(
'Each querry must contain either an "image" or a "text_input"' 'Each query must contain either an "image" or a "text_input"'
) )
multi_sample = [] multi_sample = []
for query in search_query: for query in search_query:

Просмотреть файл

@ -7,36 +7,28 @@ from lavis.models import load_model_and_preprocess
class SummaryDetector(AnalysisMethod): class SummaryDetector(AnalysisMethod):
def __init__(self, subdict: dict) -> None: def __init__(self, subdict: dict) -> None:
super().__init__(subdict) super().__init__(subdict)
self.summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
)
def load_model_base(self): def load_model_base(self):
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess( summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption", name="blip_caption",
model_type="base_coco", model_type="base_coco",
is_eval=True, is_eval=True,
device=summary_device, device=self.summary_device,
) )
return summary_model, summary_vis_processors return summary_model, summary_vis_processors
def load_model_large(self): def load_model_large(self):
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess( summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption", name="blip_caption",
model_type="large_coco", model_type="large_coco",
is_eval=True, is_eval=True,
device=summary_device, device=self.summary_device,
) )
return summary_model, summary_vis_processors return summary_model, summary_vis_processors
def load_model(self, model_type): def load_model(self, model_type):
# self.summary_device = device("cuda" if cuda.is_available() else "cpu")
select_model = { select_model = {
"base": SummaryDetector.load_model_base, "base": SummaryDetector.load_model_base,
"large": SummaryDetector.load_model_large, "large": SummaryDetector.load_model_large,
@ -47,8 +39,7 @@ class SummaryDetector(AnalysisMethod):
def analyse_image(self, summary_model=None, summary_vis_processors=None): def analyse_image(self, summary_model=None, summary_vis_processors=None):
if summary_model is None and summary_vis_processors is None: if summary_model is None and summary_vis_processors is None:
summary_model = SummaryDetector.summary_model summary_model, summary_vis_processors = self.load_model_base()
summary_vis_processors = SummaryDetector.summary_vis_processors
path = self.subdict["filename"] path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB") raw_image = Image.open(path).convert("RGB")
@ -66,32 +57,33 @@ class SummaryDetector(AnalysisMethod):
) )
return self.subdict return self.subdict
(
summary_VQA_model,
summary_VQA_vis_processors,
summary_VQA_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device
)
def analyse_questions(self, list_of_questions): def analyse_questions(self, list_of_questions):
(
summary_VQA_model,
summary_VQA_vis_processors,
summary_VQA_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa",
model_type="vqav2",
is_eval=True,
device=self.summary_device,
)
if len(list_of_questions) > 0: if len(list_of_questions) > 0:
path = self.subdict["filename"] path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB") raw_image = Image.open(path).convert("RGB")
image = ( image = (
self.summary_VQA_vis_processors["eval"](raw_image) summary_VQA_vis_processors["eval"](raw_image)
.unsqueeze(0) .unsqueeze(0)
.to(self.summary_device) .to(self.summary_device)
) )
question_batch = [] question_batch = []
for quest in list_of_questions: for quest in list_of_questions:
question_batch.append(self.summary_VQA_txt_processors["eval"](quest)) question_batch.append(summary_VQA_txt_processors["eval"](quest))
batch_size = len(list_of_questions) batch_size = len(list_of_questions)
image_batch = image.repeat(batch_size, 1, 1, 1) image_batch = image.repeat(batch_size, 1, 1, 1)
with no_grad(): with no_grad():
answers_batch = self.summary_VQA_model.predict_answers( answers_batch = summary_VQA_model.predict_answers(
samples={"image": image_batch, "text_input": question_batch}, samples={"image": image_batch, "text_input": question_batch},
inference_method="generate", inference_method="generate",
) )

Просмотреть файл

@ -1 +1 @@
{"IMG_2746": {"filename": "./test/data/IMG_2809.png", "person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}} {"person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}

Просмотреть файл

@ -1,5 +1,4 @@
{ {
"filename": "./test/data/IMG_2746.png",
"face": "Yes", "face": "Yes",
"multiple_faces": "Yes", "multiple_faces": "Yes",
"no_faces": 11, "no_faces": 11,

Просмотреть файл

@ -3,10 +3,10 @@ The Quantum Theory of
Nonrelativistic Collisions Nonrelativistic Collisions
JOHN R. TAYLOR JOHN R. TAYLOR
University of Colorado University of Colorado
postaldia Lanbidean ostaliga Lanbidean
1 ilde 1 ilde
ballenger stor goin ballenger stor goin
gd OOL, STVÍ 23 TL 02 gdĐOL, SIVI 23 TL 02
de in obl de in obl
och yd badalang och yd badalang
a a

Просмотреть файл

@ -3,12 +3,12 @@ The Quantum Theory of
Nonrelativistic Collisions Nonrelativistic Collisions
JOHN R. TAYLOR JOHN R. TAYLOR
University of Colorado University of Colorado
postaldia Lanbidean ostaliga Lanbidean
1 ilde 1 ilde
ballenger stor goin balloons big goin
gd OOL, STVÍ 23 TL 02 gdĐOL, SIVI 23 TL
de in obl there in obl
och yd badalang och yd change
a a
Ber Ber
ook Sy-RW enot go baldus ook Sy-RW isn't going anywhere

Просмотреть файл

@ -1,3 +1,4 @@
[pytest] [pytest]
markers = markers =
gcv: mark google cloud vision tests - skip to save money. gcv: mark google cloud vision tests - skip to save money.
long: mark long running tests - skip to save compute resources.

Просмотреть файл

@ -2,20 +2,20 @@ import misinformation.cropposts as crpo
import numpy as np import numpy as np
from PIL import Image from PIL import Image
TEST_IMAGE_1 = "./test/data/pic1.png" TEST_IMAGE_1 = "pic1.png"
TEST_IMAGE_2 = "./test/data/pic2.png" TEST_IMAGE_2 = "pic2.png"
def test_matching_points(): def test_matching_points(get_path):
ref_view = np.array(Image.open(TEST_IMAGE_2)) ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1)) view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view) filtered_matches, _, _ = crpo.matching_points(ref_view, view)
assert len(filtered_matches) > 0 assert len(filtered_matches) > 0
def test_kp_from_matches(): def test_kp_from_matches(get_path):
ref_view = np.array(Image.open(TEST_IMAGE_2)) ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1)) view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view) filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
kp1, kp2 = crpo.kp_from_matches(filtered_matches, kp1, kp2) kp1, kp2 = crpo.kp_from_matches(filtered_matches, kp1, kp2)
@ -25,9 +25,9 @@ def test_kp_from_matches():
assert kp2.shape[1] == 2 assert kp2.shape[1] == 2
def test_compute_crop_corner(): def test_compute_crop_corner(get_path):
ref_view = np.array(Image.open(TEST_IMAGE_2)) ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1)) view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view) filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
corner = crpo.compute_crop_corner(filtered_matches, kp1, kp2) corner = crpo.compute_crop_corner(filtered_matches, kp1, kp2)
print(view.shape) print(view.shape)
@ -38,9 +38,9 @@ def test_compute_crop_corner():
assert 0 <= h < view.shape[0] assert 0 <= h < view.shape[0]
def test_crop_posts_image(): def test_crop_posts_image(get_path):
ref_view = np.array(Image.open(TEST_IMAGE_2)) ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1)) view = np.array(Image.open(get_path + TEST_IMAGE_1))
rte = crpo.crop_posts_image(ref_view, view) rte = crpo.crop_posts_image(ref_view, view)
assert rte is not None assert rte is not None
crop_view, match_num = rte crop_view, match_num = rte
@ -48,16 +48,15 @@ def test_crop_posts_image():
assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1] assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]
def test_crop_posts_from_refs(): def test_crop_posts_from_refs(get_path):
ref_view = np.array(Image.open(TEST_IMAGE_2)) ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1)) view = np.array(Image.open(get_path + TEST_IMAGE_1))
ref_views = [ref_view] ref_views = [ref_view]
crop_view = crpo.crop_posts_from_refs(ref_views, view) crop_view = crpo.crop_posts_from_refs(ref_views, view)
assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1] assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]
def test_get_file_list(): def test_get_file_list(get_path):
ref_list = [] ref_list = []
ref_dir = "./test/data" ref_list = crpo.get_file_list(get_path, ref_list, ext="png")
ref_list = crpo.get_file_list(ref_dir, ref_list, ext="png")
assert len(ref_list) > 0 assert len(ref_list) > 0

Просмотреть файл

@ -1,27 +1,28 @@
import json import json
import misinformation.display as misinf_display
# import misinformation.display as misinf_display
import pytest
misinf_display = pytest.importorskip("misinformation.display")
def test_explore_analysis_faces(): def test_explore_analysis_faces(get_path):
mydict = {"IMG_2746": {"filename": "./test/data/IMG_2746.png"}} mydict = {"IMG_2746": {"filename": get_path + "IMG_2746.png"}}
misinf_display.explore_analysis(mydict, identify="faces") temp = misinf_display.explore_analysis(mydict, identify="faces") # noqa
with open("./test/data/example_faces.json", "r") as file: temp = None # noqa
with open(get_path + "example_faces.json", "r") as file:
outs = json.load(file) outs = json.load(file)
mydict["IMG_2746"].pop("filename", None)
for im_key in mydict.keys(): for im_key in mydict.keys():
sub_dict = mydict[im_key] sub_dict = mydict[im_key]
for key in sub_dict.keys(): for key in sub_dict.keys():
assert sub_dict[key] == outs[key] assert sub_dict[key] == outs[key]
def test_explore_analysis_objects(): def test_explore_analysis_objects(get_path):
mydict = {"IMG_2746": {"filename": "./test/data/IMG_2809.png"}} mydict = {"IMG_2809": {"filename": get_path + "IMG_2809.png"}}
misinf_display.explore_analysis(mydict, identify="objects") temp = misinf_display.explore_analysis(mydict, identify="objects") # noqa
with open("./test/data/example_analysis_objects.json", "r") as file: temp = None # noqa
with open(get_path + "example_analysis_objects.json", "r") as file:
outs = json.load(file) outs = json.load(file)
mydict["IMG_2809"].pop("filename", None)
assert str(mydict) == str(outs) for im_key in mydict.keys():
sub_dict = mydict[im_key]
for key in sub_dict.keys():
assert sub_dict[key] == outs[key]

Просмотреть файл

@ -1,16 +1,17 @@
import misinformation.faces as fc import misinformation.faces as fc
import json import json
from pytest import approx import pytest
def test_analyse_faces(): def test_analyse_faces(get_path):
mydict = { mydict = {
"filename": "./test/data/IMG_2746.png", "filename": get_path + "IMG_2746.png",
} }
mydict = fc.EmotionDetector(mydict).analyse_image() mydict.update(fc.EmotionDetector(mydict).analyse_image())
with open("./test/data/example_faces.json", "r") as file: with open(get_path + "example_faces.json", "r") as file:
out_dict = json.load(file) out_dict = json.load(file)
# delete the filename key
mydict.pop("filename", None)
for key in mydict.keys(): for key in mydict.keys():
assert mydict[key] == out_dict[key] assert mydict[key] == out_dict[key]

Просмотреть файл

@ -5,24 +5,13 @@ import numpy
from torch import device, cuda from torch import device, cuda
import misinformation.multimodal_search as ms import misinformation.multimodal_search as ms
testdict = { testdict = {
"d755771b-225e-432f-802e-fb8dc850fff7": {
"filename": "./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png"
},
"IMG_2746": {"filename": "./test/data/IMG_2746.png"}, "IMG_2746": {"filename": "./test/data/IMG_2746.png"},
"IMG_2750": {"filename": "./test/data/IMG_2750.png"},
"IMG_2805": {"filename": "./test/data/IMG_2805.png"},
"IMG_2806": {"filename": "./test/data/IMG_2806.png"},
"IMG_2807": {"filename": "./test/data/IMG_2807.png"},
"IMG_2808": {"filename": "./test/data/IMG_2808.png"},
"IMG_2809": {"filename": "./test/data/IMG_2809.png"}, "IMG_2809": {"filename": "./test/data/IMG_2809.png"},
"IMG_3755": {"filename": "./test/data/IMG_3755.jpg"},
"IMG_3756": {"filename": "./test/data/IMG_3756.jpg"},
"IMG_3757": {"filename": "./test/data/IMG_3757.jpg"},
"pic1": {"filename": "./test/data/pic1.png"},
} }
related_error = 1e-3 related_error = 1e-2
gpu_is_not_available = not cuda.is_available() gpu_is_not_available = not cuda.is_available()
@ -38,39 +27,15 @@ def test_read_img():
pre_proc_pic_blip2_blip_albef = [ pre_proc_pic_blip2_blip_albef = [
-1.0039474964141846, -1.0039474964141846,
-1.0039474964141846, -1.0039474964141846,
-0.8433647751808167,
-0.6097899675369263,
-0.5951915383338928,
-0.6243883967399597,
-0.6827820539474487,
-0.6097899675369263,
-0.7119789123535156,
-1.0623412132263184,
] ]
pre_proc_pic_clip_vitl14 = [ pre_proc_pic_clip_vitl14 = [
-0.7995694875717163, -0.7995694875717163,
-0.7849710583686829, -0.7849710583686829,
-0.7849710583686829,
-0.7703726291656494,
-0.7703726291656494,
-0.7849710583686829,
-0.7849710583686829,
-0.7703726291656494,
-0.7703726291656494,
-0.7703726291656494,
] ]
pre_proc_pic_clip_vitl14_336 = [ pre_proc_pic_clip_vitl14_336 = [
-0.7995694875717163, -0.7995694875717163,
-0.7849710583686829, -0.7849710583686829,
-0.7849710583686829,
-0.7849710583686829,
-0.7849710583686829,
-0.7849710583686829,
-0.7849710583686829,
-0.9163569211959839,
-1.149931788444519,
-1.0039474964141846,
] ]
pre_proc_text_blip2_blip_albef = ( pre_proc_text_blip2_blip_albef = (
@ -84,293 +49,150 @@ pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = (
pre_extracted_feature_img_blip2 = [ pre_extracted_feature_img_blip2 = [
0.04566730558872223, 0.04566730558872223,
-0.042554520070552826, -0.042554520070552826,
-0.06970272958278656,
-0.009771779179573059,
0.01446065679192543,
0.10173682868480682,
0.007092420011758804,
-0.020045937970280647,
0.12923966348171234,
0.006452132016420364,
] ]
pre_extracted_feature_img_blip = [ pre_extracted_feature_img_blip = [
-0.02480311505496502, -0.02480311505496502,
0.05037587881088257, 0.05037587881088257,
0.039517853409051895,
-0.06994109600782394,
-0.12886561453342438,
0.047039758414030075,
-0.11620642244815826,
-0.003398326924070716,
-0.07324369996786118,
0.06994668394327164,
] ]
pre_extracted_feature_img_albef = [ pre_extracted_feature_img_albef = [
0.08971136063337326, 0.08971136063337326,
-0.10915573686361313, -0.10915573686361313,
-0.020636577159166336,
0.048121627420186996,
-0.05943416804075241,
-0.129856139421463,
-0.0034469354432076216,
0.017888527363538742,
-0.03284582123160362,
-0.1037328764796257,
] ]
pre_extracted_feature_img_clip = [ pre_extracted_feature_img_clip = [
0.01621132344007492, 0.01621132344007492,
-0.004035486374050379, -0.004035486374050379,
-0.04304071143269539,
-0.03459808602929115,
0.016922621056437492,
-0.025056276470422745,
-0.04178355261683464,
0.02165347896516323,
-0.003224249929189682,
0.020485712215304375,
] ]
pre_extracted_feature_img_parsing_clip = [ pre_extracted_feature_img_parsing_clip = [
0.01621132344007492, 0.01621132344007492,
-0.004035486374050379, -0.004035486374050379,
-0.04304071143269539,
-0.03459808602929115,
0.016922621056437492,
-0.025056276470422745,
-0.04178355261683464,
0.02165347896516323,
-0.003224249929189682,
0.020485712215304375,
] ]
pre_extracted_feature_img_clip_vitl14 = [ pre_extracted_feature_img_clip_vitl14 = [
-0.023943455889821053, -0.023943455889821053,
-0.021703708916902542, -0.021703708916902542,
0.035043686628341675,
0.019495919346809387,
0.014351222664117813,
-0.008634116500616074,
0.01610446907579899,
-0.003426523646339774,
0.011931191198527813,
0.0008691544644534588,
] ]
pre_extracted_feature_img_clip_vitl14_336 = [ pre_extracted_feature_img_clip_vitl14_336 = [
-0.009511193260550499, -0.009511193260550499,
-0.012618942186236382, -0.012618942186236382,
0.034754861146211624,
0.016356879845261574,
-0.0011549904011189938,
-0.008054453879594803,
0.0011990377679467201,
-0.010806051082909107,
0.00140204350464046,
0.0006861367146484554,
] ]
pre_extracted_feature_text_blip2 = [ pre_extracted_feature_text_blip2 = [
-0.1384204626083374, -0.1384204626083374,
-0.008662976324558258, -0.008662976324558258,
0.006269007455557585,
0.03151319921016693,
0.060558050870895386,
-0.03230040520429611,
0.015861615538597107,
-0.11856459826231003,
-0.058296192437410355,
0.03699290752410889,
] ]
pre_extracted_feature_text_blip = [ pre_extracted_feature_text_blip = [
0.0118643119931221, 0.0118643119931221,
-0.01291718054562807, -0.01291718054562807,
-0.0009687161073088646,
0.01428765058517456,
-0.05591396614909172,
0.07386433333158493,
-0.11475936323404312,
0.01620068959891796,
0.0062415082938969135,
0.0034833091776818037,
] ]
pre_extracted_feature_text_albef = [ pre_extracted_feature_text_albef = [
-0.06229640915989876, -0.06229640915989876,
0.11278597265481949, 0.11278597265481949,
0.06628583371639252,
0.1649140566587448,
0.068987175822258,
0.006291372701525688,
0.03244050219655037,
-0.049556829035282135,
0.050752390176057816,
-0.0421440489590168,
] ]
pre_extracted_feature_text_clip = [ pre_extracted_feature_text_clip = [
0.018169036135077477, 0.018169036135077477,
0.03634127229452133, 0.03634127229452133,
0.025660742074251175,
0.009149895049631596,
-0.035570453852415085,
0.033126577734947205,
-0.004808237310498953,
-0.0031453112605959177,
-0.02194291725754738,
0.024019461125135422,
] ]
pre_extracted_feature_text_clip_vitl14 = [ pre_extracted_feature_text_clip_vitl14 = [
-0.0055463071912527084, -0.0055463071912527084,
0.006908962037414312, 0.006908962037414312,
-0.019450219348073006,
-0.018097277730703354,
0.017567576840519905,
-0.03828490898013115,
-0.03781530633568764,
-0.023951737210154533,
0.01365653332322836,
-0.02341713197529316,
] ]
pre_extracted_feature_text_clip_vitl14_336 = [ pre_extracted_feature_text_clip_vitl14_336 = [
-0.008720514364540577, -0.008720514364540577,
0.005284308455884457, 0.005284308455884457,
-0.021116750314831734,
-0.018112430348992348,
0.01685470901429653,
-0.03517491742968559,
-0.038612402975559235,
-0.021867064759135246,
0.01685977540910244,
-0.023832324892282486,
] ]
simularity_blip2 = [ simularity_blip2 = [
[0.05826476216316223, -0.03215287625789642], [0.05826476216316223, -0.02717375010251999],
[0.12869958579540253, 0.005234059877693653], [0.06297147274017334, 0.47339022159576416],
[0.11073512583971024, 0.12327003479003906],
[0.08743024617433548, 0.05598106235265732],
[0.04591086134314537, 0.48981112241744995],
[0.06297147274017334, 0.4728018641471863],
[0.18486255407333374, 0.635167121887207],
[0.015356295742094517, 0.015282897278666496],
[-0.008485622704029083, 0.010882291942834854],
[-0.04328630864620209, -0.13117870688438416],
[-0.025470387190580368, 0.13175423443317413],
[-0.05090826004743576, 0.05902523919939995],
] ]
sorted_blip2 = [ sorted_blip2 = [
[6, 1, 2, 3, 5, 0, 4, 7, 8, 10, 9, 11], [1, 0],
[6, 4, 5, 10, 2, 11, 3, 7, 8, 1, 0, 9], [1, 0],
] ]
simularity_blip = [ simularity_blip = [
[0.15640679001808167, 0.752173662185669], [0.15640679001808167, 0.752173662185669],
[0.15139800310134888, 0.7804810404777527],
[0.13010388612747192, 0.755257248878479],
[0.13746635615825653, 0.7618774175643921],
[0.1756758838891983, 0.8531903624534607],
[0.17233705520629883, 0.8448910117149353], [0.17233705520629883, 0.8448910117149353],
[0.1970970332622528, 0.8916105628013611],
[0.11693969368934631, 0.5833531618118286],
[0.12386563420295715, 0.5981853604316711],
[0.08427951484918594, 0.4962371587753296],
[0.14193706214427948, 0.7613846659660339],
[0.12051936239004135, 0.6492202281951904],
] ]
sorted_blip = [ sorted_blip = [
[6, 4, 5, 0, 1, 10, 3, 2, 8, 11, 7, 9], [1, 0],
[6, 4, 5, 1, 3, 10, 2, 0, 11, 8, 7, 9], [1, 0],
] ]
simularity_albef = [ simularity_albef = [
[0.12321824580430984, 0.35511350631713867], [0.12321824580430984, 0.35511350631713867],
[0.09512615948915482, 0.27168408036231995],
[0.09053325653076172, 0.20215675234794617],
[0.06335515528917313, 0.15055638551712036],
[0.09604836255311966, 0.4658776521682739],
[0.10870333760976791, 0.5143978595733643], [0.10870333760976791, 0.5143978595733643],
[0.11748822033405304, 0.6542638540267944],
[0.05688793584704399, 0.22170542180538177],
[0.05597608536481857, 0.11963296681642532],
[0.059643782675266266, 0.14969395101070404],
[0.06690303236246109, 0.3149859607219696],
[0.07909377664327621, 0.11911341547966003],
] ]
sorted_albef = [ sorted_albef = [
[0, 6, 5, 4, 1, 2, 11, 10, 3, 9, 7, 8], [0, 1],
[6, 5, 4, 0, 10, 1, 7, 2, 3, 9, 8, 11], [1, 0],
] ]
simularity_clip = [ simularity_clip = [
[0.23923014104366302, 0.5325412750244141], [0.23923014104366302, 0.5325412750244141],
[0.20101115107536316, 0.5112978219985962],
[0.17522737383842468, 0.49811851978302],
[0.20062290132045746, 0.5415266156196594],
[0.22865726053714752, 0.5762109756469727],
[0.2310466319322586, 0.5910375714302063], [0.2310466319322586, 0.5910375714302063],
[0.2644523084163666, 0.7851459383964539],
[0.21474510431289673, 0.4135811924934387],
[0.16407863795757294, 0.1474374681711197],
[0.19819433987140656, 0.26493316888809204],
[0.19545596837997437, 0.5007457137107849],
[0.1647854745388031, 0.45705708861351013],
] ]
sorted_clip = [ sorted_clip = [
[6, 0, 5, 4, 7, 1, 3, 9, 10, 2, 11, 8], [1, 0],
[6, 5, 4, 3, 0, 1, 10, 2, 11, 7, 9, 8], [1, 0],
] ]
simularity_clip_vitl14 = [ simularity_clip_vitl14 = [
[0.1051270067691803, 0.5184808373451233], [0.1051270067691803, 0.5184808373451233],
[0.09705893695354462, 0.49574509263038635],
[0.11964304000139236, 0.5424358248710632],
[0.13881900906562805, 0.5909714698791504],
[0.12728188931941986, 0.6758255362510681],
[0.1277746558189392, 0.6841973662376404], [0.1277746558189392, 0.6841973662376404],
[0.18026694655418396, 0.803142786026001],
[0.13977059721946716, 0.45957139134407043],
[0.11180847883224487, 0.24822194874286652],
[0.12296056002378464, 0.35143694281578064],
[0.11596094071865082, 0.5704031586647034],
[0.10174489766359329, 0.44422751665115356],
] ]
sorted_clip_vitl14 = [ sorted_clip_vitl14 = [
[6, 7, 3, 5, 4, 9, 2, 10, 8, 0, 11, 1], [1, 0],
[6, 5, 4, 3, 10, 2, 0, 1, 7, 11, 9, 8], [1, 0],
] ]
simularity_clip_vitl14_336 = [ simularity_clip_vitl14_336 = [
[0.09391091763973236, 0.49337542057037354], [0.09391091763973236, 0.49337542057037354],
[0.11103834211826324, 0.4881117343902588],
[0.12891019880771637, 0.5501476526260376],
[0.13288410007953644, 0.5498673915863037],
[0.12357455492019653, 0.6749162077903748],
[0.13700757920742035, 0.7003108263015747], [0.13700757920742035, 0.7003108263015747],
[0.1788637489080429, 0.7713702321052551],
[0.13260436058044434, 0.4300197660923004],
[0.11666625738143921, 0.2334875613451004],
[0.1316065937280655, 0.3291645646095276],
[0.12374477833509445, 0.5632147192955017],
[0.10333051532506943, 0.43023794889450073],
] ]
sorted_clip_vitl14_336 = [ sorted_clip_vitl14_336 = [
[6, 5, 3, 7, 9, 2, 10, 4, 8, 1, 11, 0], [1, 0],
[6, 5, 4, 10, 2, 3, 0, 1, 11, 7, 9, 8], [1, 0],
] ]
dict_itm_scores_for_blib = {
"blip_base": [
0.07107225805521011,
0.004100032616406679,
],
"blip_large": [
0.07890705019235611,
0.00271016638725996,
],
"blip2_coco": [
0.0833505243062973,
0.004216152708977461,
],
}
dict_image_gradcam_with_itm_for_blip = {
"blip_base": [123.36285799741745, 132.31662154197693, 53.38280035299249],
"blip_large": [119.99512910842896, 128.7044593691826, 55.552959859540515],
}
@pytest.mark.long
@pytest.mark.parametrize( @pytest.mark.parametrize(
( (
"pre_multimodal_device", "pre_multimodal_device",
@ -383,19 +205,29 @@ sorted_clip_vitl14_336 = [
"pre_sorted", "pre_sorted",
), ),
[ [
pytest.param( # (
device("cuda"), # device("cpu"),
"blip2", # "blip2",
pre_proc_pic_blip2_blip_albef, # pre_proc_pic_blip2_blip_albef,
pre_proc_text_blip2_blip_albef, # pre_proc_text_blip2_blip_albef,
pre_extracted_feature_img_blip2, # pre_extracted_feature_img_blip2,
pre_extracted_feature_text_blip2, # pre_extracted_feature_text_blip2,
simularity_blip2, # simularity_blip2,
sorted_blip2, # sorted_blip2,
marks=pytest.mark.skipif( # ),
gpu_is_not_available, reason="gpu_is_not_availible" # pytest.param(
), # device("cuda"),
), # "blip2",
# pre_proc_pic_blip2_blip_albef,
# pre_proc_text_blip2_blip_albef,
# pre_extracted_feature_img_blip2,
# pre_extracted_feature_text_blip2,
# simularity_blip2,
# sorted_blip2,
# marks=pytest.mark.skipif(
# gpu_is_not_available, reason="gpu_is_not_availible"
# ),
# ),
( (
device("cpu"), device("cpu"),
"blip", "blip",
@ -530,11 +362,11 @@ def test_parsing_images(
vis_processor, vis_processor,
txt_processor, txt_processor,
image_keys, image_keys,
image_names, _,
features_image_stacked, features_image_stacked,
) = ms.MultimodalSearch.parsing_images(testdict, pre_model) ) = ms.MultimodalSearch.parsing_images(testdict, pre_model)
for i, num in zip(range(10), features_image_stacked[0, 10:20].tolist()): for i, num in zip(range(10), features_image_stacked[0, 10:12].tolist()):
assert ( assert (
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error) math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
is True is True
@ -549,7 +381,7 @@ def test_parsing_images(
) )
processed_text = txt_processor["eval"](test_querry) processed_text = txt_processor["eval"](test_querry)
for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()): for i, num in zip(range(10), processed_pic[0, 0, 0, 25:27].tolist()):
assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True
assert processed_text == pre_proc_text assert processed_text == pre_proc_text
@ -562,13 +394,13 @@ def test_parsing_images(
testdict, search_query, model, txt_processor, vis_processor, pre_model testdict, search_query, model, txt_processor, vis_processor, pre_model
) )
for i, num in zip(range(10), multi_features_stacked[0, 10:20].tolist()): for i, num in zip(range(10), multi_features_stacked[0, 10:12].tolist()):
assert ( assert (
math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error) math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error)
is True is True
) )
for i, num in zip(range(10), multi_features_stacked[1, 10:20].tolist()): for i, num in zip(range(10), multi_features_stacked[1, 10:12].tolist()):
assert ( assert (
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error) math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
is True is True
@ -590,16 +422,24 @@ def test_parsing_images(
search_query2, search_query2,
) )
for i, num in zip(range(12), similarity.tolist()): for i, num in zip(range(len(pre_simularity)), similarity.tolist()):
for j, num2 in zip(range(len(num)), num): for j, num2 in zip(range(len(num)), num):
assert ( assert (
math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error) math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error)
is True is True
) )
for i, num in zip(range(2), sorted_list): for i, num in zip(range(len(pre_sorted)), sorted_list):
for j, num2 in zip(range(2), num): for j, num2 in zip(range(2), num):
assert num2 == pre_sorted[i][j] assert num2 == pre_sorted[i][j]
del model, vis_processor, txt_processor del (
model,
vis_processor,
txt_processor,
similarity,
features_image_stacked,
processed_pic,
multi_features_stacked,
)
cuda.empty_cache() cuda.empty_cache()

Просмотреть файл

@ -31,6 +31,7 @@ def test_analyse_image_cvlib(get_path):
with open(get_path + JSON_1, "r") as file: with open(get_path + JSON_1, "r") as file:
out_dict = json.load(file) out_dict = json.load(file)
out_dict["filename"] = get_path + out_dict["filename"]
for key in mydict.keys(): for key in mydict.keys():
assert mydict[key] == out_dict[key] assert mydict[key] == out_dict[key]
@ -56,10 +57,11 @@ def test_init_default_objects():
def test_analyse_image_from_file_cvlib(get_path): def test_analyse_image_from_file_cvlib(get_path):
file_path = get_path + TEST_IMAGE_1 file_path = get_path + TEST_IMAGE_1
objs = ob_cvlib.ObjectCVLib().analyse_image_from_file(get_path + file_path) objs = ob_cvlib.ObjectCVLib().analyse_image_from_file(file_path)
with open(get_path + JSON_1, "r") as file: with open(get_path + JSON_1, "r") as file:
out_dict = json.load(file) out_dict = json.load(file)
out_dict["filename"] = get_path + out_dict["filename"]
for key in objs.keys(): for key in objs.keys():
assert objs[key] == out_dict[key] assert objs[key] == out_dict[key]
@ -86,5 +88,6 @@ def test_analyse_image(get_path):
ob.ObjectDetector(mydict).analyse_image() ob.ObjectDetector(mydict).analyse_image()
with open(get_path + JSON_1, "r") as file: with open(get_path + JSON_1, "r") as file:
out_dict = json.load(file) out_dict = json.load(file)
out_dict["filename"] = get_path + out_dict["filename"]
assert str(mydict) == str(out_dict) assert str(mydict) == str(out_dict)

Просмотреть файл

@ -1,166 +1,98 @@
import os import os
import pytest
from torch import device, cuda from torch import device, cuda
from lavis.models import load_model_and_preprocess from lavis.models import load_model_and_preprocess
import misinformation.summary as sm import misinformation.summary as sm
images = [
"./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png", IMAGES = ["d755771b-225e-432f-802e-fb8dc850fff7.png", "IMG_2746.png"]
"./test/data/IMG_2746.png",
"./test/data/IMG_2750.png", SUMMARY_DEVICE = device("cuda" if cuda.is_available() else "cpu")
"./test/data/IMG_2805.png",
"./test/data/IMG_2806.png", TEST_KWARGS = {
"./test/data/IMG_2807.png", "run1": {
"./test/data/IMG_2808.png", "name": "blip_caption",
"./test/data/IMG_2809.png", "model_type": "base_coco",
"./test/data/IMG_3755.jpg", "is_eval": True,
"./test/data/IMG_3756.jpg", "device": SUMMARY_DEVICE,
"./test/data/IMG_3757.jpg", },
"./test/data/pic1.png", "run2": {
] "name": "blip_caption",
"model_type": "base_coco",
"is_eval": True,
"device": SUMMARY_DEVICE,
},
"run3": {
"name": "blip_caption",
"model_type": "large_coco",
"is_eval": True,
"device": SUMMARY_DEVICE,
},
}
def test_analyse_image(): @pytest.fixture
def get_dict(get_path):
mydict = {} mydict = {}
for img_path in images: for img in IMAGES:
id_ = os.path.splitext(os.path.basename(img_path))[0] id_ = os.path.splitext(os.path.basename(img))[0]
mydict[id_] = {"filename": img_path} mydict[id_] = {"filename": get_path + img}
return mydict
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image()
keys = list(mydict.keys())
assert len(mydict) == 12
for key in keys:
assert len(mydict[key]["3_non-deterministic summary"]) == 3
const_image_summary_list = [ @pytest.mark.long
"a river running through a city next to tall buildings", def test_analyse_image(get_dict):
"a crowd of people standing on top of a tennis court", reference_results = {
"a crowd of people standing on top of a field", "run1": {
"a room with a desk and a chair", "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
"a table with plastic containers on top of it", "IMG_2746": "a crowd of people standing on top of a tennis court",
"a view of a city with mountains in the background", },
"a view of a city street from a window", "run2": {
"a busy city street with cars and pedestrians", "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
"a close up of an open book with writing on it", "IMG_2746": "a crowd of people standing on top of a tennis court",
"a book that is open on a table", },
"a yellow book with green lettering on it", "run3": {
"a person running on a beach near a rock formation", "d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a town next to tall buildings",
] "IMG_2746": "a crowd of people standing on top of a track",
},
for i in range(len(const_image_summary_list)): }
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list[i] # test three different models
for test_run in TEST_KWARGS.keys():
del sm.SummaryDetector.summary_model, sm.SummaryDetector.summary_vis_processors summary_model, summary_vis_processors, _ = load_model_and_preprocess(
cuda.empty_cache() **TEST_KWARGS[test_run]
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
)
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
summary_model, summary_vis_processors
) )
keys = list(mydict.keys()) # run two different images
for key in get_dict.keys():
assert len(mydict) == 12 get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_image(
for key in keys: summary_model, summary_vis_processors
assert len(mydict[key]["3_non-deterministic summary"]) == 3 )
assert len(get_dict) == 2
const_image_summary_list2 = [ for key in get_dict.keys():
"a river running through a city next to tall buildings", assert len(get_dict[key]["3_non-deterministic summary"]) == 3
"a crowd of people standing on top of a tennis court", assert (
"a crowd of people standing on top of a field", get_dict[key]["const_image_summary"] == reference_results[test_run][key]
"a room with a desk and a chair", )
"a table with plastic containers on top of it", cuda.empty_cache()
"a view of a city with mountains in the background", summary_model = None
"a view of a city street from a window", summary_vis_processors = None
"a busy city street with cars and pedestrians",
"a close up of an open book with writing on it",
"a book that is open on a table",
"a yellow book with green lettering on it",
"a person running on a beach near a rock formation",
]
for i in range(len(const_image_summary_list2)):
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list2[i]
del summary_model, summary_vis_processors
cuda.empty_cache()
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="large_coco",
is_eval=True,
device=summary_device,
)
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
summary_model, summary_vis_processors
)
keys = list(mydict.keys())
assert len(mydict) == 12
for key in keys:
assert len(mydict[key]["3_non-deterministic summary"]) == 3
const_image_summary_list3 = [
"a river running through a town next to tall buildings",
"a crowd of people standing on top of a track",
"a group of people standing on top of a track",
"a desk and chair in a small room",
"a table that has some chairs on top of it",
"a view of a city from a window of a building",
"a view of a city from a window",
"a city street filled with lots of traffic",
"an open book with german text on it",
"a close up of a book on a table",
"a book with a green cover on a table",
"a person running on a beach near the ocean",
]
for i in range(len(const_image_summary_list2)):
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list3[i]
def test_analyse_questions(): def test_analyse_questions(get_dict):
mydict = {}
for img_path in images:
id_ = os.path.splitext(os.path.basename(img_path))[0]
mydict[id_] = {"filename": img_path}
list_of_questions = [ list_of_questions = [
"How many persons on the picture?", "How many persons on the picture?",
"What happends on the picture?", "What happends on the picture?",
] ]
for key in mydict: for key in get_dict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions( get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_questions(
list_of_questions list_of_questions
) )
assert len(get_dict) == 2
keys = list(mydict.keys()) list_of_questions_ans = ["2", "100"]
assert len(mydict) == 12 list_of_questions_ans2 = ["flood", "festival"]
test_answers = []
list_of_questions_ans = [2, 100, "many", 0, 0, "none", "two", 5, 0, 0, 0, 1] test_answers2 = []
for key in get_dict.keys():
list_of_questions_ans2 = [ test_answers.append(get_dict[key][list_of_questions[0]])
"flood", test_answers2.append(get_dict[key][list_of_questions[1]])
"festival", assert sorted(test_answers) == sorted(list_of_questions_ans)
"people are flying kites", assert sorted(test_answers2) == sorted(list_of_questions_ans2)
"no one's home",
"chair is being moved",
"traffic jam",
"day time",
"traffic jam",
"nothing",
"nothing",
"nothing",
"running",
]
for i in range(len(list_of_questions_ans)):
assert mydict[keys[i]][list_of_questions[1]] == str(list_of_questions_ans2[i])

Просмотреть файл

@ -20,7 +20,7 @@ def set_testdict(get_path):
return testdict return testdict
LANGUAGES = ["de", "en", "en"] LANGUAGES = ["de", "om", "en"]
def test_TextDetector(set_testdict): def test_TextDetector(set_testdict):

Просмотреть файл

@ -2,8 +2,6 @@ import glob
import os import os
from pandas import DataFrame from pandas import DataFrame
import pooch import pooch
from torch import device, cuda
from lavis.models import load_model_and_preprocess
class DownloadResource: class DownloadResource:
@ -108,34 +106,3 @@ if __name__ == "__main__":
outdict = append_data_to_dict(mydict) outdict = append_data_to_dict(mydict)
df = dump_df(outdict) df = dump_df(outdict)
print(df.head(10)) print(df.head(10))
def load_model_base():
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
)
return summary_model, summary_vis_processors
def load_model_large():
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="large_coco",
is_eval=True,
device=summary_device,
)
return summary_model, summary_vis_processors
def load_model(model_type):
select_model = {
"base": load_model_base,
"large": load_model_large,
}
summary_model, summary_vis_processors = select_model[model_type]()
return summary_model, summary_vis_processors

39
notebooks/image_summary.ipynb сгенерированный
Просмотреть файл

@ -17,7 +17,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"import misinformation\n", "import misinformation\n",
@ -36,7 +38,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"images = mutils.find_files(\n", "images = mutils.find_files(\n",
@ -48,7 +52,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"mydict = mutils.initialize_dict(images[0:10])" "mydict = mutils.initialize_dict(images[0:10])"
@ -57,7 +63,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"mydict" "mydict"
@ -80,22 +88,27 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"summary_model, summary_vis_processors = sm.SummaryDetector.load_model(mydict, \"base\")\n", "obj = sm.SummaryDetector(mydict)\n",
"summary_model, summary_vis_processors = obj.load_model(model_type=\"base\")\n",
"# summary_model, summary_vis_processors = mutils.load_model(\"large\")" "# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"for key in mydict:\n", "for key in mydict:\n",
" mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n", " mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n",
" summary_model, summary_vis_processors\n", " summary_model=summary_model, summary_vis_processors=summary_vis_processors\n",
" )" " )"
] ]
}, },
@ -130,7 +143,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"df.head(10)" "df.head(10)"
@ -168,7 +183,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": []
},
"outputs": [], "outputs": [],
"source": [ "source": [
"mdisplay.explore_analysis(mydict, identify=\"summary\")" "mdisplay.explore_analysis(mydict, identify=\"summary\")"
@ -279,7 +296,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.9.0" "version": "3.9.16"
}, },
"vscode": { "vscode": {
"interpreter": { "interpreter": {

Просмотреть файл

@ -24,12 +24,12 @@ classifiers = [
dependencies = [ dependencies = [
"bertopic", "bertopic",
"cvlib", "cvlib",
"deepface @ git+https://github.com/iulusoy/deepface.git", "deepface<=0.0.75",
"googletrans==3.1.0a0", "googletrans==3.1.0a0",
"grpcio", "grpcio",
"importlib_metadata", "importlib_metadata",
"ipython", "ipython",
"ipywidgets", "ipywidgets<8.0.5",
"ipykernel", "ipykernel",
"matplotlib", "matplotlib",
"numpy<=1.23.4", "numpy<=1.23.4",
@ -39,9 +39,10 @@ dependencies = [
"protobuf", "protobuf",
"pytest", "pytest",
"pytest-cov", "pytest-cov",
"pytest-xdist",
"requests", "requests",
"retina_face @ git+https://github.com/iulusoy/retinaface.git", "retina_face",
"salesforce-lavis @ git+https://github.com/iulusoy/LAVIS.git", "salesforce-lavis",
"spacy", "spacy",
"spacytextblob", "spacytextblob",
"tensorflow", "tensorflow",