* deleted lavis from utils

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fixed test_objects

* added 'not gcv' to CI

* fixed multimodal search and summary tests

* disable doc build on PR for now

* restrict ipywidgets version to avoid dummycomm error

* limit deepface version

* original repositories for retinaface lavis

* update gcv test results

* update display test outputs

* update test env

* run all tests

* wo xdist to avoid segfault

* remove widgets ref

* skip long-running tests

* skip long

* verbose codecov upload

* refactor summary test 2

* finish summary test refactor

* reduce memory overhead of SummaryDetector

* remove VQA models from self

* remove VQA models from self

* update notebook for changes

* update notebook for changes

* fixed multimodal search tests

* fixed tests in multimodal search after precommit

* run all tests

* update doc notebook for summary changes

* skip long-running multimodal

* exclude blip2 from testing

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Inga Ulusoy <inga.ulusoy@uni-heidelberg.de>
Этот коммит содержится в:
Petr Andriushchenko 2023-03-30 10:33:05 +02:00 коммит произвёл GitHub
родитель 0ca9366980
Коммит a5c43b6488
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
21 изменённых файлов: 280 добавлений и 524 удалений

5
.github/workflows/ci.yml поставляемый
Просмотреть файл

@ -14,7 +14,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-22.04,windows-latest]
os: [ubuntu-22.04]
python-version: [3.9]
steps:
- name: Checkout repository
@ -32,10 +32,11 @@ jobs:
- name: Run pytest
run: |
cd misinformation
python -m pytest --cov=. --cov-report=xml
python -m pytest -m "not gcv" -svv --cov=. --cov-report=xml
- name: Upload coverage
if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9'
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: true
files: misinformation/coverage.xml
verbose: true

2
.github/workflows/docs.yml поставляемый
Просмотреть файл

@ -3,8 +3,6 @@ name: Pages
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
jobs:

Просмотреть файл

@ -20,7 +20,6 @@
"metadata": {},
"outputs": [],
"source": [
"import misinformation\n",
"from misinformation import utils as mutils\n",
"from misinformation import display as mdisplay\n",
"import misinformation.summary as sm"
@ -74,7 +73,8 @@
"metadata": {},
"outputs": [],
"source": [
"summary_model, summary_vis_processors = mutils.load_model(\"base\")\n",
"obj = sm.SummaryDetector(mydict)\n",
"summary_model, summary_vis_processors = obj.load_model(\"base\")\n",
"# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
]
},
@ -96,7 +96,7 @@
"tags": []
},
"source": [
"Convert the dictionary of dictionarys into a dictionary with lists:"
"Convert the dictionary of dictionaries into a dictionary with lists:"
]
},
{
@ -256,7 +256,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},

Просмотреть файл

@ -141,7 +141,7 @@ class EmotionDetector(utils.AnalysisMethod):
DeepFace.analyze(
img_path=face,
actions=actions,
silent=True,
prog_bar=False,
detector_backend="skip",
)
)
@ -192,6 +192,10 @@ class EmotionDetector(utils.AnalysisMethod):
"Yes" if result[person]["wears_mask"] else "No"
)
self.subdict["age"].append(result[person]["age"])
# gender is now reported as a list of dictionaries
# each dict represents one face
# each dict contains probability for Woman and Man
# take only the higher prob result for each dict
self.subdict["gender"].append(result[person]["gender"])
# race, emotion only detected if person does not wear mask
if result[person]["wears_mask"]:

Просмотреть файл

@ -174,7 +174,7 @@ class MultimodalSearch(AnalysisMethod):
"Please, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336"
)
raw_images, images_tensors = MultimodalSearch.read_and_process_images(
_, images_tensors = MultimodalSearch.read_and_process_images(
self, image_names, vis_processors
)
if path_to_saved_tensors is None:
@ -213,7 +213,7 @@ class MultimodalSearch(AnalysisMethod):
for query in search_query:
if not (len(query) == 1) and (query in ("image", "text_input")):
raise SyntaxError(
'Each querry must contain either an "image" or a "text_input"'
'Each query must contain either an "image" or a "text_input"'
)
multi_sample = []
for query in search_query:

Просмотреть файл

@ -7,36 +7,28 @@ from lavis.models import load_model_and_preprocess
class SummaryDetector(AnalysisMethod):
def __init__(self, subdict: dict) -> None:
super().__init__(subdict)
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
)
self.summary_device = device("cuda" if cuda.is_available() else "cpu")
def load_model_base(self):
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
device=self.summary_device,
)
return summary_model, summary_vis_processors
def load_model_large(self):
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="large_coco",
is_eval=True,
device=summary_device,
device=self.summary_device,
)
return summary_model, summary_vis_processors
def load_model(self, model_type):
# self.summary_device = device("cuda" if cuda.is_available() else "cpu")
select_model = {
"base": SummaryDetector.load_model_base,
"large": SummaryDetector.load_model_large,
@ -47,8 +39,7 @@ class SummaryDetector(AnalysisMethod):
def analyse_image(self, summary_model=None, summary_vis_processors=None):
if summary_model is None and summary_vis_processors is None:
summary_model = SummaryDetector.summary_model
summary_vis_processors = SummaryDetector.summary_vis_processors
summary_model, summary_vis_processors = self.load_model_base()
path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
@ -66,32 +57,33 @@ class SummaryDetector(AnalysisMethod):
)
return self.subdict
(
summary_VQA_model,
summary_VQA_vis_processors,
summary_VQA_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device
)
def analyse_questions(self, list_of_questions):
(
summary_VQA_model,
summary_VQA_vis_processors,
summary_VQA_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa",
model_type="vqav2",
is_eval=True,
device=self.summary_device,
)
if len(list_of_questions) > 0:
path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
image = (
self.summary_VQA_vis_processors["eval"](raw_image)
summary_VQA_vis_processors["eval"](raw_image)
.unsqueeze(0)
.to(self.summary_device)
)
question_batch = []
for quest in list_of_questions:
question_batch.append(self.summary_VQA_txt_processors["eval"](quest))
question_batch.append(summary_VQA_txt_processors["eval"](quest))
batch_size = len(list_of_questions)
image_batch = image.repeat(batch_size, 1, 1, 1)
with no_grad():
answers_batch = self.summary_VQA_model.predict_answers(
answers_batch = summary_VQA_model.predict_answers(
samples={"image": image_batch, "text_input": question_batch},
inference_method="generate",
)

Просмотреть файл

@ -1 +1 @@
{"IMG_2746": {"filename": "./test/data/IMG_2809.png", "person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}}
{"person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}

Просмотреть файл

@ -1,5 +1,4 @@
{
"filename": "./test/data/IMG_2746.png",
"face": "Yes",
"multiple_faces": "Yes",
"no_faces": 11,

Просмотреть файл

@ -3,10 +3,10 @@ The Quantum Theory of
Nonrelativistic Collisions
JOHN R. TAYLOR
University of Colorado
postaldia Lanbidean
ostaliga Lanbidean
1 ilde
ballenger stor goin
gd OOL, STVÍ 23 TL 02
gdĐOL, SIVI 23 TL 02
de in obl
och yd badalang
a

Просмотреть файл

@ -3,12 +3,12 @@ The Quantum Theory of
Nonrelativistic Collisions
JOHN R. TAYLOR
University of Colorado
postaldia Lanbidean
ostaliga Lanbidean
1 ilde
ballenger stor goin
gd OOL, STVÍ 23 TL 02
de in obl
och yd badalang
balloons big goin
gdĐOL, SIVI 23 TL
there in obl
och yd change
a
Ber
ook Sy-RW enot go baldus
ook Sy-RW isn't going anywhere

Просмотреть файл

@ -1,3 +1,4 @@
[pytest]
markers =
gcv: mark google cloud vision tests - skip to save money.
gcv: mark google cloud vision tests - skip to save money.
long: mark long running tests - skip to save compute resources.

Просмотреть файл

@ -2,20 +2,20 @@ import misinformation.cropposts as crpo
import numpy as np
from PIL import Image
TEST_IMAGE_1 = "./test/data/pic1.png"
TEST_IMAGE_2 = "./test/data/pic2.png"
TEST_IMAGE_1 = "pic1.png"
TEST_IMAGE_2 = "pic2.png"
def test_matching_points():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
def test_matching_points(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, _, _ = crpo.matching_points(ref_view, view)
assert len(filtered_matches) > 0
def test_kp_from_matches():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
def test_kp_from_matches(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
kp1, kp2 = crpo.kp_from_matches(filtered_matches, kp1, kp2)
@ -25,9 +25,9 @@ def test_kp_from_matches():
assert kp2.shape[1] == 2
def test_compute_crop_corner():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
def test_compute_crop_corner(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
corner = crpo.compute_crop_corner(filtered_matches, kp1, kp2)
print(view.shape)
@ -38,9 +38,9 @@ def test_compute_crop_corner():
assert 0 <= h < view.shape[0]
def test_crop_posts_image():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
def test_crop_posts_image(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
rte = crpo.crop_posts_image(ref_view, view)
assert rte is not None
crop_view, match_num = rte
@ -48,16 +48,15 @@ def test_crop_posts_image():
assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]
def test_crop_posts_from_refs():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
def test_crop_posts_from_refs(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
ref_views = [ref_view]
crop_view = crpo.crop_posts_from_refs(ref_views, view)
assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]
def test_get_file_list():
def test_get_file_list(get_path):
ref_list = []
ref_dir = "./test/data"
ref_list = crpo.get_file_list(ref_dir, ref_list, ext="png")
ref_list = crpo.get_file_list(get_path, ref_list, ext="png")
assert len(ref_list) > 0

Просмотреть файл

@ -1,27 +1,28 @@
import json
# import misinformation.display as misinf_display
import pytest
misinf_display = pytest.importorskip("misinformation.display")
import misinformation.display as misinf_display
def test_explore_analysis_faces():
mydict = {"IMG_2746": {"filename": "./test/data/IMG_2746.png"}}
misinf_display.explore_analysis(mydict, identify="faces")
with open("./test/data/example_faces.json", "r") as file:
def test_explore_analysis_faces(get_path):
mydict = {"IMG_2746": {"filename": get_path + "IMG_2746.png"}}
temp = misinf_display.explore_analysis(mydict, identify="faces") # noqa
temp = None # noqa
with open(get_path + "example_faces.json", "r") as file:
outs = json.load(file)
mydict["IMG_2746"].pop("filename", None)
for im_key in mydict.keys():
sub_dict = mydict[im_key]
for key in sub_dict.keys():
assert sub_dict[key] == outs[key]
def test_explore_analysis_objects():
mydict = {"IMG_2746": {"filename": "./test/data/IMG_2809.png"}}
misinf_display.explore_analysis(mydict, identify="objects")
with open("./test/data/example_analysis_objects.json", "r") as file:
def test_explore_analysis_objects(get_path):
mydict = {"IMG_2809": {"filename": get_path + "IMG_2809.png"}}
temp = misinf_display.explore_analysis(mydict, identify="objects") # noqa
temp = None # noqa
with open(get_path + "example_analysis_objects.json", "r") as file:
outs = json.load(file)
assert str(mydict) == str(outs)
mydict["IMG_2809"].pop("filename", None)
for im_key in mydict.keys():
sub_dict = mydict[im_key]
for key in sub_dict.keys():
assert sub_dict[key] == outs[key]

Просмотреть файл

@ -1,16 +1,17 @@
import misinformation.faces as fc
import json
from pytest import approx
import pytest
def test_analyse_faces():
def test_analyse_faces(get_path):
mydict = {
"filename": "./test/data/IMG_2746.png",
"filename": get_path + "IMG_2746.png",
}
mydict = fc.EmotionDetector(mydict).analyse_image()
mydict.update(fc.EmotionDetector(mydict).analyse_image())
with open("./test/data/example_faces.json", "r") as file:
with open(get_path + "example_faces.json", "r") as file:
out_dict = json.load(file)
# delete the filename key
mydict.pop("filename", None)
for key in mydict.keys():
assert mydict[key] == out_dict[key]

Просмотреть файл

@ -5,24 +5,13 @@ import numpy
from torch import device, cuda
import misinformation.multimodal_search as ms
testdict = {
"d755771b-225e-432f-802e-fb8dc850fff7": {
"filename": "./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png"
},
"IMG_2746": {"filename": "./test/data/IMG_2746.png"},
"IMG_2750": {"filename": "./test/data/IMG_2750.png"},
"IMG_2805": {"filename": "./test/data/IMG_2805.png"},
"IMG_2806": {"filename": "./test/data/IMG_2806.png"},
"IMG_2807": {"filename": "./test/data/IMG_2807.png"},
"IMG_2808": {"filename": "./test/data/IMG_2808.png"},
"IMG_2809": {"filename": "./test/data/IMG_2809.png"},
"IMG_3755": {"filename": "./test/data/IMG_3755.jpg"},
"IMG_3756": {"filename": "./test/data/IMG_3756.jpg"},
"IMG_3757": {"filename": "./test/data/IMG_3757.jpg"},
"pic1": {"filename": "./test/data/pic1.png"},
}
related_error = 1e-3
related_error = 1e-2
gpu_is_not_available = not cuda.is_available()
@ -38,39 +27,15 @@ def test_read_img():
pre_proc_pic_blip2_blip_albef = [
-1.0039474964141846,
-1.0039474964141846,
-0.8433647751808167,
-0.6097899675369263,
-0.5951915383338928,
-0.6243883967399597,
-0.6827820539474487,
-0.6097899675369263,
-0.7119789123535156,
-1.0623412132263184,
]
pre_proc_pic_clip_vitl14 = [
-0.7995694875717163,
-0.7849710583686829,
-0.7849710583686829,
-0.7703726291656494,
-0.7703726291656494,
-0.7849710583686829,
-0.7849710583686829,
-0.7703726291656494,
-0.7703726291656494,
-0.7703726291656494,
]
pre_proc_pic_clip_vitl14_336 = [
-0.7995694875717163,
-0.7849710583686829,
-0.7849710583686829,
-0.7849710583686829,
-0.7849710583686829,
-0.7849710583686829,
-0.7849710583686829,
-0.9163569211959839,
-1.149931788444519,
-1.0039474964141846,
]
pre_proc_text_blip2_blip_albef = (
@ -84,293 +49,150 @@ pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = (
pre_extracted_feature_img_blip2 = [
0.04566730558872223,
-0.042554520070552826,
-0.06970272958278656,
-0.009771779179573059,
0.01446065679192543,
0.10173682868480682,
0.007092420011758804,
-0.020045937970280647,
0.12923966348171234,
0.006452132016420364,
]
pre_extracted_feature_img_blip = [
-0.02480311505496502,
0.05037587881088257,
0.039517853409051895,
-0.06994109600782394,
-0.12886561453342438,
0.047039758414030075,
-0.11620642244815826,
-0.003398326924070716,
-0.07324369996786118,
0.06994668394327164,
]
pre_extracted_feature_img_albef = [
0.08971136063337326,
-0.10915573686361313,
-0.020636577159166336,
0.048121627420186996,
-0.05943416804075241,
-0.129856139421463,
-0.0034469354432076216,
0.017888527363538742,
-0.03284582123160362,
-0.1037328764796257,
]
pre_extracted_feature_img_clip = [
0.01621132344007492,
-0.004035486374050379,
-0.04304071143269539,
-0.03459808602929115,
0.016922621056437492,
-0.025056276470422745,
-0.04178355261683464,
0.02165347896516323,
-0.003224249929189682,
0.020485712215304375,
]
pre_extracted_feature_img_parsing_clip = [
0.01621132344007492,
-0.004035486374050379,
-0.04304071143269539,
-0.03459808602929115,
0.016922621056437492,
-0.025056276470422745,
-0.04178355261683464,
0.02165347896516323,
-0.003224249929189682,
0.020485712215304375,
]
pre_extracted_feature_img_clip_vitl14 = [
-0.023943455889821053,
-0.021703708916902542,
0.035043686628341675,
0.019495919346809387,
0.014351222664117813,
-0.008634116500616074,
0.01610446907579899,
-0.003426523646339774,
0.011931191198527813,
0.0008691544644534588,
]
pre_extracted_feature_img_clip_vitl14_336 = [
-0.009511193260550499,
-0.012618942186236382,
0.034754861146211624,
0.016356879845261574,
-0.0011549904011189938,
-0.008054453879594803,
0.0011990377679467201,
-0.010806051082909107,
0.00140204350464046,
0.0006861367146484554,
]
pre_extracted_feature_text_blip2 = [
-0.1384204626083374,
-0.008662976324558258,
0.006269007455557585,
0.03151319921016693,
0.060558050870895386,
-0.03230040520429611,
0.015861615538597107,
-0.11856459826231003,
-0.058296192437410355,
0.03699290752410889,
]
pre_extracted_feature_text_blip = [
0.0118643119931221,
-0.01291718054562807,
-0.0009687161073088646,
0.01428765058517456,
-0.05591396614909172,
0.07386433333158493,
-0.11475936323404312,
0.01620068959891796,
0.0062415082938969135,
0.0034833091776818037,
]
pre_extracted_feature_text_albef = [
-0.06229640915989876,
0.11278597265481949,
0.06628583371639252,
0.1649140566587448,
0.068987175822258,
0.006291372701525688,
0.03244050219655037,
-0.049556829035282135,
0.050752390176057816,
-0.0421440489590168,
]
pre_extracted_feature_text_clip = [
0.018169036135077477,
0.03634127229452133,
0.025660742074251175,
0.009149895049631596,
-0.035570453852415085,
0.033126577734947205,
-0.004808237310498953,
-0.0031453112605959177,
-0.02194291725754738,
0.024019461125135422,
]
pre_extracted_feature_text_clip_vitl14 = [
-0.0055463071912527084,
0.006908962037414312,
-0.019450219348073006,
-0.018097277730703354,
0.017567576840519905,
-0.03828490898013115,
-0.03781530633568764,
-0.023951737210154533,
0.01365653332322836,
-0.02341713197529316,
]
pre_extracted_feature_text_clip_vitl14_336 = [
-0.008720514364540577,
0.005284308455884457,
-0.021116750314831734,
-0.018112430348992348,
0.01685470901429653,
-0.03517491742968559,
-0.038612402975559235,
-0.021867064759135246,
0.01685977540910244,
-0.023832324892282486,
]
simularity_blip2 = [
[0.05826476216316223, -0.03215287625789642],
[0.12869958579540253, 0.005234059877693653],
[0.11073512583971024, 0.12327003479003906],
[0.08743024617433548, 0.05598106235265732],
[0.04591086134314537, 0.48981112241744995],
[0.06297147274017334, 0.4728018641471863],
[0.18486255407333374, 0.635167121887207],
[0.015356295742094517, 0.015282897278666496],
[-0.008485622704029083, 0.010882291942834854],
[-0.04328630864620209, -0.13117870688438416],
[-0.025470387190580368, 0.13175423443317413],
[-0.05090826004743576, 0.05902523919939995],
[0.05826476216316223, -0.02717375010251999],
[0.06297147274017334, 0.47339022159576416],
]
sorted_blip2 = [
[6, 1, 2, 3, 5, 0, 4, 7, 8, 10, 9, 11],
[6, 4, 5, 10, 2, 11, 3, 7, 8, 1, 0, 9],
[1, 0],
[1, 0],
]
simularity_blip = [
[0.15640679001808167, 0.752173662185669],
[0.15139800310134888, 0.7804810404777527],
[0.13010388612747192, 0.755257248878479],
[0.13746635615825653, 0.7618774175643921],
[0.1756758838891983, 0.8531903624534607],
[0.17233705520629883, 0.8448910117149353],
[0.1970970332622528, 0.8916105628013611],
[0.11693969368934631, 0.5833531618118286],
[0.12386563420295715, 0.5981853604316711],
[0.08427951484918594, 0.4962371587753296],
[0.14193706214427948, 0.7613846659660339],
[0.12051936239004135, 0.6492202281951904],
]
sorted_blip = [
[6, 4, 5, 0, 1, 10, 3, 2, 8, 11, 7, 9],
[6, 4, 5, 1, 3, 10, 2, 0, 11, 8, 7, 9],
[1, 0],
[1, 0],
]
simularity_albef = [
[0.12321824580430984, 0.35511350631713867],
[0.09512615948915482, 0.27168408036231995],
[0.09053325653076172, 0.20215675234794617],
[0.06335515528917313, 0.15055638551712036],
[0.09604836255311966, 0.4658776521682739],
[0.10870333760976791, 0.5143978595733643],
[0.11748822033405304, 0.6542638540267944],
[0.05688793584704399, 0.22170542180538177],
[0.05597608536481857, 0.11963296681642532],
[0.059643782675266266, 0.14969395101070404],
[0.06690303236246109, 0.3149859607219696],
[0.07909377664327621, 0.11911341547966003],
]
sorted_albef = [
[0, 6, 5, 4, 1, 2, 11, 10, 3, 9, 7, 8],
[6, 5, 4, 0, 10, 1, 7, 2, 3, 9, 8, 11],
[0, 1],
[1, 0],
]
simularity_clip = [
[0.23923014104366302, 0.5325412750244141],
[0.20101115107536316, 0.5112978219985962],
[0.17522737383842468, 0.49811851978302],
[0.20062290132045746, 0.5415266156196594],
[0.22865726053714752, 0.5762109756469727],
[0.2310466319322586, 0.5910375714302063],
[0.2644523084163666, 0.7851459383964539],
[0.21474510431289673, 0.4135811924934387],
[0.16407863795757294, 0.1474374681711197],
[0.19819433987140656, 0.26493316888809204],
[0.19545596837997437, 0.5007457137107849],
[0.1647854745388031, 0.45705708861351013],
]
sorted_clip = [
[6, 0, 5, 4, 7, 1, 3, 9, 10, 2, 11, 8],
[6, 5, 4, 3, 0, 1, 10, 2, 11, 7, 9, 8],
[1, 0],
[1, 0],
]
simularity_clip_vitl14 = [
[0.1051270067691803, 0.5184808373451233],
[0.09705893695354462, 0.49574509263038635],
[0.11964304000139236, 0.5424358248710632],
[0.13881900906562805, 0.5909714698791504],
[0.12728188931941986, 0.6758255362510681],
[0.1277746558189392, 0.6841973662376404],
[0.18026694655418396, 0.803142786026001],
[0.13977059721946716, 0.45957139134407043],
[0.11180847883224487, 0.24822194874286652],
[0.12296056002378464, 0.35143694281578064],
[0.11596094071865082, 0.5704031586647034],
[0.10174489766359329, 0.44422751665115356],
]
sorted_clip_vitl14 = [
[6, 7, 3, 5, 4, 9, 2, 10, 8, 0, 11, 1],
[6, 5, 4, 3, 10, 2, 0, 1, 7, 11, 9, 8],
[1, 0],
[1, 0],
]
simularity_clip_vitl14_336 = [
[0.09391091763973236, 0.49337542057037354],
[0.11103834211826324, 0.4881117343902588],
[0.12891019880771637, 0.5501476526260376],
[0.13288410007953644, 0.5498673915863037],
[0.12357455492019653, 0.6749162077903748],
[0.13700757920742035, 0.7003108263015747],
[0.1788637489080429, 0.7713702321052551],
[0.13260436058044434, 0.4300197660923004],
[0.11666625738143921, 0.2334875613451004],
[0.1316065937280655, 0.3291645646095276],
[0.12374477833509445, 0.5632147192955017],
[0.10333051532506943, 0.43023794889450073],
]
sorted_clip_vitl14_336 = [
[6, 5, 3, 7, 9, 2, 10, 4, 8, 1, 11, 0],
[6, 5, 4, 10, 2, 3, 0, 1, 11, 7, 9, 8],
[1, 0],
[1, 0],
]
dict_itm_scores_for_blib = {
"blip_base": [
0.07107225805521011,
0.004100032616406679,
],
"blip_large": [
0.07890705019235611,
0.00271016638725996,
],
"blip2_coco": [
0.0833505243062973,
0.004216152708977461,
],
}
dict_image_gradcam_with_itm_for_blip = {
"blip_base": [123.36285799741745, 132.31662154197693, 53.38280035299249],
"blip_large": [119.99512910842896, 128.7044593691826, 55.552959859540515],
}
@pytest.mark.long
@pytest.mark.parametrize(
(
"pre_multimodal_device",
@ -383,19 +205,29 @@ sorted_clip_vitl14_336 = [
"pre_sorted",
),
[
pytest.param(
device("cuda"),
"blip2",
pre_proc_pic_blip2_blip_albef,
pre_proc_text_blip2_blip_albef,
pre_extracted_feature_img_blip2,
pre_extracted_feature_text_blip2,
simularity_blip2,
sorted_blip2,
marks=pytest.mark.skipif(
gpu_is_not_available, reason="gpu_is_not_availible"
),
),
# (
# device("cpu"),
# "blip2",
# pre_proc_pic_blip2_blip_albef,
# pre_proc_text_blip2_blip_albef,
# pre_extracted_feature_img_blip2,
# pre_extracted_feature_text_blip2,
# simularity_blip2,
# sorted_blip2,
# ),
# pytest.param(
# device("cuda"),
# "blip2",
# pre_proc_pic_blip2_blip_albef,
# pre_proc_text_blip2_blip_albef,
# pre_extracted_feature_img_blip2,
# pre_extracted_feature_text_blip2,
# simularity_blip2,
# sorted_blip2,
# marks=pytest.mark.skipif(
# gpu_is_not_available, reason="gpu_is_not_availible"
# ),
# ),
(
device("cpu"),
"blip",
@ -530,11 +362,11 @@ def test_parsing_images(
vis_processor,
txt_processor,
image_keys,
image_names,
_,
features_image_stacked,
) = ms.MultimodalSearch.parsing_images(testdict, pre_model)
for i, num in zip(range(10), features_image_stacked[0, 10:20].tolist()):
for i, num in zip(range(10), features_image_stacked[0, 10:12].tolist()):
assert (
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
is True
@ -549,7 +381,7 @@ def test_parsing_images(
)
processed_text = txt_processor["eval"](test_querry)
for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()):
for i, num in zip(range(10), processed_pic[0, 0, 0, 25:27].tolist()):
assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True
assert processed_text == pre_proc_text
@ -562,13 +394,13 @@ def test_parsing_images(
testdict, search_query, model, txt_processor, vis_processor, pre_model
)
for i, num in zip(range(10), multi_features_stacked[0, 10:20].tolist()):
for i, num in zip(range(10), multi_features_stacked[0, 10:12].tolist()):
assert (
math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error)
is True
)
for i, num in zip(range(10), multi_features_stacked[1, 10:20].tolist()):
for i, num in zip(range(10), multi_features_stacked[1, 10:12].tolist()):
assert (
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
is True
@ -590,16 +422,24 @@ def test_parsing_images(
search_query2,
)
for i, num in zip(range(12), similarity.tolist()):
for i, num in zip(range(len(pre_simularity)), similarity.tolist()):
for j, num2 in zip(range(len(num)), num):
assert (
math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error)
is True
)
for i, num in zip(range(2), sorted_list):
for i, num in zip(range(len(pre_sorted)), sorted_list):
for j, num2 in zip(range(2), num):
assert num2 == pre_sorted[i][j]
del model, vis_processor, txt_processor
del (
model,
vis_processor,
txt_processor,
similarity,
features_image_stacked,
processed_pic,
multi_features_stacked,
)
cuda.empty_cache()

Просмотреть файл

@ -31,6 +31,7 @@ def test_analyse_image_cvlib(get_path):
with open(get_path + JSON_1, "r") as file:
out_dict = json.load(file)
out_dict["filename"] = get_path + out_dict["filename"]
for key in mydict.keys():
assert mydict[key] == out_dict[key]
@ -56,10 +57,11 @@ def test_init_default_objects():
def test_analyse_image_from_file_cvlib(get_path):
file_path = get_path + TEST_IMAGE_1
objs = ob_cvlib.ObjectCVLib().analyse_image_from_file(get_path + file_path)
objs = ob_cvlib.ObjectCVLib().analyse_image_from_file(file_path)
with open(get_path + JSON_1, "r") as file:
out_dict = json.load(file)
out_dict["filename"] = get_path + out_dict["filename"]
for key in objs.keys():
assert objs[key] == out_dict[key]
@ -86,5 +88,6 @@ def test_analyse_image(get_path):
ob.ObjectDetector(mydict).analyse_image()
with open(get_path + JSON_1, "r") as file:
out_dict = json.load(file)
out_dict["filename"] = get_path + out_dict["filename"]
assert str(mydict) == str(out_dict)

Просмотреть файл

@ -1,166 +1,98 @@
import os
import pytest
from torch import device, cuda
from lavis.models import load_model_and_preprocess
import misinformation.summary as sm
images = [
"./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png",
"./test/data/IMG_2746.png",
"./test/data/IMG_2750.png",
"./test/data/IMG_2805.png",
"./test/data/IMG_2806.png",
"./test/data/IMG_2807.png",
"./test/data/IMG_2808.png",
"./test/data/IMG_2809.png",
"./test/data/IMG_3755.jpg",
"./test/data/IMG_3756.jpg",
"./test/data/IMG_3757.jpg",
"./test/data/pic1.png",
]
IMAGES = ["d755771b-225e-432f-802e-fb8dc850fff7.png", "IMG_2746.png"]
SUMMARY_DEVICE = device("cuda" if cuda.is_available() else "cpu")
TEST_KWARGS = {
"run1": {
"name": "blip_caption",
"model_type": "base_coco",
"is_eval": True,
"device": SUMMARY_DEVICE,
},
"run2": {
"name": "blip_caption",
"model_type": "base_coco",
"is_eval": True,
"device": SUMMARY_DEVICE,
},
"run3": {
"name": "blip_caption",
"model_type": "large_coco",
"is_eval": True,
"device": SUMMARY_DEVICE,
},
}
def test_analyse_image():
@pytest.fixture
def get_dict(get_path):
mydict = {}
for img_path in images:
id_ = os.path.splitext(os.path.basename(img_path))[0]
mydict[id_] = {"filename": img_path}
for img in IMAGES:
id_ = os.path.splitext(os.path.basename(img))[0]
mydict[id_] = {"filename": get_path + img}
return mydict
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image()
keys = list(mydict.keys())
assert len(mydict) == 12
for key in keys:
assert len(mydict[key]["3_non-deterministic summary"]) == 3
const_image_summary_list = [
"a river running through a city next to tall buildings",
"a crowd of people standing on top of a tennis court",
"a crowd of people standing on top of a field",
"a room with a desk and a chair",
"a table with plastic containers on top of it",
"a view of a city with mountains in the background",
"a view of a city street from a window",
"a busy city street with cars and pedestrians",
"a close up of an open book with writing on it",
"a book that is open on a table",
"a yellow book with green lettering on it",
"a person running on a beach near a rock formation",
]
for i in range(len(const_image_summary_list)):
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list[i]
del sm.SummaryDetector.summary_model, sm.SummaryDetector.summary_vis_processors
cuda.empty_cache()
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
)
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
summary_model, summary_vis_processors
@pytest.mark.long
def test_analyse_image(get_dict):
reference_results = {
"run1": {
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
"IMG_2746": "a crowd of people standing on top of a tennis court",
},
"run2": {
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
"IMG_2746": "a crowd of people standing on top of a tennis court",
},
"run3": {
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a town next to tall buildings",
"IMG_2746": "a crowd of people standing on top of a track",
},
}
# test three different models
for test_run in TEST_KWARGS.keys():
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
**TEST_KWARGS[test_run]
)
keys = list(mydict.keys())
assert len(mydict) == 12
for key in keys:
assert len(mydict[key]["3_non-deterministic summary"]) == 3
const_image_summary_list2 = [
"a river running through a city next to tall buildings",
"a crowd of people standing on top of a tennis court",
"a crowd of people standing on top of a field",
"a room with a desk and a chair",
"a table with plastic containers on top of it",
"a view of a city with mountains in the background",
"a view of a city street from a window",
"a busy city street with cars and pedestrians",
"a close up of an open book with writing on it",
"a book that is open on a table",
"a yellow book with green lettering on it",
"a person running on a beach near a rock formation",
]
for i in range(len(const_image_summary_list2)):
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list2[i]
del summary_model, summary_vis_processors
cuda.empty_cache()
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="large_coco",
is_eval=True,
device=summary_device,
)
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
summary_model, summary_vis_processors
)
keys = list(mydict.keys())
assert len(mydict) == 12
for key in keys:
assert len(mydict[key]["3_non-deterministic summary"]) == 3
const_image_summary_list3 = [
"a river running through a town next to tall buildings",
"a crowd of people standing on top of a track",
"a group of people standing on top of a track",
"a desk and chair in a small room",
"a table that has some chairs on top of it",
"a view of a city from a window of a building",
"a view of a city from a window",
"a city street filled with lots of traffic",
"an open book with german text on it",
"a close up of a book on a table",
"a book with a green cover on a table",
"a person running on a beach near the ocean",
]
for i in range(len(const_image_summary_list2)):
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list3[i]
# run two different images
for key in get_dict.keys():
get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_image(
summary_model, summary_vis_processors
)
assert len(get_dict) == 2
for key in get_dict.keys():
assert len(get_dict[key]["3_non-deterministic summary"]) == 3
assert (
get_dict[key]["const_image_summary"] == reference_results[test_run][key]
)
cuda.empty_cache()
summary_model = None
summary_vis_processors = None
def test_analyse_questions():
mydict = {}
for img_path in images:
id_ = os.path.splitext(os.path.basename(img_path))[0]
mydict[id_] = {"filename": img_path}
def test_analyse_questions(get_dict):
list_of_questions = [
"How many persons on the picture?",
"What happends on the picture?",
]
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions(
for key in get_dict:
get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_questions(
list_of_questions
)
keys = list(mydict.keys())
assert len(mydict) == 12
list_of_questions_ans = [2, 100, "many", 0, 0, "none", "two", 5, 0, 0, 0, 1]
list_of_questions_ans2 = [
"flood",
"festival",
"people are flying kites",
"no one's home",
"chair is being moved",
"traffic jam",
"day time",
"traffic jam",
"nothing",
"nothing",
"nothing",
"running",
]
for i in range(len(list_of_questions_ans)):
assert mydict[keys[i]][list_of_questions[1]] == str(list_of_questions_ans2[i])
assert len(get_dict) == 2
list_of_questions_ans = ["2", "100"]
list_of_questions_ans2 = ["flood", "festival"]
test_answers = []
test_answers2 = []
for key in get_dict.keys():
test_answers.append(get_dict[key][list_of_questions[0]])
test_answers2.append(get_dict[key][list_of_questions[1]])
assert sorted(test_answers) == sorted(list_of_questions_ans)
assert sorted(test_answers2) == sorted(list_of_questions_ans2)

Просмотреть файл

@ -20,7 +20,7 @@ def set_testdict(get_path):
return testdict
LANGUAGES = ["de", "en", "en"]
LANGUAGES = ["de", "om", "en"]
def test_TextDetector(set_testdict):

Просмотреть файл

@ -2,8 +2,6 @@ import glob
import os
from pandas import DataFrame
import pooch
from torch import device, cuda
from lavis.models import load_model_and_preprocess
class DownloadResource:
@ -108,34 +106,3 @@ if __name__ == "__main__":
outdict = append_data_to_dict(mydict)
df = dump_df(outdict)
print(df.head(10))
def load_model_base():
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
)
return summary_model, summary_vis_processors
def load_model_large():
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="large_coco",
is_eval=True,
device=summary_device,
)
return summary_model, summary_vis_processors
def load_model(model_type):
select_model = {
"base": load_model_base,
"large": load_model_large,
}
summary_model, summary_vis_processors = select_model[model_type]()
return summary_model, summary_vis_processors

39
notebooks/image_summary.ipynb сгенерированный
Просмотреть файл

@ -17,7 +17,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import misinformation\n",
@ -36,7 +38,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"images = mutils.find_files(\n",
@ -48,7 +52,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"mydict = mutils.initialize_dict(images[0:10])"
@ -57,7 +63,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"mydict"
@ -80,22 +88,27 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"summary_model, summary_vis_processors = sm.SummaryDetector.load_model(mydict, \"base\")\n",
"obj = sm.SummaryDetector(mydict)\n",
"summary_model, summary_vis_processors = obj.load_model(model_type=\"base\")\n",
"# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"for key in mydict:\n",
" mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n",
" summary_model, summary_vis_processors\n",
" summary_model=summary_model, summary_vis_processors=summary_vis_processors\n",
" )"
]
},
@ -130,7 +143,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df.head(10)"
@ -168,7 +183,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"mdisplay.explore_analysis(mydict, identify=\"summary\")"
@ -279,7 +296,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
"version": "3.9.16"
},
"vscode": {
"interpreter": {

Просмотреть файл

@ -24,12 +24,12 @@ classifiers = [
dependencies = [
"bertopic",
"cvlib",
"deepface @ git+https://github.com/iulusoy/deepface.git",
"deepface<=0.0.75",
"googletrans==3.1.0a0",
"grpcio",
"importlib_metadata",
"ipython",
"ipywidgets",
"ipywidgets<8.0.5",
"ipykernel",
"matplotlib",
"numpy<=1.23.4",
@ -39,9 +39,10 @@ dependencies = [
"protobuf",
"pytest",
"pytest-cov",
"pytest-xdist",
"requests",
"retina_face @ git+https://github.com/iulusoy/retinaface.git",
"salesforce-lavis @ git+https://github.com/iulusoy/LAVIS.git",
"retina_face",
"salesforce-lavis",
"spacy",
"spacytextblob",
"tensorflow",