зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 21:16:06 +02:00
Test debugging (#62)
* deleted lavis from utils * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed test_objects * added 'not gcv' to CI * fixed multimodal search and summary tests * disable doc build on PR for now * restrict ipywidgets version to avoid dummycomm error * limit deepface version * original repositories for retinaface lavis * update gcv test results * update display test outputs * update test env * run all tests * wo xdist to avoid segfault * remove widgets ref * skip long-running tests * skip long * verbose codecov upload * refactor summary test 2 * finish summary test refactor * reduce memory overhead of SummaryDetector * remove VQA models from self * remove VQA models from self * update notebook for changes * update notebook for changes * fixed multimodal search tests * fixed tests in multimodal search after precommit * run all tests * update doc notebook for summary changes * skip long-running multimodal * exclude blip2 from testing --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Inga Ulusoy <inga.ulusoy@uni-heidelberg.de>
Этот коммит содержится в:
родитель
0ca9366980
Коммит
a5c43b6488
5
.github/workflows/ci.yml
поставляемый
5
.github/workflows/ci.yml
поставляемый
@ -14,7 +14,7 @@ jobs:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-22.04,windows-latest]
|
||||
os: [ubuntu-22.04]
|
||||
python-version: [3.9]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
@ -32,10 +32,11 @@ jobs:
|
||||
- name: Run pytest
|
||||
run: |
|
||||
cd misinformation
|
||||
python -m pytest --cov=. --cov-report=xml
|
||||
python -m pytest -m "not gcv" -svv --cov=. --cov-report=xml
|
||||
- name: Upload coverage
|
||||
if: matrix.os == 'ubuntu-22.04' && matrix.python-version == '3.9'
|
||||
uses: codecov/codecov-action@v3
|
||||
with:
|
||||
fail_ci_if_error: true
|
||||
files: misinformation/coverage.xml
|
||||
verbose: true
|
||||
|
||||
2
.github/workflows/docs.yml
поставляемый
2
.github/workflows/docs.yml
поставляемый
@ -3,8 +3,6 @@ name: Pages
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
||||
@ -20,7 +20,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import misinformation\n",
|
||||
"from misinformation import utils as mutils\n",
|
||||
"from misinformation import display as mdisplay\n",
|
||||
"import misinformation.summary as sm"
|
||||
@ -74,7 +73,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summary_model, summary_vis_processors = mutils.load_model(\"base\")\n",
|
||||
"obj = sm.SummaryDetector(mydict)\n",
|
||||
"summary_model, summary_vis_processors = obj.load_model(\"base\")\n",
|
||||
"# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
|
||||
]
|
||||
},
|
||||
@ -96,7 +96,7 @@
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"Convert the dictionary of dictionarys into a dictionary with lists:"
|
||||
"Convert the dictionary of dictionaries into a dictionary with lists:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -256,7 +256,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
@ -141,7 +141,7 @@ class EmotionDetector(utils.AnalysisMethod):
|
||||
DeepFace.analyze(
|
||||
img_path=face,
|
||||
actions=actions,
|
||||
silent=True,
|
||||
prog_bar=False,
|
||||
detector_backend="skip",
|
||||
)
|
||||
)
|
||||
@ -192,6 +192,10 @@ class EmotionDetector(utils.AnalysisMethod):
|
||||
"Yes" if result[person]["wears_mask"] else "No"
|
||||
)
|
||||
self.subdict["age"].append(result[person]["age"])
|
||||
# gender is now reported as a list of dictionaries
|
||||
# each dict represents one face
|
||||
# each dict contains probability for Woman and Man
|
||||
# take only the higher prob result for each dict
|
||||
self.subdict["gender"].append(result[person]["gender"])
|
||||
# race, emotion only detected if person does not wear mask
|
||||
if result[person]["wears_mask"]:
|
||||
|
||||
@ -174,7 +174,7 @@ class MultimodalSearch(AnalysisMethod):
|
||||
"Please, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336"
|
||||
)
|
||||
|
||||
raw_images, images_tensors = MultimodalSearch.read_and_process_images(
|
||||
_, images_tensors = MultimodalSearch.read_and_process_images(
|
||||
self, image_names, vis_processors
|
||||
)
|
||||
if path_to_saved_tensors is None:
|
||||
@ -213,7 +213,7 @@ class MultimodalSearch(AnalysisMethod):
|
||||
for query in search_query:
|
||||
if not (len(query) == 1) and (query in ("image", "text_input")):
|
||||
raise SyntaxError(
|
||||
'Each querry must contain either an "image" or a "text_input"'
|
||||
'Each query must contain either an "image" or a "text_input"'
|
||||
)
|
||||
multi_sample = []
|
||||
for query in search_query:
|
||||
|
||||
@ -7,36 +7,28 @@ from lavis.models import load_model_and_preprocess
|
||||
class SummaryDetector(AnalysisMethod):
|
||||
def __init__(self, subdict: dict) -> None:
|
||||
super().__init__(subdict)
|
||||
|
||||
summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
|
||||
name="blip_caption",
|
||||
model_type="base_coco",
|
||||
is_eval=True,
|
||||
device=summary_device,
|
||||
)
|
||||
self.summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
|
||||
def load_model_base(self):
|
||||
summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
|
||||
name="blip_caption",
|
||||
model_type="base_coco",
|
||||
is_eval=True,
|
||||
device=summary_device,
|
||||
device=self.summary_device,
|
||||
)
|
||||
return summary_model, summary_vis_processors
|
||||
|
||||
def load_model_large(self):
|
||||
summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
|
||||
name="blip_caption",
|
||||
model_type="large_coco",
|
||||
is_eval=True,
|
||||
device=summary_device,
|
||||
device=self.summary_device,
|
||||
)
|
||||
return summary_model, summary_vis_processors
|
||||
|
||||
def load_model(self, model_type):
|
||||
# self.summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
select_model = {
|
||||
"base": SummaryDetector.load_model_base,
|
||||
"large": SummaryDetector.load_model_large,
|
||||
@ -47,8 +39,7 @@ class SummaryDetector(AnalysisMethod):
|
||||
def analyse_image(self, summary_model=None, summary_vis_processors=None):
|
||||
|
||||
if summary_model is None and summary_vis_processors is None:
|
||||
summary_model = SummaryDetector.summary_model
|
||||
summary_vis_processors = SummaryDetector.summary_vis_processors
|
||||
summary_model, summary_vis_processors = self.load_model_base()
|
||||
|
||||
path = self.subdict["filename"]
|
||||
raw_image = Image.open(path).convert("RGB")
|
||||
@ -66,32 +57,33 @@ class SummaryDetector(AnalysisMethod):
|
||||
)
|
||||
return self.subdict
|
||||
|
||||
def analyse_questions(self, list_of_questions):
|
||||
(
|
||||
summary_VQA_model,
|
||||
summary_VQA_vis_processors,
|
||||
summary_VQA_txt_processors,
|
||||
) = load_model_and_preprocess(
|
||||
name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device
|
||||
name="blip_vqa",
|
||||
model_type="vqav2",
|
||||
is_eval=True,
|
||||
device=self.summary_device,
|
||||
)
|
||||
|
||||
def analyse_questions(self, list_of_questions):
|
||||
|
||||
if len(list_of_questions) > 0:
|
||||
path = self.subdict["filename"]
|
||||
raw_image = Image.open(path).convert("RGB")
|
||||
image = (
|
||||
self.summary_VQA_vis_processors["eval"](raw_image)
|
||||
summary_VQA_vis_processors["eval"](raw_image)
|
||||
.unsqueeze(0)
|
||||
.to(self.summary_device)
|
||||
)
|
||||
question_batch = []
|
||||
for quest in list_of_questions:
|
||||
question_batch.append(self.summary_VQA_txt_processors["eval"](quest))
|
||||
question_batch.append(summary_VQA_txt_processors["eval"](quest))
|
||||
batch_size = len(list_of_questions)
|
||||
image_batch = image.repeat(batch_size, 1, 1, 1)
|
||||
|
||||
with no_grad():
|
||||
answers_batch = self.summary_VQA_model.predict_answers(
|
||||
answers_batch = summary_VQA_model.predict_answers(
|
||||
samples={"image": image_batch, "text_input": question_batch},
|
||||
inference_method="generate",
|
||||
)
|
||||
|
||||
@ -1 +1 @@
|
||||
{"IMG_2746": {"filename": "./test/data/IMG_2809.png", "person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}}
|
||||
{"person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
{
|
||||
"filename": "./test/data/IMG_2746.png",
|
||||
"face": "Yes",
|
||||
"multiple_faces": "Yes",
|
||||
"no_faces": 11,
|
||||
|
||||
@ -3,10 +3,10 @@ The Quantum Theory of
|
||||
Nonrelativistic Collisions
|
||||
JOHN R. TAYLOR
|
||||
University of Colorado
|
||||
postaldia Lanbidean
|
||||
ostaliga Lanbidean
|
||||
1 ilde
|
||||
ballenger stor goin
|
||||
gd OOL, STVÍ 23 TL 02
|
||||
gdĐOL, SIVI 23 TL 02
|
||||
de in obl
|
||||
och yd badalang
|
||||
a
|
||||
|
||||
@ -3,12 +3,12 @@ The Quantum Theory of
|
||||
Nonrelativistic Collisions
|
||||
JOHN R. TAYLOR
|
||||
University of Colorado
|
||||
postaldia Lanbidean
|
||||
ostaliga Lanbidean
|
||||
1 ilde
|
||||
ballenger stor goin
|
||||
gd OOL, STVÍ 23 TL 02
|
||||
de in obl
|
||||
och yd badalang
|
||||
balloons big goin
|
||||
gdĐOL, SIVI 23 TL
|
||||
there in obl
|
||||
och yd change
|
||||
a
|
||||
Ber
|
||||
ook Sy-RW enot go baldus
|
||||
ook Sy-RW isn't going anywhere
|
||||
@ -1,3 +1,4 @@
|
||||
[pytest]
|
||||
markers =
|
||||
gcv: mark google cloud vision tests - skip to save money.
|
||||
long: mark long running tests - skip to save compute resources.
|
||||
@ -2,20 +2,20 @@ import misinformation.cropposts as crpo
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
TEST_IMAGE_1 = "./test/data/pic1.png"
|
||||
TEST_IMAGE_2 = "./test/data/pic2.png"
|
||||
TEST_IMAGE_1 = "pic1.png"
|
||||
TEST_IMAGE_2 = "pic2.png"
|
||||
|
||||
|
||||
def test_matching_points():
|
||||
ref_view = np.array(Image.open(TEST_IMAGE_2))
|
||||
view = np.array(Image.open(TEST_IMAGE_1))
|
||||
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
|
||||
def test_matching_points(get_path):
|
||||
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
|
||||
view = np.array(Image.open(get_path + TEST_IMAGE_1))
|
||||
filtered_matches, _, _ = crpo.matching_points(ref_view, view)
|
||||
assert len(filtered_matches) > 0
|
||||
|
||||
|
||||
def test_kp_from_matches():
|
||||
ref_view = np.array(Image.open(TEST_IMAGE_2))
|
||||
view = np.array(Image.open(TEST_IMAGE_1))
|
||||
def test_kp_from_matches(get_path):
|
||||
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
|
||||
view = np.array(Image.open(get_path + TEST_IMAGE_1))
|
||||
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
|
||||
kp1, kp2 = crpo.kp_from_matches(filtered_matches, kp1, kp2)
|
||||
|
||||
@ -25,9 +25,9 @@ def test_kp_from_matches():
|
||||
assert kp2.shape[1] == 2
|
||||
|
||||
|
||||
def test_compute_crop_corner():
|
||||
ref_view = np.array(Image.open(TEST_IMAGE_2))
|
||||
view = np.array(Image.open(TEST_IMAGE_1))
|
||||
def test_compute_crop_corner(get_path):
|
||||
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
|
||||
view = np.array(Image.open(get_path + TEST_IMAGE_1))
|
||||
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
|
||||
corner = crpo.compute_crop_corner(filtered_matches, kp1, kp2)
|
||||
print(view.shape)
|
||||
@ -38,9 +38,9 @@ def test_compute_crop_corner():
|
||||
assert 0 <= h < view.shape[0]
|
||||
|
||||
|
||||
def test_crop_posts_image():
|
||||
ref_view = np.array(Image.open(TEST_IMAGE_2))
|
||||
view = np.array(Image.open(TEST_IMAGE_1))
|
||||
def test_crop_posts_image(get_path):
|
||||
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
|
||||
view = np.array(Image.open(get_path + TEST_IMAGE_1))
|
||||
rte = crpo.crop_posts_image(ref_view, view)
|
||||
assert rte is not None
|
||||
crop_view, match_num = rte
|
||||
@ -48,16 +48,15 @@ def test_crop_posts_image():
|
||||
assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]
|
||||
|
||||
|
||||
def test_crop_posts_from_refs():
|
||||
ref_view = np.array(Image.open(TEST_IMAGE_2))
|
||||
view = np.array(Image.open(TEST_IMAGE_1))
|
||||
def test_crop_posts_from_refs(get_path):
|
||||
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
|
||||
view = np.array(Image.open(get_path + TEST_IMAGE_1))
|
||||
ref_views = [ref_view]
|
||||
crop_view = crpo.crop_posts_from_refs(ref_views, view)
|
||||
assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]
|
||||
|
||||
|
||||
def test_get_file_list():
|
||||
def test_get_file_list(get_path):
|
||||
ref_list = []
|
||||
ref_dir = "./test/data"
|
||||
ref_list = crpo.get_file_list(ref_dir, ref_list, ext="png")
|
||||
ref_list = crpo.get_file_list(get_path, ref_list, ext="png")
|
||||
assert len(ref_list) > 0
|
||||
|
||||
@ -1,27 +1,28 @@
|
||||
import json
|
||||
|
||||
# import misinformation.display as misinf_display
|
||||
import pytest
|
||||
|
||||
misinf_display = pytest.importorskip("misinformation.display")
|
||||
import misinformation.display as misinf_display
|
||||
|
||||
|
||||
def test_explore_analysis_faces():
|
||||
mydict = {"IMG_2746": {"filename": "./test/data/IMG_2746.png"}}
|
||||
misinf_display.explore_analysis(mydict, identify="faces")
|
||||
with open("./test/data/example_faces.json", "r") as file:
|
||||
def test_explore_analysis_faces(get_path):
|
||||
mydict = {"IMG_2746": {"filename": get_path + "IMG_2746.png"}}
|
||||
temp = misinf_display.explore_analysis(mydict, identify="faces") # noqa
|
||||
temp = None # noqa
|
||||
with open(get_path + "example_faces.json", "r") as file:
|
||||
outs = json.load(file)
|
||||
|
||||
mydict["IMG_2746"].pop("filename", None)
|
||||
for im_key in mydict.keys():
|
||||
sub_dict = mydict[im_key]
|
||||
for key in sub_dict.keys():
|
||||
assert sub_dict[key] == outs[key]
|
||||
|
||||
|
||||
def test_explore_analysis_objects():
|
||||
mydict = {"IMG_2746": {"filename": "./test/data/IMG_2809.png"}}
|
||||
misinf_display.explore_analysis(mydict, identify="objects")
|
||||
with open("./test/data/example_analysis_objects.json", "r") as file:
|
||||
def test_explore_analysis_objects(get_path):
|
||||
mydict = {"IMG_2809": {"filename": get_path + "IMG_2809.png"}}
|
||||
temp = misinf_display.explore_analysis(mydict, identify="objects") # noqa
|
||||
temp = None # noqa
|
||||
with open(get_path + "example_analysis_objects.json", "r") as file:
|
||||
outs = json.load(file)
|
||||
|
||||
assert str(mydict) == str(outs)
|
||||
mydict["IMG_2809"].pop("filename", None)
|
||||
for im_key in mydict.keys():
|
||||
sub_dict = mydict[im_key]
|
||||
for key in sub_dict.keys():
|
||||
assert sub_dict[key] == outs[key]
|
||||
|
||||
@ -1,16 +1,17 @@
|
||||
import misinformation.faces as fc
|
||||
import json
|
||||
from pytest import approx
|
||||
import pytest
|
||||
|
||||
|
||||
def test_analyse_faces():
|
||||
def test_analyse_faces(get_path):
|
||||
mydict = {
|
||||
"filename": "./test/data/IMG_2746.png",
|
||||
"filename": get_path + "IMG_2746.png",
|
||||
}
|
||||
mydict = fc.EmotionDetector(mydict).analyse_image()
|
||||
mydict.update(fc.EmotionDetector(mydict).analyse_image())
|
||||
|
||||
with open("./test/data/example_faces.json", "r") as file:
|
||||
with open(get_path + "example_faces.json", "r") as file:
|
||||
out_dict = json.load(file)
|
||||
|
||||
# delete the filename key
|
||||
mydict.pop("filename", None)
|
||||
for key in mydict.keys():
|
||||
assert mydict[key] == out_dict[key]
|
||||
|
||||
@ -5,24 +5,13 @@ import numpy
|
||||
from torch import device, cuda
|
||||
import misinformation.multimodal_search as ms
|
||||
|
||||
|
||||
testdict = {
|
||||
"d755771b-225e-432f-802e-fb8dc850fff7": {
|
||||
"filename": "./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png"
|
||||
},
|
||||
"IMG_2746": {"filename": "./test/data/IMG_2746.png"},
|
||||
"IMG_2750": {"filename": "./test/data/IMG_2750.png"},
|
||||
"IMG_2805": {"filename": "./test/data/IMG_2805.png"},
|
||||
"IMG_2806": {"filename": "./test/data/IMG_2806.png"},
|
||||
"IMG_2807": {"filename": "./test/data/IMG_2807.png"},
|
||||
"IMG_2808": {"filename": "./test/data/IMG_2808.png"},
|
||||
"IMG_2809": {"filename": "./test/data/IMG_2809.png"},
|
||||
"IMG_3755": {"filename": "./test/data/IMG_3755.jpg"},
|
||||
"IMG_3756": {"filename": "./test/data/IMG_3756.jpg"},
|
||||
"IMG_3757": {"filename": "./test/data/IMG_3757.jpg"},
|
||||
"pic1": {"filename": "./test/data/pic1.png"},
|
||||
}
|
||||
|
||||
related_error = 1e-3
|
||||
related_error = 1e-2
|
||||
gpu_is_not_available = not cuda.is_available()
|
||||
|
||||
|
||||
@ -38,39 +27,15 @@ def test_read_img():
|
||||
pre_proc_pic_blip2_blip_albef = [
|
||||
-1.0039474964141846,
|
||||
-1.0039474964141846,
|
||||
-0.8433647751808167,
|
||||
-0.6097899675369263,
|
||||
-0.5951915383338928,
|
||||
-0.6243883967399597,
|
||||
-0.6827820539474487,
|
||||
-0.6097899675369263,
|
||||
-0.7119789123535156,
|
||||
-1.0623412132263184,
|
||||
]
|
||||
pre_proc_pic_clip_vitl14 = [
|
||||
-0.7995694875717163,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7703726291656494,
|
||||
-0.7703726291656494,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7703726291656494,
|
||||
-0.7703726291656494,
|
||||
-0.7703726291656494,
|
||||
]
|
||||
|
||||
pre_proc_pic_clip_vitl14_336 = [
|
||||
-0.7995694875717163,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.7849710583686829,
|
||||
-0.9163569211959839,
|
||||
-1.149931788444519,
|
||||
-1.0039474964141846,
|
||||
]
|
||||
|
||||
pre_proc_text_blip2_blip_albef = (
|
||||
@ -84,293 +49,150 @@ pre_proc_text_clip_clip_vitl14_clip_vitl14_336 = (
|
||||
pre_extracted_feature_img_blip2 = [
|
||||
0.04566730558872223,
|
||||
-0.042554520070552826,
|
||||
-0.06970272958278656,
|
||||
-0.009771779179573059,
|
||||
0.01446065679192543,
|
||||
0.10173682868480682,
|
||||
0.007092420011758804,
|
||||
-0.020045937970280647,
|
||||
0.12923966348171234,
|
||||
0.006452132016420364,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_blip = [
|
||||
-0.02480311505496502,
|
||||
0.05037587881088257,
|
||||
0.039517853409051895,
|
||||
-0.06994109600782394,
|
||||
-0.12886561453342438,
|
||||
0.047039758414030075,
|
||||
-0.11620642244815826,
|
||||
-0.003398326924070716,
|
||||
-0.07324369996786118,
|
||||
0.06994668394327164,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_albef = [
|
||||
0.08971136063337326,
|
||||
-0.10915573686361313,
|
||||
-0.020636577159166336,
|
||||
0.048121627420186996,
|
||||
-0.05943416804075241,
|
||||
-0.129856139421463,
|
||||
-0.0034469354432076216,
|
||||
0.017888527363538742,
|
||||
-0.03284582123160362,
|
||||
-0.1037328764796257,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_clip = [
|
||||
0.01621132344007492,
|
||||
-0.004035486374050379,
|
||||
-0.04304071143269539,
|
||||
-0.03459808602929115,
|
||||
0.016922621056437492,
|
||||
-0.025056276470422745,
|
||||
-0.04178355261683464,
|
||||
0.02165347896516323,
|
||||
-0.003224249929189682,
|
||||
0.020485712215304375,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_parsing_clip = [
|
||||
0.01621132344007492,
|
||||
-0.004035486374050379,
|
||||
-0.04304071143269539,
|
||||
-0.03459808602929115,
|
||||
0.016922621056437492,
|
||||
-0.025056276470422745,
|
||||
-0.04178355261683464,
|
||||
0.02165347896516323,
|
||||
-0.003224249929189682,
|
||||
0.020485712215304375,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_clip_vitl14 = [
|
||||
-0.023943455889821053,
|
||||
-0.021703708916902542,
|
||||
0.035043686628341675,
|
||||
0.019495919346809387,
|
||||
0.014351222664117813,
|
||||
-0.008634116500616074,
|
||||
0.01610446907579899,
|
||||
-0.003426523646339774,
|
||||
0.011931191198527813,
|
||||
0.0008691544644534588,
|
||||
]
|
||||
|
||||
pre_extracted_feature_img_clip_vitl14_336 = [
|
||||
-0.009511193260550499,
|
||||
-0.012618942186236382,
|
||||
0.034754861146211624,
|
||||
0.016356879845261574,
|
||||
-0.0011549904011189938,
|
||||
-0.008054453879594803,
|
||||
0.0011990377679467201,
|
||||
-0.010806051082909107,
|
||||
0.00140204350464046,
|
||||
0.0006861367146484554,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_blip2 = [
|
||||
-0.1384204626083374,
|
||||
-0.008662976324558258,
|
||||
0.006269007455557585,
|
||||
0.03151319921016693,
|
||||
0.060558050870895386,
|
||||
-0.03230040520429611,
|
||||
0.015861615538597107,
|
||||
-0.11856459826231003,
|
||||
-0.058296192437410355,
|
||||
0.03699290752410889,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_blip = [
|
||||
0.0118643119931221,
|
||||
-0.01291718054562807,
|
||||
-0.0009687161073088646,
|
||||
0.01428765058517456,
|
||||
-0.05591396614909172,
|
||||
0.07386433333158493,
|
||||
-0.11475936323404312,
|
||||
0.01620068959891796,
|
||||
0.0062415082938969135,
|
||||
0.0034833091776818037,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_albef = [
|
||||
-0.06229640915989876,
|
||||
0.11278597265481949,
|
||||
0.06628583371639252,
|
||||
0.1649140566587448,
|
||||
0.068987175822258,
|
||||
0.006291372701525688,
|
||||
0.03244050219655037,
|
||||
-0.049556829035282135,
|
||||
0.050752390176057816,
|
||||
-0.0421440489590168,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_clip = [
|
||||
0.018169036135077477,
|
||||
0.03634127229452133,
|
||||
0.025660742074251175,
|
||||
0.009149895049631596,
|
||||
-0.035570453852415085,
|
||||
0.033126577734947205,
|
||||
-0.004808237310498953,
|
||||
-0.0031453112605959177,
|
||||
-0.02194291725754738,
|
||||
0.024019461125135422,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_clip_vitl14 = [
|
||||
-0.0055463071912527084,
|
||||
0.006908962037414312,
|
||||
-0.019450219348073006,
|
||||
-0.018097277730703354,
|
||||
0.017567576840519905,
|
||||
-0.03828490898013115,
|
||||
-0.03781530633568764,
|
||||
-0.023951737210154533,
|
||||
0.01365653332322836,
|
||||
-0.02341713197529316,
|
||||
]
|
||||
|
||||
pre_extracted_feature_text_clip_vitl14_336 = [
|
||||
-0.008720514364540577,
|
||||
0.005284308455884457,
|
||||
-0.021116750314831734,
|
||||
-0.018112430348992348,
|
||||
0.01685470901429653,
|
||||
-0.03517491742968559,
|
||||
-0.038612402975559235,
|
||||
-0.021867064759135246,
|
||||
0.01685977540910244,
|
||||
-0.023832324892282486,
|
||||
]
|
||||
|
||||
simularity_blip2 = [
|
||||
[0.05826476216316223, -0.03215287625789642],
|
||||
[0.12869958579540253, 0.005234059877693653],
|
||||
[0.11073512583971024, 0.12327003479003906],
|
||||
[0.08743024617433548, 0.05598106235265732],
|
||||
[0.04591086134314537, 0.48981112241744995],
|
||||
[0.06297147274017334, 0.4728018641471863],
|
||||
[0.18486255407333374, 0.635167121887207],
|
||||
[0.015356295742094517, 0.015282897278666496],
|
||||
[-0.008485622704029083, 0.010882291942834854],
|
||||
[-0.04328630864620209, -0.13117870688438416],
|
||||
[-0.025470387190580368, 0.13175423443317413],
|
||||
[-0.05090826004743576, 0.05902523919939995],
|
||||
[0.05826476216316223, -0.02717375010251999],
|
||||
[0.06297147274017334, 0.47339022159576416],
|
||||
]
|
||||
|
||||
sorted_blip2 = [
|
||||
[6, 1, 2, 3, 5, 0, 4, 7, 8, 10, 9, 11],
|
||||
[6, 4, 5, 10, 2, 11, 3, 7, 8, 1, 0, 9],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_blip = [
|
||||
[0.15640679001808167, 0.752173662185669],
|
||||
[0.15139800310134888, 0.7804810404777527],
|
||||
[0.13010388612747192, 0.755257248878479],
|
||||
[0.13746635615825653, 0.7618774175643921],
|
||||
[0.1756758838891983, 0.8531903624534607],
|
||||
[0.17233705520629883, 0.8448910117149353],
|
||||
[0.1970970332622528, 0.8916105628013611],
|
||||
[0.11693969368934631, 0.5833531618118286],
|
||||
[0.12386563420295715, 0.5981853604316711],
|
||||
[0.08427951484918594, 0.4962371587753296],
|
||||
[0.14193706214427948, 0.7613846659660339],
|
||||
[0.12051936239004135, 0.6492202281951904],
|
||||
]
|
||||
|
||||
sorted_blip = [
|
||||
[6, 4, 5, 0, 1, 10, 3, 2, 8, 11, 7, 9],
|
||||
[6, 4, 5, 1, 3, 10, 2, 0, 11, 8, 7, 9],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_albef = [
|
||||
[0.12321824580430984, 0.35511350631713867],
|
||||
[0.09512615948915482, 0.27168408036231995],
|
||||
[0.09053325653076172, 0.20215675234794617],
|
||||
[0.06335515528917313, 0.15055638551712036],
|
||||
[0.09604836255311966, 0.4658776521682739],
|
||||
[0.10870333760976791, 0.5143978595733643],
|
||||
[0.11748822033405304, 0.6542638540267944],
|
||||
[0.05688793584704399, 0.22170542180538177],
|
||||
[0.05597608536481857, 0.11963296681642532],
|
||||
[0.059643782675266266, 0.14969395101070404],
|
||||
[0.06690303236246109, 0.3149859607219696],
|
||||
[0.07909377664327621, 0.11911341547966003],
|
||||
]
|
||||
|
||||
sorted_albef = [
|
||||
[0, 6, 5, 4, 1, 2, 11, 10, 3, 9, 7, 8],
|
||||
[6, 5, 4, 0, 10, 1, 7, 2, 3, 9, 8, 11],
|
||||
[0, 1],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_clip = [
|
||||
[0.23923014104366302, 0.5325412750244141],
|
||||
[0.20101115107536316, 0.5112978219985962],
|
||||
[0.17522737383842468, 0.49811851978302],
|
||||
[0.20062290132045746, 0.5415266156196594],
|
||||
[0.22865726053714752, 0.5762109756469727],
|
||||
[0.2310466319322586, 0.5910375714302063],
|
||||
[0.2644523084163666, 0.7851459383964539],
|
||||
[0.21474510431289673, 0.4135811924934387],
|
||||
[0.16407863795757294, 0.1474374681711197],
|
||||
[0.19819433987140656, 0.26493316888809204],
|
||||
[0.19545596837997437, 0.5007457137107849],
|
||||
[0.1647854745388031, 0.45705708861351013],
|
||||
]
|
||||
|
||||
sorted_clip = [
|
||||
[6, 0, 5, 4, 7, 1, 3, 9, 10, 2, 11, 8],
|
||||
[6, 5, 4, 3, 0, 1, 10, 2, 11, 7, 9, 8],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_clip_vitl14 = [
|
||||
[0.1051270067691803, 0.5184808373451233],
|
||||
[0.09705893695354462, 0.49574509263038635],
|
||||
[0.11964304000139236, 0.5424358248710632],
|
||||
[0.13881900906562805, 0.5909714698791504],
|
||||
[0.12728188931941986, 0.6758255362510681],
|
||||
[0.1277746558189392, 0.6841973662376404],
|
||||
[0.18026694655418396, 0.803142786026001],
|
||||
[0.13977059721946716, 0.45957139134407043],
|
||||
[0.11180847883224487, 0.24822194874286652],
|
||||
[0.12296056002378464, 0.35143694281578064],
|
||||
[0.11596094071865082, 0.5704031586647034],
|
||||
[0.10174489766359329, 0.44422751665115356],
|
||||
]
|
||||
|
||||
sorted_clip_vitl14 = [
|
||||
[6, 7, 3, 5, 4, 9, 2, 10, 8, 0, 11, 1],
|
||||
[6, 5, 4, 3, 10, 2, 0, 1, 7, 11, 9, 8],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
simularity_clip_vitl14_336 = [
|
||||
[0.09391091763973236, 0.49337542057037354],
|
||||
[0.11103834211826324, 0.4881117343902588],
|
||||
[0.12891019880771637, 0.5501476526260376],
|
||||
[0.13288410007953644, 0.5498673915863037],
|
||||
[0.12357455492019653, 0.6749162077903748],
|
||||
[0.13700757920742035, 0.7003108263015747],
|
||||
[0.1788637489080429, 0.7713702321052551],
|
||||
[0.13260436058044434, 0.4300197660923004],
|
||||
[0.11666625738143921, 0.2334875613451004],
|
||||
[0.1316065937280655, 0.3291645646095276],
|
||||
[0.12374477833509445, 0.5632147192955017],
|
||||
[0.10333051532506943, 0.43023794889450073],
|
||||
]
|
||||
|
||||
sorted_clip_vitl14_336 = [
|
||||
[6, 5, 3, 7, 9, 2, 10, 4, 8, 1, 11, 0],
|
||||
[6, 5, 4, 10, 2, 3, 0, 1, 11, 7, 9, 8],
|
||||
[1, 0],
|
||||
[1, 0],
|
||||
]
|
||||
|
||||
dict_itm_scores_for_blib = {
|
||||
"blip_base": [
|
||||
0.07107225805521011,
|
||||
0.004100032616406679,
|
||||
],
|
||||
"blip_large": [
|
||||
0.07890705019235611,
|
||||
0.00271016638725996,
|
||||
],
|
||||
"blip2_coco": [
|
||||
0.0833505243062973,
|
||||
0.004216152708977461,
|
||||
],
|
||||
}
|
||||
|
||||
dict_image_gradcam_with_itm_for_blip = {
|
||||
"blip_base": [123.36285799741745, 132.31662154197693, 53.38280035299249],
|
||||
"blip_large": [119.99512910842896, 128.7044593691826, 55.552959859540515],
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.long
|
||||
@pytest.mark.parametrize(
|
||||
(
|
||||
"pre_multimodal_device",
|
||||
@ -383,19 +205,29 @@ sorted_clip_vitl14_336 = [
|
||||
"pre_sorted",
|
||||
),
|
||||
[
|
||||
pytest.param(
|
||||
device("cuda"),
|
||||
"blip2",
|
||||
pre_proc_pic_blip2_blip_albef,
|
||||
pre_proc_text_blip2_blip_albef,
|
||||
pre_extracted_feature_img_blip2,
|
||||
pre_extracted_feature_text_blip2,
|
||||
simularity_blip2,
|
||||
sorted_blip2,
|
||||
marks=pytest.mark.skipif(
|
||||
gpu_is_not_available, reason="gpu_is_not_availible"
|
||||
),
|
||||
),
|
||||
# (
|
||||
# device("cpu"),
|
||||
# "blip2",
|
||||
# pre_proc_pic_blip2_blip_albef,
|
||||
# pre_proc_text_blip2_blip_albef,
|
||||
# pre_extracted_feature_img_blip2,
|
||||
# pre_extracted_feature_text_blip2,
|
||||
# simularity_blip2,
|
||||
# sorted_blip2,
|
||||
# ),
|
||||
# pytest.param(
|
||||
# device("cuda"),
|
||||
# "blip2",
|
||||
# pre_proc_pic_blip2_blip_albef,
|
||||
# pre_proc_text_blip2_blip_albef,
|
||||
# pre_extracted_feature_img_blip2,
|
||||
# pre_extracted_feature_text_blip2,
|
||||
# simularity_blip2,
|
||||
# sorted_blip2,
|
||||
# marks=pytest.mark.skipif(
|
||||
# gpu_is_not_available, reason="gpu_is_not_availible"
|
||||
# ),
|
||||
# ),
|
||||
(
|
||||
device("cpu"),
|
||||
"blip",
|
||||
@ -530,11 +362,11 @@ def test_parsing_images(
|
||||
vis_processor,
|
||||
txt_processor,
|
||||
image_keys,
|
||||
image_names,
|
||||
_,
|
||||
features_image_stacked,
|
||||
) = ms.MultimodalSearch.parsing_images(testdict, pre_model)
|
||||
|
||||
for i, num in zip(range(10), features_image_stacked[0, 10:20].tolist()):
|
||||
for i, num in zip(range(10), features_image_stacked[0, 10:12].tolist()):
|
||||
assert (
|
||||
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
|
||||
is True
|
||||
@ -549,7 +381,7 @@ def test_parsing_images(
|
||||
)
|
||||
processed_text = txt_processor["eval"](test_querry)
|
||||
|
||||
for i, num in zip(range(10), processed_pic[0, 0, 0, 25:35].tolist()):
|
||||
for i, num in zip(range(10), processed_pic[0, 0, 0, 25:27].tolist()):
|
||||
assert math.isclose(num, pre_proc_pic[i], rel_tol=related_error) is True
|
||||
|
||||
assert processed_text == pre_proc_text
|
||||
@ -562,13 +394,13 @@ def test_parsing_images(
|
||||
testdict, search_query, model, txt_processor, vis_processor, pre_model
|
||||
)
|
||||
|
||||
for i, num in zip(range(10), multi_features_stacked[0, 10:20].tolist()):
|
||||
for i, num in zip(range(10), multi_features_stacked[0, 10:12].tolist()):
|
||||
assert (
|
||||
math.isclose(num, pre_extracted_feature_text[i], rel_tol=related_error)
|
||||
is True
|
||||
)
|
||||
|
||||
for i, num in zip(range(10), multi_features_stacked[1, 10:20].tolist()):
|
||||
for i, num in zip(range(10), multi_features_stacked[1, 10:12].tolist()):
|
||||
assert (
|
||||
math.isclose(num, pre_extracted_feature_img[i], rel_tol=related_error)
|
||||
is True
|
||||
@ -590,16 +422,24 @@ def test_parsing_images(
|
||||
search_query2,
|
||||
)
|
||||
|
||||
for i, num in zip(range(12), similarity.tolist()):
|
||||
for i, num in zip(range(len(pre_simularity)), similarity.tolist()):
|
||||
for j, num2 in zip(range(len(num)), num):
|
||||
assert (
|
||||
math.isclose(num2, pre_simularity[i][j], rel_tol=100 * related_error)
|
||||
is True
|
||||
)
|
||||
|
||||
for i, num in zip(range(2), sorted_list):
|
||||
for i, num in zip(range(len(pre_sorted)), sorted_list):
|
||||
for j, num2 in zip(range(2), num):
|
||||
assert num2 == pre_sorted[i][j]
|
||||
|
||||
del model, vis_processor, txt_processor
|
||||
del (
|
||||
model,
|
||||
vis_processor,
|
||||
txt_processor,
|
||||
similarity,
|
||||
features_image_stacked,
|
||||
processed_pic,
|
||||
multi_features_stacked,
|
||||
)
|
||||
cuda.empty_cache()
|
||||
|
||||
@ -31,6 +31,7 @@ def test_analyse_image_cvlib(get_path):
|
||||
|
||||
with open(get_path + JSON_1, "r") as file:
|
||||
out_dict = json.load(file)
|
||||
out_dict["filename"] = get_path + out_dict["filename"]
|
||||
for key in mydict.keys():
|
||||
assert mydict[key] == out_dict[key]
|
||||
|
||||
@ -56,10 +57,11 @@ def test_init_default_objects():
|
||||
|
||||
def test_analyse_image_from_file_cvlib(get_path):
|
||||
file_path = get_path + TEST_IMAGE_1
|
||||
objs = ob_cvlib.ObjectCVLib().analyse_image_from_file(get_path + file_path)
|
||||
objs = ob_cvlib.ObjectCVLib().analyse_image_from_file(file_path)
|
||||
|
||||
with open(get_path + JSON_1, "r") as file:
|
||||
out_dict = json.load(file)
|
||||
out_dict["filename"] = get_path + out_dict["filename"]
|
||||
for key in objs.keys():
|
||||
assert objs[key] == out_dict[key]
|
||||
|
||||
@ -86,5 +88,6 @@ def test_analyse_image(get_path):
|
||||
ob.ObjectDetector(mydict).analyse_image()
|
||||
with open(get_path + JSON_1, "r") as file:
|
||||
out_dict = json.load(file)
|
||||
out_dict["filename"] = get_path + out_dict["filename"]
|
||||
|
||||
assert str(mydict) == str(out_dict)
|
||||
|
||||
@ -1,166 +1,98 @@
|
||||
import os
|
||||
import pytest
|
||||
from torch import device, cuda
|
||||
from lavis.models import load_model_and_preprocess
|
||||
import misinformation.summary as sm
|
||||
|
||||
images = [
|
||||
"./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png",
|
||||
"./test/data/IMG_2746.png",
|
||||
"./test/data/IMG_2750.png",
|
||||
"./test/data/IMG_2805.png",
|
||||
"./test/data/IMG_2806.png",
|
||||
"./test/data/IMG_2807.png",
|
||||
"./test/data/IMG_2808.png",
|
||||
"./test/data/IMG_2809.png",
|
||||
"./test/data/IMG_3755.jpg",
|
||||
"./test/data/IMG_3756.jpg",
|
||||
"./test/data/IMG_3757.jpg",
|
||||
"./test/data/pic1.png",
|
||||
]
|
||||
|
||||
IMAGES = ["d755771b-225e-432f-802e-fb8dc850fff7.png", "IMG_2746.png"]
|
||||
|
||||
SUMMARY_DEVICE = device("cuda" if cuda.is_available() else "cpu")
|
||||
|
||||
TEST_KWARGS = {
|
||||
"run1": {
|
||||
"name": "blip_caption",
|
||||
"model_type": "base_coco",
|
||||
"is_eval": True,
|
||||
"device": SUMMARY_DEVICE,
|
||||
},
|
||||
"run2": {
|
||||
"name": "blip_caption",
|
||||
"model_type": "base_coco",
|
||||
"is_eval": True,
|
||||
"device": SUMMARY_DEVICE,
|
||||
},
|
||||
"run3": {
|
||||
"name": "blip_caption",
|
||||
"model_type": "large_coco",
|
||||
"is_eval": True,
|
||||
"device": SUMMARY_DEVICE,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_analyse_image():
|
||||
@pytest.fixture
|
||||
def get_dict(get_path):
|
||||
mydict = {}
|
||||
for img_path in images:
|
||||
id_ = os.path.splitext(os.path.basename(img_path))[0]
|
||||
mydict[id_] = {"filename": img_path}
|
||||
for img in IMAGES:
|
||||
id_ = os.path.splitext(os.path.basename(img))[0]
|
||||
mydict[id_] = {"filename": get_path + img}
|
||||
return mydict
|
||||
|
||||
for key in mydict:
|
||||
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image()
|
||||
keys = list(mydict.keys())
|
||||
assert len(mydict) == 12
|
||||
for key in keys:
|
||||
assert len(mydict[key]["3_non-deterministic summary"]) == 3
|
||||
|
||||
const_image_summary_list = [
|
||||
"a river running through a city next to tall buildings",
|
||||
"a crowd of people standing on top of a tennis court",
|
||||
"a crowd of people standing on top of a field",
|
||||
"a room with a desk and a chair",
|
||||
"a table with plastic containers on top of it",
|
||||
"a view of a city with mountains in the background",
|
||||
"a view of a city street from a window",
|
||||
"a busy city street with cars and pedestrians",
|
||||
"a close up of an open book with writing on it",
|
||||
"a book that is open on a table",
|
||||
"a yellow book with green lettering on it",
|
||||
"a person running on a beach near a rock formation",
|
||||
]
|
||||
|
||||
for i in range(len(const_image_summary_list)):
|
||||
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list[i]
|
||||
|
||||
del sm.SummaryDetector.summary_model, sm.SummaryDetector.summary_vis_processors
|
||||
cuda.empty_cache()
|
||||
|
||||
summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
@pytest.mark.long
|
||||
def test_analyse_image(get_dict):
|
||||
reference_results = {
|
||||
"run1": {
|
||||
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
|
||||
"IMG_2746": "a crowd of people standing on top of a tennis court",
|
||||
},
|
||||
"run2": {
|
||||
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
|
||||
"IMG_2746": "a crowd of people standing on top of a tennis court",
|
||||
},
|
||||
"run3": {
|
||||
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a town next to tall buildings",
|
||||
"IMG_2746": "a crowd of people standing on top of a track",
|
||||
},
|
||||
}
|
||||
# test three different models
|
||||
for test_run in TEST_KWARGS.keys():
|
||||
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
|
||||
name="blip_caption",
|
||||
model_type="base_coco",
|
||||
is_eval=True,
|
||||
device=summary_device,
|
||||
**TEST_KWARGS[test_run]
|
||||
)
|
||||
|
||||
for key in mydict:
|
||||
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
|
||||
# run two different images
|
||||
for key in get_dict.keys():
|
||||
get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_image(
|
||||
summary_model, summary_vis_processors
|
||||
)
|
||||
keys = list(mydict.keys())
|
||||
|
||||
assert len(mydict) == 12
|
||||
for key in keys:
|
||||
assert len(mydict[key]["3_non-deterministic summary"]) == 3
|
||||
|
||||
const_image_summary_list2 = [
|
||||
"a river running through a city next to tall buildings",
|
||||
"a crowd of people standing on top of a tennis court",
|
||||
"a crowd of people standing on top of a field",
|
||||
"a room with a desk and a chair",
|
||||
"a table with plastic containers on top of it",
|
||||
"a view of a city with mountains in the background",
|
||||
"a view of a city street from a window",
|
||||
"a busy city street with cars and pedestrians",
|
||||
"a close up of an open book with writing on it",
|
||||
"a book that is open on a table",
|
||||
"a yellow book with green lettering on it",
|
||||
"a person running on a beach near a rock formation",
|
||||
]
|
||||
|
||||
for i in range(len(const_image_summary_list2)):
|
||||
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list2[i]
|
||||
|
||||
del summary_model, summary_vis_processors
|
||||
assert len(get_dict) == 2
|
||||
for key in get_dict.keys():
|
||||
assert len(get_dict[key]["3_non-deterministic summary"]) == 3
|
||||
assert (
|
||||
get_dict[key]["const_image_summary"] == reference_results[test_run][key]
|
||||
)
|
||||
cuda.empty_cache()
|
||||
|
||||
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
|
||||
name="blip_caption",
|
||||
model_type="large_coco",
|
||||
is_eval=True,
|
||||
device=summary_device,
|
||||
)
|
||||
|
||||
for key in mydict:
|
||||
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
|
||||
summary_model, summary_vis_processors
|
||||
)
|
||||
keys = list(mydict.keys())
|
||||
assert len(mydict) == 12
|
||||
for key in keys:
|
||||
assert len(mydict[key]["3_non-deterministic summary"]) == 3
|
||||
|
||||
const_image_summary_list3 = [
|
||||
"a river running through a town next to tall buildings",
|
||||
"a crowd of people standing on top of a track",
|
||||
"a group of people standing on top of a track",
|
||||
"a desk and chair in a small room",
|
||||
"a table that has some chairs on top of it",
|
||||
"a view of a city from a window of a building",
|
||||
"a view of a city from a window",
|
||||
"a city street filled with lots of traffic",
|
||||
"an open book with german text on it",
|
||||
"a close up of a book on a table",
|
||||
"a book with a green cover on a table",
|
||||
"a person running on a beach near the ocean",
|
||||
]
|
||||
|
||||
for i in range(len(const_image_summary_list2)):
|
||||
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list3[i]
|
||||
summary_model = None
|
||||
summary_vis_processors = None
|
||||
|
||||
|
||||
def test_analyse_questions():
|
||||
mydict = {}
|
||||
for img_path in images:
|
||||
id_ = os.path.splitext(os.path.basename(img_path))[0]
|
||||
mydict[id_] = {"filename": img_path}
|
||||
|
||||
def test_analyse_questions(get_dict):
|
||||
list_of_questions = [
|
||||
"How many persons on the picture?",
|
||||
"What happends on the picture?",
|
||||
]
|
||||
for key in mydict:
|
||||
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions(
|
||||
for key in get_dict:
|
||||
get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_questions(
|
||||
list_of_questions
|
||||
)
|
||||
|
||||
keys = list(mydict.keys())
|
||||
assert len(mydict) == 12
|
||||
|
||||
list_of_questions_ans = [2, 100, "many", 0, 0, "none", "two", 5, 0, 0, 0, 1]
|
||||
|
||||
list_of_questions_ans2 = [
|
||||
"flood",
|
||||
"festival",
|
||||
"people are flying kites",
|
||||
"no one's home",
|
||||
"chair is being moved",
|
||||
"traffic jam",
|
||||
"day time",
|
||||
"traffic jam",
|
||||
"nothing",
|
||||
"nothing",
|
||||
"nothing",
|
||||
"running",
|
||||
]
|
||||
|
||||
for i in range(len(list_of_questions_ans)):
|
||||
assert mydict[keys[i]][list_of_questions[1]] == str(list_of_questions_ans2[i])
|
||||
assert len(get_dict) == 2
|
||||
list_of_questions_ans = ["2", "100"]
|
||||
list_of_questions_ans2 = ["flood", "festival"]
|
||||
test_answers = []
|
||||
test_answers2 = []
|
||||
for key in get_dict.keys():
|
||||
test_answers.append(get_dict[key][list_of_questions[0]])
|
||||
test_answers2.append(get_dict[key][list_of_questions[1]])
|
||||
assert sorted(test_answers) == sorted(list_of_questions_ans)
|
||||
assert sorted(test_answers2) == sorted(list_of_questions_ans2)
|
||||
|
||||
@ -20,7 +20,7 @@ def set_testdict(get_path):
|
||||
return testdict
|
||||
|
||||
|
||||
LANGUAGES = ["de", "en", "en"]
|
||||
LANGUAGES = ["de", "om", "en"]
|
||||
|
||||
|
||||
def test_TextDetector(set_testdict):
|
||||
|
||||
@ -2,8 +2,6 @@ import glob
|
||||
import os
|
||||
from pandas import DataFrame
|
||||
import pooch
|
||||
from torch import device, cuda
|
||||
from lavis.models import load_model_and_preprocess
|
||||
|
||||
|
||||
class DownloadResource:
|
||||
@ -108,34 +106,3 @@ if __name__ == "__main__":
|
||||
outdict = append_data_to_dict(mydict)
|
||||
df = dump_df(outdict)
|
||||
print(df.head(10))
|
||||
|
||||
|
||||
def load_model_base():
|
||||
summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
|
||||
name="blip_caption",
|
||||
model_type="base_coco",
|
||||
is_eval=True,
|
||||
device=summary_device,
|
||||
)
|
||||
return summary_model, summary_vis_processors
|
||||
|
||||
|
||||
def load_model_large():
|
||||
summary_device = device("cuda" if cuda.is_available() else "cpu")
|
||||
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
|
||||
name="blip_caption",
|
||||
model_type="large_coco",
|
||||
is_eval=True,
|
||||
device=summary_device,
|
||||
)
|
||||
return summary_model, summary_vis_processors
|
||||
|
||||
|
||||
def load_model(model_type):
|
||||
select_model = {
|
||||
"base": load_model_base,
|
||||
"large": load_model_large,
|
||||
}
|
||||
summary_model, summary_vis_processors = select_model[model_type]()
|
||||
return summary_model, summary_vis_processors
|
||||
|
||||
39
notebooks/image_summary.ipynb
сгенерированный
39
notebooks/image_summary.ipynb
сгенерированный
@ -17,7 +17,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import misinformation\n",
|
||||
@ -36,7 +38,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"images = mutils.find_files(\n",
|
||||
@ -48,7 +52,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mydict = mutils.initialize_dict(images[0:10])"
|
||||
@ -57,7 +63,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mydict"
|
||||
@ -80,22 +88,27 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"summary_model, summary_vis_processors = sm.SummaryDetector.load_model(mydict, \"base\")\n",
|
||||
"obj = sm.SummaryDetector(mydict)\n",
|
||||
"summary_model, summary_vis_processors = obj.load_model(model_type=\"base\")\n",
|
||||
"# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for key in mydict:\n",
|
||||
" mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n",
|
||||
" summary_model, summary_vis_processors\n",
|
||||
" summary_model=summary_model, summary_vis_processors=summary_vis_processors\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@ -130,7 +143,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.head(10)"
|
||||
@ -168,7 +183,9 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mdisplay.explore_analysis(mydict, identify=\"summary\")"
|
||||
@ -279,7 +296,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.0"
|
||||
"version": "3.9.16"
|
||||
},
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
|
||||
@ -24,12 +24,12 @@ classifiers = [
|
||||
dependencies = [
|
||||
"bertopic",
|
||||
"cvlib",
|
||||
"deepface @ git+https://github.com/iulusoy/deepface.git",
|
||||
"deepface<=0.0.75",
|
||||
"googletrans==3.1.0a0",
|
||||
"grpcio",
|
||||
"importlib_metadata",
|
||||
"ipython",
|
||||
"ipywidgets",
|
||||
"ipywidgets<8.0.5",
|
||||
"ipykernel",
|
||||
"matplotlib",
|
||||
"numpy<=1.23.4",
|
||||
@ -39,9 +39,10 @@ dependencies = [
|
||||
"protobuf",
|
||||
"pytest",
|
||||
"pytest-cov",
|
||||
"pytest-xdist",
|
||||
"requests",
|
||||
"retina_face @ git+https://github.com/iulusoy/retinaface.git",
|
||||
"salesforce-lavis @ git+https://github.com/iulusoy/LAVIS.git",
|
||||
"retina_face",
|
||||
"salesforce-lavis",
|
||||
"spacy",
|
||||
"spacytextblob",
|
||||
"tensorflow",
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user