Этот коммит содержится в:
Petr Andriushchenko 2023-03-30 13:43:06 +02:00
родитель a00ac65868 e33035bc68
Коммит 46d5d43f0b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4C4A5DCF634115B6
21 изменённых файлов: 258 добавлений и 273 удалений

3
.github/workflows/ci.yml поставляемый
Просмотреть файл

@ -14,7 +14,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-22.04,windows-latest]
os: [ubuntu-22.04]
python-version: [3.9]
steps:
- name: Checkout repository
@ -39,3 +39,4 @@ jobs:
with:
fail_ci_if_error: true
files: misinformation/coverage.xml
verbose: true

2
.github/workflows/docs.yml поставляемый
Просмотреть файл

@ -3,8 +3,6 @@ name: Pages
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
jobs:

Просмотреть файл

@ -20,7 +20,6 @@
"metadata": {},
"outputs": [],
"source": [
"import misinformation\n",
"from misinformation import utils as mutils\n",
"from misinformation import display as mdisplay\n",
"import misinformation.summary as sm"
@ -74,7 +73,8 @@
"metadata": {},
"outputs": [],
"source": [
"summary_model, summary_vis_processors = mutils.load_model(\"base\")\n",
"obj = sm.SummaryDetector(mydict)\n",
"summary_model, summary_vis_processors = obj.load_model(\"base\")\n",
"# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
]
},
@ -96,7 +96,7 @@
"tags": []
},
"source": [
"Convert the dictionary of dictionarys into a dictionary with lists:"
"Convert the dictionary of dictionaries into a dictionary with lists:"
]
},
{
@ -256,7 +256,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},

Просмотреть файл

@ -141,7 +141,7 @@ class EmotionDetector(utils.AnalysisMethod):
DeepFace.analyze(
img_path=face,
actions=actions,
silent=True,
prog_bar=False,
detector_backend="skip",
)
)
@ -192,6 +192,10 @@ class EmotionDetector(utils.AnalysisMethod):
"Yes" if result[person]["wears_mask"] else "No"
)
self.subdict["age"].append(result[person]["age"])
# gender is now reported as a list of dictionaries
# each dict represents one face
# each dict contains probability for Woman and Man
# take only the higher prob result for each dict
self.subdict["gender"].append(result[person]["gender"])
# race, emotion only detected if person does not wear mask
if result[person]["wears_mask"]:

Просмотреть файл

@ -181,7 +181,7 @@ class MultimodalSearch(AnalysisMethod):
"Please, use one of the following models: blip2, blip, albef, clip_base, clip_vitl14, clip_vitl14_336"
)
raw_images, images_tensors = MultimodalSearch.read_and_process_images(
_, images_tensors = MultimodalSearch.read_and_process_images(
self, image_names, vis_processors
)
if path_to_saved_tensors is None:
@ -220,7 +220,7 @@ class MultimodalSearch(AnalysisMethod):
for query in search_query:
if not (len(query) == 1) and (query in ("image", "text_input")):
raise SyntaxError(
'Each querry must contain either an "image" or a "text_input"'
'Each query must contain either an "image" or a "text_input"'
)
multi_sample = []
for query in search_query:

Просмотреть файл

@ -7,36 +7,28 @@ from lavis.models import load_model_and_preprocess
class SummaryDetector(AnalysisMethod):
def __init__(self, subdict: dict) -> None:
super().__init__(subdict)
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
)
self.summary_device = device("cuda" if cuda.is_available() else "cpu")
def load_model_base(self):
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
device=self.summary_device,
)
return summary_model, summary_vis_processors
def load_model_large(self):
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="large_coco",
is_eval=True,
device=summary_device,
device=self.summary_device,
)
return summary_model, summary_vis_processors
def load_model(self, model_type):
# self.summary_device = device("cuda" if cuda.is_available() else "cpu")
select_model = {
"base": SummaryDetector.load_model_base,
"large": SummaryDetector.load_model_large,
@ -47,8 +39,7 @@ class SummaryDetector(AnalysisMethod):
def analyse_image(self, summary_model=None, summary_vis_processors=None):
if summary_model is None and summary_vis_processors is None:
summary_model = SummaryDetector.summary_model
summary_vis_processors = SummaryDetector.summary_vis_processors
summary_model, summary_vis_processors = self.load_model_base()
path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
@ -66,32 +57,33 @@ class SummaryDetector(AnalysisMethod):
)
return self.subdict
(
summary_VQA_model,
summary_VQA_vis_processors,
summary_VQA_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa", model_type="vqav2", is_eval=True, device=summary_device
)
def analyse_questions(self, list_of_questions):
(
summary_VQA_model,
summary_VQA_vis_processors,
summary_VQA_txt_processors,
) = load_model_and_preprocess(
name="blip_vqa",
model_type="vqav2",
is_eval=True,
device=self.summary_device,
)
if len(list_of_questions) > 0:
path = self.subdict["filename"]
raw_image = Image.open(path).convert("RGB")
image = (
self.summary_VQA_vis_processors["eval"](raw_image)
summary_VQA_vis_processors["eval"](raw_image)
.unsqueeze(0)
.to(self.summary_device)
)
question_batch = []
for quest in list_of_questions:
question_batch.append(self.summary_VQA_txt_processors["eval"](quest))
question_batch.append(summary_VQA_txt_processors["eval"](quest))
batch_size = len(list_of_questions)
image_batch = image.repeat(batch_size, 1, 1, 1)
with no_grad():
answers_batch = self.summary_VQA_model.predict_answers(
answers_batch = summary_VQA_model.predict_answers(
samples={"image": image_batch, "text_input": question_batch},
inference_method="generate",
)

Просмотреть файл

@ -1 +1 @@
{"IMG_2746": {"filename": "./test/data/IMG_2809.png", "person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}}
{"person": "yes", "bicycle": "no", "car": "yes", "motorcycle": "no", "airplane": "no", "bus": "yes", "train": "no", "truck": "no", "boat": "no", "traffic light": "no", "cell phone": "no"}

Просмотреть файл

@ -1,5 +1,4 @@
{
"filename": "./test/data/IMG_2746.png",
"face": "Yes",
"multiple_faces": "Yes",
"no_faces": 11,

Просмотреть файл

@ -0,0 +1,16 @@
Im sorry, but I dont want to be an emperor. Thats not my business. I dont want to rule or conquer anyone. I should like to help everyone - if possible - Jew, Gentile - black man - white. We all want to help one another. Human beings are like that. We want to live by each others happiness - not by each others misery. We dont want to hate and despise one another. In this world there is room for everyone. And the good earth is rich and can provide for everyone. The way of life can be free and beautiful, but we have lost the way.
Greed has poisoned mens souls, has barricaded the world with hate, has goose-stepped us into misery and bloodshed. We have developed speed, but we have shut ourselves in. Machinery that gives abundance has left us in want. Our knowledge has made us cynical. Our cleverness, hard and unkind. We think too much and feel too little. More than machinery we need humanity. More than cleverness we need kindness and gentleness. Without these qualities, life will be violent and all will be lost…
The aeroplane and the radio have brought us closer together. The very nature of these inventions cries out for the goodness in men - cries out for universal brotherhood - for the unity of us all. Even now my voice is reaching millions throughout the world - millions of despairing men, women, and little children - victims of a system that makes men torture and imprison innocent people.
To those who can hear me, I say - do not despair. The misery that is now upon us is but the passing of greed - the bitterness of men who fear the way of human progress. The hate of men will pass, and dictators die, and the power they took from the people will return to the people. And so long as men die, liberty will never perish…
Soldiers! dont give yourselves to brutes - men who despise you - enslave you - who regiment your lives - tell you what to do - what to think and what to feel! Who drill you - diet you - treat you like cattle, use you as cannon fodder. Dont give yourselves to these unnatural men - machine men with machine minds and machine hearts! You are not machines! You are not cattle! You are men! You have the love of humanity in your hearts! You dont hate! Only the unloved hate - the unloved and the unnatural! Soldiers! Dont fight for slavery! Fight for liberty!
In the 17th Chapter of St Luke it is written: “the Kingdom of God is within man” - not one man nor a group of men, but in all men! In you! You, the people have the power - the power to create machines. The power to create happiness! You, the people, have the power to make this life free and beautiful, to make this life a wonderful adventure.
Then - in the name of democracy - let us use that power - let us all unite. Let us fight for a new world - a decent world that will give men a chance to work - that will give youth a future and old age a security. By the promise of these things, brutes have risen to power. But they lie! They do not fulfil that promise. They never will!
Dictators free themselves but they enslave the people! Now let us fight to fulfil that promise! Let us fight to free the world - to do away with national barriers - to do away with greed, with hate and intolerance. Let us fight for a world of reason, a world where science and progress will lead to all mens happiness. Soldiers! in the name of democracy, let us all unite!

Просмотреть файл

@ -3,10 +3,10 @@ The Quantum Theory of
Nonrelativistic Collisions
JOHN R. TAYLOR
University of Colorado
postaldia Lanbidean
ostaliga Lanbidean
1 ilde
ballenger stor goin
gd OOL, STVÍ 23 TL 02
gdĐOL, SIVI 23 TL 02
de in obl
och yd badalang
a

Просмотреть файл

@ -3,12 +3,12 @@ The Quantum Theory of
Nonrelativistic Collisions
JOHN R. TAYLOR
University of Colorado
postaldia Lanbidean
ostaliga Lanbidean
1 ilde
ballenger stor goin
gd OOL, STVÍ 23 TL 02
de in obl
och yd badalang
balloons big goin
gdĐOL, SIVI 23 TL
there in obl
och yd change
a
Ber
ook Sy-RW enot go baldus
ook Sy-RW isn't going anywhere

Просмотреть файл

@ -1,3 +1,4 @@
[pytest]
markers =
gcv: mark google cloud vision tests - skip to save money.
gcv: mark google cloud vision tests - skip to save money.
long: mark long running tests - skip to save compute resources.

Просмотреть файл

@ -2,20 +2,20 @@ import misinformation.cropposts as crpo
import numpy as np
from PIL import Image
TEST_IMAGE_1 = "./test/data/pic1.png"
TEST_IMAGE_2 = "./test/data/pic2.png"
TEST_IMAGE_1 = "pic1.png"
TEST_IMAGE_2 = "pic2.png"
def test_matching_points():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
def test_matching_points(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, _, _ = crpo.matching_points(ref_view, view)
assert len(filtered_matches) > 0
def test_kp_from_matches():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
def test_kp_from_matches(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
kp1, kp2 = crpo.kp_from_matches(filtered_matches, kp1, kp2)
@ -25,9 +25,9 @@ def test_kp_from_matches():
assert kp2.shape[1] == 2
def test_compute_crop_corner():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
def test_compute_crop_corner(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
filtered_matches, kp1, kp2 = crpo.matching_points(ref_view, view)
corner = crpo.compute_crop_corner(filtered_matches, kp1, kp2)
print(view.shape)
@ -38,9 +38,9 @@ def test_compute_crop_corner():
assert 0 <= h < view.shape[0]
def test_crop_posts_image():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
def test_crop_posts_image(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
rte = crpo.crop_posts_image(ref_view, view)
assert rte is not None
crop_view, match_num = rte
@ -48,16 +48,15 @@ def test_crop_posts_image():
assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]
def test_crop_posts_from_refs():
ref_view = np.array(Image.open(TEST_IMAGE_2))
view = np.array(Image.open(TEST_IMAGE_1))
def test_crop_posts_from_refs(get_path):
ref_view = np.array(Image.open(get_path + TEST_IMAGE_2))
view = np.array(Image.open(get_path + TEST_IMAGE_1))
ref_views = [ref_view]
crop_view = crpo.crop_posts_from_refs(ref_views, view)
assert crop_view.shape[0] * crop_view.shape[1] <= view.shape[0] * view.shape[1]
def test_get_file_list():
def test_get_file_list(get_path):
ref_list = []
ref_dir = "./test/data"
ref_list = crpo.get_file_list(ref_dir, ref_list, ext="png")
ref_list = crpo.get_file_list(get_path, ref_list, ext="png")
assert len(ref_list) > 0

Просмотреть файл

@ -1,27 +1,28 @@
import json
# import misinformation.display as misinf_display
import pytest
misinf_display = pytest.importorskip("misinformation.display")
import misinformation.display as misinf_display
def test_explore_analysis_faces():
mydict = {"IMG_2746": {"filename": "./test/data/IMG_2746.png"}}
misinf_display.explore_analysis(mydict, identify="faces")
with open("./test/data/example_faces.json", "r") as file:
def test_explore_analysis_faces(get_path):
mydict = {"IMG_2746": {"filename": get_path + "IMG_2746.png"}}
temp = misinf_display.explore_analysis(mydict, identify="faces") # noqa
temp = None # noqa
with open(get_path + "example_faces.json", "r") as file:
outs = json.load(file)
mydict["IMG_2746"].pop("filename", None)
for im_key in mydict.keys():
sub_dict = mydict[im_key]
for key in sub_dict.keys():
assert sub_dict[key] == outs[key]
def test_explore_analysis_objects():
mydict = {"IMG_2746": {"filename": "./test/data/IMG_2809.png"}}
misinf_display.explore_analysis(mydict, identify="objects")
with open("./test/data/example_analysis_objects.json", "r") as file:
def test_explore_analysis_objects(get_path):
mydict = {"IMG_2809": {"filename": get_path + "IMG_2809.png"}}
temp = misinf_display.explore_analysis(mydict, identify="objects") # noqa
temp = None # noqa
with open(get_path + "example_analysis_objects.json", "r") as file:
outs = json.load(file)
assert str(mydict) == str(outs)
mydict["IMG_2809"].pop("filename", None)
for im_key in mydict.keys():
sub_dict = mydict[im_key]
for key in sub_dict.keys():
assert sub_dict[key] == outs[key]

Просмотреть файл

@ -1,16 +1,17 @@
import misinformation.faces as fc
import json
from pytest import approx
import pytest
def test_analyse_faces():
def test_analyse_faces(get_path):
mydict = {
"filename": "./test/data/IMG_2746.png",
"filename": get_path + "IMG_2746.png",
}
mydict = fc.EmotionDetector(mydict).analyse_image()
mydict.update(fc.EmotionDetector(mydict).analyse_image())
with open("./test/data/example_faces.json", "r") as file:
with open(get_path + "example_faces.json", "r") as file:
out_dict = json.load(file)
# delete the filename key
mydict.pop("filename", None)
for key in mydict.keys():
assert mydict[key] == out_dict[key]

Просмотреть файл

@ -5,6 +5,7 @@ import numpy
from torch import device, cuda
import misinformation.multimodal_search as ms
testdict = {
"IMG_2746": {"filename": "./test/data/IMG_2746.png"},
"IMG_2809": {"filename": "./test/data/IMG_2809.png"},
@ -191,6 +192,7 @@ dict_image_gradcam_with_itm_for_blip = {
}
@pytest.mark.long
@pytest.mark.parametrize(
(
"pre_multimodal_device",
@ -203,29 +205,29 @@ dict_image_gradcam_with_itm_for_blip = {
"pre_sorted",
),
[
(
device("cpu"),
"blip2",
pre_proc_pic_blip2_blip_albef,
pre_proc_text_blip2_blip_albef,
pre_extracted_feature_img_blip2,
pre_extracted_feature_text_blip2,
simularity_blip2,
sorted_blip2,
),
pytest.param(
device("cuda"),
"blip2",
pre_proc_pic_blip2_blip_albef,
pre_proc_text_blip2_blip_albef,
pre_extracted_feature_img_blip2,
pre_extracted_feature_text_blip2,
simularity_blip2,
sorted_blip2,
marks=pytest.mark.skipif(
gpu_is_not_available, reason="gpu_is_not_availible"
),
),
# (
# device("cpu"),
# "blip2",
# pre_proc_pic_blip2_blip_albef,
# pre_proc_text_blip2_blip_albef,
# pre_extracted_feature_img_blip2,
# pre_extracted_feature_text_blip2,
# simularity_blip2,
# sorted_blip2,
# ),
# pytest.param(
# device("cuda"),
# "blip2",
# pre_proc_pic_blip2_blip_albef,
# pre_proc_text_blip2_blip_albef,
# pre_extracted_feature_img_blip2,
# pre_extracted_feature_text_blip2,
# simularity_blip2,
# sorted_blip2,
# marks=pytest.mark.skipif(
# gpu_is_not_available, reason="gpu_is_not_availible"
# ),
# ),
(
device("cpu"),
"blip",
@ -360,7 +362,7 @@ def test_parsing_images(
vis_processor,
txt_processor,
image_keys,
image_names,
_,
features_image_stacked,
) = ms.MultimodalSearch.parsing_images(testdict, pre_model)

Просмотреть файл

@ -1,166 +1,98 @@
import os
import pytest
from torch import device, cuda
from lavis.models import load_model_and_preprocess
import misinformation.summary as sm
images = [
"./test/data/d755771b-225e-432f-802e-fb8dc850fff7.png",
"./test/data/IMG_2746.png",
"./test/data/IMG_2750.png",
"./test/data/IMG_2805.png",
"./test/data/IMG_2806.png",
"./test/data/IMG_2807.png",
"./test/data/IMG_2808.png",
"./test/data/IMG_2809.png",
"./test/data/IMG_3755.jpg",
"./test/data/IMG_3756.jpg",
"./test/data/IMG_3757.jpg",
"./test/data/pic1.png",
]
IMAGES = ["d755771b-225e-432f-802e-fb8dc850fff7.png", "IMG_2746.png"]
SUMMARY_DEVICE = device("cuda" if cuda.is_available() else "cpu")
TEST_KWARGS = {
"run1": {
"name": "blip_caption",
"model_type": "base_coco",
"is_eval": True,
"device": SUMMARY_DEVICE,
},
"run2": {
"name": "blip_caption",
"model_type": "base_coco",
"is_eval": True,
"device": SUMMARY_DEVICE,
},
"run3": {
"name": "blip_caption",
"model_type": "large_coco",
"is_eval": True,
"device": SUMMARY_DEVICE,
},
}
def test_analyse_image():
@pytest.fixture
def get_dict(get_path):
mydict = {}
for img_path in images:
id_ = os.path.splitext(os.path.basename(img_path))[0]
mydict[id_] = {"filename": img_path}
for img in IMAGES:
id_ = os.path.splitext(os.path.basename(img))[0]
mydict[id_] = {"filename": get_path + img}
return mydict
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image()
keys = list(mydict.keys())
assert len(mydict) == 12
for key in keys:
assert len(mydict[key]["3_non-deterministic summary"]) == 3
const_image_summary_list = [
"a river running through a city next to tall buildings",
"a crowd of people standing on top of a tennis court",
"a crowd of people standing on top of a field",
"a room with a desk and a chair",
"a table with plastic containers on top of it",
"a view of a city with mountains in the background",
"a view of a city street from a window",
"a busy city street with cars and pedestrians",
"a close up of an open book with writing on it",
"a book that is open on a table",
"a yellow book with green lettering on it",
"a person running on a beach near a rock formation",
]
for i in range(len(const_image_summary_list)):
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list[i]
del sm.SummaryDetector.summary_model, sm.SummaryDetector.summary_vis_processors
cuda.empty_cache()
summary_device = device("cuda" if cuda.is_available() else "cpu")
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="base_coco",
is_eval=True,
device=summary_device,
)
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
summary_model, summary_vis_processors
@pytest.mark.long
def test_analyse_image(get_dict):
reference_results = {
"run1": {
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
"IMG_2746": "a crowd of people standing on top of a tennis court",
},
"run2": {
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a city next to tall buildings",
"IMG_2746": "a crowd of people standing on top of a tennis court",
},
"run3": {
"d755771b-225e-432f-802e-fb8dc850fff7": "a river running through a town next to tall buildings",
"IMG_2746": "a crowd of people standing on top of a track",
},
}
# test three different models
for test_run in TEST_KWARGS.keys():
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
**TEST_KWARGS[test_run]
)
keys = list(mydict.keys())
assert len(mydict) == 12
for key in keys:
assert len(mydict[key]["3_non-deterministic summary"]) == 3
const_image_summary_list2 = [
"a river running through a city next to tall buildings",
"a crowd of people standing on top of a tennis court",
"a crowd of people standing on top of a field",
"a room with a desk and a chair",
"a table with plastic containers on top of it",
"a view of a city with mountains in the background",
"a view of a city street from a window",
"a busy city street with cars and pedestrians",
"a close up of an open book with writing on it",
"a book that is open on a table",
"a yellow book with green lettering on it",
"a person running on a beach near a rock formation",
]
for i in range(len(const_image_summary_list2)):
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list2[i]
del summary_model, summary_vis_processors
cuda.empty_cache()
summary_model, summary_vis_processors, _ = load_model_and_preprocess(
name="blip_caption",
model_type="large_coco",
is_eval=True,
device=summary_device,
)
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(
summary_model, summary_vis_processors
)
keys = list(mydict.keys())
assert len(mydict) == 12
for key in keys:
assert len(mydict[key]["3_non-deterministic summary"]) == 3
const_image_summary_list3 = [
"a river running through a town next to tall buildings",
"a crowd of people standing on top of a track",
"a group of people standing on top of a track",
"a desk and chair in a small room",
"a table that has some chairs on top of it",
"a view of a city from a window of a building",
"a view of a city from a window",
"a city street with cars and people on it",
"an open book with german text on it",
"a close up of a book on a table",
"a book with a green cover on a table",
"a person running on a beach near the ocean",
]
for i in range(len(const_image_summary_list2)):
assert mydict[keys[i]]["const_image_summary"] == const_image_summary_list3[i]
# run two different images
for key in get_dict.keys():
get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_image(
summary_model, summary_vis_processors
)
assert len(get_dict) == 2
for key in get_dict.keys():
assert len(get_dict[key]["3_non-deterministic summary"]) == 3
assert (
get_dict[key]["const_image_summary"] == reference_results[test_run][key]
)
cuda.empty_cache()
summary_model = None
summary_vis_processors = None
def test_analyse_questions():
mydict = {}
for img_path in images:
id_ = os.path.splitext(os.path.basename(img_path))[0]
mydict[id_] = {"filename": img_path}
def test_analyse_questions(get_dict):
list_of_questions = [
"How many persons on the picture?",
"What happends on the picture?",
]
for key in mydict:
mydict[key] = sm.SummaryDetector(mydict[key]).analyse_questions(
for key in get_dict:
get_dict[key] = sm.SummaryDetector(get_dict[key]).analyse_questions(
list_of_questions
)
keys = list(mydict.keys())
assert len(mydict) == 12
list_of_questions_ans = [2, 100, "many", 0, 0, "none", "two", 5, 0, 0, 0, 1]
list_of_questions_ans2 = [
"flood",
"festival",
"people are flying kites",
"no one's home",
"chair is being moved",
"traffic jam",
"day time",
"traffic jam",
"nothing",
"nothing",
"nothing",
"running",
]
for i in range(len(list_of_questions_ans)):
assert mydict[keys[i]][list_of_questions[1]] == str(list_of_questions_ans2[i])
assert len(get_dict) == 2
list_of_questions_ans = ["2", "100"]
list_of_questions_ans2 = ["flood", "festival"]
test_answers = []
test_answers2 = []
for key in get_dict.keys():
test_answers.append(get_dict[key][list_of_questions[0]])
test_answers2.append(get_dict[key][list_of_questions[1]])
assert sorted(test_answers) == sorted(list_of_questions_ans)
assert sorted(test_answers2) == sorted(list_of_questions_ans2)

Просмотреть файл

@ -20,7 +20,7 @@ def set_testdict(get_path):
return testdict
LANGUAGES = ["de", "en", "en"]
LANGUAGES = ["de", "om", "en"]
def test_TextDetector(set_testdict):
@ -116,6 +116,18 @@ def test_sentiment_analysis():
assert test_obj.subdict["subjectivity"] == 0.6
def test_text_summary(get_path):
mydict = {}
test_obj = tt.TextDetector(mydict, analyse_text=True)
ref_file = get_path + "example_summary.txt"
with open(ref_file, "r", encoding="utf8") as file:
reference_text = file.read()
test_obj.subdict["text_english"] = reference_text
test_obj.text_summary()
reference_summary = " Im sorry, but I dont want to be an emperor. Thats not my business. I should like to help everyone - if possible - Jew, Gentile - black man - white . We all want to help one another. In this world there is room for everyone. The way of life can be free and beautiful, but we have lost the way ."
assert mydict["summary_text"] == reference_summary
def test_PostprocessText(set_testdict, get_path):
reference_dict = "THE\nALGEBRAIC\nEIGENVALUE\nPROBLEM\nDOM\nNVS TIO\nMINA\nMonographs\non Numerical Analysis\nJ.. H. WILKINSON"
reference_df = "Mathematische Formelsammlung\nfür Ingenieure und Naturwissenschaftler\nMit zahlreichen Abbildungen und Rechenbeispielen\nund einer ausführlichen Integraltafel\n3., verbesserte Auflage"

Просмотреть файл

@ -9,6 +9,7 @@ from misinformation import utils
import grpc
import pandas as pd
from bertopic import BERTopic
from transformers import pipeline
# make widgets work again
# clean text has weird spaces and separation of "do n't"
@ -119,6 +120,14 @@ class TextDetector(utils.AnalysisMethod):
# where 0.0 is very objective and 1.0 is very subjective
self.subdict["subjectivity"] = self.doc._.blob.subjectivity
def text_summary(self):
# use the transformers pipeline to summarize the text
pipe = pipeline("summarization")
self.subdict.update(pipe(self.subdict["text_english"])[0])
# def text_sentiment_transformers(self):
# pipe = pipeline("text-classification")
class PostprocessText:
def __init__(

39
notebooks/image_summary.ipynb сгенерированный
Просмотреть файл

@ -17,7 +17,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import misinformation\n",
@ -36,7 +38,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"images = mutils.find_files(\n",
@ -48,7 +52,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"mydict = mutils.initialize_dict(images[0:10])"
@ -57,7 +63,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"mydict"
@ -80,22 +88,27 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"summary_model, summary_vis_processors = sm.SummaryDetector.load_model(mydict, \"base\")\n",
"obj = sm.SummaryDetector(mydict)\n",
"summary_model, summary_vis_processors = obj.load_model(model_type=\"base\")\n",
"# summary_model, summary_vis_processors = mutils.load_model(\"large\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"for key in mydict:\n",
" mydict[key] = sm.SummaryDetector(mydict[key]).analyse_image(\n",
" summary_model, summary_vis_processors\n",
" summary_model=summary_model, summary_vis_processors=summary_vis_processors\n",
" )"
]
},
@ -130,7 +143,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"df.head(10)"
@ -168,7 +183,9 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"mdisplay.explore_analysis(mydict, identify=\"summary\")"
@ -279,7 +296,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.0"
"version": "3.9.16"
},
"vscode": {
"interpreter": {

Просмотреть файл

@ -48,6 +48,7 @@ dependencies = [
"tensorflow",
"textblob",
"torch",
"transformers",
"google-cloud-vision",
"setuptools",
"opencv-contrib-python",