зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 21:16:06 +02:00
* add image summary notebook * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * pin deepface version to avoid bug with progress bar after update * update actions version for checkout and python * test ci without lavis * no lavis for ci test * merging * return lavis * change lavis to salesforce-lavis * change pycocotools install method * change pycocotools install method * fix_pycocotools * Downgrade Python * back to 3.9 and remove pycocotools dependance * instrucctions for windows * missing comma after merge * lavis only for ubuntu * use lavis package name in install instead of git * adding multimodal searching py and notebook * exclude lavis on windows * skip import on windows * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * reactivate lavis * Revert "reactivate lavis" This reverts commit ecdaf9d316e4b08816ba62da5e0482c8ff15b14e. * Change input format for multimodal search * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix clip models * account for new interface in init imports * changed imports bec of lavis/windows * fix if-else, added clip ViT-L-14=336 model * fix code smells * add model change function to summary * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed new model in summary.py * fixed summary windget * moved some function to utils * fixed imort torch in utils * added test_summary.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed opencv version * added first test of multimodal_search.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed test * removed windows in CI and added test in multimodal search * change lavis from dependencies from pip ro git * fixed blip2 model in test_multimodal_search.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixed test multimodal search on cpu and gpu machines * added test, fixed dependencies * add -vv to pytest command in CI * added test_multimodal_search tests * fixed tests in test_multimodal_search.py * fixed tests in test_summary * changed CI and fixed test_multimodel search * fixed ci * fixed error in test multimodal search, changed ci * added multimodal search test, added windows CI, added picture in test data * CI debuging * fixing tests in CI * fixing test in CI 2 * fixing CI 3 * fixing CI * added filtering function * Brought back all tests after CI fixing * changed CI one pytest by individual tests * fixed opencv problem * fix path for text, adjust result for new gcv * remove opencv * fixing cv2 error * added opencv-contrib, change objects_cvlib * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing tests in CI * fixing CI testing * cleanup objects * fixing codecov in CI * fixing codecov in CI * run tests together; install opencv last * update requirements for opencv dependencies * moved lavis functions from utils to summary * Remove lavis from utils.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add missing jupyter --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: iulusoy <inga.ulusoy@uni-heidelberg.de>
153 строки
5.6 KiB
Python
153 строки
5.6 KiB
Python
import os
|
|
import pytest
|
|
import spacy
|
|
import misinformation.text as tt
|
|
|
|
|
|
@pytest.fixture
|
|
def set_testdict(get_path):
|
|
testdict = {
|
|
"IMG_3755": {
|
|
"filename": get_path + "IMG_3755.jpg",
|
|
},
|
|
"IMG_3756": {
|
|
"filename": get_path + "IMG_3756.jpg",
|
|
},
|
|
"IMG_3757": {
|
|
"filename": get_path + "IMG_3757.jpg",
|
|
},
|
|
}
|
|
return testdict
|
|
|
|
|
|
LANGUAGES = ["de", "en", "en"]
|
|
|
|
|
|
def test_TextDetector(set_testdict):
|
|
for item in set_testdict:
|
|
test_obj = tt.TextDetector(set_testdict[item])
|
|
assert test_obj.subdict["text"] is None
|
|
assert test_obj.subdict["text_language"] is None
|
|
assert test_obj.subdict["text_english"] is None
|
|
assert not test_obj.analyse_text
|
|
|
|
|
|
@pytest.mark.gcv
|
|
def test_analyse_image(set_testdict, set_environ):
|
|
for item in set_testdict:
|
|
test_obj = tt.TextDetector(set_testdict[item])
|
|
test_obj.analyse_image()
|
|
test_obj = tt.TextDetector(set_testdict[item], analyse_text=True)
|
|
test_obj.analyse_image()
|
|
|
|
|
|
@pytest.mark.gcv
|
|
def test_get_text_from_image(set_testdict, get_path, set_environ):
|
|
for item in set_testdict:
|
|
test_obj = tt.TextDetector(set_testdict[item])
|
|
test_obj.get_text_from_image()
|
|
ref_file = get_path + "text_" + item + ".txt"
|
|
with open(ref_file, "r", encoding="utf8") as file:
|
|
reference_text = file.read()
|
|
assert test_obj.subdict["text"] == reference_text
|
|
|
|
|
|
def test_translate_text(set_testdict, get_path):
|
|
for item, lang in zip(set_testdict, LANGUAGES):
|
|
test_obj = tt.TextDetector(set_testdict[item])
|
|
ref_file = get_path + "text_" + item + ".txt"
|
|
trans_file = get_path + "text_translated_" + item + ".txt"
|
|
with open(ref_file, "r", encoding="utf8") as file:
|
|
reference_text = file.read()
|
|
with open(trans_file, "r", encoding="utf8") as file:
|
|
translated_text = file.read()
|
|
test_obj.subdict["text"] = reference_text
|
|
test_obj.translate_text()
|
|
assert test_obj.subdict["text_language"] == lang
|
|
assert test_obj.subdict["text_english"] == translated_text
|
|
|
|
|
|
def test_remove_linebreaks():
|
|
test_obj = tt.TextDetector({})
|
|
test_obj.subdict["text"] = "This is \n a test."
|
|
test_obj.subdict["text_english"] = "This is \n another\n test."
|
|
test_obj.remove_linebreaks()
|
|
assert test_obj.subdict["text"] == "This is a test."
|
|
assert test_obj.subdict["text_english"] == "This is another test."
|
|
|
|
|
|
def test_run_spacy(set_testdict, get_path):
|
|
test_obj = tt.TextDetector(set_testdict["IMG_3755"], analyse_text=True)
|
|
ref_file = get_path + "text_IMG_3755.txt"
|
|
with open(ref_file, "r") as file:
|
|
reference_text = file.read()
|
|
test_obj.subdict["text_english"] = reference_text
|
|
test_obj._run_spacy()
|
|
assert isinstance(test_obj.doc, spacy.tokens.doc.Doc)
|
|
|
|
|
|
def test_clean_text(set_testdict):
|
|
nlp = spacy.load("en_core_web_md")
|
|
doc = nlp("I like cats and fjejg")
|
|
test_obj = tt.TextDetector(set_testdict["IMG_3755"])
|
|
test_obj.doc = doc
|
|
test_obj.clean_text()
|
|
result = "I like cats and"
|
|
assert test_obj.subdict["text_clean"] == result
|
|
|
|
|
|
def test_correct_spelling():
|
|
mydict = {}
|
|
test_obj = tt.TextDetector(mydict, analyse_text=True)
|
|
test_obj.subdict["text_english"] = "I lik cats ad dogs."
|
|
test_obj.correct_spelling()
|
|
result = "I like cats ad dogs."
|
|
assert test_obj.subdict["text_english_correct"] == result
|
|
|
|
|
|
def test_sentiment_analysis():
|
|
mydict = {}
|
|
test_obj = tt.TextDetector(mydict, analyse_text=True)
|
|
test_obj.subdict["text_english"] = "I love cats and dogs."
|
|
test_obj._run_spacy()
|
|
test_obj.correct_spelling()
|
|
test_obj.sentiment_analysis()
|
|
assert test_obj.subdict["polarity"] == 0.5
|
|
assert test_obj.subdict["subjectivity"] == 0.6
|
|
|
|
|
|
def test_PostprocessText(set_testdict, get_path):
|
|
reference_dict = "THE\nALGEBRAIC\nEIGENVALUE\nPROBLEM\nDOM\nNVS TIO\nMINA\nMonographs\non Numerical Analysis\nJ.. H. WILKINSON"
|
|
reference_df = "Mathematische Formelsammlung\nfür Ingenieure und Naturwissenschaftler\nMit zahlreichen Abbildungen und Rechenbeispielen\nund einer ausführlichen Integraltafel\n3., verbesserte Auflage"
|
|
img_numbers = ["IMG_3755", "IMG_3756", "IMG_3757"]
|
|
for image_ref in img_numbers:
|
|
ref_file = get_path + "text_" + image_ref + ".txt"
|
|
with open(ref_file, "r") as file:
|
|
reference_text = file.read()
|
|
set_testdict[image_ref]["text_english"] = reference_text
|
|
obj = tt.PostprocessText(mydict=set_testdict)
|
|
test_dict = obj.list_text_english[2].replace("\r", "")
|
|
assert test_dict == reference_dict
|
|
for key in set_testdict.keys():
|
|
set_testdict[key].pop("text_english")
|
|
with pytest.raises(ValueError):
|
|
tt.PostprocessText(mydict=set_testdict)
|
|
obj = tt.PostprocessText(use_csv=True, csv_path=get_path + "test_data_out.csv")
|
|
# make sure test works on windows where end-of-line character is \r\n
|
|
test_df = obj.list_text_english[0].replace("\r", "")
|
|
assert test_df == reference_df
|
|
with pytest.raises(ValueError):
|
|
tt.PostprocessText(use_csv=True, csv_path=get_path + "test_data_out_nokey.csv")
|
|
with pytest.raises(ValueError):
|
|
tt.PostprocessText()
|
|
|
|
|
|
def test_analyse_topic(get_path):
|
|
_, topic_df, most_frequent_topics = tt.PostprocessText(
|
|
use_csv=True, csv_path=get_path + "topic_analysis_test.csv"
|
|
).analyse_topic()
|
|
# since this is not deterministic we cannot be sure we get the same result twice
|
|
assert len(topic_df) == 2
|
|
assert topic_df["Name"].iloc[0] == "0_the_feat_of_is"
|
|
assert most_frequent_topics[0][0][0] == "the"
|