зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-30 13:36:04 +02:00
* start with translate * translate and clean - notebook * spacy model in requirements * translate in module * clean in module * upload coverage only for ubuntu * update ubuntu version on runner * update dependencies * start tests for text * skip gcv test * fix age * more text tests * more text tests * add comment * test translation * fix numpy version; add reference data for trans * use utf-8 for windows
78 строки
2.4 KiB
Python
78 строки
2.4 KiB
Python
import os
|
|
import pytest
|
|
import spacy
|
|
import misinformation.text as tt
|
|
|
|
TESTDICT = {
|
|
"IMG_3755": {
|
|
"filename": "./test/data/IMG_3755.jpg",
|
|
},
|
|
"IMG_3756": {
|
|
"filename": "./test/data/IMG_3756.jpg",
|
|
},
|
|
"IMG_3757": {
|
|
"filename": "./test/data/IMG_3757.jpg",
|
|
},
|
|
}
|
|
|
|
LANGUAGES = ["de", "om", "en"]
|
|
|
|
os.environ[
|
|
"GOOGLE_APPLICATION_CREDENTIALS"
|
|
] = "../data/seismic-bonfire-329406-412821a70264.json"
|
|
|
|
|
|
def test_TextDetector():
|
|
for item in TESTDICT:
|
|
test_obj = tt.TextDetector(TESTDICT[item])
|
|
assert test_obj.subdict["text"] is None
|
|
assert test_obj.subdict["text_language"] is None
|
|
assert test_obj.subdict["text_english"] is None
|
|
assert test_obj.subdict["text_cleaned"] is None
|
|
|
|
|
|
@pytest.mark.gcv
|
|
def test_get_text_from_image():
|
|
for item in TESTDICT:
|
|
test_obj = tt.TextDetector(TESTDICT[item])
|
|
test_obj.get_text_from_image()
|
|
ref_file = "./test/data/text_" + item + ".txt"
|
|
with open(ref_file, "r", encoding="utf8") as file:
|
|
reference_text = file.read()
|
|
assert test_obj.subdict["text"] == reference_text
|
|
|
|
|
|
def test_translate_text():
|
|
for item, lang in zip(TESTDICT, LANGUAGES):
|
|
test_obj = tt.TextDetector(TESTDICT[item])
|
|
ref_file = "./test/data/text_" + item + ".txt"
|
|
trans_file = "./test/data/text_translated_" + item + ".txt"
|
|
with open(ref_file, "r", encoding="utf8") as file:
|
|
reference_text = file.read()
|
|
with open(trans_file, "r", encoding="utf8") as file:
|
|
translated_text = file.read()
|
|
test_obj.subdict["text"] = reference_text
|
|
test_obj.translate_text()
|
|
assert test_obj.subdict["text_language"] == lang
|
|
assert test_obj.subdict["text_english"] == translated_text
|
|
|
|
|
|
def test_init_spacy():
|
|
test_obj = tt.TextDetector(TESTDICT["IMG_3755"])
|
|
ref_file = "./test/data/text_IMG_3755.txt"
|
|
with open(ref_file, "r") as file:
|
|
reference_text = file.read()
|
|
test_obj.subdict["text_english"] = reference_text
|
|
test_obj._init_spacy()
|
|
assert isinstance(test_obj.doc, spacy.tokens.doc.Doc)
|
|
|
|
|
|
def test_clean_text():
|
|
nlp = spacy.load("en_core_web_md")
|
|
doc = nlp("I like cats and fjejg")
|
|
test_obj = tt.TextDetector(TESTDICT["IMG_3755"])
|
|
test_obj.doc = doc
|
|
test_obj.clean_text()
|
|
result = "I like cats and"
|
|
assert test_obj.subdict["text_clean"] == result
|