зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 05:04:14 +02:00
keep original text length (#247)
* keep original text length, skip analysis if not translated * bump version, remove debug
Этот коммит содержится в:
родитель
09d009c464
Коммит
14ae93063e
@ -159,9 +159,11 @@ def test_truncate_text(accepted):
|
||||
test_obj.subdict["text"] = "I like cats and dogs."
|
||||
test_obj._truncate_text()
|
||||
assert test_obj.subdict["text"] == "I like cats and dogs."
|
||||
assert "text_truncated" not in test_obj.subdict
|
||||
test_obj.subdict["text"] = 20000 * "m"
|
||||
test_obj._truncate_text()
|
||||
assert test_obj.subdict["text"] == 5000 * "m"
|
||||
assert test_obj.subdict["text_truncated"] == 5000 * "m"
|
||||
assert test_obj.subdict["text"] == 20000 * "m"
|
||||
|
||||
|
||||
@pytest.mark.gcv
|
||||
@ -173,6 +175,14 @@ def test_analyse_image(set_testdict, set_environ, accepted):
|
||||
set_testdict[item], analyse_text=True, accept_privacy=accepted
|
||||
)
|
||||
test_obj.analyse_image()
|
||||
testdict = {}
|
||||
testdict["text"] = 20000 * "m"
|
||||
test_obj = tt.TextDetector(
|
||||
testdict, skip_extraction=True, analyse_text=True, accept_privacy=accepted
|
||||
)
|
||||
test_obj.analyse_image()
|
||||
assert test_obj.subdict["text_truncated"] == 5000 * "m"
|
||||
assert test_obj.subdict["text"] == 20000 * "m"
|
||||
|
||||
|
||||
@pytest.mark.gcv
|
||||
|
||||
@ -263,7 +263,7 @@ class TextDetector(AnalysisMethod):
|
||||
"""Truncate the text if it is too long for googletrans."""
|
||||
if self.subdict["text"] and len(self.subdict["text"]) > max_length:
|
||||
print("Text is too long - truncating to {} characters.".format(max_length))
|
||||
self.subdict["text"] = self.subdict["text"][:max_length]
|
||||
self.subdict["text_truncated"] = self.subdict["text"][:max_length]
|
||||
|
||||
def analyse_image(self) -> dict:
|
||||
"""Perform text extraction and analysis of the text.
|
||||
@ -283,7 +283,7 @@ class TextDetector(AnalysisMethod):
|
||||
self._truncate_text()
|
||||
self.translate_text()
|
||||
self.remove_linebreaks()
|
||||
if self.analyse_text:
|
||||
if self.analyse_text and self.subdict["text_english"]:
|
||||
self._run_spacy()
|
||||
self.clean_text()
|
||||
self.text_summary()
|
||||
@ -336,8 +336,13 @@ class TextDetector(AnalysisMethod):
|
||||
raise ValueError(
|
||||
"Privacy disclosure not accepted - skipping text translation."
|
||||
)
|
||||
text_to_translate = (
|
||||
self.subdict["text_truncated"]
|
||||
if "text_truncated" in self.subdict
|
||||
else self.subdict["text"]
|
||||
)
|
||||
try:
|
||||
translated = self.translator.translate(self.subdict["text"])
|
||||
translated = self.translator.translate(text_to_translate)
|
||||
except Exception:
|
||||
print("Could not translate the text with error {}.".format(Exception))
|
||||
translated = None
|
||||
|
||||
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "ammico"
|
||||
version = "0.2.5"
|
||||
version = "0.2.6"
|
||||
description = "AI Media and Misinformation Content Analysis Tool"
|
||||
readme = "README.md"
|
||||
maintainers = [
|
||||
|
||||
Загрузка…
x
Ссылка в новой задаче
Block a user