* keep original text length, skip analysis if not translated

* bump version, remove debug
Этот коммит содержится в:
Inga Ulusoy 2025-02-19 11:23:27 +01:00 коммит произвёл GitHub
родитель 09d009c464
Коммит 14ae93063e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 20 добавлений и 5 удалений

Просмотреть файл

@ -159,9 +159,11 @@ def test_truncate_text(accepted):
test_obj.subdict["text"] = "I like cats and dogs."
test_obj._truncate_text()
assert test_obj.subdict["text"] == "I like cats and dogs."
assert "text_truncated" not in test_obj.subdict
test_obj.subdict["text"] = 20000 * "m"
test_obj._truncate_text()
assert test_obj.subdict["text"] == 5000 * "m"
assert test_obj.subdict["text_truncated"] == 5000 * "m"
assert test_obj.subdict["text"] == 20000 * "m"
@pytest.mark.gcv
@ -173,6 +175,14 @@ def test_analyse_image(set_testdict, set_environ, accepted):
set_testdict[item], analyse_text=True, accept_privacy=accepted
)
test_obj.analyse_image()
testdict = {}
testdict["text"] = 20000 * "m"
test_obj = tt.TextDetector(
testdict, skip_extraction=True, analyse_text=True, accept_privacy=accepted
)
test_obj.analyse_image()
assert test_obj.subdict["text_truncated"] == 5000 * "m"
assert test_obj.subdict["text"] == 20000 * "m"
@pytest.mark.gcv

Просмотреть файл

@ -263,7 +263,7 @@ class TextDetector(AnalysisMethod):
"""Truncate the text if it is too long for googletrans."""
if self.subdict["text"] and len(self.subdict["text"]) > max_length:
print("Text is too long - truncating to {} characters.".format(max_length))
self.subdict["text"] = self.subdict["text"][:max_length]
self.subdict["text_truncated"] = self.subdict["text"][:max_length]
def analyse_image(self) -> dict:
"""Perform text extraction and analysis of the text.
@ -283,7 +283,7 @@ class TextDetector(AnalysisMethod):
self._truncate_text()
self.translate_text()
self.remove_linebreaks()
if self.analyse_text:
if self.analyse_text and self.subdict["text_english"]:
self._run_spacy()
self.clean_text()
self.text_summary()
@ -336,8 +336,13 @@ class TextDetector(AnalysisMethod):
raise ValueError(
"Privacy disclosure not accepted - skipping text translation."
)
text_to_translate = (
self.subdict["text_truncated"]
if "text_truncated" in self.subdict
else self.subdict["text"]
)
try:
translated = self.translator.translate(self.subdict["text"])
translated = self.translator.translate(text_to_translate)
except Exception:
print("Could not translate the text with error {}.".format(Exception))
translated = None

Просмотреть файл

@ -4,7 +4,7 @@ build-backend = "hatchling.build"
[project]
name = "ammico"
version = "0.2.5"
version = "0.2.6"
description = "AI Media and Misinformation Content Analysis Tool"
readme = "README.md"
maintainers = [