зеркало из
https://github.com/ssciwr/AMMICO.git
synced 2025-10-29 13:06:04 +02:00
keep original text length (#247)
* keep original text length, skip analysis if not translated * bump version, remove debug
Этот коммит содержится в:
родитель
09d009c464
Коммит
14ae93063e
@ -159,9 +159,11 @@ def test_truncate_text(accepted):
|
|||||||
test_obj.subdict["text"] = "I like cats and dogs."
|
test_obj.subdict["text"] = "I like cats and dogs."
|
||||||
test_obj._truncate_text()
|
test_obj._truncate_text()
|
||||||
assert test_obj.subdict["text"] == "I like cats and dogs."
|
assert test_obj.subdict["text"] == "I like cats and dogs."
|
||||||
|
assert "text_truncated" not in test_obj.subdict
|
||||||
test_obj.subdict["text"] = 20000 * "m"
|
test_obj.subdict["text"] = 20000 * "m"
|
||||||
test_obj._truncate_text()
|
test_obj._truncate_text()
|
||||||
assert test_obj.subdict["text"] == 5000 * "m"
|
assert test_obj.subdict["text_truncated"] == 5000 * "m"
|
||||||
|
assert test_obj.subdict["text"] == 20000 * "m"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.gcv
|
@pytest.mark.gcv
|
||||||
@ -173,6 +175,14 @@ def test_analyse_image(set_testdict, set_environ, accepted):
|
|||||||
set_testdict[item], analyse_text=True, accept_privacy=accepted
|
set_testdict[item], analyse_text=True, accept_privacy=accepted
|
||||||
)
|
)
|
||||||
test_obj.analyse_image()
|
test_obj.analyse_image()
|
||||||
|
testdict = {}
|
||||||
|
testdict["text"] = 20000 * "m"
|
||||||
|
test_obj = tt.TextDetector(
|
||||||
|
testdict, skip_extraction=True, analyse_text=True, accept_privacy=accepted
|
||||||
|
)
|
||||||
|
test_obj.analyse_image()
|
||||||
|
assert test_obj.subdict["text_truncated"] == 5000 * "m"
|
||||||
|
assert test_obj.subdict["text"] == 20000 * "m"
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.gcv
|
@pytest.mark.gcv
|
||||||
|
|||||||
@ -263,7 +263,7 @@ class TextDetector(AnalysisMethod):
|
|||||||
"""Truncate the text if it is too long for googletrans."""
|
"""Truncate the text if it is too long for googletrans."""
|
||||||
if self.subdict["text"] and len(self.subdict["text"]) > max_length:
|
if self.subdict["text"] and len(self.subdict["text"]) > max_length:
|
||||||
print("Text is too long - truncating to {} characters.".format(max_length))
|
print("Text is too long - truncating to {} characters.".format(max_length))
|
||||||
self.subdict["text"] = self.subdict["text"][:max_length]
|
self.subdict["text_truncated"] = self.subdict["text"][:max_length]
|
||||||
|
|
||||||
def analyse_image(self) -> dict:
|
def analyse_image(self) -> dict:
|
||||||
"""Perform text extraction and analysis of the text.
|
"""Perform text extraction and analysis of the text.
|
||||||
@ -283,7 +283,7 @@ class TextDetector(AnalysisMethod):
|
|||||||
self._truncate_text()
|
self._truncate_text()
|
||||||
self.translate_text()
|
self.translate_text()
|
||||||
self.remove_linebreaks()
|
self.remove_linebreaks()
|
||||||
if self.analyse_text:
|
if self.analyse_text and self.subdict["text_english"]:
|
||||||
self._run_spacy()
|
self._run_spacy()
|
||||||
self.clean_text()
|
self.clean_text()
|
||||||
self.text_summary()
|
self.text_summary()
|
||||||
@ -336,8 +336,13 @@ class TextDetector(AnalysisMethod):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Privacy disclosure not accepted - skipping text translation."
|
"Privacy disclosure not accepted - skipping text translation."
|
||||||
)
|
)
|
||||||
|
text_to_translate = (
|
||||||
|
self.subdict["text_truncated"]
|
||||||
|
if "text_truncated" in self.subdict
|
||||||
|
else self.subdict["text"]
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
translated = self.translator.translate(self.subdict["text"])
|
translated = self.translator.translate(text_to_translate)
|
||||||
except Exception:
|
except Exception:
|
||||||
print("Could not translate the text with error {}.".format(Exception))
|
print("Could not translate the text with error {}.".format(Exception))
|
||||||
translated = None
|
translated = None
|
||||||
|
|||||||
@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "ammico"
|
name = "ammico"
|
||||||
version = "0.2.5"
|
version = "0.2.6"
|
||||||
description = "AI Media and Misinformation Content Analysis Tool"
|
description = "AI Media and Misinformation Content Analysis Tool"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
maintainers = [
|
maintainers = [
|
||||||
|
|||||||
Загрузка…
x
Ссылка в новой задаче
Block a user