diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 952d33e..2f0fba4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.1 hooks: - id: nbstripout files: ".ipynb" diff --git a/ammico/notebooks/DemoNotebook_ammico.ipynb b/ammico/notebooks/DemoNotebook_ammico.ipynb index fd24b09..aaf992f 100644 --- a/ammico/notebooks/DemoNotebook_ammico.ipynb +++ b/ammico/notebooks/DemoNotebook_ammico.ipynb @@ -173,8 +173,8 @@ "source": [ "%pwd\n", "image_dict = ammico.find_files(\n", - " path=\"data/errors_Nov24/Error1\",\n", - " # path=str(data_path),\n", + " # path = \"/content/drive/MyDrive/misinformation-data/\",\n", + " path=str(data_path),\n", " limit=15,\n", ")" ] diff --git a/ammico/text.py b/ammico/text.py index 8d79e32..6149902 100644 --- a/ammico/text.py +++ b/ammico/text.py @@ -237,6 +237,15 @@ class TextDetector(AnalysisMethod): if not self.subdict["text"]: print("No text found - skipping analysis.") else: + # make sure all full stops are followed by whitespace + # otherwise googletrans breaks + index_stop = self.subdict["text"].find(".") + if self.subdict["text"][index_stop + 1] != " ": + self.subdict["text"] = ( + self.subdict["text"][: index_stop + 1] + + " " + + self.subdict["text"][index_stop + 1 :] + ) self.translate_text() self.remove_linebreaks() if self.analyse_text: