From dc6c904ec8c98c185f3403db683bdf1e2225bce1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 Nov 2024 14:58:10 +0100 Subject: [PATCH 1/2] [pre-commit.ci] pre-commit autoupdate (#223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/kynan/nbstripout: 0.7.1 → 0.8.1](https://github.com/kynan/nbstripout/compare/0.7.1...0.8.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 952d33e..2f0fba4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/kynan/nbstripout - rev: 0.7.1 + rev: 0.8.1 hooks: - id: nbstripout files: ".ipynb" From 8057153e6050e89ddf5739cbaf7aaf2a6c445fc0 Mon Sep 17 00:00:00 2001 From: Inga Ulusoy Date: Fri, 22 Nov 2024 20:03:00 +0100 Subject: [PATCH 2/2] fix missing whitespace for links and names (#225) * fix missing whitespace for links and names * remove comments in notebook * cleanup --- ammico/notebooks/DemoNotebook_ammico.ipynb | 2 +- ammico/text.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ammico/notebooks/DemoNotebook_ammico.ipynb b/ammico/notebooks/DemoNotebook_ammico.ipynb index 0f2b2e2..275e4eb 100644 --- a/ammico/notebooks/DemoNotebook_ammico.ipynb +++ b/ammico/notebooks/DemoNotebook_ammico.ipynb @@ -172,7 +172,7 @@ "outputs": [], "source": [ "image_dict = ammico.find_files(\n", - " # path=\"/content/drive/MyDrive/misinformation-data/\",\n", + " # path = \"/content/drive/MyDrive/misinformation-data/\",\n", " path=str(data_path),\n", " limit=15,\n", ")" diff --git a/ammico/text.py b/ammico/text.py index 8d79e32..6149902 100644 --- a/ammico/text.py +++ b/ammico/text.py @@ -237,6 +237,15 @@ class TextDetector(AnalysisMethod): if not self.subdict["text"]: print("No text found - skipping analysis.") else: + # make sure all full stops are followed by whitespace + # otherwise googletrans breaks + index_stop = self.subdict["text"].find(".") + if self.subdict["text"][index_stop + 1] != " ": + self.subdict["text"] = ( + self.subdict["text"][: index_stop + 1] + + " " + + self.subdict["text"][index_stop + 1 :] + ) self.translate_text() self.remove_linebreaks() if self.analyse_text: