From 8057153e6050e89ddf5739cbaf7aaf2a6c445fc0 Mon Sep 17 00:00:00 2001 From: Inga Ulusoy Date: Fri, 22 Nov 2024 20:03:00 +0100 Subject: [PATCH] fix missing whitespace for links and names (#225) * fix missing whitespace for links and names * remove comments in notebook * cleanup --- ammico/notebooks/DemoNotebook_ammico.ipynb | 2 +- ammico/text.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/ammico/notebooks/DemoNotebook_ammico.ipynb b/ammico/notebooks/DemoNotebook_ammico.ipynb index 0f2b2e2e..275e4eba 100644 --- a/ammico/notebooks/DemoNotebook_ammico.ipynb +++ b/ammico/notebooks/DemoNotebook_ammico.ipynb @@ -172,7 +172,7 @@ "outputs": [], "source": [ "image_dict = ammico.find_files(\n", - " # path=\"/content/drive/MyDrive/misinformation-data/\",\n", + " # path = \"/content/drive/MyDrive/misinformation-data/\",\n", " path=str(data_path),\n", " limit=15,\n", ")" diff --git a/ammico/text.py b/ammico/text.py index 8d79e322..61499022 100644 --- a/ammico/text.py +++ b/ammico/text.py @@ -237,6 +237,15 @@ def analyse_image(self) -> dict: if not self.subdict["text"]: print("No text found - skipping analysis.") else: + # make sure all full stops are followed by whitespace + # otherwise googletrans breaks + index_stop = self.subdict["text"].find(".") + if self.subdict["text"][index_stop + 1] != " ": + self.subdict["text"] = ( + self.subdict["text"][: index_stop + 1] + + " " + + self.subdict["text"][index_stop + 1 :] + ) self.translate_text() self.remove_linebreaks() if self.analyse_text: