Merge pull request #349 from heinpa/update-python-mt-tests

Update test for created SPARQL INSERT queries in Pthon MT components
WDAqua · May 10, 2024 · 9ff3151 · 9ff3151
2 parents ca9b646 + 7ad8db3
commit 9ff3151
Show file tree

Hide file tree

Showing 12 changed files with 205 additions and 136 deletions.
diff --git a/qanary-component-MT-Python-HelsinkiNLP/pytest.ini b/qanary-component-MT-Python-HelsinkiNLP/pytest.ini
@@ -1,6 +1,8 @@
 [pytest]
-log_cli = True
+log_cli = 1
 log_cli_level = INFO
+log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s 
+log_cli_date_format=%Y-%m-%d %H:%M:%S
 env = 
     SPRING_BOOT_ADMIN_URL=https://localhost:43740
     SPRING_BOOT_ADMIN_USERNAME=admin

diff --git a/qanary-component-MT-Python-HelsinkiNLP/tests/test_mt_helsinky_nlp.py b/qanary-component-MT-Python-HelsinkiNLP/tests/test_mt_helsinky_nlp.py
@@ -3,7 +3,6 @@
 from unittest.mock import patch
 import re
 from unittest import TestCase
-#import pytest
 
 class TestComponent(TestCase):
 
@@ -15,16 +14,16 @@ class TestComponent(TestCase):
     out_graph = "urn:qanary#test-outGraph"
 
     source_language = "de"
-    question_translation = "what is a test?"
+    target_language = "en"
 
     request_data = '''{
         "values": {
-            "urn:qanary#endpoint": "urn:qanary#test-endpoint", 
-            "urn:qanary#inGraph": "urn:qanary#test-inGraph", 
+            "urn:qanary#endpoint": "urn:qanary#test-endpoint",
+            "urn:qanary#inGraph": "urn:qanary#test-inGraph",
             "urn:qanary#outGraph": "urn:qanary#test-outGraph"
         },
-        "endpoint": "urn:qanary#test-endpoint", 
-        "inGraph": "urn:qanary#test-inGraph", 
+        "endpoint": "urn:qanary#test-endpoint",
+        "inGraph": "urn:qanary#test-inGraph",
         "outGrpah": "urn:qanary#test-outGraph"
     }'''
 
@@ -49,21 +48,32 @@ def test_qanary_service(self):
             # when a call to /annotatequestion is made
             response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)
 
-            # then
-            # the text question is retrieved from the triplestore
+            # then the text question is retrieved from the triplestore
             mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)
 
-            # new information is pushed to the triplestore 
-            mocked_insert_into_triplestore.assert_called()
+            # get arguments of the (2) separate insert calls 
+            arg_list = mocked_insert_into_triplestore.call_args_list
+            # get the call arguments for question translation
+            call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
+            assert len(call_args_translation) == 1
+            # get the call arguments for question language
+            call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
+            assert len(call_args_language) == 1
 
-            args = mocked_insert_into_triplestore.call_args.args
-            query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
+            # clean query strings
+            query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
+            query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])
 
-            # the source language is correctly identified and annotated
-            self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
-            # the question is translated and the result is annotated
-            assert self.question_translation in query_stored
+            # then the triplestore is updated twice 
+            # (question language and translation)
+            assert mocked_insert_into_triplestore.call_count == 2
 
-            # the response is not empty
-            assert response_json != None
+            # then the source language is correctly identified and annotated
+            self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
+
+            # then the question is translated and the result is annotated
+            self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
+            assert "@"+self.target_language in query_translation.lower()
 
+            # then the response is not empty
+            assert response_json != None
diff --git a/qanary-component-MT-Python-LibreTranslate/component/mt_libretranslate.py b/qanary-component-MT-Python-LibreTranslate/component/mt_libretranslate.py
@@ -12,13 +12,13 @@
 
 SERVICE_NAME_COMPONENT = os.environ["SERVICE_NAME_COMPONENT"]
 
-
-target_lang = 'en'
+SOURCE_LANG = os.environ["SOURCE_LANGUAGE"]
+#TARGET_LANG = os.environ["TARGET_LANGUAGE"]
+TARGET_LANG = "en" # currently only supports English
 TRANSLATE_ENDPOINT = os.environ["TRANSLATE_ENDPOINT"]
 LANGUAGES_ENDPOINT = os.environ["LANGUAGES_ENDPOINT"]
 
 
-
 @mt_libretranslate_bp.route("/annotatequestion", methods=["POST"])
 def qanary_service():
     """the POST endpoint required for a Qanary service"""
@@ -29,18 +29,26 @@ def qanary_service():
     logging.info("endpoint: %s, inGraph: %s, outGraph: %s" % \
                  (triplestore_endpoint, triplestore_ingraph, triplestore_outgraph))
 
-    text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint, 
+    text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
                                       graph=triplestore_ingraph)[0]["text"]
     question_uri = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
                                               graph=triplestore_ingraph)[0]["uri"]
     logging.info(f"Question text: {text}")
 
+    if SOURCE_LANG != None and len(SOURCE_LANG.strip()) > 0:
+        lang = SOURCE_LANG
+        logging.info("Using custom SOURCE_LANGUAGE")
+    else:
+        lang = detect(text)
+        logging.info("No SOURCE_LANGUAGE specified, using langdetect!")
+    logging.info(f"source language: {lang}")
+
     #lang, prob = langid.classify(text)
     lang = detect(text)
     logging.info(f"source language: {lang}")
 
     ## TODO: MAIN FUNCTIONALITY
-    result, _ = translate_input(text, lang)
+    result, _ = translate_input(text, lang, TARGET_LANG)
 
     # building SPARQL query TODO: verify this annotation AnnotationOfQuestionTranslation ??
     SPARQLqueryAnnotationOfQuestionTranslation = """
@@ -112,7 +120,7 @@ def index():
     return "Python MT LibreTranslate Qanary component"
 
 
-def translate_input(text, source_lang):
+def translate_input(text, source_lang, target_lang):
 
     req_json = {
         'q': text,
@@ -135,11 +143,11 @@ def check_connection():
     success = "The test translation was successful"
     try:
         # TODO: test with supported language? 
-        t, error = translate_input("eingabe zum testen", "de")
+        t, error = translate_input("eingabe zum testen", "de", "en")
         logging.info(f"got translation: {t}")
         assert len(t) > 0
         return True, success
-    except Exception: 
+    except Exception:
         logging.info(f"test failed with {error}")
         return False, error
 

diff --git a/qanary-component-MT-Python-LibreTranslate/pytest.ini b/qanary-component-MT-Python-LibreTranslate/pytest.ini
@@ -1,3 +1,15 @@
 [pytest]
-log_cli = True
+log_cli = 1
 log_cli_level = INFO
+log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s 
+log_cli_date_format=%Y-%m-%d %H:%M:%S
+env = 
+  SERVER_PORT=40120
+  SPRING_BOOT_ADMIN_URL=http://qanary-pipeline-host:40111
+  SERVER_HOST=http://public-component-host
+  SPRING_BOOT_ADMIN_USERNAME=admin
+  SPRING_BOOT_ADMIN_PASSWORD=admin
+  SERVICE_NAME_COMPONENT=LibreTranslate
+  TRANSLATE_ENDPOINT=http://localhost:5000/translate
+  LANGUAGES_ENDPOINT=http://localhost:5000/languages
+  SOURCE_LANGUAGE=
diff --git a/qanary-component-MT-Python-LibreTranslate/requirements.txt b/qanary-component-MT-Python-LibreTranslate/requirements.txt
@@ -1,8 +1,9 @@
-Flask==2.2.2
+Flask
 langdetect==1.0.9
 langid==1.1.6
 mock==3.0.5
 python-dotenv==0.21.1
 qanary_helpers==0.2.2
 gunicorn==20.1.0
 pytest
+pytest-env
diff --git a/qanary-component-MT-Python-LibreTranslate/tests/test_mt_libretranslate.py b/qanary-component-MT-Python-LibreTranslate/tests/test_mt_libretranslate.py
@@ -9,12 +9,13 @@ class TestComponent(TestCase):
 
     logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)
 
-    questions = list([{"uri": "urn:test-uri", "text": "test_input"}])
+    questions = list([{"uri": "urn:test-uri", "text": "was ist ein Test?"}])
     endpoint = "urn:qanary#test-endpoint"
     in_graph = "urn:qanary#test-inGraph"
     out_graph = "urn:qanary#test-outGraph"
 
-    source_language = "en"
+    source_language = "de"
+    target_language = "en"
 
     test_translation_placeholder = "test_translation"
 
@@ -49,21 +50,32 @@ def test_qanary_service(self):
             # when a call to /annotatequestion is made
             response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)
 
-            # then
-            # the text question is retrieved from the triplestore
+            # then the text question is retrieved from the triplestore
             mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)
 
-            # new information is pushed to the triplestore 
-            mocked_insert_into_triplestore.assert_called()
+            # get arguments of the (2) separate insert calls 
+            arg_list = mocked_insert_into_triplestore.call_args_list
+            # get the call arguments for question translation
+            call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
+            assert len(call_args_translation) == 1
+            # get the call arguments for question language
+            call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
+            assert len(call_args_language) == 1
 
-            args = mocked_insert_into_triplestore.call_args.args
-            query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
+            # clean query strings
+            query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
+            query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])
 
-            # the source language is correctly identified and annotated
-            self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
-            # the question is translated and the result is annotated
-            assert self.test_translation_placeholder in query_stored.lower()
+            # then the triplestore is updated twice 
+            # (question language and translation)
+            assert mocked_insert_into_triplestore.call_count == 2
 
-            # the response is not empty
-            assert response_json != None
+            # then the source language is correctly identified and annotated
+            self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
+
+            # then the question is translated and the result is annotated
+            self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
+            assert "@"+self.target_language in query_translation.lower()
 
+            # then the response is not empty
+            assert response_json != None
diff --git a/qanary-component-MT-Python-MBart/pytest.ini b/qanary-component-MT-Python-MBart/pytest.ini
@@ -1,6 +1,8 @@
 [pytest]
-log_cli = True
+log_cli = 1
 log_cli_level = INFO
+log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s 
+log_cli_date_format=%Y-%m-%d %H:%M:%S
 env =
   SERVER_PORT=40120
   SPRING_BOOT_ADMIN_URL=http://qanary-pipeline-host:40111

diff --git a/qanary-component-MT-Python-MBart/tests/test_mt_mbart_nlp.py b/qanary-component-MT-Python-MBart/tests/test_mt_mbart_nlp.py
@@ -1,7 +1,6 @@
 from component.mt_mbart_nlp import *
 from component import app
 from unittest.mock import patch
-import mock 
 import re
 from unittest import TestCase
 
@@ -16,16 +15,16 @@ class TestComponent(TestCase):
     out_graph = "urn:qanary#test-outGraph"
 
     source_language = "de"
-    question_translation = "what is a test?"
+    target_language = "en"
 
     request_data = '''{
         "values": {
-            "urn:qanary#endpoint": "urn:qanary#test-endpoint", 
-            "urn:qanary#inGraph": "urn:qanary#test-inGraph", 
+            "urn:qanary#endpoint": "urn:qanary#test-endpoint",
+            "urn:qanary#inGraph": "urn:qanary#test-inGraph",
             "urn:qanary#outGraph": "urn:qanary#test-outGraph"
         },
-        "endpoint": "urn:qanary#test-endpoint", 
-        "inGraph": "urn:qanary#test-inGraph", 
+        "endpoint": "urn:qanary#test-endpoint",
+        "inGraph": "urn:qanary#test-inGraph",
         "outGrpah": "urn:qanary#test-outGraph"
     }'''
 
@@ -47,20 +46,32 @@ def test_qanary_service(self):
             # when a call to /annotatequestion is made
             response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)
 
-            # then
-            # the text question is retrieved from the triplestore
+            # then the text question is retrieved from the triplestore
             mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)
 
-            # new information is pushed to the triplestore 
-            mocked_insert_into_triplestore.assert_called()
+            # get arguments of the (2) separate insert calls 
+            arg_list = mocked_insert_into_triplestore.call_args_list
+            # get the call arguments for question translation
+            call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
+            assert len(call_args_translation) == 1
+            # get the call arguments for question language
+            call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
+            assert len(call_args_language) == 1
 
-            args = mocked_insert_into_triplestore.call_args.args
-            query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
+            # clean query strings
+            query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
+            query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])
 
-            # the source language is correctly identified and annotated
-            self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
-            # the question is translated and the result is annotated
-            assert self.question_translation in query_stored.lower()
+            # then the triplestore is updated twice 
+            # (question language and translation)
+            assert mocked_insert_into_triplestore.call_count == 2
 
-            # the response is not empty
+            # then the source language is correctly identified and annotated
+            self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
+
+            # then the question is translated and the result is annotated
+            self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
+            assert "@"+self.target_language in query_translation.lower()
+
+            # then the response is not empty
             assert response_json != None
diff --git a/qanary-component-MT-Python-NLLB/component/__init__.py b/qanary-component-MT-Python-NLLB/component/__init__.py
@@ -1,7 +1,7 @@
 from component.mt_nllb import mt_nllb_bp
 from flask import Flask
 
-version = "0.1.2"
+version = "0.1.3"
 
 # default config file
 configfile = "app.conf"