Skip to content

Commit

Permalink
Merge pull request #349 from heinpa/update-python-mt-tests
Browse files Browse the repository at this point in the history
Update test for created SPARQL INSERT queries in Pthon MT components
  • Loading branch information
Perevalov authored May 10, 2024
2 parents ca9b646 + 7ad8db3 commit 9ff3151
Show file tree
Hide file tree
Showing 12 changed files with 205 additions and 136 deletions.
4 changes: 3 additions & 1 deletion qanary-component-MT-Python-HelsinkiNLP/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[pytest]
log_cli = True
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s
log_cli_date_format=%Y-%m-%d %H:%M:%S
env =
SPRING_BOOT_ADMIN_URL=https://localhost:43740
SPRING_BOOT_ADMIN_USERNAME=admin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from unittest.mock import patch
import re
from unittest import TestCase
#import pytest

class TestComponent(TestCase):

Expand All @@ -15,16 +14,16 @@ class TestComponent(TestCase):
out_graph = "urn:qanary#test-outGraph"

source_language = "de"
question_translation = "what is a test?"
target_language = "en"

request_data = '''{
"values": {
"urn:qanary#endpoint": "urn:qanary#test-endpoint",
"urn:qanary#inGraph": "urn:qanary#test-inGraph",
"urn:qanary#endpoint": "urn:qanary#test-endpoint",
"urn:qanary#inGraph": "urn:qanary#test-inGraph",
"urn:qanary#outGraph": "urn:qanary#test-outGraph"
},
"endpoint": "urn:qanary#test-endpoint",
"inGraph": "urn:qanary#test-inGraph",
"endpoint": "urn:qanary#test-endpoint",
"inGraph": "urn:qanary#test-inGraph",
"outGrpah": "urn:qanary#test-outGraph"
}'''

Expand All @@ -49,21 +48,32 @@ def test_qanary_service(self):
# when a call to /annotatequestion is made
response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)

# then
# the text question is retrieved from the triplestore
# then the text question is retrieved from the triplestore
mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)

# new information is pushed to the triplestore
mocked_insert_into_triplestore.assert_called()
# get arguments of the (2) separate insert calls
arg_list = mocked_insert_into_triplestore.call_args_list
# get the call arguments for question translation
call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
assert len(call_args_translation) == 1
# get the call arguments for question language
call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
assert len(call_args_language) == 1

args = mocked_insert_into_triplestore.call_args.args
query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
# clean query strings
query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])

# the source language is correctly identified and annotated
self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
# the question is translated and the result is annotated
assert self.question_translation in query_stored
# then the triplestore is updated twice
# (question language and translation)
assert mocked_insert_into_triplestore.call_count == 2

# the response is not empty
assert response_json != None
# then the source language is correctly identified and annotated
self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")

# then the question is translated and the result is annotated
self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
assert "@"+self.target_language in query_translation.lower()

# then the response is not empty
assert response_json != None
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@

SERVICE_NAME_COMPONENT = os.environ["SERVICE_NAME_COMPONENT"]


target_lang = 'en'
SOURCE_LANG = os.environ["SOURCE_LANGUAGE"]
#TARGET_LANG = os.environ["TARGET_LANGUAGE"]
TARGET_LANG = "en" # currently only supports English
TRANSLATE_ENDPOINT = os.environ["TRANSLATE_ENDPOINT"]
LANGUAGES_ENDPOINT = os.environ["LANGUAGES_ENDPOINT"]



@mt_libretranslate_bp.route("/annotatequestion", methods=["POST"])
def qanary_service():
"""the POST endpoint required for a Qanary service"""
Expand All @@ -29,18 +29,26 @@ def qanary_service():
logging.info("endpoint: %s, inGraph: %s, outGraph: %s" % \
(triplestore_endpoint, triplestore_ingraph, triplestore_outgraph))

text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
text = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
graph=triplestore_ingraph)[0]["text"]
question_uri = get_text_question_in_graph(triplestore_endpoint=triplestore_endpoint,
graph=triplestore_ingraph)[0]["uri"]
logging.info(f"Question text: {text}")

if SOURCE_LANG != None and len(SOURCE_LANG.strip()) > 0:
lang = SOURCE_LANG
logging.info("Using custom SOURCE_LANGUAGE")
else:
lang = detect(text)
logging.info("No SOURCE_LANGUAGE specified, using langdetect!")
logging.info(f"source language: {lang}")

#lang, prob = langid.classify(text)
lang = detect(text)
logging.info(f"source language: {lang}")

## TODO: MAIN FUNCTIONALITY
result, _ = translate_input(text, lang)
result, _ = translate_input(text, lang, TARGET_LANG)

# building SPARQL query TODO: verify this annotation AnnotationOfQuestionTranslation ??
SPARQLqueryAnnotationOfQuestionTranslation = """
Expand Down Expand Up @@ -112,7 +120,7 @@ def index():
return "Python MT LibreTranslate Qanary component"


def translate_input(text, source_lang):
def translate_input(text, source_lang, target_lang):

req_json = {
'q': text,
Expand All @@ -135,11 +143,11 @@ def check_connection():
success = "The test translation was successful"
try:
# TODO: test with supported language?
t, error = translate_input("eingabe zum testen", "de")
t, error = translate_input("eingabe zum testen", "de", "en")
logging.info(f"got translation: {t}")
assert len(t) > 0
return True, success
except Exception:
except Exception:
logging.info(f"test failed with {error}")
return False, error

Expand Down
14 changes: 13 additions & 1 deletion qanary-component-MT-Python-LibreTranslate/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
[pytest]
log_cli = True
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s
log_cli_date_format=%Y-%m-%d %H:%M:%S
env =
SERVER_PORT=40120
SPRING_BOOT_ADMIN_URL=http://qanary-pipeline-host:40111
SERVER_HOST=http://public-component-host
SPRING_BOOT_ADMIN_USERNAME=admin
SPRING_BOOT_ADMIN_PASSWORD=admin
SERVICE_NAME_COMPONENT=LibreTranslate
TRANSLATE_ENDPOINT=http://localhost:5000/translate
LANGUAGES_ENDPOINT=http://localhost:5000/languages
SOURCE_LANGUAGE=
3 changes: 2 additions & 1 deletion qanary-component-MT-Python-LibreTranslate/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
Flask==2.2.2
Flask
langdetect==1.0.9
langid==1.1.6
mock==3.0.5
python-dotenv==0.21.1
qanary_helpers==0.2.2
gunicorn==20.1.0
pytest
pytest-env
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@ class TestComponent(TestCase):

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)

questions = list([{"uri": "urn:test-uri", "text": "test_input"}])
questions = list([{"uri": "urn:test-uri", "text": "was ist ein Test?"}])
endpoint = "urn:qanary#test-endpoint"
in_graph = "urn:qanary#test-inGraph"
out_graph = "urn:qanary#test-outGraph"

source_language = "en"
source_language = "de"
target_language = "en"

test_translation_placeholder = "test_translation"

Expand Down Expand Up @@ -49,21 +50,32 @@ def test_qanary_service(self):
# when a call to /annotatequestion is made
response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)

# then
# the text question is retrieved from the triplestore
# then the text question is retrieved from the triplestore
mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)

# new information is pushed to the triplestore
mocked_insert_into_triplestore.assert_called()
# get arguments of the (2) separate insert calls
arg_list = mocked_insert_into_triplestore.call_args_list
# get the call arguments for question translation
call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
assert len(call_args_translation) == 1
# get the call arguments for question language
call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
assert len(call_args_language) == 1

args = mocked_insert_into_triplestore.call_args.args
query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
# clean query strings
query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])

# the source language is correctly identified and annotated
self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
# the question is translated and the result is annotated
assert self.test_translation_placeholder in query_stored.lower()
# then the triplestore is updated twice
# (question language and translation)
assert mocked_insert_into_triplestore.call_count == 2

# the response is not empty
assert response_json != None
# then the source language is correctly identified and annotated
self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")

# then the question is translated and the result is annotated
self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
assert "@"+self.target_language in query_translation.lower()

# then the response is not empty
assert response_json != None
4 changes: 3 additions & 1 deletion qanary-component-MT-Python-MBart/pytest.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[pytest]
log_cli = True
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] [%(filename)s:%(lineno)s] %(message)s
log_cli_date_format=%Y-%m-%d %H:%M:%S
env =
SERVER_PORT=40120
SPRING_BOOT_ADMIN_URL=http://qanary-pipeline-host:40111
Expand Down
45 changes: 28 additions & 17 deletions qanary-component-MT-Python-MBart/tests/test_mt_mbart_nlp.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from component.mt_mbart_nlp import *
from component import app
from unittest.mock import patch
import mock
import re
from unittest import TestCase

Expand All @@ -16,16 +15,16 @@ class TestComponent(TestCase):
out_graph = "urn:qanary#test-outGraph"

source_language = "de"
question_translation = "what is a test?"
target_language = "en"

request_data = '''{
"values": {
"urn:qanary#endpoint": "urn:qanary#test-endpoint",
"urn:qanary#inGraph": "urn:qanary#test-inGraph",
"urn:qanary#endpoint": "urn:qanary#test-endpoint",
"urn:qanary#inGraph": "urn:qanary#test-inGraph",
"urn:qanary#outGraph": "urn:qanary#test-outGraph"
},
"endpoint": "urn:qanary#test-endpoint",
"inGraph": "urn:qanary#test-inGraph",
"endpoint": "urn:qanary#test-endpoint",
"inGraph": "urn:qanary#test-inGraph",
"outGrpah": "urn:qanary#test-outGraph"
}'''

Expand All @@ -47,20 +46,32 @@ def test_qanary_service(self):
# when a call to /annotatequestion is made
response_json = client.post("/annotatequestion", headers = self.headers, data = self.request_data)

# then
# the text question is retrieved from the triplestore
# then the text question is retrieved from the triplestore
mocked_get_text_question_in_graph.assert_called_with(triplestore_endpoint=self.endpoint, graph=self.in_graph)

# new information is pushed to the triplestore
mocked_insert_into_triplestore.assert_called()
# get arguments of the (2) separate insert calls
arg_list = mocked_insert_into_triplestore.call_args_list
# get the call arguments for question translation
call_args_translation = [a.args for a in arg_list if "AnnotationOfQuestionTranslation" in a.args[1]]
assert len(call_args_translation) == 1
# get the call arguments for question language
call_args_language = [a.args for a in arg_list if "AnnotationOfQuestionLanguage" in a.args[1]]
assert len(call_args_language) == 1

args = mocked_insert_into_triplestore.call_args.args
query_stored = re.sub(r"(\\n\W*|\n\W*)", " ", args[1])
# clean query strings
query_translation = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_translation[0][1])
query_language = re.sub(r"(\\n\W*|\n\W*)", " ", call_args_language[0][1])

# the source language is correctly identified and annotated
self.assertRegex(query_stored, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")
# the question is translated and the result is annotated
assert self.question_translation in query_stored.lower()
# then the triplestore is updated twice
# (question language and translation)
assert mocked_insert_into_triplestore.call_count == 2

# the response is not empty
# then the source language is correctly identified and annotated
self.assertRegex(query_language, r".*AnnotationOfQuestionLanguage(.*;\W?)*oa:hasBody \""+self.source_language+r"\".*\.")

# then the question is translated and the result is annotated
self.assertRegex(query_translation, r".*AnnotationOfQuestionTranslation(.*;\W?)*oa:hasBody \".*\"@" + self.target_language + r".*\.")
assert "@"+self.target_language in query_translation.lower()

# then the response is not empty
assert response_json != None
2 changes: 1 addition & 1 deletion qanary-component-MT-Python-NLLB/component/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from component.mt_nllb import mt_nllb_bp
from flask import Flask

version = "0.1.2"
version = "0.1.3"

# default config file
configfile = "app.conf"
Expand Down
Loading

0 comments on commit 9ff3151

Please sign in to comment.