From 07a95d191ba13fedc1fd3e36cb9db4111b28e13e Mon Sep 17 00:00:00 2001 From: BaiBlanc <1458491606@qq.com> Date: Fri, 28 Aug 2020 23:20:31 +0200 Subject: [PATCH 1/3] Benchmark modifications --- gsoc/zheyuan/utility/benchmark/benchmark.py | 6 +++--- .../utility/benchmark/extract_questions.py | 5 ++--- gsoc/zheyuan/utility/benchmark/interpreter.py | 12 +++++++----- .../utility/benchmark/retrieve_answers.py | 16 +++++++++++++--- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/gsoc/zheyuan/utility/benchmark/benchmark.py b/gsoc/zheyuan/utility/benchmark/benchmark.py index 30d0d4b..acd6e4d 100644 --- a/gsoc/zheyuan/utility/benchmark/benchmark.py +++ b/gsoc/zheyuan/utility/benchmark/benchmark.py @@ -29,11 +29,11 @@ def benchmark(trained_model, test_set, answer_file="answers.json"): answer_group = [] answers.append(answer_group) - json_file = construct_json("qald-9-train-multilingual", questions_info, questions, sparqls, answers) + json_file = construct_json(test_set.replace(".qald.json",""), questions_info, questions, sparqls, answers) path = "../gsoc/zheyuan/utility/benchmark/" - with open(path+"answers.qald.json", "w") as f: + with open(path+"answers-"+test_set, "w") as f: # js = json.dumps(json_file, indent=4, separators=(',', ':')) - json.dump(json_file, f) + json.dump(json_file, f, indent=4, separators=(', ', ': ')) diff --git a/gsoc/zheyuan/utility/benchmark/extract_questions.py b/gsoc/zheyuan/utility/benchmark/extract_questions.py index f8f3060..c4ad069 100644 --- a/gsoc/zheyuan/utility/benchmark/extract_questions.py +++ b/gsoc/zheyuan/utility/benchmark/extract_questions.py @@ -29,11 +29,10 @@ def read_json(file): return questions_info, questions def write_to_ask(questions): - with open('to_ask.txt', 'w') as write_f: + with open('to_ask1.txt', 'w') as write_f: for key in questions: question = questions[key] - write_f.write(question+"\n") - + write_f.write(question.lower().replace("?"," ?")+"\n") if __name__ == "__main__": """ diff --git a/gsoc/zheyuan/utility/benchmark/interpreter.py b/gsoc/zheyuan/utility/benchmark/interpreter.py index 6a3b630..95b5597 100644 --- a/gsoc/zheyuan/utility/benchmark/interpreter.py +++ b/gsoc/zheyuan/utility/benchmark/interpreter.py @@ -5,16 +5,17 @@ def interprete(trained_model_folder): os.system('pwd') print('start') folder_name = 'data/'+trained_model_folder + print('python -m nmt.nmt --vocab_prefix=../' + folder_name + '/vocab --out_dir=../' + folder_name + '_model --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask1.txt --inference_output_file=../gsoc/zheyuan/utility/benchmark/output1.txt --src=en --tgt=sparql | tail -n4') os.system( - 'python -m nmt.nmt --vocab_prefix=../' + folder_name + '/vocab --model_dir=../' + folder_name + '_model --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask.txt --inference_output_file=../gsoc/zheyuan/utility/benchmark/output.txt --out_dir=../' + folder_name + '_model --src=en --tgt=sparql | tail -n4') + 'python -m nmt.nmt --vocab_prefix=../' + folder_name + '/vocab --out_dir=../' + folder_name + '_model --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask1.txt --inference_output_file=../gsoc/zheyuan/utility/benchmark/output1.txt --src=en --tgt=sparql | tail -n4') os.system('''if [ $? -eq 0 ] then echo "" echo "ANSWER IN SPARQL SEQUENCE:" - ENCODED="$(cat ../gsoc/zheyuan/utility/benchmark/output.txt)" - python ../interpreter.py "${ENCODED}" > ../gsoc/zheyuan/utility/benchmark/output_decoded.txt - cat ../gsoc/zheyuan/utility/benchmark/output_decoded.txt + ENCODED="$(cat ../gsoc/zheyuan/utility/benchmark/output1.txt)" + python ../interpreter.py "${ENCODED}" > ../gsoc/zheyuan/utility/benchmark/output_decoded1.txt + cat ../gsoc/zheyuan/utility/benchmark/output_decoded1.txt echo "" fi''') print('end') @@ -24,4 +25,5 @@ def interprete(trained_model_folder): Section to test the Interpreter. """ interprete('monument_300') - pass \ No newline at end of file + pass + diff --git a/gsoc/zheyuan/utility/benchmark/retrieve_answers.py b/gsoc/zheyuan/utility/benchmark/retrieve_answers.py index f29b4f9..f5f6181 100644 --- a/gsoc/zheyuan/utility/benchmark/retrieve_answers.py +++ b/gsoc/zheyuan/utility/benchmark/retrieve_answers.py @@ -8,7 +8,7 @@ def read_sparqls(): os.system("pwd") sparqls = [] - file_path = "../gsoc/zheyuan/utility/benchmark/output_decoded.txt" + file_path = "../gsoc/zheyuan/utility/benchmark/output_decoded1.txt" with open(file_path, 'r') as lines: for line in lines: sparqls.append(line) @@ -42,6 +42,7 @@ def retrieve(query): } } + answer_dict["results"]["bindings"].append(uri) for td in rows.find_all("pre"): @@ -54,9 +55,18 @@ def retrieve(query): "value": a } } + answer_dict["results"]["bindings"].append(uri) - answers.append(answer_dict) + if answer_dict["results"]["bindings"]: + answers.append(answer_dict) + if not answers: + return [{ + "head" : { + "vars" : [ "date" ] + }, + "results" : { } + }] return answers @@ -73,7 +83,7 @@ def retrieve(query): # query = args.query answer_groups = [] i = 1 - with open("./output_decoded.txt", 'r') as lines: + with open("../output_decoded.txt", 'r') as lines: for line in lines: i+=1 try: From c9fa5f1c53ffbe3f18a9a4ebb04bc945036ce115 Mon Sep 17 00:00:00 2001 From: BaiBlanc <1458491606@qq.com> Date: Fri, 28 Aug 2020 23:20:50 +0200 Subject: [PATCH 2/3] Benchmark modifications --- gsoc/zheyuan/utility/benchmark/reconstruct_json.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gsoc/zheyuan/utility/benchmark/reconstruct_json.py b/gsoc/zheyuan/utility/benchmark/reconstruct_json.py index ccc8505..b17f157 100644 --- a/gsoc/zheyuan/utility/benchmark/reconstruct_json.py +++ b/gsoc/zheyuan/utility/benchmark/reconstruct_json.py @@ -2,9 +2,11 @@ def construct_json(dataset_id,infos, questions, sparqls, answers): qald_test_answers_dict = {} qald_test_answers_dict["dataset"] = {'id':dataset_id} qald_test_answers_dict['questions'] = [] + print(len(answers)) for index,info in enumerate(infos): question_dict = info + id = info["id"] question = questions[id] question_dict["question"] = [{ @@ -13,6 +15,7 @@ def construct_json(dataset_id,infos, questions, sparqls, answers): }] question_dict["query"] = {"sparql" : sparqls[index]} question_dict["answers"] = answers[index] + print(answers[index]) qald_test_answers_dict['questions'].append(question_dict) return qald_test_answers_dict From 903cfc74954b4d10bc10f74d049d13c28c2529fd Mon Sep 17 00:00:00 2001 From: BaiBlanc <1458491606@qq.com> Date: Sat, 29 Aug 2020 01:19:15 +0200 Subject: [PATCH 3/3] requirements.txt added: Mittens, tensorflow_hub, requests, torch, transformers, constant, NLTK, xmltodict, sklearn --- requirements.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/requirements.txt b/requirements.txt index df651fb..d055e01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,3 +25,12 @@ tensorflow==1.15.2 termcolor==1.1.0 tqdm==4.43.0 Werkzeug==1.0.0 +Mittens +tensorflow_hub +requests +torch +transformers +constant +NLTK +xmltodict +sklearn