Merge pull request #34 from BaiBlanc/master

Benchmark
LiberAI · Aug 28, 2020 · d795771 · d795771
2 parents e22e36f + 903cfc7
commit d795771
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 14 deletions.
diff --git a/gsoc/zheyuan/utility/benchmark/benchmark.py b/gsoc/zheyuan/utility/benchmark/benchmark.py
@@ -29,11 +29,11 @@ def benchmark(trained_model, test_set, answer_file="answers.json"):
             answer_group = []
         answers.append(answer_group)
 
-    json_file = construct_json("qald-9-train-multilingual", questions_info, questions, sparqls, answers)
+    json_file = construct_json(test_set.replace(".qald.json",""), questions_info, questions, sparqls, answers)
     path = "../gsoc/zheyuan/utility/benchmark/"
-    with open(path+"answers.qald.json", "w") as f:
+    with open(path+"answers-"+test_set, "w") as f:
         # js = json.dumps(json_file, indent=4, separators=(',', ':'))
-        json.dump(json_file, f)
+        json.dump(json_file, f, indent=4, separators=(', ', ': '))
 
 
 

diff --git a/gsoc/zheyuan/utility/benchmark/extract_questions.py b/gsoc/zheyuan/utility/benchmark/extract_questions.py
@@ -29,11 +29,10 @@ def read_json(file):
 
     return questions_info, questions
 def write_to_ask(questions):
-    with open('to_ask.txt', 'w') as write_f:
+    with open('to_ask1.txt', 'w') as write_f:
         for key in questions:
             question = questions[key]
-            write_f.write(question+"\n")
-
+            write_f.write(question.lower().replace("?"," ?")+"\n")
 
 if __name__ == "__main__":
     """

diff --git a/gsoc/zheyuan/utility/benchmark/interpreter.py b/gsoc/zheyuan/utility/benchmark/interpreter.py
@@ -5,16 +5,17 @@ def interprete(trained_model_folder):
     os.system('pwd')
     print('start')
     folder_name = 'data/'+trained_model_folder
+    print('python -m nmt.nmt  --vocab_prefix=../' + folder_name + '/vocab --out_dir=../' + folder_name + '_model  --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask1.txt  --inference_output_file=../gsoc/zheyuan/utility/benchmark/output1.txt --src=en --tgt=sparql | tail -n4')
     os.system(
-        'python -m nmt.nmt  --vocab_prefix=../' + folder_name + '/vocab --model_dir=../' + folder_name + '_model  --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask.txt  --inference_output_file=../gsoc/zheyuan/utility/benchmark/output.txt --out_dir=../' + folder_name + '_model --src=en --tgt=sparql | tail -n4')
+        'python -m nmt.nmt  --vocab_prefix=../' + folder_name + '/vocab --out_dir=../' + folder_name + '_model  --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask1.txt  --inference_output_file=../gsoc/zheyuan/utility/benchmark/output1.txt --src=en --tgt=sparql | tail -n4')
 
     os.system('''if [ $? -eq 0 ]
             then
                 echo ""
                 echo "ANSWER IN SPARQL SEQUENCE:"
-                ENCODED="$(cat ../gsoc/zheyuan/utility/benchmark/output.txt)"
-                python ../interpreter.py "${ENCODED}" > ../gsoc/zheyuan/utility/benchmark/output_decoded.txt
-                cat ../gsoc/zheyuan/utility/benchmark/output_decoded.txt
+                ENCODED="$(cat ../gsoc/zheyuan/utility/benchmark/output1.txt)"
+                python ../interpreter.py "${ENCODED}" > ../gsoc/zheyuan/utility/benchmark/output_decoded1.txt
+                cat ../gsoc/zheyuan/utility/benchmark/output_decoded1.txt
                 echo ""
             fi''')
     print('end')
@@ -24,4 +25,5 @@ def interprete(trained_model_folder):
     Section to test the Interpreter.
     """
     interprete('monument_300')
-    pass
+    pass
+
diff --git a/gsoc/zheyuan/utility/benchmark/reconstruct_json.py b/gsoc/zheyuan/utility/benchmark/reconstruct_json.py
@@ -2,9 +2,11 @@ def construct_json(dataset_id,infos, questions, sparqls, answers):
     qald_test_answers_dict = {}
     qald_test_answers_dict["dataset"] = {'id':dataset_id}
     qald_test_answers_dict['questions'] = []
+    print(len(answers))
     for index,info in enumerate(infos):
 
         question_dict = info
+
         id = info["id"]
         question = questions[id]
         question_dict["question"] = [{
@@ -13,6 +15,7 @@ def construct_json(dataset_id,infos, questions, sparqls, answers):
         }]
         question_dict["query"] = {"sparql" : sparqls[index]}
         question_dict["answers"] = answers[index]
+        print(answers[index])
         qald_test_answers_dict['questions'].append(question_dict)
     return qald_test_answers_dict
 
diff --git a/gsoc/zheyuan/utility/benchmark/retrieve_answers.py b/gsoc/zheyuan/utility/benchmark/retrieve_answers.py
@@ -8,7 +8,7 @@
 def read_sparqls():
     os.system("pwd")
     sparqls = []
-    file_path = "../gsoc/zheyuan/utility/benchmark/output_decoded.txt"
+    file_path = "../gsoc/zheyuan/utility/benchmark/output_decoded1.txt"
     with open(file_path, 'r') as lines:
         for line in lines:
             sparqls.append(line)
@@ -42,6 +42,7 @@ def retrieve(query):
                     }
                 }
 
+
                 answer_dict["results"]["bindings"].append(uri)
 
         for td in rows.find_all("pre"):
@@ -54,9 +55,18 @@ def retrieve(query):
                             "value": a
                         }
                     }
+
                     answer_dict["results"]["bindings"].append(uri)
-        answers.append(answer_dict)
+        if answer_dict["results"]["bindings"]:
+            answers.append(answer_dict)
 
+    if not answers:
+        return [{
+                  "head" : {
+                    "vars" : [ "date" ]
+                  },
+                  "results" : { }
+                }]
     return answers
 
 
@@ -73,7 +83,7 @@ def retrieve(query):
     # query = args.query
     answer_groups = []
     i = 1
-    with open("./output_decoded.txt", 'r') as lines:
+    with open("../output_decoded.txt", 'r') as lines:
          for line in lines:
              i+=1
              try:

diff --git a/requirements.txt b/requirements.txt
@@ -25,3 +25,12 @@ tensorflow==1.15.2
 termcolor==1.1.0
 tqdm==4.43.0
 Werkzeug==1.0.0
+Mittens
+tensorflow_hub
+requests
+torch
+transformers
+constant
+NLTK
+xmltodict
+sklearn