Skip to content

Commit

Permalink
Merge pull request #34 from BaiBlanc/master
Browse files Browse the repository at this point in the history
Benchmark
  • Loading branch information
BaiBlanc authored Aug 28, 2020
2 parents e22e36f + 903cfc7 commit d795771
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 14 deletions.
6 changes: 3 additions & 3 deletions gsoc/zheyuan/utility/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ def benchmark(trained_model, test_set, answer_file="answers.json"):
answer_group = []
answers.append(answer_group)

json_file = construct_json("qald-9-train-multilingual", questions_info, questions, sparqls, answers)
json_file = construct_json(test_set.replace(".qald.json",""), questions_info, questions, sparqls, answers)
path = "../gsoc/zheyuan/utility/benchmark/"
with open(path+"answers.qald.json", "w") as f:
with open(path+"answers-"+test_set, "w") as f:
# js = json.dumps(json_file, indent=4, separators=(',', ':'))
json.dump(json_file, f)
json.dump(json_file, f, indent=4, separators=(', ', ': '))



Expand Down
5 changes: 2 additions & 3 deletions gsoc/zheyuan/utility/benchmark/extract_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ def read_json(file):

return questions_info, questions
def write_to_ask(questions):
with open('to_ask.txt', 'w') as write_f:
with open('to_ask1.txt', 'w') as write_f:
for key in questions:
question = questions[key]
write_f.write(question+"\n")

write_f.write(question.lower().replace("?"," ?")+"\n")

if __name__ == "__main__":
"""
Expand Down
12 changes: 7 additions & 5 deletions gsoc/zheyuan/utility/benchmark/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@ def interprete(trained_model_folder):
os.system('pwd')
print('start')
folder_name = 'data/'+trained_model_folder
print('python -m nmt.nmt --vocab_prefix=../' + folder_name + '/vocab --out_dir=../' + folder_name + '_model --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask1.txt --inference_output_file=../gsoc/zheyuan/utility/benchmark/output1.txt --src=en --tgt=sparql | tail -n4')
os.system(
'python -m nmt.nmt --vocab_prefix=../' + folder_name + '/vocab --model_dir=../' + folder_name + '_model --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask.txt --inference_output_file=../gsoc/zheyuan/utility/benchmark/output.txt --out_dir=../' + folder_name + '_model --src=en --tgt=sparql | tail -n4')
'python -m nmt.nmt --vocab_prefix=../' + folder_name + '/vocab --out_dir=../' + folder_name + '_model --inference_input_file=../gsoc/zheyuan/utility/benchmark/to_ask1.txt --inference_output_file=../gsoc/zheyuan/utility/benchmark/output1.txt --src=en --tgt=sparql | tail -n4')

os.system('''if [ $? -eq 0 ]
then
echo ""
echo "ANSWER IN SPARQL SEQUENCE:"
ENCODED="$(cat ../gsoc/zheyuan/utility/benchmark/output.txt)"
python ../interpreter.py "${ENCODED}" > ../gsoc/zheyuan/utility/benchmark/output_decoded.txt
cat ../gsoc/zheyuan/utility/benchmark/output_decoded.txt
ENCODED="$(cat ../gsoc/zheyuan/utility/benchmark/output1.txt)"
python ../interpreter.py "${ENCODED}" > ../gsoc/zheyuan/utility/benchmark/output_decoded1.txt
cat ../gsoc/zheyuan/utility/benchmark/output_decoded1.txt
echo ""
fi''')
print('end')
Expand All @@ -24,4 +25,5 @@ def interprete(trained_model_folder):
Section to test the Interpreter.
"""
interprete('monument_300')
pass
pass

3 changes: 3 additions & 0 deletions gsoc/zheyuan/utility/benchmark/reconstruct_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ def construct_json(dataset_id,infos, questions, sparqls, answers):
qald_test_answers_dict = {}
qald_test_answers_dict["dataset"] = {'id':dataset_id}
qald_test_answers_dict['questions'] = []
print(len(answers))
for index,info in enumerate(infos):

question_dict = info

id = info["id"]
question = questions[id]
question_dict["question"] = [{
Expand All @@ -13,6 +15,7 @@ def construct_json(dataset_id,infos, questions, sparqls, answers):
}]
question_dict["query"] = {"sparql" : sparqls[index]}
question_dict["answers"] = answers[index]
print(answers[index])
qald_test_answers_dict['questions'].append(question_dict)
return qald_test_answers_dict

16 changes: 13 additions & 3 deletions gsoc/zheyuan/utility/benchmark/retrieve_answers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
def read_sparqls():
os.system("pwd")
sparqls = []
file_path = "../gsoc/zheyuan/utility/benchmark/output_decoded.txt"
file_path = "../gsoc/zheyuan/utility/benchmark/output_decoded1.txt"
with open(file_path, 'r') as lines:
for line in lines:
sparqls.append(line)
Expand Down Expand Up @@ -42,6 +42,7 @@ def retrieve(query):
}
}


answer_dict["results"]["bindings"].append(uri)

for td in rows.find_all("pre"):
Expand All @@ -54,9 +55,18 @@ def retrieve(query):
"value": a
}
}

answer_dict["results"]["bindings"].append(uri)
answers.append(answer_dict)
if answer_dict["results"]["bindings"]:
answers.append(answer_dict)

if not answers:
return [{
"head" : {
"vars" : [ "date" ]
},
"results" : { }
}]
return answers


Expand All @@ -73,7 +83,7 @@ def retrieve(query):
# query = args.query
answer_groups = []
i = 1
with open("./output_decoded.txt", 'r') as lines:
with open("../output_decoded.txt", 'r') as lines:
for line in lines:
i+=1
try:
Expand Down
9 changes: 9 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,12 @@ tensorflow==1.15.2
termcolor==1.1.0
tqdm==4.43.0
Werkzeug==1.0.0
Mittens
tensorflow_hub
requests
torch
transformers
constant
NLTK
xmltodict
sklearn

0 comments on commit d795771

Please sign in to comment.