Skip to content

Commit

Permalink
Fixing bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
BaiBlanc committed Aug 30, 2020
1 parent 9f4dcad commit a5d48db
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 6 deletions.
10 changes: 8 additions & 2 deletions gsoc/zheyuan/pipeline/batch_paraphrase.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import os
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
from paraphrase_questions import paraphrase_questions,get_pretrained_model,prepare_model,set_seed,pick_final_sentence, pick_final_sentence_advanced
Expand All @@ -13,15 +14,17 @@ def batch_paraphrase(templates_path, model_dir):
folder_path = get_pretrained_model(const.URL)
set_seed(42)
tokenizer, device, model = prepare_model(folder_path)
with open(templates_path, "r") as lines:
with open(templates_path + "_paraphrased", "w") as w:
dir = os.path.realpath(templates_path)
with open(dir, "r") as lines:
with open(dir + "_paraphrased", "w") as w:
for line in lines:
prop = line.strip("\n").split(seperator)
question = prop[3]
paraphrased_candidates = paraphrase_questions(tokenizer, device, model, question)
paraphrased = pick_final_sentence(question, paraphrased_candidates)
advanced = pick_final_sentence_advanced(device, question, paraphrased_candidates, model_dir)
w.write(line)
print("Original", line)
# for i, candidate in enumerate(paraphrased_candidates):
# new_prop = prop[:-1]
# new_prop[3] = candidate
Expand All @@ -35,11 +38,14 @@ def batch_paraphrase(templates_path, model_dir):
new_prop.append("Paraphrased \n")
new_line = seperator.join(new_prop)
w.write(new_line)
print("Paraphrase", new_line)

new_prop = prop[:-1]
new_prop[3] = advanced
new_prop.append("Paraphrased advanced\n")
new_line = seperator.join(new_prop)
w.write(new_line)
print("Advanced", new_line)


if __name__=="__main__":
Expand Down
9 changes: 7 additions & 2 deletions gsoc/zheyuan/pipeline/pipeline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,14 @@ fi


# 1. Generate templates
python multi_generate_templates.py --label '['Agent', 'Place', 'Work', 'Species', 'TopicalConcept', 'MeanOfTransportation', 'Event', 'Algorithm', 'Altitude', 'AnatomicalStructure', 'Area', 'Award', 'Biomolecule', 'Blazon', 'Browser', 'ChartsPlacements', 'ChemicalSubstance', 'Cipher', 'Colour', 'Currency', 'Demographics', 'Depth', 'Device', 'Diploma', 'Disease', 'ElectionDiagram', 'ElectricalSubstation', 'EthnicGroup', 'FileSystem', 'Flag', 'Food', 'GeneLocation', 'GrossDomesticProduct', 'Holiday', 'Identifier', 'Language', 'List', 'Media', 'MedicalSpecialty', 'Medicine', 'Name', 'PersonFunction', 'Population', 'Protocol', 'PublicService', 'Relationship', 'PersonFunction', 'SportsSeason', 'Spreadsheet', 'StarCluster', 'Statistic', 'Tank', 'TimePeriod', 'UnitOfWork', 'Unknown']' --project_name $1 --depth 1 --multi True
partr="../utility/part-r-00000"

if [ ! -d $partr ]; then
wget https://s3.amazonaws.com/subjectiveEye/0.9/subjectiveEye3D/part-r-00000.gz
gzip -d part-r-00000.gz
fi
python multi_generate_templates.py --label '['Agent', 'Place', 'Work', 'Species', 'TopicalConcept', 'MeanOfTransportation', 'Event', 'AnatomicalStructure', 'Device', 'TimePeriod', 'Activity']' --project_name $1 --depth 1 --multi True
#'['Agent', 'Place', 'Work', 'Species', 'TopicalConcept', 'MeanOfTransportation', 'Event', 'Algorithm', 'Altitude', 'AnatomicalStructure', 'Area', 'Award', 'Biomolecule', 'Blazon', 'Browser', 'ChartsPlacements', 'ChemicalSubstance', 'Cipher', 'Colour', 'Currency', 'Demographics', 'Depth', 'Device', 'Diploma', 'Disease', 'ElectionDiagram', 'ElectricalSubstation', 'EthnicGroup', 'FileSystem', 'Flag', 'Food', 'GeneLocation', 'GrossDomesticProduct', 'Holiday', 'Identifier', 'Language', 'List', 'Media', 'MedicalSpecialty', 'Medicine', 'Name', 'PersonFunction', 'Population', 'Protocol', 'PublicService', 'Relationship', 'PersonFunction', 'SportsSeason', 'Spreadsheet', 'StarCluster', 'Statistic', 'Tank', 'TimePeriod', 'UnitOfWork', 'Unknown']'
# 2. Batch Paraphrasing
# 2.1 Download BERT-Classifier

Expand Down Expand Up @@ -132,4 +138,3 @@ fi
cd ..


fi
2 changes: 1 addition & 1 deletion gsoc/zheyuan/utility/GloVe/glove_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def finetune_glove(project_path, glove_path="glove.6B.300d.txt", dimension=300):
sentence = sentence.strip("\n")
sentence = "<s> " + sentence + " </s>"
for word in sentence.split():
word_en.append(word.strip(":").strip("\"").strip("»").strip("+").strip("?").replace("i̇", ""))
word_en.append(word.strip(":").strip("\"").strip("»").strip("+").strip("?"))
print(len(word_en), word_en[:20])

vocab_en = list(set(word_en) - set(["<s>", "</s>"]))
Expand Down
2 changes: 1 addition & 1 deletion gsoc/zheyuan/utility/vocab_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def sparql_vocab(project_path):

def add_s_tokens(path):
with open(path+"/data.sparql", "r") as lines:
with open("./GloVe/GloVe-master/data_s.sparql", "w") as w:
with open(path+"/../../GloVe/data_s.sparql", "w") as w:
for line in lines:
new_line = "<s> " + line.strip() + " </s>\n"
w.write(new_line)
Expand Down

0 comments on commit a5d48db

Please sign in to comment.