diff --git a/build_medicalgraph.py b/build_medicalgraph.py index 8c5bc5a..e7088cc 100644 --- a/build_medicalgraph.py +++ b/build_medicalgraph.py @@ -47,7 +47,7 @@ def read_nodes(self): count = 0 - for data in open(self.data_path): + for data in open(self.data_path, encoding='utf-8'): disease_dict = {} count += 1 print(count) @@ -237,13 +237,13 @@ def create_relationship(self, start_node, end_node, edges, rel_type, rel_name): '''导出数据''' def export_data(self): Drugs, Foods, Checks, Departments, Producers, Symptoms, Diseases, disease_infos, rels_check, rels_recommandeat, rels_noteat, rels_doeat, rels_department, rels_commonddrug, rels_drug_producer, rels_recommanddrug, rels_symptom, rels_acompany, rels_category = self.read_nodes() - f_drug = open('drug.txt', 'w+') - f_food = open('food.txt', 'w+') - f_check = open('check.txt', 'w+') - f_department = open('department.txt', 'w+') - f_producer = open('producer.txt', 'w+') - f_symptom = open('symptoms.txt', 'w+') - f_disease = open('disease.txt', 'w+') + f_drug = open('drug.txt', 'w+', encoding='utf-8') + f_food = open('food.txt', 'w+', encoding='utf-8') + f_check = open('check.txt', 'w+', encoding='utf-8') + f_department = open('department.txt', 'w+', encoding='utf-8') + f_producer = open('producer.txt', 'w+', encoding='utf-8') + f_symptom = open('symptoms.txt', 'w+', encoding='utf-8') + f_disease = open('disease.txt', 'w+', encoding='utf-8') f_drug.write('\n'.join(list(Drugs))) f_food.write('\n'.join(list(Foods))) diff --git a/question_classifier.py b/question_classifier.py index 2ee72b8..727a58b 100644 --- a/question_classifier.py +++ b/question_classifier.py @@ -20,15 +20,15 @@ def __init__(self): self.symptom_path = os.path.join(cur_dir, 'dict/symptom.txt') self.deny_path = os.path.join(cur_dir, 'dict/deny.txt') # 加载特征词 - self.disease_wds= [i.strip() for i in open(self.disease_path) if i.strip()] - self.department_wds= [i.strip() for i in open(self.department_path) if i.strip()] - self.check_wds= [i.strip() for i in open(self.check_path) if i.strip()] - self.drug_wds= [i.strip() for i in open(self.drug_path) if i.strip()] - self.food_wds= [i.strip() for i in open(self.food_path) if i.strip()] - self.producer_wds= [i.strip() for i in open(self.producer_path) if i.strip()] - self.symptom_wds= [i.strip() for i in open(self.symptom_path) if i.strip()] + self.disease_wds= [i.strip() for i in open(self.disease_path, encoding='utf-8') if i.strip()] + self.department_wds= [i.strip() for i in open(self.department_path, encoding='utf-8') if i.strip()] + self.check_wds= [i.strip() for i in open(self.check_path, encoding='utf-8') if i.strip()] + self.drug_wds= [i.strip() for i in open(self.drug_path, encoding='utf-8') if i.strip()] + self.food_wds= [i.strip() for i in open(self.food_path, encoding='utf-8') if i.strip()] + self.producer_wds= [i.strip() for i in open(self.producer_path, encoding='utf-8') if i.strip()] + self.symptom_wds= [i.strip() for i in open(self.symptom_path, encoding='utf-8') if i.strip()] self.region_words = set(self.department_wds + self.disease_wds + self.check_wds + self.drug_wds + self.food_wds + self.producer_wds + self.symptom_wds) - self.deny_words = [i.strip() for i in open(self.deny_path) if i.strip()] + self.deny_words = [i.strip() for i in open(self.deny_path, encoding='utf-8') if i.strip()] # 构造领域actree self.region_tree = self.build_actree(list(self.region_words)) # 构建词典