-
Notifications
You must be signed in to change notification settings - Fork 1
/
buildModels.py
40 lines (31 loc) · 1.54 KB
/
buildModels.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import kindred
import argparse
import pickle
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Build and save a classifier')
parser.add_argument('--inTrain',type=str,required=True,help='Directory with corpus in standoff format')
parser.add_argument('--outDir',type=str,required=True,help='Directory to store output model files')
args = parser.parse_args()
relationInfo = []
relationInfo.append(('AssociatedVariant',0.6,('gene','variant')))
relationInfo.append(('Diagnostic',0.7,('cancer','gene')))
relationInfo.append(('Predictive',0.92,('cancer','drug','gene')))
relationInfo.append(('Prognostic',0.7,('cancer','gene')))
relationInfo.append(('Predisposing',0.96,('cancer','gene')))
for relationType,threshold,entityTypes in relationInfo:
print("Building %s model" % relationType)
print(" Loading training")
trainCorpus = kindred.load('standoff',args.inTrain)
for doc in trainCorpus.documents:
doc.relations = [ r for r in doc.relations if r.relationType == relationType ]
doc.relations = [ r for r in doc.relations if len(r.entities) == len(entityTypes) ]
print(" Doing training")
threshold = 0.5
classifier = kindred.RelationClassifier(classifierType='LogisticRegression',threshold=threshold,entityCount=len(entityTypes),acceptedEntityTypes=[entityTypes],model='en_core_sci_sm')
classifier.train(trainCorpus)
print(" Saving classifer")
outModel = os.path.join(args.outDir,"%s.model" % relationType)
with open(outModel,'wb') as f:
pickle.dump(classifier,f)
print(" Output done!")