-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathliberal-parser.bsh
244 lines (199 loc) · 12.3 KB
/
liberal-parser.bsh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import java.io.File;
import java.util.Calendar;
import java.util.logging.Level;
import java.util.logging.Logger;
import rpi.nlp.arl.amrTranslator.AMRTranslator;
import rpi.nlp.arl.amrTranslator.SystemAMRTranslator;
import rpi.nlp.arl.clustering.JointCluster;
import rpi.nlp.arl.identifier.TriggerArgIdentifier;
import rpi.nlp.arl.naming.ArgumentRoleNaming;
import rpi.nlp.arl.naming.TriggerNaming;
import rpi.nlp.arl.representation.GeneralRepresentation;
import rpi.nlp.arl.representation.TAEOutput;
import rpi.nlp.arl.util.EntityIndex;
import rpi.nlp.arl.util.PutEntityArguments;
import rpi.nlp.word2vec.VectorModel;
void perfectAMR(String io_dir){
// ************ INPUT ********************************
// INPUT: the directory of the gold AMR annotation files
String parsedFilePath = io_dir + File.separator + "AMRParsingHuman/";
// INPUT: the directory of the gold AMR alignment files
String alignmentFilePath = io_dir + File.separator + "AMRAlignment/";
// MEDIATE: nodes and edges extracted from AMR parsing
String nodeEdgeFilePath = io_dir + File.separator + "AMRNodeEdge/";
// MEDIATE: triggers and corresponding arguments
String triggerArgFile = io_dir + File.separator + "triggerArgFile.txt";
// MEDIATE: word sense vectors for triggers
String triggerGeneralVec = io_dir + File.separator + "triggerGeneralVec.txt";
// MEDIATE: general vectors for arguments
String argumentVec = io_dir + File.separator + "argumentVec.txt";
// MEDIATE: event structures representations
String triggerMultilayerStrucRep = io_dir + File.separator + "triggerMultilayerStructureRep.txt";
//extract nodes and edges from AMR parsing results
AMRTranslator.translate(parsedFilePath, alignmentFilePath, nodeEdgeFilePath);
System.out.println("finished extracting nodes and edges ..");
//Identify trigger and argument candidates
// INPUT: Annotations for AMR relation: whether the relation will be used to compose the event structure or for specifying arguments
String annotatedRelationFile = "Data/Resources/amrRelationsAnnotated.txt";
TriggerArgIdentifier.identifyTriggerArgs(nodeEdgeFilePath, triggerArgFile, annotatedRelationFile);
System.out.println("finished extracting triggers and arguments ..");
// MEDIATE: Entities from AMR parsing
String amrParsedEntities = "Data/Resources/amrParsedEntities.txt";
//EntityIndex.readEntitys(nodeEdgeFilePath, amrParsedEntities);
// MEDIATE: update entity vectors
String newTriggerArgFile = io_dir + File.separator + "triggerArgFile1.txt";
PutEntityArguments.put(amrParsedEntities, triggerArgFile, newTriggerArgFile);
//train Tensor/Matrix representations for each relation
// INPUT: pre-trained word sense embeddings
String modelFile = "Data/model/wsd.model";
VectorModel vm = VectorModel.loadFromFile(modelFile);
// TreeTensorAE ttae = new TreeTensorAE();
// ttae.pipeline(nodeEdgeFilePath, vm);
// System.out.println("finished training relation representations ..");
// generate tree structure representation
TAEOutput.generateResults(vm, nodeEdgeFilePath, triggerMultilayerStrucRep);
System.out.println("finished generating structure representations ..");
// generate trigger and argument representations, and update structure representations
// MEDIATE: will generate a new structure representation file: triggerMultilayerStrucRep+".processed"
GeneralRepresentation.generalRepresentation(modelFile, triggerGeneralVec, argumentVec, triggerArgFile);
GeneralRepresentation.processStructureRep1(triggerGeneralVec, triggerMultilayerStrucRep);
System.out.println("finished generating trigger and arg representations ..");
int min = 100;
int maxNum = 120;
double balance = 0.6;
String siginal = "test";
System.out.println("start clustering ..");
// MEDIATE: will generate many clustering results
String triggerResultPath = io_dir + File.separator + "Cluster/triggerOverallCluster";
String argResultPath = io_dir + File.separator + "Cluster/argOverallCluster";
double minOptimalValue = 100000000;
int minNumOfClusterTrigger=0, minNumOfClusterArgument=0;
for(int numOfClusterTrigger = min; numOfClusterTrigger<maxNum; numOfClusterTrigger=numOfClusterTrigger+2){
for(int numOfClusterArgument = min; numOfClusterArgument<maxNum; numOfClusterArgument=numOfClusterArgument+5){
JointCluster jc = new JointCluster();
double optimalValue = jc.jointOptimizedClusteringOverall(triggerGeneralVec, triggerMultilayerStrucRep+".processed", argumentVec, numOfClusterTrigger, numOfClusterArgument, triggerResultPath, argResultPath, triggerArgFile, balance, siginal);
if(optimalValue<minOptimalValue){
minOptimalValue = optimalValue;
minNumOfClusterTrigger = numOfClusterTrigger;
minNumOfClusterArgument = numOfClusterArgument;
}
}
}
System.out.println("finish Clustering...");
String optimalTriggerClusterFile = triggerResultPath + "."+minNumOfClusterTrigger + "." + minNumOfClusterArgument;
String optimalArgumentClusterFile = argResultPath + "."+minNumOfClusterTrigger + "." + minNumOfClusterArgument;
String namingMethod = "majority"; // majority or centroid
String namedClusterFile = optimalTriggerClusterFile+"."+namingMethod;
TriggerNaming.naming1(optimalTriggerClusterFile, namedClusterFile, triggerGeneralVec, namingMethod);
// INPUT: arg role mapping results extracted from FrameNet, VerbNet, and PropBank
String roleMapFile = "Data/Resources/argRoleMap1.txt";
// MEDIATE: the output, including triggers, arguments, and their roles.
String roleNamingResult = io_dir + File.separator + "argRoleNamingResult.txt";
EntityIndex.readEntitys(nodeEdgeFilePath, amrParsedEntities);
// MEDIATE: remove some noisy args
String notArgFile = "Data/Resources/notArgs.txt";
ArgumentRoleNaming.argumentRoleNaming(modelFile, notArgFile, namedClusterFile, triggerArgFile, roleMapFile, amrParsedEntities, roleNamingResult);
// OUTPUT: the final output, including triggers, arguments, and their roles.
// OUTPUT FILE: finalOutput
String finalOutput = io_dir + File.separator + "output.human.txt";
ArgumentRoleNaming.processDoc(roleNamingResult, finalOutput);
}
void systemAMR(String io_dir, int min_cluster, int max_cluster){
// ************ INPUT **********************************
// INPUT: the directory of the system AMR annotation files
String parsedFilePath = io_dir + File.separator + "AMRParsingSystem/";
// MEDIATE: nodes and edges extracted from AMR parsing
String nodeEdgeFilePath = io_dir + File.separator + "AMRNodeEdgeSystem/";
// MEDIATE: triggers and corresponding arguments
String triggerArgFile = io_dir + File.separator + "triggerArgFile.System.txt";
// MEDIATE: word sense vectors for triggers
String triggerGeneralVec = io_dir + File.separator + "triggerGeneralVec.System.txt";
// MEDIATE: general vectors for arguments
String argumentVec = io_dir + File.separator + "argumentVec.System.txt";
// MEDIATE: event structures representations
String triggerMultilayerStrucRep = io_dir + File.separator + "triggerMultilayerStrucRep.System.txt";
// extract nodes and edges from AMR parsing results
SystemAMRTranslator.translate(parsedFilePath, nodeEdgeFilePath);
System.out.println("finish extracting nodes and edges ..");
// Identify trigger and argument candidates
// INPUT: Annotations for AMR relation: whether the relation will be used to compose the event structure or for specifying arguments
String annotatedRelationFile = "Data/Resources/amrRelationsAnnotated.txt";
TriggerArgIdentifier.identifyTriggerArgs(nodeEdgeFilePath, triggerArgFile, annotatedRelationFile);
System.out.println("finish extracting triggers and arguments ..");
// train Tensor/Matrix representations for each relation
// INPUT: pre-trained word sense embeddings
String modelFile = "Data/model/wsd.model";
VectorModel vm = VectorModel.loadFromFile(modelFile);
// TreeTensorAE ttae = new TreeTensorAE();
// ttae.pipeline(nodeEdgeFilePath, vm);
// System.out.println("finished training relation representations ..");
// generate tree structure representation
TAEOutput.generateResults(vm, nodeEdgeFilePath, triggerMultilayerStrucRep);
System.out.println("finish generating structure representations ..");
// generate trigger and argument representations
// MEDIATE: will generate a new structure representation file: triggerMultilayerStrucRep+".processed"
GeneralRepresentation.generalRepresentation(modelFile, triggerGeneralVec, argumentVec, triggerArgFile);
GeneralRepresentation.processStructureRep1(triggerGeneralVec, triggerMultilayerStrucRep);
System.out.println("finish generating trigger and arg representations ..");
int min = min_cluster;
int maxNum = max_cluster;
double balance = 0.7;
String siginal = "System";
System.out.println("start clustering ..");
// MEDIATE: will generate many clustering results
String triggerResultPath = io_dir + File.separator + "Cluster/triggerOverallCluster";
String argResultPath = io_dir + File.separator + "Cluster/argOverallCluster";
double minOptimalValue = 100000000;
int minNumOfClusterTrigger=min, minNumOfClusterArgument=min;
for(int numOfClusterTrigger = min; numOfClusterTrigger<maxNum; numOfClusterTrigger=numOfClusterTrigger+2){
for(int numOfClusterArgument = min; numOfClusterArgument<maxNum; numOfClusterArgument=numOfClusterArgument+5){
JointCluster jc = new JointCluster();
double optimalValue = jc.jointOptimizedClusteringOverall(triggerGeneralVec, triggerMultilayerStrucRep+".processed", argumentVec, numOfClusterTrigger, numOfClusterArgument, triggerResultPath, argResultPath, triggerArgFile, balance, siginal);
if(optimalValue<minOptimalValue){
minOptimalValue = optimalValue;
minNumOfClusterTrigger = numOfClusterTrigger;
minNumOfClusterArgument = numOfClusterArgument;
}
}
}
System.out.println("finish Clustering...");
String optimalTriggerClusterFile = triggerResultPath + "."+minNumOfClusterTrigger + "." + minNumOfClusterArgument;
String optimalArgumentClusterFile = argResultPath + "."+minNumOfClusterTrigger + "." + minNumOfClusterArgument;
String namingMethod = "majority"; // majority or centroid
String namedClusterFile = optimalTriggerClusterFile+"."+namingMethod;
TriggerNaming.naming1(optimalTriggerClusterFile, namedClusterFile, triggerGeneralVec, namingMethod);
// INPUT: arg role mapping results extracted from FrameNet, VerbNet, and PropBank
String roleMapFile = "Data/Resources/argRoleMap1.txt";
// MEDIATE: the output, including triggers, arguments, and their roles.
String roleNamingResult = io_dir + File.separator + "argRoleNamingResult.System.txt";
// MEDIATE: Entities from AMR parsing
String amrParsedEntityFile = io_dir + File.separator + "amrParsedEntities.System.txt";
EntityIndex.readEntitys(nodeEdgeFilePath, amrParsedEntityFile);
// MEDIATE: remove some noisy args
String notArgFile = "Data/Resources/notArgs.txt";
ArgumentRoleNaming.argumentRoleNaming(modelFile, notArgFile, namedClusterFile, triggerArgFile, roleMapFile, amrParsedEntityFile, roleNamingResult);
// OUTPUT: the final output, including triggers, arguments, and their roles.
// OUTPUT FILE: finalOutput
String finalOutput = io_dir + File.separator + "output.System.txt";
ArgumentRoleNaming.processDoc(roleNamingResult, finalOutput);
}
Logger logger = Logger.getLogger("libearal-event");
Calendar start = Calendar.getInstance();
System.out.println("systemAMR()");
File f = new File(bsh.args[0]);
int min_cluster = 10;
int max_cluster = 60;
if(bsh.args.length > 2) {
min_cluster = Integer.parseInt(bsh.args[1]);
max_cluster = Integer.parseInt(bsh.args[2]);
}
if(f.exists()) {
System.out.println("Min Cluster: " + min_cluster);
System.out.println("Max Cluster: " + max_cluster);
systemAMR(bsh.args[0], min_cluster, max_cluster));
} else {
logger.log(Level.WARNING, "No input directory found: ", bsh.args[0]);
}
Calendar end = Calendar.getInstance();
long consumed = end.getTimeInMillis() - start.getTimeInMillis();
logger.log(Level.INFO, "Time Consumed: {0} ms", new Object[]{consumed});