lexicon;
/** The inverted map of integer keys to their features. */
protected FVector lexiconInv;
/** The encoding to use for new features added to this lexicon. */
@@ -182,7 +183,7 @@ public Lexicon(String e) {
/** Clears the data structures associated with this instance. */
public void clear() {
- lexicon = new HashMap();
+ lexicon = new THashMap();
lexiconInv = new FVector();
lexiconChildren = null;
pruneCutoff = -1;
@@ -709,7 +710,7 @@ public Object clone() {
}
if (lexicon != null) {
- clone.lexicon = new HashMap();
+ clone.lexicon = new THashMap();
clone.lexicon.putAll(lexicon);
}
clone.lexiconInv = (FVector) lexiconInv.clone();
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
index 822fc1fd..d89bd59f 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/learn/SparseNetworkLearner.java
@@ -8,9 +8,9 @@
package edu.illinois.cs.cogcomp.lbjava.learn;
import java.io.PrintStream;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
-import java.util.Map.Entry;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream;
@@ -688,6 +688,44 @@ public void read(ExceptionlessInputStream in) {
for (int i = 0; i < N; ++i)
network.add(Learner.readLearner(in));
}
+
+ /**
+ * This method will discard learners not associated with the provided labels. For labels that are
+ * not needed at runtime, this would improve performance as well as memory footprint. For example,
+ * imagine you have a 4 class model, PER, ORG, LOC and OTHER, but you could care less about OTHER.
+ * In this case, you could eliminate that label and improve the performance of the model proportionally.
+ *
+ * Use of this feature may cause terms previously classified by a discarded classifier to be labeled
+ * as one of the remaining classes.
+ *
+ * @param keepers A list of the only labels to keep.
+ */
+ public void pruneUnusedLabels(ArrayList keepers) {
+ int N = network.size();
+ for (int i = 0; i < N; ++i) {
+ LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i);
+ if (ltu == null)
+ continue;
+
+ // get the label and determine if it should be pruned.
+ String label = labelLexicon.lookupKey(i).getStringValue();
+ if (label.length() > 2) {
+ // Take off the B-, I-, L- or U-
+ label = label.substring(2);
+ boolean keepit = false;
+ for (String checkme : keepers) {
+ if (label.equals(checkme)) {
+ keepit = true;
+ break;
+ }
+ }
+ if (!keepit)
+ network.set(i, null);
+ } else {
+ // keep other("O"), this is like a non-label to begin with.
+ }
+ }
+ }
/** Returns a deep clone of this learning algorithm. */
public Object clone() {
diff --git a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayFileParser.java b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayFileParser.java
index cbae0aa4..ea163d86 100644
--- a/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayFileParser.java
+++ b/lbjava/src/main/java/edu/illinois/cs/cogcomp/lbjava/parse/ArrayFileParser.java
@@ -36,6 +36,8 @@
public class ArrayFileParser implements Parser {
/** Reader for file currently being parsed. */
protected DataInputStream in;
+ /** the zip file must also be closed, if this is compressed file. */
+ protected ZipFile zipFile=null;
/** The name of the file to parse. */
protected String exampleFileName;
/** A single array from which all examples can be parsed. */
@@ -190,13 +192,11 @@ public void reset() {
try {
if (exampleFileName != null) {
if (zipped) {
- ZipFile zip = new ZipFile(exampleFileName);
- in =
- new DataInputStream(new BufferedInputStream(zip.getInputStream(zip
+ zipFile = new ZipFile(exampleFileName);
+ in = new DataInputStream(new BufferedInputStream(zipFile.getInputStream(zipFile
.getEntry(ExceptionlessInputStream.zipEntryName))));
} else
- in =
- new DataInputStream(new BufferedInputStream(new FileInputStream(
+ in = new DataInputStream(new BufferedInputStream(new FileInputStream(
exampleFileName)));
} else if (zipped) {
ZipInputStream zip = new ZipInputStream(new ByteArrayInputStream(exampleData));
@@ -218,6 +218,9 @@ public void close() {
return;
try {
in.close();
+ if (zipFile != null) {
+ zipFile.close();
+ }
} catch (Exception e) {
System.err.println("Can't close '" + exampleFileName + "':");
e.printStackTrace();
diff --git a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java
index e5202d51..e6e4aaf1 100644
--- a/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java
+++ b/lbjava/src/test/java/edu/illinois/cs/cogcomp/lbjava/SparseNetworkLearningPruneTest.java
@@ -16,5 +16,4 @@ public class SparseNetworkLearningPruneTest {
@Test
public void test() {
}
-
}
diff --git a/pom.xml b/pom.xml
index db0a0881..8fcd6b74 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
edu.illinois.cs.cogcomp
lbjava-project
pom
- 1.3.1
+ 1.3.3
lbjava