Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

made the stand alone version nicer to use as a lib #59

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 84 additions & 17 deletions src/de/unihd/dbs/heideltime/standalone/HeidelTimeStandalone.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,15 @@

package de.unihd.dbs.heideltime.standalone;

import java.io.FileInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.RandomAccessFile;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.text.DateFormat;
Expand All @@ -38,8 +40,8 @@
import org.apache.uima.util.XMLInputSource;

import de.unihd.dbs.heideltime.standalone.components.JCasFactory;
import de.unihd.dbs.heideltime.standalone.components.ResultFormatter;
import de.unihd.dbs.heideltime.standalone.components.PartOfSpeechTagger;
import de.unihd.dbs.heideltime.standalone.components.ResultFormatter;
import de.unihd.dbs.heideltime.standalone.components.impl.AllLanguagesTokenizerWrapper;
import de.unihd.dbs.heideltime.standalone.components.impl.HunPosTaggerWrapper;
import de.unihd.dbs.heideltime.standalone.components.impl.IntervalTaggerWrapper;
Expand Down Expand Up @@ -125,7 +127,21 @@ public HeidelTimeStandalone() {
* @param outputType
*/
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType) {
this(language, typeToProcess, outputType, null);
this(language, typeToProcess, outputType, (URL)null);
}

@Deprecated
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType, String configPath) {
this.language = language;
this.documentType = typeToProcess;
this.outputType = outputType;

try {
this.initialize(language, typeToProcess, outputType, (new File(configPath).toURI().toURL()));
} catch(MalformedURLException e) {
logger.log(Level.WARNING,"Malformed URL means HeidelTime cannot be initialized");
throw new RuntimeException(e);
}
}

/**
Expand All @@ -136,7 +152,7 @@ public HeidelTimeStandalone(Language language, DocumentType typeToProcess, Outpu
* @param outputType
* @param configPath
*/
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType, String configPath) {
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType, URL configPath) {
this.language = language;
this.documentType = typeToProcess;
this.outputType = outputType;
Expand All @@ -153,14 +169,29 @@ public HeidelTimeStandalone(Language language, DocumentType typeToProcess, Outpu
* @param configPath
* @param posTagger
*/
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType, String configPath, POSTagger posTagger) {
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType, URL configPath, POSTagger posTagger) {
this.language = language;
this.documentType = typeToProcess;
this.outputType = outputType;

this.initialize(language, typeToProcess, outputType, configPath, posTagger);
}

@Deprecated
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType, String configPath, POSTagger posTagger, Boolean doIntervalTagging) {
this.language = language;
this.documentType = typeToProcess;
this.outputType = outputType;
this.doIntervalTagging = doIntervalTagging;

try {
this.initialize(language, typeToProcess, outputType, (new File(configPath)).toURI().toURL(), posTagger, doIntervalTagging);
} catch(MalformedURLException e) {
logger.log(Level.WARNING,"Malformed URL means HeidelTime cannot be initialized");
throw new RuntimeException(e);
}
}

/**
* Constructor with configPath
*
Expand All @@ -170,7 +201,7 @@ public HeidelTimeStandalone(Language language, DocumentType typeToProcess, Outpu
* @param configPath
* @param posTagger
*/
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType, String configPath, POSTagger posTagger, Boolean doIntervalTagging) {
public HeidelTimeStandalone(Language language, DocumentType typeToProcess, OutputType outputType, URL configPath, POSTagger posTagger, Boolean doIntervalTagging) {
this.language = language;
this.documentType = typeToProcess;
this.outputType = outputType;
Expand All @@ -179,6 +210,16 @@ public HeidelTimeStandalone(Language language, DocumentType typeToProcess, Outpu
this.initialize(language, typeToProcess, outputType, configPath, posTagger, doIntervalTagging);
}

@Deprecated
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, String configPath) {
try {
initialize(language,typeToProcess,outputType,(new File(configPath)).toURI().toURL(),POSTagger.TREETAGGER,false);
} catch(MalformedURLException e) {
logger.log(Level.WARNING,"Malformed URL means HeidelTime cannot be initialized");
throw new RuntimeException(e);
}
}

/**
* Method that initializes all vital prerequisites
*
Expand All @@ -187,10 +228,20 @@ public HeidelTimeStandalone(Language language, DocumentType typeToProcess, Outpu
* @param outputType Output type
* @param configPath Path to the configuration file for HeidelTimeStandalone
*/
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, String configPath) {
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, URL configPath) {
initialize(language, typeToProcess, outputType, configPath, POSTagger.TREETAGGER);
}

@Deprecated
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, String configPath, POSTagger posTagger) {
try {
initialize(language,typeToProcess,outputType,(new File(configPath)).toURI().toURL(),posTagger,false);
} catch(MalformedURLException e) {
logger.log(Level.WARNING,"Malformed URL means HeidelTime cannot be initialized");
throw new RuntimeException(e);
}
}

/**
* Method that initializes all vital prerequisites, including POS Tagger
*
Expand All @@ -200,10 +251,20 @@ public void initialize(Language language, DocumentType typeToProcess, OutputType
* @param configPath Path to the configuration file for HeidelTimeStandalone
* @param posTagger POS Tagger to use for preprocessing
*/
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, String configPath, POSTagger posTagger) {
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, URL configPath, POSTagger posTagger) {
initialize(language, typeToProcess, outputType, configPath, posTagger, false);
}

@Deprecated
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, String configPath, POSTagger posTagger, Boolean doIntervalTagging) {
try {
initialize(language,typeToProcess,outputType,(new File(configPath)).toURI().toURL(),posTagger,doIntervalTagging);
} catch(MalformedURLException e) {
logger.log(Level.WARNING,"Malformed URL means HeidelTime cannot be initialized");
throw new RuntimeException(e);
}
}

/**
* Method that initializes all vital prerequisites, including POS Tagger
*
Expand All @@ -214,7 +275,7 @@ public void initialize(Language language, DocumentType typeToProcess, OutputType
* @param posTagger POS Tagger to use for preprocessing
* @param doIntervalTagging Whether or not to invoke the IntervalTagger
*/
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, String configPath, POSTagger posTagger, Boolean doIntervalTagging) {
public void initialize(Language language, DocumentType typeToProcess, OutputType outputType, URL configPath, POSTagger posTagger, Boolean doIntervalTagging) {
logger.log(Level.INFO, "HeidelTimeStandalone initialized with language " + this.language.getName());

// set the POS tagger
Expand All @@ -225,19 +286,25 @@ public void initialize(Language language, DocumentType typeToProcess, OutputType

// read in configuration in case it's not yet initialized
if(!Config.isInitialized()) {
try {
if(configPath == null)
readConfigFile(CLISwitch.CONFIGFILE.getValue().toString());
readConfigFile((new File(CLISwitch.CONFIGFILE.getValue().toString())).toURI().toURL());
else
readConfigFile(configPath);
}
catch (MalformedURLException e) {
logger.log(Level.WARNING,"Malformed URL means HeidelTime cannot be initialized");
throw new RuntimeException(e);
}
}

try {
heidelTime = new HeidelTime();
heidelTime.initialize(new UimaContextImpl(language, typeToProcess, CLISwitch.VERBOSITY2.getIsActive()));
logger.log(Level.INFO, "HeidelTime initialized");
} catch (Exception e) {
e.printStackTrace();
logger.log(Level.WARNING, "HeidelTime could not be initialized");
throw new RuntimeException(e);
}

// Initialize JCas factory -------------
Expand All @@ -255,8 +322,8 @@ public void initialize(Language language, DocumentType typeToProcess, OutputType
jcasFactory = new JCasFactoryImpl(descriptions);
logger.log(Level.INFO, "JCas factory initialized");
} catch (Exception e) {
e.printStackTrace();
logger.log(Level.WARNING, "JCas factory could not be initialized");
throw new RuntimeException(e);
}
}

Expand Down Expand Up @@ -711,7 +778,7 @@ public static void main(String[] args) {
try {
logger.log(Level.INFO, "Configuration path '-c': "+configPath);

readConfigFile(configPath);
readConfigFile((new File(configPath)).toURI().toURL());

logger.log(Level.FINE, "Config initialized");
} catch (Exception e) {
Expand Down Expand Up @@ -778,7 +845,7 @@ public static void main(String[] args) {
// double-newstring should not be necessary, but without this, it's not running on Windows (?)
String input = new String(new String(inArr, encodingType).getBytes("UTF-8"), "UTF-8");

HeidelTimeStandalone standalone = new HeidelTimeStandalone(language, type, outputType, null, posTagger, doIntervalTagging);
HeidelTimeStandalone standalone = new HeidelTimeStandalone(language, type, outputType, (URL)null, posTagger, doIntervalTagging);
String out = standalone.process(input, dct);

// Print output always as UTF-8
Expand Down Expand Up @@ -806,11 +873,11 @@ public static void main(String[] args) {
}
}

public static void readConfigFile(String configPath) {
public static void readConfigFile(URL configPath) {
InputStream configStream = null;
try {
logger.log(Level.INFO, "trying to read in file "+configPath);
configStream = new FileInputStream(configPath);
configStream = configPath.openStream();

Properties props = new Properties();
props.load(configStream);
Expand All @@ -820,7 +887,7 @@ public static void readConfigFile(String configPath) {
configStream.close();
} catch (FileNotFoundException e) {
logger.log(Level.WARNING, "couldn't open configuration file \""+configPath+"\". quitting.");
System.exit(-1);
throw new RuntimeException("Config file is missing:",e);
} catch (IOException e) {
logger.log(Level.WARNING, "couldn't close config file handle");
e.printStackTrace();
Expand Down