Skip to content

Commit

Permalink
OPENNLP-385: Add unit tests for OpenNLP UIMA component (#748)
Browse files Browse the repository at this point in the history
- adapts and reactivates AnnotatorsInitializationTest by T. Teofili, originally provided in JIRA issue, to execute for all xml descriptors
- converts AnnotatorsInitializationTest to several integration tests executed via Maven failsafe plugin
- adds Parser.xml to 'test-descriptors'
- adds more test classes
- configures Maven resource filtering for xml 'test-descriptors'
- moves 'ci' profile to upper-level pom.xml for re-use in opennlp-uima component
- adds simplelogger.properties to test resources to avoid log spam during build caused by internal UIMA logger config
  • Loading branch information
mawiesne authored Feb 14, 2025
1 parent ac73a5c commit 9d1dfa9
Show file tree
Hide file tree
Showing 60 changed files with 2,165 additions and 235 deletions.
15 changes: 1 addition & 14 deletions opennlp-tools/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -136,25 +136,12 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<version>${maven.failsafe.plugin}</version>
<configuration>
<argLine>-DOPENNLP_DOWNLOAD_HOME=${opennlp.download.home}</argLine>
</configuration>
</plugin>

</plugins>
</build>

<properties>
<opennlp.download.home>${user.home}</opennlp.download.home>
</properties>

<profiles>
<profile>
<id>ci</id>
<properties>
<opennlp.download.home>${project.build.directory}</opennlp.download.home>
</properties>
</profile>
<profile>
<id>jmh</id>
<dependencies>
Expand Down
34 changes: 32 additions & 2 deletions opennlp-uima/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
Expand All @@ -87,11 +93,35 @@
</dependencies>

<build>
<testResources>
<testResource>
<directory>src/test/resources</directory>
<filtering>true</filtering>
</testResource>
</testResources>

<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<argLine>-Xmx2048m -DOPENNLP_DOWNLOAD_HOME=${opennlp.download.home}</argLine>
<forkCount>${opennlp.forkCount}</forkCount>
<failIfNoSpecifiedTests>false</failIfNoSpecifiedTests>
<excludes>
<exclude>**/*IT.java</exclude>
</excludes>
</configuration>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.8.1</version>
<executions>
<execution>
<id>copy-dependencies</id>
Expand All @@ -100,7 +130,7 @@
<goal>copy-dependencies</goal>
</goals>
<configuration>
<excludeScope>provided</excludeScope>
<includeScope>runtime</includeScope>
<stripVersion>true</stripVersion>
</configuration>
</execution>
Expand Down
7 changes: 3 additions & 4 deletions opennlp-uima/src/main/java/opennlp/uima/chunker/Chunker.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ public void initialize(UimaContext context)

Logger mLogger = context.getLogger();

if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP Chunker annotator.");
if (mLogger.isLoggable(Level.DEBUG)) {
mLogger.log(Level.DEBUG, "Initializing the OpenNLP Chunker annotator.");
}

ChunkerModel model;
Expand Down Expand Up @@ -172,8 +172,7 @@ public void process(CAS tcas) {

String[] tokens = new String[tokenAnnotationIndex.size()];
String[] pos = new String[tokenAnnotationIndex.size()];
AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenAnnotationIndex
.size()];
AnnotationFS[] tokenAnnotations = new AnnotationFS[tokenAnnotationIndex.size()];

int index = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ abstract class AbstractDocumentCategorizer extends CasAnnotator_ImplBase {

private Type mTokenType;

@Override
public void initialize(UimaContext context)
throws ResourceInitializationException {

Expand All @@ -59,8 +60,8 @@ public void initialize(UimaContext context)

Logger mLogger = context.getLogger();

if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP Categorizer.");
if (mLogger.isLoggable(Level.DEBUG)) {
mLogger.log(Level.DEBUG, "Initializing the OpenNLP Categorizer.");
}

DoccatModel model;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public class DocumentCategorizer extends AbstractDocumentCategorizer {

private Feature mCategoryFeature;


@Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {

Expand All @@ -55,7 +55,7 @@ protected void setBestCategory(CAS tcas, String bestCategory) {

AnnotationFS categoryAnnotation;

if (categoryIndex.size() > 0) {
if (!categoryIndex.isEmpty()) {
categoryAnnotation = categoryIndex.iterator().next();
} else {
categoryAnnotation = tcas.createAnnotation(mCategoryType, 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ public final void initialize(UimaContext context) throws ResourceInitializationE

mLogger = context.getLogger();

if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the " + name + ".");
if (mLogger.isLoggable(Level.DEBUG)) {
mLogger.log(Level.DEBUG, "Initializing the " + name + ".");
}

isRemoveExistingAnnotations = AnnotatorUtil.getOptionalBooleanParameter(
Expand Down Expand Up @@ -133,21 +133,19 @@ public void typeSystemInit(TypeSystem typeSystem)
mNameTypeMapping = Collections.unmodifiableMap(nameTypeMap);
}

if (mNameType == null && mNameTypeMapping.size() == 0) {
if (mNameType == null && mNameTypeMapping.isEmpty()) {
throw new AnalysisEngineProcessException(
new Exception("No name type or valid name type mapping configured!"));
}
}

protected void postProcessAnnotations(Span[] detectedNames,
AnnotationFS[] nameAnnotations) {
}
protected abstract void postProcessAnnotations(Span[] detectedNames,
AnnotationFS[] nameAnnotations);

/**
* Called if the current document is completely processed.
*/
protected void documentDone(CAS cas) {
}
protected abstract void documentDone(CAS cas);

protected abstract Span[] find(CAS cas, String[] tokens);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
package opennlp.uima.namefind;

import java.io.IOException;
import java.io.InputStream;

import org.apache.uima.cas.CAS;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;

Expand Down Expand Up @@ -66,24 +66,31 @@ public void initialize() throws ResourceInitializationException {
String modelName = AnnotatorUtil.getRequiredStringParameter(context,
UimaUtil.DICTIONARY_PARAMETER);

InputStream inModel = AnnotatorUtil.getResourceAsStream(context,
modelName);

nameFinderDictionary = new Dictionary(inModel);
nameFinderDictionary = new Dictionary(
AnnotatorUtil.getResourceAsStream(context, modelName));

} catch (IOException ie) {
throw new ResourceInitializationException(
ExceptionMessages.MESSAGE_CATALOG,
ExceptionMessages.IO_ERROR_DICTIONARY_READING,
new Object[] {ie.getMessage()});
}

}

mNameFinder = new opennlp.tools.namefind.DictionaryNameFinder(
nameFinderDictionary);
}

@Override
protected void postProcessAnnotations(Span[] detectedNames, AnnotationFS[] nameAnnotations) {
// nothing to do
}

@Override
protected void documentDone(CAS cas) {
// nothing to do
}

@Override
protected Span[] find(CAS cas, String[] tokens) {
return mNameFinder.find(tokens);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,7 @@ public NameFinder() {
* Note: Do all initialization in this method, do not use the constructor.
*/
@Override
public void initialize()
throws ResourceInitializationException {
public void initialize() throws ResourceInitializationException {

super.initialize();

Expand All @@ -136,13 +135,12 @@ public void initialize()
* Initializes the type system.
*/
@Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {
public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException {

super.typeSystemInit(typeSystem);

probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context, mNameType,
UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);

documentConfidenceType = AnnotatorUtil.getOptionalTypeParameter(context, typeSystem,
"opennlp.uima.DocumentConfidenceType");
Expand All @@ -160,7 +158,6 @@ protected Span[] find(CAS cas, String[] tokens) {
Span[] names = mNameFinder.find(tokens);

double[] probs = mNameFinder.probs();

for (double prob : probs) {
documentConfidence.add(prob);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

/**
* The Normalizer tries the structure annotations. The structured value
* is than assigned to a field of the annotation.
* is then assigned to a field of the annotation.
* <p>
* The process depends on the
* <p>
Expand Down Expand Up @@ -108,6 +108,7 @@ private Normalizer() {
*
* @implNote Do all initialization in this method, do not use the constructor.
*/
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {

super.initialize(context);
Expand All @@ -116,8 +117,8 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti

mLogger = context.getLogger();

if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP Normalizer annotator.");
if (mLogger.isLoggable(Level.DEBUG)) {
mLogger.log(Level.DEBUG, "Initializing the OpenNLP Normalizer annotator.");
}

try {
Expand All @@ -141,6 +142,7 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
* Initializes the type system.
* @param typeSystem type system to initialize
*/
@Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {

Expand All @@ -165,6 +167,7 @@ public void typeSystemInit(TypeSystem typeSystem)
}
}

@Override
public void process(CAS tcas) {

FSIndex<AnnotationFS> sentenceIndex = tcas.getAnnotationIndex(mNameType);
Expand Down Expand Up @@ -203,8 +206,8 @@ public void process(CAS tcas) {
try {
number = NumberUtil.parse(text, language);
} catch (ParseException e) {
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Invalid number format: " + text);
if (mLogger.isLoggable(Level.WARN)) {
mLogger.log(Level.WARN, "Invalid number format: " + text);
}
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public StringDictionary() {
* @throws IOException Thrown if IO errors occurred.
*/
public StringDictionary(InputStream in) throws IOException {
this();
DictionaryEntryPersistor.create(in, entry -> {
String valueString = entry.attributes().getValue("value");
put(entry.tokens(), valueString);
Expand Down
1 change: 1 addition & 0 deletions opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ public void typeSystemInit(TypeSystem typeSystem)
/**
* Performs parsing on the given {@link CAS} object.
*/
@Override
public void process(CAS cas) {
FSIndex<AnnotationFS> sentences = cas.getAnnotationIndex(mSentenceType);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ public void initialize(UimaContext context)

logger = context.getLogger();

if (logger.isLoggable(Level.INFO)) {
logger.log(Level.INFO, "Initializing the OpenNLP Sentence annotator.");
if (logger.isLoggable(Level.DEBUG)) {
logger.log(Level.DEBUG, "Initializing the OpenNLP Sentence annotator.");
}

isRemoveExistingAnnotations = AnnotatorUtil.getOptionalBooleanParameter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ public SentenceDetector() {
* <p>
* Note: Do all initialization in this method, do not use the constructor.
*/
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException {

Expand All @@ -98,14 +99,14 @@ public void initialize(UimaContext context)
/**
* Initializes the type system.
*/
@Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {

super.typeSystemInit(typeSystem);

probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context,
sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER,
CAS.TYPE_NAME_DOUBLE);
sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
}

@Override
Expand All @@ -128,6 +129,7 @@ protected void postProcessAnnotations(AnnotationFS[] sentences) {
/**
* Releases allocated resources.
*/
@Override
public void destroy() {
// dereference model to allow garbage collection
sentenceDetector = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
public class SentenceModelResourceImpl extends AbstractModelResource<SentenceModel>
implements SentenceModelResource {

@Override
public SentenceModel getModel() {
return model;
}
Expand Down
Loading

0 comments on commit 9d1dfa9

Please sign in to comment.