Skip to content

Commit

Permalink
Migrate OpenNLP 'ant train-test-models' to Gradle (#14198)
Browse files Browse the repository at this point in the history
  • Loading branch information
msfroh authored Feb 7, 2025
1 parent 5ec2f61 commit 30d18df
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 1 deletion.
29 changes: 29 additions & 0 deletions lucene/analysis/opennlp/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,32 @@ dependencies {

moduleTestImplementation project(':lucene:test-framework')
}

ext {
testModelDataDir = file('src/tools/test-model-data')
testsUserDir = file('src/test-files')
testModelDir = file("${testsUserDir}/org/apache/lucene/analysis/opennlp")
}

tasks.register('trainTestModels') {
description = 'Train all small test models for unit tests'
doLast {
mkdir testModelDir
trainModel('SentenceDetectorTrainer', 'en', 'sentences.txt', 'en-test-sent.bin')
trainModel('TokenizerTrainer', 'en', 'tokenizer.txt', 'en-test-tokenizer.bin')
trainModel('POSTaggerTrainer', 'en', 'pos.txt', 'en-test-pos-maxent.bin')
trainModel('ChunkerTrainerME', 'en', 'chunks.txt', 'en-test-chunker.bin')
trainModel('TokenNameFinderTrainer', 'en', 'ner.txt', 'en-test-ner.bin', ['-params', 'ner_TrainerParams.txt'])
trainModel('LemmatizerTrainerME', 'en', 'lemmas.txt', 'en-test-lemmatizer.bin')
}
}

def trainModel(String command, String lang, String data, String model, List extraArgs = []) {
javaexec {
classpath = sourceSets.main.compileClasspath
mainClass = 'opennlp.tools.cmdline.CLI'
workingDir = testModelDataDir
args = [command, '-lang', lang, '-data', data, '-model', "${testModelDir}/${model}"] + extraArgs
}
}

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public class TestOpenNLPChunkerFilterFactory extends BaseTokenStreamTestCase {
8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58
};
private static final String[] SENTENCES_chunks = {
"B-NP", "I-NP", "I-NP", "I-NP", "I-NP", "I-NP", "O", "B-NP", "I-NP", "I-NP", "O", "B-NP",
"B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "I-NP", "O", "B-NP", "I-NP", "I-NP", "O", "B-NP",
"I-NP", "O"
};

Expand Down

0 comments on commit 30d18df

Please sign in to comment.