Skip to content

Commit

Permalink
feat: implement spell checker dictionaries
Browse files Browse the repository at this point in the history
Signed-off-by: Hiroshi Miura <[email protected]>
  • Loading branch information
miurahr committed Nov 23, 2024
1 parent fc61687 commit 213cfed
Show file tree
Hide file tree
Showing 153 changed files with 1,173,870 additions and 539 deletions.
3 changes: 2 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,6 @@ dependencies {
testImplementation(libs.assertj)
testImplementation(libs.bundles.xmlunit)

testImplementation(project(":language-modules"))
testImplementation(libs.languagetool.server) {
exclude module: "logback-classic"
}
Expand Down Expand Up @@ -1592,6 +1591,8 @@ test {
systemProperty 'java.awt.headless', 'true'
}
systemProperty 'java.util.logging.config.file', "${rootDir}/config/test/logger.properties"
// some test case depends on modules from subproject
dependsOn subprojects.collect { it.tasks.named('jar') }
}

tasks.register('testIntegration', JavaExec) {
Expand Down
9 changes: 8 additions & 1 deletion language-modules/ar/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@ dependencies {
testImplementation(libs.junit4)
testImplementation(libs.assertj)
testImplementation(testFixtures(project.rootProject))
testImplementation(libs.commons.io)
testImplementation(libs.languagetool.core)
testImplementation(project(":spellchecker:hunspell"))
testRuntimeOnly(libs.commons.io)
}

test {
dependsOn jar
dependsOn project(":spellchecker:hunspell").tasks.jar
}

jar {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* OmegaT - Computer Assisted Translation (CAT) tool
* with fuzzy matching, translation memory, keyword search,
* glossaries, and translation leveraging into updated projects.
*
* Copyright (C) 2023-2024 Hiroshi Miura
* Home page: https://www.omegat.org/
* Support center: https://omegat.org/support
*
* This file is part of OmegaT.
*
* OmegaT is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OmegaT is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.omegat.languages.ar;

import java.io.InputStream;

import org.languagetool.JLanguageTool;

import org.omegat.core.spellchecker.AbstractHunspellDictionary;

public class ArabicHunspellDictionary extends AbstractHunspellDictionary {

private static final String DICTIONARY_BASE = "/org/languagetool/resource/ar/hunspell/";
private static final String[] LANG = {"ar"};

@Override
protected String[] getDictionaries() {
return LANG;
}

@Override
protected InputStream getResourceAsStream(final String resource) {
return JLanguageTool.getDataBroker().getAsStream(DICTIONARY_BASE + resource);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,15 @@
public final class ArabicPlugin {

private static final String ARABIC_LT = "org.languagetool.language.Arabic";
private static final String ARABIC_SCD = "org.omegat.languages.ar.ArabicSpellCheckerDictionary";
private static final String ARABIC_SCD = "org.omegat.languages.ar.ArabicHunspellDictionary";

private ArabicPlugin() {
}

public static void loadPlugins() {
LanguageManager.registerLTLanguage("ar-AR", ARABIC_LT);
SpellCheckerManager.registerSpellCheckerDictionaryProvider("ar", SpellCheckDictionaryType.HUNSPELL, ARABIC_SCD);
SpellCheckerManager.registerSpellCheckerDictionaryProvider("ar", SpellCheckDictionaryType.HUNSPELL,
ARABIC_SCD);
}

public static void unloadPlugins() {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*******************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2024 Hiroshi Miura
Home page: https://www.omegat.org/
Support center: https://omegat.org/support
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
******************************************************************************/
package org.omegat.languages.ar;

import static org.assertj.core.api.Assertions.assertThat;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;

import org.apache.commons.io.FileUtils;
import org.junit.BeforeClass;
import org.junit.Test;
import org.languagetool.JLanguageTool;

import org.omegat.core.Core;
import org.omegat.core.data.NotLoadedProject;
import org.omegat.core.data.ProjectProperties;
import org.omegat.core.spellchecker.ISpellChecker;
import org.omegat.filters2.master.PluginUtils;
import org.omegat.languagetools.LanguageDataBroker;
import org.omegat.spellchecker.hunspell.HunSpellChecker;
import org.omegat.util.Language;
import org.omegat.util.TestPreferencesInitializer;


public class HunspellTest {

private static final String LANGUAGE = "ar";
private static final String GOOD = "مرحبا.";
private static Path tmpDir;

@BeforeClass
public static void setUpClass() throws IOException {
JLanguageTool.setDataBroker(new LanguageDataBroker());
PluginUtils.loadPlugins(Collections.emptyMap());
tmpDir = Files.createTempDirectory("omegat");
assertThat(tmpDir.toFile()).isDirectory();
Path configDir = Files.createDirectory(tmpDir.resolve(".omegat"));
TestPreferencesInitializer.init(configDir.toString());
Files.createDirectory(configDir.resolve("spelling"));
FileUtils.forceDeleteOnExit(tmpDir.toFile());
}

@Test
public void testDictionary() throws Exception {
ProjectProperties props = new ProjectProperties(tmpDir.toFile());
props.setTargetLanguage(new Language(LANGUAGE));
Core.setProject(new NotLoadedProject() {
@Override
public ProjectProperties getProjectProperties() {
return props;
}
});
ISpellChecker checker = new HunSpellChecker();
assertThat(checker.initialize()).as("Success initialize").isTrue();
assertThat(checker.isCorrect(GOOD)).as("Spell check for correct word").isTrue();
}

}
7 changes: 7 additions & 0 deletions language-modules/ast/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,16 @@ dependencies {
testImplementation(libs.junit4)
testImplementation(libs.assertj)
testImplementation(testFixtures(project.rootProject))
testImplementation(libs.languagetool.core)
testImplementation project(":spellchecker:morfologik")
testImplementation(libs.commons.io)
}

test {
dependsOn jar
dependsOn project(":spellchecker:morfologik").tasks.jar
}

jar {
archiveFileName.set("omegat-language-ast.${archiveExtension.get()}")
destinationDirectory.set(rootProject.layout.buildDirectory.dir("modules").get())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* OmegaT - Computer Assisted Translation (CAT) tool
* with fuzzy matching, translation memory, keyword search,
* glossaries, and translation leveraging into updated projects.
*
* Copyright (C) 2023-2024 Hiroshi Miura
* Home page: https://www.omegat.org/
* Support center: https://omegat.org/support
*
* This file is part of OmegaT.
*
* OmegaT is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OmegaT is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package org.omegat.languages.ast;

import java.io.IOException;
import java.io.InputStream;

import morfologik.stemming.Dictionary;
import org.languagetool.JLanguageTool;

import org.omegat.core.spellchecker.ISpellCheckerDictionary;
import org.omegat.core.spellchecker.SpellCheckDictionaryType;

public class AsturianMorfologikDictionary implements ISpellCheckerDictionary, AutoCloseable {

private static final String DICTIONARY_PATH = "/org/languagetool/resource/ast/hunspell/";

private InputStream infoInputStream;
private InputStream dictInputStream;

@Override
public Dictionary getMorfologikDictionary(String language) {
if ("ast_ES".startsWith(language)) {
infoInputStream = JLanguageTool.getDataBroker().getAsStream(DICTIONARY_PATH + "ast_ES.info");
dictInputStream = JLanguageTool.getDataBroker().getAsStream(DICTIONARY_PATH + "ast_ES.dict");
try {
return Dictionary.read(dictInputStream, infoInputStream);
} catch (IOException ignored) {
}
}
return null;
}

@Override
public SpellCheckDictionaryType getDictionaryType() {
return SpellCheckDictionaryType.MORFOLOGIK;
}

@Override
public void close() {
try {
infoInputStream.close();
dictInputStream.close();
} catch (IOException ignored) {
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ public final class AsturianPlugin {

private static final String ASTURIAN = "org.languagetool.language.Asturian";
private static final String SPELLCHECK_DICITONARY = "org.omegat.languages.ast"
+ ".AstruianSpellCheckerDictionary";
+ ".AsturianMorfologikDictionary";

private AsturianPlugin() {
}

public static void loadPlugins() {
LanguageManager.registerLTLanguage("ast-ES", ASTURIAN);
SpellCheckerManager.registerSpellCheckerDictionaryProvider("ast_ES",
SpellCheckDictionaryType.MORFOLOGIK, SPELLCHECK_DICITONARY);
SpellCheckerManager.registerSpellCheckerDictionaryProvider("ast", SpellCheckDictionaryType.MORFOLOGIK,
SPELLCHECK_DICITONARY);
}
Expand Down
Loading

0 comments on commit 213cfed

Please sign in to comment.