From fc616872554dd3ff3c27ae79469f98166f34baeb Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Sat, 23 Nov 2024 11:16:33 +0900 Subject: [PATCH] feat: add abstract impl classes for spell dictionary Signed-off-by: Hiroshi Miura --- .../AbstractHunspellDictionary.java | 105 ++++++++++++++++++ .../AbstractMorfologikDictionary.java | 74 ++++++++++++ 2 files changed, 179 insertions(+) create mode 100644 src/org/omegat/core/spellchecker/AbstractHunspellDictionary.java create mode 100644 src/org/omegat/core/spellchecker/AbstractMorfologikDictionary.java diff --git a/src/org/omegat/core/spellchecker/AbstractHunspellDictionary.java b/src/org/omegat/core/spellchecker/AbstractHunspellDictionary.java new file mode 100644 index 0000000000..e26e73343f --- /dev/null +++ b/src/org/omegat/core/spellchecker/AbstractHunspellDictionary.java @@ -0,0 +1,105 @@ +/************************************************************************** + OmegaT - Computer Assisted Translation (CAT) tool + with fuzzy matching, translation memory, keyword search, + glossaries, and translation leveraging into updated projects. + + Copyright (C) 2024 Hiroshi Miura + Home page: https://www.omegat.org/ + Support center: https://omegat.org/support + + This file is part of OmegaT. + + OmegaT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OmegaT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + **************************************************************************/ +package org.omegat.core.spellchecker; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Path; +import java.text.ParseException; +import java.util.Arrays; +import java.util.Collections; + +import org.apache.commons.io.IOUtils; +import org.apache.lucene.analysis.hunspell.Dictionary; + +public abstract class AbstractHunspellDictionary implements ISpellCheckerDictionary, AutoCloseable { + + private static final String DICT_EXT = ".dic"; + private static final String AFFIX_EXT = ".aff"; + private InputStream affixInputStream; + private InputStream dicInputStream; + + protected abstract String[] getDictionaries(); + + protected String getDictionary(String language) { + return Arrays.stream(getDictionaries()).filter(language::startsWith).findFirst().orElse(null); + } + + protected abstract InputStream getResourceAsStream(String resource); + + @Override + public Dictionary getHunspellDictionary(String language) { + String target = getDictionary(language); + if (target != null) { + affixInputStream = getResourceAsStream(target + AFFIX_EXT); + dicInputStream = getResourceAsStream(target + DICT_EXT); + try { + return new Dictionary(affixInputStream, + Collections.singletonList(dicInputStream), true); + } catch (IOException | ParseException ignored) { + } + } + return null; + } + + @Override + public Path installHunspellDictionary(Path dictionaryDir, String language) { + String target = getDictionary(language); + if (target != null) { + try { + Path dictionaryPath = dictionaryDir.resolve(target + DICT_EXT); + try (InputStream dicStream = getResourceAsStream(target + DICT_EXT); + FileOutputStream fos = new FileOutputStream(dictionaryPath.toFile())) { + IOUtils.copy(dicStream, fos); + } + File affixFile = dictionaryDir.resolve(target + AFFIX_EXT).toFile(); + try (InputStream affStream = getResourceAsStream(target + AFFIX_EXT); + FileOutputStream fos = new FileOutputStream(affixFile)) { + IOUtils.copy(affStream, fos); + } + return dictionaryPath; + } catch (Exception ignored) { + + } + } + return null; + } + + @Override + public SpellCheckDictionaryType getDictionaryType() { + return SpellCheckDictionaryType.HUNSPELL; + } + + @Override + public void close() { + try { + affixInputStream.close(); + dicInputStream.close(); + } catch (IOException ignored) { + } + } +} diff --git a/src/org/omegat/core/spellchecker/AbstractMorfologikDictionary.java b/src/org/omegat/core/spellchecker/AbstractMorfologikDictionary.java new file mode 100644 index 0000000000..fde1f476f1 --- /dev/null +++ b/src/org/omegat/core/spellchecker/AbstractMorfologikDictionary.java @@ -0,0 +1,74 @@ +/************************************************************************** + OmegaT - Computer Assisted Translation (CAT) tool + with fuzzy matching, translation memory, keyword search, + glossaries, and translation leveraging into updated projects. + + Copyright (C) 2024 Hiroshi Miura + Home page: https://www.omegat.org/ + Support center: https://omegat.org/support + + This file is part of OmegaT. + + OmegaT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OmegaT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + **************************************************************************/ +package org.omegat.core.spellchecker; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; + +import morfologik.stemming.Dictionary; + +public abstract class AbstractMorfologikDictionary implements ISpellCheckerDictionary, AutoCloseable { + private static final String DICT_EXT = ".dict"; + private static final String META_EXT = ".info"; + + private InputStream infoInputStream; + private InputStream dictInputStream; + + protected abstract String[] getDictionaries(); + + protected String getDictionary(String language) { + return Arrays.stream(getDictionaries()).filter(language::startsWith).findFirst().orElse(null); + } + protected abstract InputStream getResourceAsStream(String resource); + + @Override + public morfologik.stemming.Dictionary getMorfologikDictionary(String language) { + String target = getDictionary(language); + if (target != null) { + infoInputStream = getResourceAsStream(target + META_EXT); + dictInputStream = getResourceAsStream(target + DICT_EXT); + try { + return Dictionary.read(dictInputStream, infoInputStream); + } catch (IOException ignored) { + } + } + return null; + } + + @Override + public SpellCheckDictionaryType getDictionaryType() { + return SpellCheckDictionaryType.MORFOLOGIK; + } + + @Override + public void close() { + try { + infoInputStream.close(); + dictInputStream.close(); + } catch (IOException ignored) { + } + } +}