From 094622952495ac8f3c4bb7e0110e1c4f9a16e85a Mon Sep 17 00:00:00 2001 From: Thomas CORDONNIER Date: Fri, 27 Sep 2024 15:56:51 +0200 Subject: [PATCH 1/7] Implement language-dependent sort order in glossaries --- .../omegat/gui/glossary/GlossarySearcher.java | 34 +++++++++++++++---- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/src/org/omegat/gui/glossary/GlossarySearcher.java b/src/org/omegat/gui/glossary/GlossarySearcher.java index f806c22082..7efd53cc3c 100644 --- a/src/org/omegat/gui/glossary/GlossarySearcher.java +++ b/src/org/omegat/gui/glossary/GlossarySearcher.java @@ -26,6 +26,7 @@ package org.omegat.gui.glossary; +import java.text.Collator; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; @@ -256,23 +257,42 @@ static void sortGlossaryEntries(List entries) { // longer is better if one contains another c = o2.getSrcText().length() - o1.getSrcText().length(); } - // sort source text alphabetically, first ignore a case, then - // consider a case + // sort source text alphabetically. + // Notion of alphabetical order is language-dependent if (c == 0) { - c = o1.getSrcText().compareToIgnoreCase(o2.getSrcText()); - } - if (c == 0) { - c = o1.getSrcText().compareTo(o2.getSrcText()); + c = compareLanguageDependent(Core.getProject().getProjectProperties().getSourceLanguage(), o1.getSrcText(), o2.getSrcText()); } if (c == 0 && Preferences.isPreferenceDefault(Preferences.GLOSSARY_SORT_BY_LENGTH, false)) { c = o2.getLocText().length() - o1.getLocText().length(); } if (c == 0) { - c = o1.getLocText().compareToIgnoreCase(o2.getLocText()); + c = compareLanguageDependent(Core.getProject().getProjectProperties().getTargetLanguage(), o1.getSrcText(), o2.getSrcText()); } return c; }); } + + private static int compareLanguageDependent(Language lang, String s1, String s2) { + Collator langCollator = Collator.getInstance(lang.getLocale()); + // Use primary criteria - for most languages written with latin alphabet, PRIMARY means case-insensitive + // (see https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#PRIMARY) + langCollator.setStrength(Collator.PRIMARY); + int c = langCollator.compare(s1, s2); + if (c != 0) { + return c; + } + // Use secondary criteria - for most languages written with latin alphabet, SECONDARY means ignore accents + // (see https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#PRIMARY) + langCollator.setStrength(Collator.SECONDARY); + c = langCollator.compare(s1, s2); + if (c != 0) { + return c; + } + // Use tertiary criteria - language-dependent + // (see https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#TERTIARY) + langCollator.setStrength(Collator.TERTIARY); + return langCollator.compare(s1, s2); + } private static List filterGlossary(List result, boolean mergeAltDefinitions) { From cfa3e52a6740196c4e54dc491e16e638395bcc45 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 3 Oct 2024 08:54:40 +0900 Subject: [PATCH 2/7] refactor: glossary sorter not static - Add sort test cases with locales - GlossarySearcher ctor accept src and target language - use startsWith instead of contains for src length sort detection - fix target term length sort argument. Signed-off-by: Hiroshi Miura --- .../omegat/gui/glossary/GlossarySearcher.java | 64 ++++++++++++----- .../gui/glossary/FindGlossaryThreadTest.java | 69 +++++++++++++++---- 2 files changed, 101 insertions(+), 32 deletions(-) diff --git a/src/org/omegat/gui/glossary/GlossarySearcher.java b/src/org/omegat/gui/glossary/GlossarySearcher.java index 7efd53cc3c..8c4329baad 100644 --- a/src/org/omegat/gui/glossary/GlossarySearcher.java +++ b/src/org/omegat/gui/glossary/GlossarySearcher.java @@ -55,12 +55,19 @@ */ public class GlossarySearcher { private final ITokenizer tok; - private final Language lang; + private final Language srcLang; + private final Language targetLang; private final boolean mergeAltDefinitions; - public GlossarySearcher(ITokenizer tok, Language lang, boolean mergeAltDefinitions) { + public GlossarySearcher(ITokenizer tok, Language srcLang, boolean mergeAltDefinitions) { + this(tok, srcLang, Core.getProject().getProjectProperties().getTargetLanguage(), mergeAltDefinitions); + } + + public GlossarySearcher(ITokenizer tok, Language srcLang, Language targetLang, + boolean mergeAltDefinitions) { this.tok = tok; - this.lang = lang; + this.srcLang = srcLang; + this.targetLang = targetLang; this.mergeAltDefinitions = mergeAltDefinitions; } @@ -89,6 +96,9 @@ public List searchSourceMatches(SourceTextEntry ste, List getCjkMatchingTokens(String fullText, String term) private Token[] tokenize(String str) { // Make comparison case-insensitive - String strLower = str.toLowerCase(lang.getLocale()); + String strLower = str.toLowerCase(srcLang.getLocale()); if (Preferences.isPreferenceDefault(Preferences.GLOSSARY_STEMMING, Preferences.GLOSSARY_STEMMING_DEFAULT)) { return tok.tokenizeWords(strLower, StemmingMode.GLOSSARY); @@ -246,52 +256,68 @@ private static boolean tokenInTag(Token tok, List tags) { return false; } - static void sortGlossaryEntries(List entries) { + /** + * sort glossary entries for test. + * + * @param entries + */ + void sortGlossaryEntries(List entries) { + final Collator srcLangCollator = Collator.getInstance(srcLang.getLocale()); + final Collator targetLangCollator = Collator.getInstance(targetLang.getLocale()); + sortGlossaryEntries(srcLangCollator, targetLangCollator, entries); + } + + private void sortGlossaryEntries(Collator srcLangCollator, Collator targetLangCollator, + List entries) { entries.sort((o1, o2) -> { int p1 = o1.getPriority() ? 1 : 2; int p2 = o2.getPriority() ? 1 : 2; int c = p1 - p2; if (c == 0 && Preferences.isPreferenceDefault(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true) - && (o2.getSrcText().contains(o1.getSrcText()) - || o1.getSrcText().contains(o2.getSrcText()))) { + && (o2.getSrcText().startsWith(o1.getSrcText()) + || o1.getSrcText().startsWith(o2.getSrcText()))) { // longer is better if one contains another c = o2.getSrcText().length() - o1.getSrcText().length(); } // sort source text alphabetically. - // Notion of alphabetical order is language-dependent + // Notion of alphabetical order is language-dependent if (c == 0) { - c = compareLanguageDependent(Core.getProject().getProjectProperties().getSourceLanguage(), o1.getSrcText(), o2.getSrcText()); + c = compareLanguageDependent(srcLangCollator, o1.getSrcText(), o2.getSrcText()); } if (c == 0 && Preferences.isPreferenceDefault(Preferences.GLOSSARY_SORT_BY_LENGTH, false)) { c = o2.getLocText().length() - o1.getLocText().length(); } if (c == 0) { - c = compareLanguageDependent(Core.getProject().getProjectProperties().getTargetLanguage(), o1.getSrcText(), o2.getSrcText()); + c = compareLanguageDependent(targetLangCollator, o1.getLocText(), o2.getLocText()); } return c; }); } - - private static int compareLanguageDependent(Language lang, String s1, String s2) { - Collator langCollator = Collator.getInstance(lang.getLocale()); - // Use primary criteria - for most languages written with latin alphabet, PRIMARY means case-insensitive - // (see https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#PRIMARY) + + private int compareLanguageDependent(Collator langCollator, String s1, String s2) { + // Use primary criteria - for most languages written with latin + // alphabet, PRIMARY means case-insensitive + // (see + // https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#PRIMARY) langCollator.setStrength(Collator.PRIMARY); int c = langCollator.compare(s1, s2); if (c != 0) { return c; } - // Use secondary criteria - for most languages written with latin alphabet, SECONDARY means ignore accents - // (see https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#PRIMARY) + // Use secondary criteria - for most languages written with latin + // alphabet, SECONDARY means ignore accents + // (see + // https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#PRIMARY) langCollator.setStrength(Collator.SECONDARY); c = langCollator.compare(s1, s2); if (c != 0) { return c; } // Use tertiary criteria - language-dependent - // (see https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#TERTIARY) + // (see + // https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#TERTIARY) langCollator.setStrength(Collator.TERTIARY); - return langCollator.compare(s1, s2); + return langCollator.compare(s1, s2); } private static List filterGlossary(List result, diff --git a/test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java b/test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java index 7947afc022..1b5e7cd695 100644 --- a/test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java +++ b/test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java @@ -31,24 +31,30 @@ import java.util.List; import org.junit.Test; + import org.omegat.core.TestCore; +import org.omegat.tokenizer.DefaultTokenizer; +import org.omegat.tokenizer.ITokenizer; +import org.omegat.util.Language; import org.omegat.util.Preferences; public class FindGlossaryThreadTest extends TestCore { + @Test - public void testEntriesSort() { + public void testEntriesSortEn() { + Language srcLang = new Language("en_US"); + Language targetLang = new Language("en_GB"); + ITokenizer tok = new DefaultTokenizer(); + GlossarySearcher searcher = new GlossarySearcher(tok, srcLang, targetLang, false); List entries = new ArrayList<>(); entries.add(new GlossaryEntry("dog", "doggy", "cdog", false, null)); entries.add(new GlossaryEntry("cat", "catty", "ccat", false, null)); entries.add(new GlossaryEntry("cat", "mikeneko", "ccat", false, null)); entries.add(new GlossaryEntry("zzz", "zzz", "czzz", true, null)); entries.add(new GlossaryEntry("horse", "catty", "chorse", false, null)); - entries.add(new GlossaryEntry("向上", "enhance", "", false, null)); - entries.add(new GlossaryEntry("向", "direct", "", false, null)); - entries.add(new GlossaryEntry("上", "up", "", false, null)); Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, true); Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false); - GlossarySearcher.sortGlossaryEntries(entries); + searcher.sortGlossaryEntries(entries); assertEquals("zzz", entries.get(0).getSrcText()); assertEquals("cat", entries.get(1).getSrcText()); assertEquals("mikeneko", entries.get(1).getLocText()); @@ -57,7 +63,7 @@ public void testEntriesSort() { assertEquals("dog", entries.get(3).getSrcText()); assertEquals("horse", entries.get(4).getSrcText()); Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, false); - GlossarySearcher.sortGlossaryEntries(entries); + searcher.sortGlossaryEntries(entries); assertEquals("zzz", entries.get(0).getSrcText()); assertEquals("cat", entries.get(1).getSrcText()); assertEquals("catty", entries.get(1).getLocText()); @@ -65,11 +71,8 @@ public void testEntriesSort() { assertEquals("mikeneko", entries.get(2).getLocText()); assertEquals("dog", entries.get(3).getSrcText()); assertEquals("horse", entries.get(4).getSrcText()); - assertEquals("up", entries.get(5).getLocText()); - assertEquals("direct", entries.get(6).getLocText()); - assertEquals("enhance", entries.get(7).getLocText()); Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true); - GlossarySearcher.sortGlossaryEntries(entries); + searcher.sortGlossaryEntries(entries); assertEquals("zzz", entries.get(0).getSrcText()); assertEquals("cat", entries.get(1).getSrcText()); assertEquals("catty", entries.get(1).getLocText()); @@ -77,8 +80,48 @@ public void testEntriesSort() { assertEquals("mikeneko", entries.get(2).getLocText()); assertEquals("dog", entries.get(3).getSrcText()); assertEquals("horse", entries.get(4).getSrcText()); - assertEquals("enhance", entries.get(5).getLocText()); - assertEquals("up", entries.get(6).getLocText()); - assertEquals("direct", entries.get(7).getLocText()); + } + + @Test + public void testEntriesSortJA() { + Language lang = new Language("ja_JP"); + Language targetLang = new Language("en_GB"); + ITokenizer tok = new DefaultTokenizer(); + GlossarySearcher searcher = new GlossarySearcher(tok, lang, targetLang, false); + List entries = new ArrayList<>(); + entries.add(new GlossaryEntry("向上", "enhance", "", false, null)); + entries.add(new GlossaryEntry("向", "direct", "", false, null)); + entries.add(new GlossaryEntry("上", "on", "", false, null)); + entries.add(new GlossaryEntry("上", "up to", "", false, null)); + entries.add(new GlossaryEntry("トヨタ自動車", "toyota motors", "", false, null)); + entries.add(new GlossaryEntry("トヨタ", "toyota", "", false, null)); + entries.add(new GlossaryEntry("さくら", "cherry blossom", "", false, null)); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, true); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false); + searcher.sortGlossaryEntries(entries); + assertEquals("さくら", entries.get(0).getSrcText()); + assertEquals("トヨタ", entries.get(1).getSrcText()); + assertEquals("トヨタ自動車", entries.get(2).getSrcText()); + assertEquals("向", entries.get(3).getSrcText()); + assertEquals("向上", entries.get(4).getSrcText()); + assertEquals("up to", entries.get(5).getLocText()); + assertEquals("on", entries.get(6).getLocText()); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, false); + searcher.sortGlossaryEntries(entries); + assertEquals("cherry blossom", entries.get(0).getLocText()); + assertEquals("toyota", entries.get(1).getLocText()); + assertEquals("toyota motors", entries.get(2).getLocText()); + assertEquals("direct", entries.get(3).getLocText()); + assertEquals("enhance", entries.get(4).getLocText()); + assertEquals("on", entries.get(5).getLocText()); + assertEquals("up to", entries.get(6).getLocText()); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true); + searcher.sortGlossaryEntries(entries); + assertEquals("toyota motors", entries.get(1).getLocText()); + assertEquals("toyota", entries.get(2).getLocText()); + assertEquals("enhance", entries.get(3).getLocText()); + assertEquals("direct", entries.get(4).getLocText()); + assertEquals("on", entries.get(5).getLocText()); + assertEquals("up to", entries.get(6).getLocText()); } } From 111724c689adb6844a5d2037a154e282a58b484a Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 3 Oct 2024 17:59:30 +0900 Subject: [PATCH 3/7] refactor: FindGlossaryThread - Use GlossarySearcher#search - Drop duplicated search in GlossarySearcher Signed-off-by: Hiroshi Miura --- src/org/omegat/gui/glossary/FindGlossaryThread.java | 6 ++++-- src/org/omegat/gui/glossary/GlossarySearcher.java | 1 - 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/org/omegat/gui/glossary/FindGlossaryThread.java b/src/org/omegat/gui/glossary/FindGlossaryThread.java index 6329a5c298..197c6cf064 100644 --- a/src/org/omegat/gui/glossary/FindGlossaryThread.java +++ b/src/org/omegat/gui/glossary/FindGlossaryThread.java @@ -86,11 +86,13 @@ protected List search() { return Collections.emptyList(); } - Language language = Core.getProject().getProjectProperties().getSourceLanguage(); + Language srcLang = Core.getProject().getProjectProperties().getSourceLanguage(); + Language trLang = Core.getProject().getProjectProperties().getTargetLanguage(); + boolean merge = Preferences.isPreferenceDefault(Preferences.GLOSSARY_MERGE_ALTERNATE_DEFINITIONS, Preferences.GLOSSARY_MERGE_ALTERNATE_DEFINITIONS_DEFAULT); - GlossarySearcher searcher = new GlossarySearcher(tok, language, merge) { + GlossarySearcher searcher = new GlossarySearcher(tok, srcLang, trLang, merge) { @Override protected void checkCancelled() { checkEntryChanged(); diff --git a/src/org/omegat/gui/glossary/GlossarySearcher.java b/src/org/omegat/gui/glossary/GlossarySearcher.java index 8c4329baad..753501b893 100644 --- a/src/org/omegat/gui/glossary/GlossarySearcher.java +++ b/src/org/omegat/gui/glossary/GlossarySearcher.java @@ -95,7 +95,6 @@ public List searchSourceMatches(SourceTextEntry ste, List Date: Thu, 3 Oct 2024 18:02:55 +0900 Subject: [PATCH 4/7] refactor: FindGlossaryThreadTest - Move test cases to GlossarySearcherTest.java Signed-off-by: Hiroshi Miura --- .../gui/glossary/FindGlossaryThreadTest.java | 95 --------------- .../gui/glossary/GlossarySearcherTest.java | 113 ++++++++++++++++-- 2 files changed, 103 insertions(+), 105 deletions(-) diff --git a/test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java b/test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java index 1b5e7cd695..acb28916ed 100644 --- a/test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java +++ b/test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java @@ -25,103 +25,8 @@ package org.omegat.gui.glossary; -import static org.junit.Assert.assertEquals; - -import java.util.ArrayList; -import java.util.List; - -import org.junit.Test; - import org.omegat.core.TestCore; -import org.omegat.tokenizer.DefaultTokenizer; -import org.omegat.tokenizer.ITokenizer; -import org.omegat.util.Language; -import org.omegat.util.Preferences; public class FindGlossaryThreadTest extends TestCore { - @Test - public void testEntriesSortEn() { - Language srcLang = new Language("en_US"); - Language targetLang = new Language("en_GB"); - ITokenizer tok = new DefaultTokenizer(); - GlossarySearcher searcher = new GlossarySearcher(tok, srcLang, targetLang, false); - List entries = new ArrayList<>(); - entries.add(new GlossaryEntry("dog", "doggy", "cdog", false, null)); - entries.add(new GlossaryEntry("cat", "catty", "ccat", false, null)); - entries.add(new GlossaryEntry("cat", "mikeneko", "ccat", false, null)); - entries.add(new GlossaryEntry("zzz", "zzz", "czzz", true, null)); - entries.add(new GlossaryEntry("horse", "catty", "chorse", false, null)); - Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, true); - Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false); - searcher.sortGlossaryEntries(entries); - assertEquals("zzz", entries.get(0).getSrcText()); - assertEquals("cat", entries.get(1).getSrcText()); - assertEquals("mikeneko", entries.get(1).getLocText()); - assertEquals("cat", entries.get(2).getSrcText()); - assertEquals("catty", entries.get(2).getLocText()); - assertEquals("dog", entries.get(3).getSrcText()); - assertEquals("horse", entries.get(4).getSrcText()); - Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, false); - searcher.sortGlossaryEntries(entries); - assertEquals("zzz", entries.get(0).getSrcText()); - assertEquals("cat", entries.get(1).getSrcText()); - assertEquals("catty", entries.get(1).getLocText()); - assertEquals("cat", entries.get(2).getSrcText()); - assertEquals("mikeneko", entries.get(2).getLocText()); - assertEquals("dog", entries.get(3).getSrcText()); - assertEquals("horse", entries.get(4).getSrcText()); - Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true); - searcher.sortGlossaryEntries(entries); - assertEquals("zzz", entries.get(0).getSrcText()); - assertEquals("cat", entries.get(1).getSrcText()); - assertEquals("catty", entries.get(1).getLocText()); - assertEquals("cat", entries.get(2).getSrcText()); - assertEquals("mikeneko", entries.get(2).getLocText()); - assertEquals("dog", entries.get(3).getSrcText()); - assertEquals("horse", entries.get(4).getSrcText()); - } - - @Test - public void testEntriesSortJA() { - Language lang = new Language("ja_JP"); - Language targetLang = new Language("en_GB"); - ITokenizer tok = new DefaultTokenizer(); - GlossarySearcher searcher = new GlossarySearcher(tok, lang, targetLang, false); - List entries = new ArrayList<>(); - entries.add(new GlossaryEntry("向上", "enhance", "", false, null)); - entries.add(new GlossaryEntry("向", "direct", "", false, null)); - entries.add(new GlossaryEntry("上", "on", "", false, null)); - entries.add(new GlossaryEntry("上", "up to", "", false, null)); - entries.add(new GlossaryEntry("トヨタ自動車", "toyota motors", "", false, null)); - entries.add(new GlossaryEntry("トヨタ", "toyota", "", false, null)); - entries.add(new GlossaryEntry("さくら", "cherry blossom", "", false, null)); - Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, true); - Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false); - searcher.sortGlossaryEntries(entries); - assertEquals("さくら", entries.get(0).getSrcText()); - assertEquals("トヨタ", entries.get(1).getSrcText()); - assertEquals("トヨタ自動車", entries.get(2).getSrcText()); - assertEquals("向", entries.get(3).getSrcText()); - assertEquals("向上", entries.get(4).getSrcText()); - assertEquals("up to", entries.get(5).getLocText()); - assertEquals("on", entries.get(6).getLocText()); - Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, false); - searcher.sortGlossaryEntries(entries); - assertEquals("cherry blossom", entries.get(0).getLocText()); - assertEquals("toyota", entries.get(1).getLocText()); - assertEquals("toyota motors", entries.get(2).getLocText()); - assertEquals("direct", entries.get(3).getLocText()); - assertEquals("enhance", entries.get(4).getLocText()); - assertEquals("on", entries.get(5).getLocText()); - assertEquals("up to", entries.get(6).getLocText()); - Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true); - searcher.sortGlossaryEntries(entries); - assertEquals("toyota motors", entries.get(1).getLocText()); - assertEquals("toyota", entries.get(2).getLocText()); - assertEquals("enhance", entries.get(3).getLocText()); - assertEquals("direct", entries.get(4).getLocText()); - assertEquals("on", entries.get(5).getLocText()); - assertEquals("up to", entries.get(6).getLocText()); - } } diff --git a/test/src/org/omegat/gui/glossary/GlossarySearcherTest.java b/test/src/org/omegat/gui/glossary/GlossarySearcherTest.java index 5924655b77..84b2b85771 100644 --- a/test/src/org/omegat/gui/glossary/GlossarySearcherTest.java +++ b/test/src/org/omegat/gui/glossary/GlossarySearcherTest.java @@ -29,6 +29,7 @@ import static org.junit.Assert.assertTrue; import java.io.File; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -40,11 +41,13 @@ import org.omegat.core.data.NotLoadedProject; import org.omegat.core.data.ProjectProperties; import org.omegat.core.data.SourceTextEntry; +import org.omegat.tokenizer.DefaultTokenizer; import org.omegat.tokenizer.ITokenizer; import org.omegat.tokenizer.LuceneCJKTokenizer; import org.omegat.tokenizer.LuceneEnglishTokenizer; import org.omegat.tokenizer.LuceneJapaneseTokenizer; import org.omegat.util.Language; +import org.omegat.util.Preferences; /** * @author Hiroshi Miura @@ -56,10 +59,11 @@ public void testGlossarySearcherEnglish() { String translationText = "translation"; String commentText = "comment"; ITokenizer tok = new LuceneEnglishTokenizer(); - Language language = new Language("en"); - setupProject(language); + Language srcLang = new Language("en"); + Language trLang = new Language("de"); + setupProject(srcLang); List entries = Arrays.asList(new GlossaryEntry(sourceText, translationText, commentText, true, "origin")); - List result = glossarySearcherCommon(sourceText, tok, language, entries); + List result = glossarySearcherCommon(sourceText, tok, srcLang, trLang, entries); assertEquals(1, result.size()); assertEquals(sourceText, result.get(0).getSrcText()); assertEquals(commentText, result.get(0).getCommentText()); @@ -85,10 +89,11 @@ public void testGlossarySearcherKorean() { String commentText = "comment"; ITokenizer tok = new LuceneCJKTokenizer(); Language language = new Language("ko"); + Language trLang = new Language("en"); setupProject(language); List entries = Collections.singletonList(new GlossaryEntry(sourceText, translationText, commentText, true, "origin")); - List result = glossarySearcherCommon(segmentText, tok, language, entries); + List result = glossarySearcherCommon(segmentText, tok, language, trLang, entries); assertEquals(1, result.size()); } @@ -99,9 +104,10 @@ public void testGlossarySearcherJapanese1() { String commentText = "comment"; ITokenizer tok = new LuceneJapaneseTokenizer(); Language language = new Language("ja"); + Language trLang = new Language("en"); setupProject(language); List entries = Arrays.asList(new GlossaryEntry(sourceText, translationText, commentText, true, "origin")); - List result = glossarySearcherCommon(sourceText, tok, language, entries); + List result = glossarySearcherCommon(sourceText, tok, language, trLang, entries); assertEquals(1, result.size()); assertEquals(sourceText, result.get(0).getSrcText()); assertEquals(commentText, result.get(0).getCommentText()); @@ -112,16 +118,18 @@ public void testGlossarySearcherJapanese1() { public void testGlossarySearcherJapanese2() { String sourceText = "\u5834\u6240"; Language language = new Language("ja"); + Language trLang = new Language("en"); setupProject(language); ITokenizer tok = new LuceneJapaneseTokenizer(); List entries = Arrays.asList(new GlossaryEntry("\u5857\u5E03", "wrong", "", true, "origin")); - List result = glossarySearcherCommon(sourceText, tok, language, entries); + List result = glossarySearcherCommon(sourceText, tok, language, trLang, entries); assertEquals(0, result.size()); } @Test public void testGlossarySearcherJapaneseLongText() { Language language = new Language("ja"); + Language trLang = new Language("en"); setupProject(language); ITokenizer tok = new LuceneJapaneseTokenizer(); List entries = Arrays.asList( @@ -131,10 +139,95 @@ public void testGlossarySearcherJapaneseLongText() { new GlossaryEntry("\u5730\u57DF\u5316", "localization", "", true, "") ); String sourceText = "OmegaT\u306E\u30E6\u30FC\u30B6\u30FC\u30A4\u30F3\u30BF\u30FC\u30D5\u30A7\u30FC\u30B9\u3084\u30D8\u30EB\u30D7\u30C6\u30AD\u30B9\u30C8\u3092\u3001\u3055\u307E\u3056\u307E\u306A\u8A00\u8A9E\u3078\u7FFB\u8A33\u3057\u3066\u304F\u3060\u3055\u3063\u305F\u65B9\u3005\u306B\u611F\u8B1D\u3057\u307E\u3059\u3002\u305D\u3057\u3066\u3001\u7FFB\u8A33\u304C\u306A\u3055\u308C\u3066\u3044\u306A\u3044\u8A00\u8A9E\u304C\u307E\u3060\u6570\u5343\u6B8B\u3063\u3066\u3044\u307E\u3059\uFF01OmegaT \u306E\u591A\u8A00\u8A9E\u3078\u306E\u5730\u57DF\u5316\u306F\u3001\u6301\u7D9A\u7684\u306A\u4F5C\u696D\u3067\u3082\u3042\u308A\u307E\u3059\u3002\u306A\u305C\u306A\u3089\u3001\u65B0\u3057\u3044\u6A5F\u80FD\u304C\u7D76\u3048\u305A\u8FFD\u52A0\u3055\u308C\u3066\u3044\u308B\u304B\u3089\u3067\u3059\u3002OmegaT\u306E\u30ED\u30FC\u30AB\u30E9\u30A4\u30BA/\u7FFB\u8A33\u306B\u95A2\u3059\u308B\u8A73\u7D30\u306B\u3064\u3044\u3066\u306F\u3001OmegaT\u30ED\u30FC\u30AB\u30EA\u30BC\u30FC\u30B7\u30E7\u30F3\u30B3\u30FC\u30C7\u30A3\u30CD\u30FC\u30BF\u30FC\u306B\u304A\u554F\u3044\u5408\u308F\u305B\u304F\u3060\u3055\u3044\u3002"; - List result = glossarySearcherCommon(sourceText, tok, language, entries); + List result = glossarySearcherCommon(sourceText, tok, language, trLang, entries); assertEquals(3, result.size()); } + @Test + public void testEntriesSortEn() { + Language srcLang = new Language("en_US"); + Language targetLang = new Language("en_GB"); + ITokenizer tok = new DefaultTokenizer(); + GlossarySearcher searcher = new GlossarySearcher(tok, srcLang, targetLang, false); + List entries = new ArrayList<>(); + entries.add(new GlossaryEntry("dog", "doggy", "cdog", false, null)); + entries.add(new GlossaryEntry("cat", "catty", "ccat", false, null)); + entries.add(new GlossaryEntry("cat", "mikeneko", "ccat", false, null)); + entries.add(new GlossaryEntry("zzz", "zzz", "czzz", true, null)); + entries.add(new GlossaryEntry("horse", "catty", "chorse", false, null)); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, true); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false); + searcher.sortGlossaryEntries(entries); + assertEquals("zzz", entries.get(0).getSrcText()); + assertEquals("cat", entries.get(1).getSrcText()); + assertEquals("mikeneko", entries.get(1).getLocText()); + assertEquals("cat", entries.get(2).getSrcText()); + assertEquals("catty", entries.get(2).getLocText()); + assertEquals("dog", entries.get(3).getSrcText()); + assertEquals("horse", entries.get(4).getSrcText()); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, false); + searcher.sortGlossaryEntries(entries); + assertEquals("zzz", entries.get(0).getSrcText()); + assertEquals("cat", entries.get(1).getSrcText()); + assertEquals("catty", entries.get(1).getLocText()); + assertEquals("cat", entries.get(2).getSrcText()); + assertEquals("mikeneko", entries.get(2).getLocText()); + assertEquals("dog", entries.get(3).getSrcText()); + assertEquals("horse", entries.get(4).getSrcText()); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true); + searcher.sortGlossaryEntries(entries); + assertEquals("zzz", entries.get(0).getSrcText()); + assertEquals("cat", entries.get(1).getSrcText()); + assertEquals("catty", entries.get(1).getLocText()); + assertEquals("cat", entries.get(2).getSrcText()); + assertEquals("mikeneko", entries.get(2).getLocText()); + assertEquals("dog", entries.get(3).getSrcText()); + assertEquals("horse", entries.get(4).getSrcText()); + } + + @Test + public void testEntriesSortJA() { + Language lang = new Language("ja_JP"); + Language targetLang = new Language("en_GB"); + ITokenizer tok = new DefaultTokenizer(); + GlossarySearcher searcher = new GlossarySearcher(tok, lang, targetLang, false); + List entries = new ArrayList<>(); + entries.add(new GlossaryEntry("向上", "enhance", "", false, null)); + entries.add(new GlossaryEntry("向", "direct", "", false, null)); + entries.add(new GlossaryEntry("上", "on", "", false, null)); + entries.add(new GlossaryEntry("上", "up to", "", false, null)); + entries.add(new GlossaryEntry("トヨタ自動車", "toyota motors", "", false, null)); + entries.add(new GlossaryEntry("トヨタ", "toyota", "", false, null)); + entries.add(new GlossaryEntry("さくら", "cherry blossom", "", false, null)); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, true); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false); + searcher.sortGlossaryEntries(entries); + assertEquals("さくら", entries.get(0).getSrcText()); + assertEquals("トヨタ", entries.get(1).getSrcText()); + assertEquals("トヨタ自動車", entries.get(2).getSrcText()); + assertEquals("向", entries.get(3).getSrcText()); + assertEquals("向上", entries.get(4).getSrcText()); + assertEquals("up to", entries.get(5).getLocText()); + assertEquals("on", entries.get(6).getLocText()); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, false); + searcher.sortGlossaryEntries(entries); + assertEquals("cherry blossom", entries.get(0).getLocText()); + assertEquals("toyota", entries.get(1).getLocText()); + assertEquals("toyota motors", entries.get(2).getLocText()); + assertEquals("direct", entries.get(3).getLocText()); + assertEquals("enhance", entries.get(4).getLocText()); + assertEquals("on", entries.get(5).getLocText()); + assertEquals("up to", entries.get(6).getLocText()); + Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true); + searcher.sortGlossaryEntries(entries); + assertEquals("toyota motors", entries.get(1).getLocText()); + assertEquals("toyota", entries.get(2).getLocText()); + assertEquals("enhance", entries.get(3).getLocText()); + assertEquals("direct", entries.get(4).getLocText()); + assertEquals("on", entries.get(5).getLocText()); + assertEquals("up to", entries.get(6).getLocText()); + } + private void setupProject(Language language) { Core.setProject(new NotLoadedProject() { @Override @@ -162,11 +255,11 @@ public Language getTargetLanguage() { }); } - private List glossarySearcherCommon(String sourceText, ITokenizer tok, Language language, - List entries) { + private List glossarySearcherCommon(String sourceText, ITokenizer tok, Language srcLang, + Language trLang, List entries) { EntryKey key = new EntryKey("file", sourceText, "id", "prev", "next", "path"); SourceTextEntry ste = new SourceTextEntry(key, 1, new String[0], sourceText, Collections.emptyList()); - GlossarySearcher searcher = new GlossarySearcher(tok, language, false); + GlossarySearcher searcher = new GlossarySearcher(tok, srcLang, trLang,false); return searcher.searchSourceMatches(ste, entries); } } From 3096a6fb046b5acce320bd9ac0c2794aafbebfb0 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 3 Oct 2024 19:32:57 +0900 Subject: [PATCH 5/7] style: apply spotless Signed-off-by: Hiroshi Miura --- .../gui/glossary/GlossarySearcherTest.java | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/test/src/org/omegat/gui/glossary/GlossarySearcherTest.java b/test/src/org/omegat/gui/glossary/GlossarySearcherTest.java index 84b2b85771..a0175aa42c 100644 --- a/test/src/org/omegat/gui/glossary/GlossarySearcherTest.java +++ b/test/src/org/omegat/gui/glossary/GlossarySearcherTest.java @@ -62,7 +62,8 @@ public void testGlossarySearcherEnglish() { Language srcLang = new Language("en"); Language trLang = new Language("de"); setupProject(srcLang); - List entries = Arrays.asList(new GlossaryEntry(sourceText, translationText, commentText, true, "origin")); + List entries = Arrays + .asList(new GlossaryEntry(sourceText, translationText, commentText, true, "origin")); List result = glossarySearcherCommon(sourceText, tok, srcLang, trLang, entries); assertEquals(1, result.size()); assertEquals(sourceText, result.get(0).getSrcText()); @@ -91,8 +92,8 @@ public void testGlossarySearcherKorean() { Language language = new Language("ko"); Language trLang = new Language("en"); setupProject(language); - List entries = Collections.singletonList(new GlossaryEntry(sourceText, - translationText, commentText, true, "origin")); + List entries = Collections + .singletonList(new GlossaryEntry(sourceText, translationText, commentText, true, "origin")); List result = glossarySearcherCommon(segmentText, tok, language, trLang, entries); assertEquals(1, result.size()); } @@ -106,7 +107,8 @@ public void testGlossarySearcherJapanese1() { Language language = new Language("ja"); Language trLang = new Language("en"); setupProject(language); - List entries = Arrays.asList(new GlossaryEntry(sourceText, translationText, commentText, true, "origin")); + List entries = Arrays + .asList(new GlossaryEntry(sourceText, translationText, commentText, true, "origin")); List result = glossarySearcherCommon(sourceText, tok, language, trLang, entries); assertEquals(1, result.size()); assertEquals(sourceText, result.get(0).getSrcText()); @@ -121,7 +123,8 @@ public void testGlossarySearcherJapanese2() { Language trLang = new Language("en"); setupProject(language); ITokenizer tok = new LuceneJapaneseTokenizer(); - List entries = Arrays.asList(new GlossaryEntry("\u5857\u5E03", "wrong", "", true, "origin")); + List entries = Arrays + .asList(new GlossaryEntry("\u5857\u5E03", "wrong", "", true, "origin")); List result = glossarySearcherCommon(sourceText, tok, language, trLang, entries); assertEquals(0, result.size()); } @@ -136,8 +139,7 @@ public void testGlossarySearcherJapaneseLongText() { new GlossaryEntry("\u307E\u3050\u308D", "tuna", "", true, ""), new GlossaryEntry("\u7FFB\u8A33", "translation", "", true, ""), new GlossaryEntry("\u591A\u8A00\u8A9E", "multi-languages", "", true, ""), - new GlossaryEntry("\u5730\u57DF\u5316", "localization", "", true, "") - ); + new GlossaryEntry("\u5730\u57DF\u5316", "localization", "", true, "")); String sourceText = "OmegaT\u306E\u30E6\u30FC\u30B6\u30FC\u30A4\u30F3\u30BF\u30FC\u30D5\u30A7\u30FC\u30B9\u3084\u30D8\u30EB\u30D7\u30C6\u30AD\u30B9\u30C8\u3092\u3001\u3055\u307E\u3056\u307E\u306A\u8A00\u8A9E\u3078\u7FFB\u8A33\u3057\u3066\u304F\u3060\u3055\u3063\u305F\u65B9\u3005\u306B\u611F\u8B1D\u3057\u307E\u3059\u3002\u305D\u3057\u3066\u3001\u7FFB\u8A33\u304C\u306A\u3055\u308C\u3066\u3044\u306A\u3044\u8A00\u8A9E\u304C\u307E\u3060\u6570\u5343\u6B8B\u3063\u3066\u3044\u307E\u3059\uFF01OmegaT \u306E\u591A\u8A00\u8A9E\u3078\u306E\u5730\u57DF\u5316\u306F\u3001\u6301\u7D9A\u7684\u306A\u4F5C\u696D\u3067\u3082\u3042\u308A\u307E\u3059\u3002\u306A\u305C\u306A\u3089\u3001\u65B0\u3057\u3044\u6A5F\u80FD\u304C\u7D76\u3048\u305A\u8FFD\u52A0\u3055\u308C\u3066\u3044\u308B\u304B\u3089\u3067\u3059\u3002OmegaT\u306E\u30ED\u30FC\u30AB\u30E9\u30A4\u30BA/\u7FFB\u8A33\u306B\u95A2\u3059\u308B\u8A73\u7D30\u306B\u3064\u3044\u3066\u306F\u3001OmegaT\u30ED\u30FC\u30AB\u30EA\u30BC\u30FC\u30B7\u30E7\u30F3\u30B3\u30FC\u30C7\u30A3\u30CD\u30FC\u30BF\u30FC\u306B\u304A\u554F\u3044\u5408\u308F\u305B\u304F\u3060\u3055\u3044\u3002"; List result = glossarySearcherCommon(sourceText, tok, language, trLang, entries); assertEquals(3, result.size()); @@ -234,6 +236,7 @@ private void setupProject(Language language) { public boolean isProjectLoaded() { return true; } + @Override public ProjectProperties getProjectProperties() { try { @@ -242,6 +245,7 @@ public ProjectProperties getProjectProperties() { public Language getSourceLanguage() { return language; } + @Override public Language getTargetLanguage() { return new Language("pl"); @@ -256,10 +260,10 @@ public Language getTargetLanguage() { } private List glossarySearcherCommon(String sourceText, ITokenizer tok, Language srcLang, - Language trLang, List entries) { + Language trLang, List entries) { EntryKey key = new EntryKey("file", sourceText, "id", "prev", "next", "path"); SourceTextEntry ste = new SourceTextEntry(key, 1, new String[0], sourceText, Collections.emptyList()); - GlossarySearcher searcher = new GlossarySearcher(tok, srcLang, trLang,false); + GlossarySearcher searcher = new GlossarySearcher(tok, srcLang, trLang, false); return searcher.searchSourceMatches(ste, entries); } } From ccb8a88e13fb1cac3a2d450debc34aa618cd8090 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Fri, 25 Oct 2024 13:06:23 +0900 Subject: [PATCH 6/7] fix: the default of src term length sort of glossary and update preference explanation --- src/org/omegat/Bundle.properties | 2 +- src/org/omegat/gui/glossary/GlossarySearcher.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/org/omegat/Bundle.properties b/src/org/omegat/Bundle.properties index e4fffd5370..19be2f82b4 100644 --- a/src/org/omegat/Bundle.properties +++ b/src/org/omegat/Bundle.properties @@ -2768,7 +2768,7 @@ PREFS_GLOSSARY_STEMMING=Use &stemming PREFS_GLOSSARY_REPLACE_ON_INSERT=Replace matches when inserting source text PREFS_GLOSSARY_REQUIRE_SIMILAR_CASE=&Ignore matches with very different case (e.g. FOO vs foo) PREFS_GLOSSARY_MERGE_ALTERNATE_DEFINITIONS=Merge alternate definitions of the same term -PREFS_GLOSSARY_SORT_BY_SRC_LENGTH=Sort by Source Term Length +PREFS_GLOSSARY_SORT_BY_SRC_LENGTH=Sort by Source Term Length when a term starts with another PREFS_GLOSSARY_SORT_BY_LENGTH=Sort by Target Term Length PREFS_GLOSSARY_LAYOUT=&Layout: diff --git a/src/org/omegat/gui/glossary/GlossarySearcher.java b/src/org/omegat/gui/glossary/GlossarySearcher.java index 753501b893..c2fffd41ec 100644 --- a/src/org/omegat/gui/glossary/GlossarySearcher.java +++ b/src/org/omegat/gui/glossary/GlossarySearcher.java @@ -272,10 +272,10 @@ private void sortGlossaryEntries(Collator srcLangCollator, Collator targetLangCo int p1 = o1.getPriority() ? 1 : 2; int p2 = o2.getPriority() ? 1 : 2; int c = p1 - p2; - if (c == 0 && Preferences.isPreferenceDefault(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true) + if (c == 0 && Preferences.isPreferenceDefault(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false) && (o2.getSrcText().startsWith(o1.getSrcText()) || o1.getSrcText().startsWith(o2.getSrcText()))) { - // longer is better if one contains another + // longer is better if one source term starts with another c = o2.getSrcText().length() - o1.getSrcText().length(); } // sort source text alphabetically. From 03644346115d0f8c5042829f99e56cd7ab536626 Mon Sep 17 00:00:00 2001 From: kazephil Date: Sun, 27 Oct 2024 11:36:58 +0900 Subject: [PATCH 7/7] Alternate wording for new key I think this is a clearer description. --- src/org/omegat/Bundle.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/org/omegat/Bundle.properties b/src/org/omegat/Bundle.properties index 19be2f82b4..12a9b17808 100644 --- a/src/org/omegat/Bundle.properties +++ b/src/org/omegat/Bundle.properties @@ -2768,7 +2768,7 @@ PREFS_GLOSSARY_STEMMING=Use &stemming PREFS_GLOSSARY_REPLACE_ON_INSERT=Replace matches when inserting source text PREFS_GLOSSARY_REQUIRE_SIMILAR_CASE=&Ignore matches with very different case (e.g. FOO vs foo) PREFS_GLOSSARY_MERGE_ALTERNATE_DEFINITIONS=Merge alternate definitions of the same term -PREFS_GLOSSARY_SORT_BY_SRC_LENGTH=Sort by Source Term Length when a term starts with another +PREFS_GLOSSARY_SORT_BY_SRC_LENGTH=Sort by Source Term Length when two or more terms start with the same characters PREFS_GLOSSARY_SORT_BY_LENGTH=Sort by Target Term Length PREFS_GLOSSARY_LAYOUT=&Layout: