Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Glossary sort order: Change src length sort condition and use Language Dependent sorter #1154

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/org/omegat/Bundle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2768,7 +2768,7 @@ PREFS_GLOSSARY_STEMMING=Use &stemming
PREFS_GLOSSARY_REPLACE_ON_INSERT=Replace matches when inserting source text
PREFS_GLOSSARY_REQUIRE_SIMILAR_CASE=&Ignore matches with very different case (e.g. FOO vs foo)
PREFS_GLOSSARY_MERGE_ALTERNATE_DEFINITIONS=Merge alternate definitions of the same term
PREFS_GLOSSARY_SORT_BY_SRC_LENGTH=Sort by Source Term Length
PREFS_GLOSSARY_SORT_BY_SRC_LENGTH=Sort by Source Term Length when two or more terms start with the same characters
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to reword the description a little.

PREFS_GLOSSARY_SORT_BY_LENGTH=Sort by Target Term Length

PREFS_GLOSSARY_LAYOUT=&Layout:
Expand Down
6 changes: 4 additions & 2 deletions src/org/omegat/gui/glossary/FindGlossaryThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,13 @@ protected List<GlossaryEntry> search() {
return Collections.emptyList();
}

Language language = Core.getProject().getProjectProperties().getSourceLanguage();
Language srcLang = Core.getProject().getProjectProperties().getSourceLanguage();
Language trLang = Core.getProject().getProjectProperties().getTargetLanguage();

boolean merge = Preferences.isPreferenceDefault(Preferences.GLOSSARY_MERGE_ALTERNATE_DEFINITIONS,
Preferences.GLOSSARY_MERGE_ALTERNATE_DEFINITIONS_DEFAULT);

GlossarySearcher searcher = new GlossarySearcher(tok, language, merge) {
GlossarySearcher searcher = new GlossarySearcher(tok, srcLang, trLang, merge) {
@Override
protected void checkCancelled() {
checkEntryChanged();
Expand Down
79 changes: 62 additions & 17 deletions src/org/omegat/gui/glossary/GlossarySearcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

package org.omegat.gui.glossary;

import java.text.Collator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
Expand Down Expand Up @@ -54,12 +55,19 @@
*/
public class GlossarySearcher {
private final ITokenizer tok;
private final Language lang;
private final Language srcLang;
private final Language targetLang;
private final boolean mergeAltDefinitions;

public GlossarySearcher(ITokenizer tok, Language lang, boolean mergeAltDefinitions) {
public GlossarySearcher(ITokenizer tok, Language srcLang, boolean mergeAltDefinitions) {
this(tok, srcLang, Core.getProject().getProjectProperties().getTargetLanguage(), mergeAltDefinitions);
}

public GlossarySearcher(ITokenizer tok, Language srcLang, Language targetLang,
boolean mergeAltDefinitions) {
this.tok = tok;
this.lang = lang;
this.srcLang = srcLang;
this.targetLang = targetLang;
this.mergeAltDefinitions = mergeAltDefinitions;
}

Expand Down Expand Up @@ -87,7 +95,9 @@ public List<GlossaryEntry> searchSourceMatches(SourceTextEntry ste, List<Glossar
// 4) by length of localized term (optional)
// 5) by alphabet of localized term
// Then remove the duplicates and combine the synonyms.
sortGlossaryEntries(result);
final Collator srcLangCollator = Collator.getInstance(srcLang.getLocale());
final Collator targetLangCollator = Collator.getInstance(targetLang.getLocale());
sortGlossaryEntries(srcLangCollator, targetLangCollator, result);
return filterGlossary(result, mergeAltDefinitions);
}

Expand Down Expand Up @@ -212,7 +222,7 @@ private static List<Token[]> getCjkMatchingTokens(String fullText, String term)

private Token[] tokenize(String str) {
// Make comparison case-insensitive
String strLower = str.toLowerCase(lang.getLocale());
String strLower = str.toLowerCase(srcLang.getLocale());
if (Preferences.isPreferenceDefault(Preferences.GLOSSARY_STEMMING,
Preferences.GLOSSARY_STEMMING_DEFAULT)) {
return tok.tokenizeWords(strLower, StemmingMode.GLOSSARY);
Expand Down Expand Up @@ -245,35 +255,70 @@ private static boolean tokenInTag(Token tok, List<Tag> tags) {
return false;
}

static void sortGlossaryEntries(List<GlossaryEntry> entries) {
/**
* sort glossary entries for test.
*
* @param entries
*/
void sortGlossaryEntries(List<GlossaryEntry> entries) {
final Collator srcLangCollator = Collator.getInstance(srcLang.getLocale());
final Collator targetLangCollator = Collator.getInstance(targetLang.getLocale());
sortGlossaryEntries(srcLangCollator, targetLangCollator, entries);
}

private void sortGlossaryEntries(Collator srcLangCollator, Collator targetLangCollator,
List<GlossaryEntry> entries) {
entries.sort((o1, o2) -> {
int p1 = o1.getPriority() ? 1 : 2;
int p2 = o2.getPriority() ? 1 : 2;
int c = p1 - p2;
if (c == 0 && Preferences.isPreferenceDefault(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true)
&& (o2.getSrcText().contains(o1.getSrcText())
|| o1.getSrcText().contains(o2.getSrcText()))) {
// longer is better if one contains another
if (c == 0 && Preferences.isPreferenceDefault(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false)
&& (o2.getSrcText().startsWith(o1.getSrcText())
|| o1.getSrcText().startsWith(o2.getSrcText()))) {
// longer is better if one source term starts with another
c = o2.getSrcText().length() - o1.getSrcText().length();
}
// sort source text alphabetically, first ignore a case, then
// consider a case
// sort source text alphabetically.
// Notion of alphabetical order is language-dependent
if (c == 0) {
c = o1.getSrcText().compareToIgnoreCase(o2.getSrcText());
}
if (c == 0) {
c = o1.getSrcText().compareTo(o2.getSrcText());
c = compareLanguageDependent(srcLangCollator, o1.getSrcText(), o2.getSrcText());
}
if (c == 0 && Preferences.isPreferenceDefault(Preferences.GLOSSARY_SORT_BY_LENGTH, false)) {
c = o2.getLocText().length() - o1.getLocText().length();
}
if (c == 0) {
c = o1.getLocText().compareToIgnoreCase(o2.getLocText());
c = compareLanguageDependent(targetLangCollator, o1.getLocText(), o2.getLocText());
}
return c;
});
}

private int compareLanguageDependent(Collator langCollator, String s1, String s2) {
// Use primary criteria - for most languages written with latin
// alphabet, PRIMARY means case-insensitive
// (see
// https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#PRIMARY)
langCollator.setStrength(Collator.PRIMARY);
int c = langCollator.compare(s1, s2);
if (c != 0) {
return c;
}
// Use secondary criteria - for most languages written with latin
// alphabet, SECONDARY means ignore accents
// (see
// https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#PRIMARY)
langCollator.setStrength(Collator.SECONDARY);
c = langCollator.compare(s1, s2);
if (c != 0) {
return c;
}
// Use tertiary criteria - language-dependent
// (see
// https://docs.oracle.com/javase/8/docs/api/java/text/Collator.html#TERTIARY)
langCollator.setStrength(Collator.TERTIARY);
return langCollator.compare(s1, s2);
}

private static List<GlossaryEntry> filterGlossary(List<GlossaryEntry> result,
boolean mergeAltDefinitions) {
// First check that entries exist in the list.
Expand Down
54 changes: 1 addition & 53 deletions test/src/org/omegat/gui/glossary/FindGlossaryThreadTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,60 +25,8 @@

package org.omegat.gui.glossary;

import static org.junit.Assert.assertEquals;

import java.util.ArrayList;
import java.util.List;

import org.junit.Test;
import org.omegat.core.TestCore;
import org.omegat.util.Preferences;

public class FindGlossaryThreadTest extends TestCore {
@Test
public void testEntriesSort() {
List<GlossaryEntry> entries = new ArrayList<>();
entries.add(new GlossaryEntry("dog", "doggy", "cdog", false, null));
entries.add(new GlossaryEntry("cat", "catty", "ccat", false, null));
entries.add(new GlossaryEntry("cat", "mikeneko", "ccat", false, null));
entries.add(new GlossaryEntry("zzz", "zzz", "czzz", true, null));
entries.add(new GlossaryEntry("horse", "catty", "chorse", false, null));
entries.add(new GlossaryEntry("向上", "enhance", "", false, null));
entries.add(new GlossaryEntry("向", "direct", "", false, null));
entries.add(new GlossaryEntry("上", "up", "", false, null));
Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, true);
Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, false);
GlossarySearcher.sortGlossaryEntries(entries);
assertEquals("zzz", entries.get(0).getSrcText());
assertEquals("cat", entries.get(1).getSrcText());
assertEquals("mikeneko", entries.get(1).getLocText());
assertEquals("cat", entries.get(2).getSrcText());
assertEquals("catty", entries.get(2).getLocText());
assertEquals("dog", entries.get(3).getSrcText());
assertEquals("horse", entries.get(4).getSrcText());
Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_LENGTH, false);
GlossarySearcher.sortGlossaryEntries(entries);
assertEquals("zzz", entries.get(0).getSrcText());
assertEquals("cat", entries.get(1).getSrcText());
assertEquals("catty", entries.get(1).getLocText());
assertEquals("cat", entries.get(2).getSrcText());
assertEquals("mikeneko", entries.get(2).getLocText());
assertEquals("dog", entries.get(3).getSrcText());
assertEquals("horse", entries.get(4).getSrcText());
assertEquals("up", entries.get(5).getLocText());
assertEquals("direct", entries.get(6).getLocText());
assertEquals("enhance", entries.get(7).getLocText());
Preferences.setPreference(Preferences.GLOSSARY_SORT_BY_SRC_LENGTH, true);
GlossarySearcher.sortGlossaryEntries(entries);
assertEquals("zzz", entries.get(0).getSrcText());
assertEquals("cat", entries.get(1).getSrcText());
assertEquals("catty", entries.get(1).getLocText());
assertEquals("cat", entries.get(2).getSrcText());
assertEquals("mikeneko", entries.get(2).getLocText());
assertEquals("dog", entries.get(3).getSrcText());
assertEquals("horse", entries.get(4).getSrcText());
assertEquals("enhance", entries.get(5).getLocText());
assertEquals("up", entries.get(6).getLocText());
assertEquals("direct", entries.get(7).getLocText());
}

}
Loading
Loading