From 4f429dca5022a4f6b8890a4a95a4887a43b9f56b Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Wed, 6 Nov 2024 22:37:12 +0900 Subject: [PATCH 01/12] feat: EditorUtils - Update javadoc for getWordEnd and getWordStart utility functions - Remove "TODO: change to use document's locale" - grab word with source and target locale of the project Signed-off-by: Hiroshi Miura --- src/org/omegat/gui/editor/EditorUtils.java | 76 ++++++++++++++++++---- 1 file changed, 64 insertions(+), 12 deletions(-) diff --git a/src/org/omegat/gui/editor/EditorUtils.java b/src/org/omegat/gui/editor/EditorUtils.java index 8e5f49da1c..1c11d3d11b 100644 --- a/src/org/omegat/gui/editor/EditorUtils.java +++ b/src/org/omegat/gui/editor/EditorUtils.java @@ -27,10 +27,13 @@ package org.omegat.gui.editor; +import java.text.BreakIterator; import java.util.List; import java.util.Locale; import javax.swing.text.BadLocationException; +import javax.swing.text.Document; +import javax.swing.text.Element; import javax.swing.text.JTextComponent; import javax.swing.text.Utilities; @@ -61,15 +64,40 @@ private EditorUtils() { * Determines the start of a word for the given model location. This method * skips direction char. * - * TODO: change to use document's locale - * - * @param c - * @param offs - * @return + * @param c TextComponent of the editor area. + * @param offs offset of the text. + * @return position of word start on the text component. * @throws BadLocationException + * when there is no line found in the text component. */ public static int getWordStart(JTextComponent c, int offs) throws BadLocationException { - int result = Utilities.getWordStart(c, offs); + int result = offs; + Element line = Utilities.getParagraphElement(c, offs); + if (line == null) { + throw new BadLocationException("No word at " + offs, offs); + } + int lineStart = line.getStartOffset(); + Document doc = c.getDocument(); + int lineEnd = Math.min(line.getEndOffset(), doc.getLength()); + if (lineEnd - lineStart > 0) { + String lineString = doc.getText(lineStart, lineEnd - lineStart); + Locale locale = c.getLocale(); + if (c instanceof EditorTextArea3 && Core.getProject().isProjectLoaded()) { + if (((EditorTextArea3) c).isInActiveTranslation(offs)) { + locale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); + } else { + locale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); + } + } + BreakIterator words = BreakIterator.getWordInstance(locale); + words.setText(lineString); + int wordPosition = offs - lineStart; + if (wordPosition >= words.last()) { + wordPosition = words.last() - 1; + } + words.following(wordPosition); + result = lineStart + words.previous(); + } char ch = c.getDocument().getText(result, 1).charAt(0); if (isDirectionChar(ch)) { result++; @@ -81,15 +109,39 @@ public static int getWordStart(JTextComponent c, int offs) throws BadLocationExc * Determines the end of a word for the given model location. This method * skips direction char. * - * TODO: change to use document's locale - * - * @param c - * @param offs - * @return + * @param c TextComponent of the editor area. + * @param offs offset of the text. + * @return position of the word end on the text component. * @throws BadLocationException + * when there is no line found in the text component. */ public static int getWordEnd(JTextComponent c, int offs) throws BadLocationException { - int result = Utilities.getWordEnd(c, offs); + int result = offs; + Element line = Utilities.getParagraphElement(c, offs); + if (line == null) { + throw new BadLocationException("No word at " + offs, offs); + } + int lineStart = line.getStartOffset(); + Document doc = c.getDocument(); + int lineEnd = Math.min(line.getEndOffset(), doc.getLength()); + if (lineEnd - lineStart > 0) { + String lineString = doc.getText(lineStart, lineEnd - lineStart); + Locale locale = c.getLocale(); + if (c instanceof EditorTextArea3 && Core.getProject().isProjectLoaded()) { + if (((EditorTextArea3) c).isInActiveTranslation(offs)) { + locale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); + } else { + locale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); + } + } + BreakIterator words = BreakIterator.getWordInstance(locale); + words.setText(lineString); + int wordPosition = offs - lineStart; + if (wordPosition >= words.last()) { + wordPosition = words.last() - 1; + } + result = lineStart + words.following(wordPosition); + } if (result > 0) { char ch = c.getDocument().getText(result - 1, 1).charAt(0); if (isDirectionChar(ch)) { From 3f537e7485c7459d5497703d4c237aef2a5a0ed5 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 7 Nov 2024 16:35:28 +0900 Subject: [PATCH 02/12] chore: add test case in English first steps pane Signed-off-by: Hiroshi Miura --- .../omegat/gui/editor/EditorController.java | 2 + .../omegat/gui/editor/EditorTextAreaTest.java | 26 +++++++ .../omegat/gui/editor/EditorUtilsTest.java | 68 +++++++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java diff --git a/src/org/omegat/gui/editor/EditorController.java b/src/org/omegat/gui/editor/EditorController.java index a0792c480d..35c7c82a75 100644 --- a/src/org/omegat/gui/editor/EditorController.java +++ b/src/org/omegat/gui/editor/EditorController.java @@ -1947,6 +1947,7 @@ private void createAdditionalPanes() { .setComponentOrientation(BiDiUtils.isRtl(language) ? ComponentOrientation.RIGHT_TO_LEFT : ComponentOrientation.LEFT_TO_RIGHT); introPane.setEditable(false); + introPane.setName("IntroPane"); DragTargetOverlay.apply(introPane, dropInfo); URI uri = Help.getHelpFileURI(OConsts.HELP_FIRST_STEPS_PREFIX, language, OConsts.HELP_FIRST_STEPS); if (uri != null) { @@ -1958,6 +1959,7 @@ private void createAdditionalPanes() { emptyProjectPaneTitle = OStrings.getString("TF_INTRO_EMPTYPROJECT_FILENAME"); emptyProjectPane = new JTextPane(); emptyProjectPane.setEditable(false); + emptyProjectPane.setName("EmptyProjectPane"); emptyProjectPane.setText(OStrings.getString("TF_INTRO_EMPTYPROJECT")); emptyProjectPane.setFont(mw.getApplicationFont()); DragTargetOverlay.apply(emptyProjectPane, dropInfo); diff --git a/test-acceptance/src/org/omegat/gui/editor/EditorTextAreaTest.java b/test-acceptance/src/org/omegat/gui/editor/EditorTextAreaTest.java index c9eba5bcb2..f0a7fe5c24 100644 --- a/test-acceptance/src/org/omegat/gui/editor/EditorTextAreaTest.java +++ b/test-acceptance/src/org/omegat/gui/editor/EditorTextAreaTest.java @@ -1,3 +1,29 @@ + +/************************************************************************** + OmegaT - Computer Assisted Translation (CAT) tool + with fuzzy matching, translation memory, keyword search, + glossaries, and translation leveraging into updated projects. + + Copyright (C) 2024 Hiroshi Miura + Home page: https://www.omegat.org/ + Support center: https://omegat.org/support + + This file is part of OmegaT. + + OmegaT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OmegaT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + **************************************************************************/ + package org.omegat.gui.editor; import org.junit.Test; diff --git a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java new file mode 100644 index 0000000000..b873c8d89e --- /dev/null +++ b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java @@ -0,0 +1,68 @@ +/************************************************************************** + OmegaT - Computer Assisted Translation (CAT) tool + with fuzzy matching, translation memory, keyword search, + glossaries, and translation leveraging into updated projects. + + Copyright (C) 2024 Hiroshi Miura + Home page: https://www.omegat.org/ + Support center: https://omegat.org/support + + This file is part of OmegaT. + + OmegaT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OmegaT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + **************************************************************************/ + +package org.omegat.gui.editor; + +import static org.junit.Assert.assertEquals; + +import java.util.Locale; + +import javax.swing.text.BadLocationException; +import javax.swing.text.JTextComponent; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.runners.Enclosed; +import org.junit.runner.RunWith; + +import org.omegat.gui.main.TestCoreGUI; +import org.omegat.util.LocaleRule; + +/** + * @author Hiroshi Miura + */ +@RunWith(Enclosed.class) +public class EditorUtilsTest { + + public static class EditorUtilsEnTest extends TestCoreGUI { + + @Rule + public final LocaleRule localeRule = new LocaleRule(new Locale("en")); + + @Test + public void testEditorUtilsGetWordEn() throws BadLocationException { + int offs = 518; + JTextComponent editPane = window.panel("First Steps").textBox("IntroPane").target(); + String text = editPane.getText(); + int posStart = EditorUtils.getWordStart(editPane, offs); + int posEnd = EditorUtils.getWordEnd(editPane, offs); + String word = editPane.getText(posStart, posEnd - posStart); + assertEquals("translation", word); + assertEquals(508, posStart); + assertEquals(519, posEnd); + } + } + +} From 5f7052457aff85ecbdf74d7ac7a12b04d7d4695e Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 7 Nov 2024 16:46:00 +0900 Subject: [PATCH 03/12] chore: add task dependency for acceptance test Signed-off-by: Hiroshi Miura --- build.gradle | 1 + test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index f3d5a5a025..b9a864a146 100644 --- a/build.gradle +++ b/build.gradle @@ -1699,6 +1699,7 @@ tasks.register('testAcceptance', Test) { classpath = sourceSets.testAcceptance.runtimeClasspath systemProperties = System.properties systemProperty 'java.util.logging.config.file', "${rootDir}/config/test/logger.properties" + dependsOn firstStepsEn } ext.mavenStyleVersion = version.replace('_', '-') diff --git a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java index b873c8d89e..5da153e28f 100644 --- a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java +++ b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java @@ -55,7 +55,6 @@ public static class EditorUtilsEnTest extends TestCoreGUI { public void testEditorUtilsGetWordEn() throws BadLocationException { int offs = 518; JTextComponent editPane = window.panel("First Steps").textBox("IntroPane").target(); - String text = editPane.getText(); int posStart = EditorUtils.getWordStart(editPane, offs); int posEnd = EditorUtils.getWordEnd(editPane, offs); String word = editPane.getText(posStart, posEnd - posStart); From e5bdae1b305c6de3c6ef441bffc239daa844e2fb Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 7 Nov 2024 21:48:52 +0900 Subject: [PATCH 04/12] test: add the case - test EditorUtils.getWord* with loaded project from Chinese to Japanese in English environment Signed-off-by: Hiroshi Miura --- test-acceptance/data/project_CN_JP/.gitignore | 3 + .../data/project_CN_JP/dictionary/.keep | 0 .../data/project_CN_JP/glossary/glossary.txt | 3 + .../data/project_CN_JP/omegat.project | 33 ++++++++++ .../project_CN_JP/omegat/ignored_words.txt | 0 .../omegat/last_entry.properties | 4 ++ .../project_CN_JP/omegat/learned_words.txt | 0 .../project_CN_JP/omegat/project_save.tmx | 17 +++++ .../data/project_CN_JP/source/source.txt | 5 ++ .../data/project_CN_JP/target/.keep | 0 test-acceptance/data/project_CN_JP/tm/.keep | 0 .../omegat/gui/editor/EditorUtilsTest.java | 62 ++++++++++++++++++- 12 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 test-acceptance/data/project_CN_JP/.gitignore create mode 100644 test-acceptance/data/project_CN_JP/dictionary/.keep create mode 100644 test-acceptance/data/project_CN_JP/glossary/glossary.txt create mode 100644 test-acceptance/data/project_CN_JP/omegat.project create mode 100644 test-acceptance/data/project_CN_JP/omegat/ignored_words.txt create mode 100644 test-acceptance/data/project_CN_JP/omegat/last_entry.properties create mode 100644 test-acceptance/data/project_CN_JP/omegat/learned_words.txt create mode 100644 test-acceptance/data/project_CN_JP/omegat/project_save.tmx create mode 100644 test-acceptance/data/project_CN_JP/source/source.txt create mode 100644 test-acceptance/data/project_CN_JP/target/.keep create mode 100644 test-acceptance/data/project_CN_JP/tm/.keep diff --git a/test-acceptance/data/project_CN_JP/.gitignore b/test-acceptance/data/project_CN_JP/.gitignore new file mode 100644 index 0000000000..2f3bf92e42 --- /dev/null +++ b/test-acceptance/data/project_CN_JP/.gitignore @@ -0,0 +1,3 @@ +project_stats.txt +project_stats.json +*.bak diff --git a/test-acceptance/data/project_CN_JP/dictionary/.keep b/test-acceptance/data/project_CN_JP/dictionary/.keep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test-acceptance/data/project_CN_JP/glossary/glossary.txt b/test-acceptance/data/project_CN_JP/glossary/glossary.txt new file mode 100644 index 0000000000..b53c81f5a7 --- /dev/null +++ b/test-acceptance/data/project_CN_JP/glossary/glossary.txt @@ -0,0 +1,3 @@ +# Glossary in tab-separated format -*- coding: utf-8 -*- +介绍 紹介 +中的 中心的な diff --git a/test-acceptance/data/project_CN_JP/omegat.project b/test-acceptance/data/project_CN_JP/omegat.project new file mode 100644 index 0000000000..1f961b9d19 --- /dev/null +++ b/test-acceptance/data/project_CN_JP/omegat.project @@ -0,0 +1,33 @@ + + + + source + + **/.svn/** + **/CVS/** + **/.cvs/** + **/.git/** + **/.hg/** + **/.repositories/** + **/desktop.ini + **/Thumbs.db + **/.DS_Store + **/~$* + + target + tm + glossary + .-glossary.txt + dictionary + + + zh-CN + ja-JP + org.omegat.tokenizer.LuceneSmartChineseTokenizer + org.omegat.tokenizer.LuceneJapaneseTokenizer + true + true + true + + + diff --git a/test-acceptance/data/project_CN_JP/omegat/ignored_words.txt b/test-acceptance/data/project_CN_JP/omegat/ignored_words.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test-acceptance/data/project_CN_JP/omegat/last_entry.properties b/test-acceptance/data/project_CN_JP/omegat/last_entry.properties new file mode 100644 index 0000000000..f21203a178 --- /dev/null +++ b/test-acceptance/data/project_CN_JP/omegat/last_entry.properties @@ -0,0 +1,4 @@ +#Thu Nov 07 21:30:29 JST 2024 +LAST_ENTRY_NUMBER=1 +LAST_ENTRY_SRC=\u592A\u5E73\u5BFA\u4E2D\u7684\u6587\u7B14\u5854 +LAST_ENTRY_FILE=source.txt diff --git a/test-acceptance/data/project_CN_JP/omegat/learned_words.txt b/test-acceptance/data/project_CN_JP/omegat/learned_words.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test-acceptance/data/project_CN_JP/omegat/project_save.tmx b/test-acceptance/data/project_CN_JP/omegat/project_save.tmx new file mode 100644 index 0000000000..7ee75cc495 --- /dev/null +++ b/test-acceptance/data/project_CN_JP/omegat/project_save.tmx @@ -0,0 +1,17 @@ + + + +
+ + + + + 太平寺中的文笔塔 + + + 太平寺の中心的なペン塔 + + + + + diff --git a/test-acceptance/data/project_CN_JP/source/source.txt b/test-acceptance/data/project_CN_JP/source/source.txt new file mode 100644 index 0000000000..8236143511 --- /dev/null +++ b/test-acceptance/data/project_CN_JP/source/source.txt @@ -0,0 +1,5 @@ +太平寺中的文笔塔 + +文筆塔原是江苏省常州市太平寺中的塔。太平寺始建于南北朝齐梁时期,是常州最古老的佛寺之一,今已不存。 +文笔塔为砖木结构,七级八面,每级4个拱门,中有旋梯。塔下有曲池、拱桥。 +“夕照塔影”为文笔胜景。现存塔为光绪末年(1905-1908年)重建 diff --git a/test-acceptance/data/project_CN_JP/target/.keep b/test-acceptance/data/project_CN_JP/target/.keep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test-acceptance/data/project_CN_JP/tm/.keep b/test-acceptance/data/project_CN_JP/tm/.keep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java index 5da153e28f..98423a93e7 100644 --- a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java +++ b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java @@ -15,7 +15,7 @@ (at your option) any later version. OmegaT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of + but WITHOUT ANY WARRANTY; without eve太平寺中的文笔塔n the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. @@ -26,19 +26,30 @@ package org.omegat.gui.editor; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import java.io.File; import java.util.Locale; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import javax.swing.SwingUtilities; import javax.swing.text.BadLocationException; import javax.swing.text.JTextComponent; +import org.apache.commons.io.FileUtils; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.runners.Enclosed; +import org.junit.rules.TemporaryFolder; import org.junit.runner.RunWith; +import org.omegat.core.Core; +import org.omegat.core.CoreEvents; +import org.omegat.gui.main.ProjectUICommands; import org.omegat.gui.main.TestCoreGUI; import org.omegat.util.LocaleRule; +import org.omegat.util.Preferences; /** * @author Hiroshi Miura @@ -46,13 +57,13 @@ @RunWith(Enclosed.class) public class EditorUtilsTest { - public static class EditorUtilsEnTest extends TestCoreGUI { + public static class EditorUtilsFirstStepsTest extends TestCoreGUI { @Rule public final LocaleRule localeRule = new LocaleRule(new Locale("en")); @Test - public void testEditorUtilsGetWordEn() throws BadLocationException { + public void testEditorUtilsGetWordFirstSteps() throws BadLocationException { int offs = 518; JTextComponent editPane = window.panel("First Steps").textBox("IntroPane").target(); int posStart = EditorUtils.getWordStart(editPane, offs); @@ -64,4 +75,49 @@ public void testEditorUtilsGetWordEn() throws BadLocationException { } } + public static class EditorUtilsLoadedProjectTest extends TestCoreGUI { + + @Rule + public final LocaleRule localeRule = new LocaleRule(new Locale("en")); + + @Rule + public final TemporaryFolder folder = TemporaryFolder.builder().assureDeletion().build(); + + @Test + public void testEditorUtilsGetWordLoadedProject() throws Exception { + // Prepare a sample project + File tmpDir = folder.newFolder("omegat-sample-project-"); + File projSrc = new File("test-acceptance/data/project_CN_JP/"); + FileUtils.copyDirectory(projSrc, tmpDir); + FileUtils.forceDeleteOnExit(tmpDir); + Preferences.setPreference(Preferences.PROJECT_FILES_SHOW_ON_LOAD, false); + // Load the project and wait a completion + CountDownLatch latch = new CountDownLatch(1); + CoreEvents.registerProjectChangeListener(eventType -> { + if (Core.getProject().isProjectLoaded()) { + latch.countDown(); + } + }); + SwingUtilities.invokeAndWait(() -> ProjectUICommands.projectOpen(tmpDir, true)); + try { + assertTrue(latch.await(5, TimeUnit.SECONDS)); + } catch (InterruptedException ignored) { + } + // + final JTextComponent editPane = window.panel("Editor - source.txt").textBox().target(); + // select word from a source text + int offs = 102; + int posStart = EditorUtils.getWordStart(editPane, offs); + int posEnd = EditorUtils.getWordEnd(editPane, offs); + String word = editPane.getText(posStart, posEnd - posStart); + assertEquals("太平寺中的文笔塔", word); + // select word from a translation + offs = 109; + posStart = EditorUtils.getWordStart(editPane, offs); + posEnd = EditorUtils.getWordEnd(editPane, offs); + word = editPane.getText(posStart, posEnd - posStart); + assertEquals("太平寺", word); + } + } + } From d815bb53bb9e39e278372ff97bd2156970513f51 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Tue, 12 Nov 2024 00:30:06 +0900 Subject: [PATCH 05/12] refactor: reduce duplicated code Signed-off-by: Hiroshi Miura --- src/org/omegat/gui/editor/EditorUtils.java | 50 ++++++++-------------- 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/src/org/omegat/gui/editor/EditorUtils.java b/src/org/omegat/gui/editor/EditorUtils.java index 1c11d3d11b..4dba5965a8 100644 --- a/src/org/omegat/gui/editor/EditorUtils.java +++ b/src/org/omegat/gui/editor/EditorUtils.java @@ -71,33 +71,7 @@ private EditorUtils() { * when there is no line found in the text component. */ public static int getWordStart(JTextComponent c, int offs) throws BadLocationException { - int result = offs; - Element line = Utilities.getParagraphElement(c, offs); - if (line == null) { - throw new BadLocationException("No word at " + offs, offs); - } - int lineStart = line.getStartOffset(); - Document doc = c.getDocument(); - int lineEnd = Math.min(line.getEndOffset(), doc.getLength()); - if (lineEnd - lineStart > 0) { - String lineString = doc.getText(lineStart, lineEnd - lineStart); - Locale locale = c.getLocale(); - if (c instanceof EditorTextArea3 && Core.getProject().isProjectLoaded()) { - if (((EditorTextArea3) c).isInActiveTranslation(offs)) { - locale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); - } else { - locale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); - } - } - BreakIterator words = BreakIterator.getWordInstance(locale); - words.setText(lineString); - int wordPosition = offs - lineStart; - if (wordPosition >= words.last()) { - wordPosition = words.last() - 1; - } - words.following(wordPosition); - result = lineStart + words.previous(); - } + int result = getWordBoundary(c, offs, false); char ch = c.getDocument().getText(result, 1).charAt(0); if (isDirectionChar(ch)) { result++; @@ -116,6 +90,17 @@ public static int getWordStart(JTextComponent c, int offs) throws BadLocationExc * when there is no line found in the text component. */ public static int getWordEnd(JTextComponent c, int offs) throws BadLocationException { + int result = getWordBoundary(c, offs, true); + if (result > 0) { + char ch = c.getDocument().getText(result - 1, 1).charAt(0); + if (isDirectionChar(ch)) { + result--; + } + } + return result; + } + + private static int getWordBoundary(JTextComponent c, int offs, boolean end) throws BadLocationException { int result = offs; Element line = Utilities.getParagraphElement(c, offs); if (line == null) { @@ -140,12 +125,11 @@ public static int getWordEnd(JTextComponent c, int offs) throws BadLocationExcep if (wordPosition >= words.last()) { wordPosition = words.last() - 1; } - result = lineStart + words.following(wordPosition); - } - if (result > 0) { - char ch = c.getDocument().getText(result - 1, 1).charAt(0); - if (isDirectionChar(ch)) { - result--; + if (end) { + result = lineStart + words.following(wordPosition); + } else { + words.following(wordPosition); + result = lineStart + words.previous(); } } return result; From e51d17a0cf234249881436fc1907230d25f707f6 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Wed, 27 Nov 2024 15:15:36 +0900 Subject: [PATCH 06/12] fix: fix copyright header typo Signed-off-by: Hiroshi Miura --- test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java index 98423a93e7..1c5bfe8126 100644 --- a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java +++ b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java @@ -15,7 +15,7 @@ (at your option) any later version. OmegaT is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without eve太平寺中的文笔塔n the implied warranty of + but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. From 66a84b1d13e76df6ff02891652060e9d6ff7b9b8 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Tue, 3 Dec 2024 07:41:15 +0900 Subject: [PATCH 07/12] feat: use ICU4J for BreakItelator to support CJ - Add unit test for getBoundary method with English, Japanese and Chinese Signed-off-by: Hiroshi Miura --- build.gradle | 1 + gradle/libs.versions.toml | 2 +- language-modules/ja/build.gradle | 4 +- src/org/omegat/gui/editor/EditorUtils.java | 35 +++++---- .../omegat/gui/editor/EditorUtilsTest.java | 2 +- .../omegat/gui/editor/EditorUtilsTest.java | 74 +++++++++++++++++++ tipoftheday/build.gradle | 6 +- 7 files changed, 103 insertions(+), 21 deletions(-) create mode 100644 test/src/org/omegat/gui/editor/EditorUtilsTest.java diff --git a/build.gradle b/build.gradle index 829ab42f1f..7c857f290e 100644 --- a/build.gradle +++ b/build.gradle @@ -300,6 +300,7 @@ dependencies { } runtimeOnly(libs.language.detector) runtimeOnly(libs.dumont.hunspell) + implementation(libs.icu4j) // Lucene for tokenizers implementation(libs.bundles.lucene) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 90c3fe0f11..e56cf0dc02 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -65,7 +65,7 @@ commons-lang3 = {group = "org.apache.commons", name = "commons-lang3", version.r commons-text = {group = "org.apache.commons", name = "commons-text", version.ref = "commons_text"} commons-validator = {group = "commons-validator", name = "commons-validator", version.ref = "commons_validator"} jsoup = {group = "org.jsoup", name = "jsoup", version.ref = "jsoup"} -icj4j = {group = "com.ibm.icu", name = "icu4j", version.ref = "icu4j"} +icu4j = {group = "com.ibm.icu", name = "icu4j", version.ref = "icu4j"} stax2-api = {group = "org.codehaus.woodstox", name = "stax2-api", version.ref = "stax2api"} woodstox-core = {group = "com.fasterxml.woodstox", name = "woodstox-core", version.ref = "woodstox"} languagetool-all = {group = "org.languagetool", name = "language-all", version.ref = "languagetool"} diff --git a/language-modules/ja/build.gradle b/language-modules/ja/build.gradle index 1855d1888f..4a1deeda4d 100644 --- a/language-modules/ja/build.gradle +++ b/language-modules/ja/build.gradle @@ -25,7 +25,7 @@ dependencies { exclude module: 'icu4j' } implementation(dependencies.variantOf(libs.lucene.gosen) { classifier("ipadic") }) - implementation(libs.icj4j) + compileOnly(libs.icu4j) } testImplementation(libs.junit4) @@ -43,7 +43,7 @@ dependencies { exclude module: 'icu4j' } testRuntimeOnly(dependencies.variantOf(libs.lucene.gosen) { classifier("ipadic") }) - testRuntimeOnly(libs.icj4j) + testRuntimeOnly(libs.icu4j) testImplementation(libs.assertj) testImplementation(testFixtures(project.rootProject)) diff --git a/src/org/omegat/gui/editor/EditorUtils.java b/src/org/omegat/gui/editor/EditorUtils.java index 4dba5965a8..0eac3b860a 100644 --- a/src/org/omegat/gui/editor/EditorUtils.java +++ b/src/org/omegat/gui/editor/EditorUtils.java @@ -27,7 +27,6 @@ package org.omegat.gui.editor; -import java.text.BreakIterator; import java.util.List; import java.util.Locale; @@ -37,6 +36,8 @@ import javax.swing.text.JTextComponent; import javax.swing.text.Utilities; +import com.ibm.icu.text.BreakIterator; + import org.omegat.core.Core; import org.omegat.core.data.ProtectedPart; import org.omegat.core.data.SourceTextEntry; @@ -111,30 +112,36 @@ private static int getWordBoundary(JTextComponent c, int offs, boolean end) thro int lineEnd = Math.min(line.getEndOffset(), doc.getLength()); if (lineEnd - lineStart > 0) { String lineString = doc.getText(lineStart, lineEnd - lineStart); + // Detection of target string locale. + // A default is UI component locale, and when OmegaT project + // is loaded, it uses a source or a target language as a + // processing locale. Locale locale = c.getLocale(); if (c instanceof EditorTextArea3 && Core.getProject().isProjectLoaded()) { if (((EditorTextArea3) c).isInActiveTranslation(offs)) { locale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); } else { - locale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); + locale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); } } - BreakIterator words = BreakIterator.getWordInstance(locale); - words.setText(lineString); - int wordPosition = offs - lineStart; - if (wordPosition >= words.last()) { - wordPosition = words.last() - 1; - } - if (end) { - result = lineStart + words.following(wordPosition); - } else { - words.following(wordPosition); - result = lineStart + words.previous(); - } + result = lineStart + getWordBoundary(locale, lineString, offs - lineStart, end); } return result; } + static int getWordBoundary(Locale locale, String lineString, int wordPosition, boolean end) { + BreakIterator words = com.ibm.icu.text.BreakIterator.getWordInstance(locale); + words.setText(lineString); + if (wordPosition >= words.last()) { + wordPosition = words.last() - 1; + } + if (end) { + return words.following(wordPosition); + } + words.following(wordPosition); + return words.previous(); +} + /** * Check if char is direction char(u202A,u202B,u202C). * diff --git a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java index 1c5bfe8126..0c12fa54ee 100644 --- a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java +++ b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java @@ -110,7 +110,7 @@ public void testEditorUtilsGetWordLoadedProject() throws Exception { int posStart = EditorUtils.getWordStart(editPane, offs); int posEnd = EditorUtils.getWordEnd(editPane, offs); String word = editPane.getText(posStart, posEnd - posStart); - assertEquals("太平寺中的文笔塔", word); + assertEquals("太平寺", word); // select word from a translation offs = 109; posStart = EditorUtils.getWordStart(editPane, offs); diff --git a/test/src/org/omegat/gui/editor/EditorUtilsTest.java b/test/src/org/omegat/gui/editor/EditorUtilsTest.java new file mode 100644 index 0000000000..3e7173e913 --- /dev/null +++ b/test/src/org/omegat/gui/editor/EditorUtilsTest.java @@ -0,0 +1,74 @@ +/************************************************************************** + OmegaT - Computer Assisted Translation (CAT) tool + with fuzzy matching, translation memory, keyword search, + glossaries, and translation leveraging into updated projects. + + Copyright (C) 2024 Hiroshi Miura + Home page: https://www.omegat.org/ + Support center: https://omegat.org/support + + This file is part of OmegaT. + + OmegaT is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OmegaT is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + **************************************************************************/ +package org.omegat.gui.editor; + +import static org.junit.Assert.assertEquals; + +import java.util.Locale; + +import org.junit.Test; + +public class EditorUtilsTest { + @Test + public void testGetBoundarySimple() { + final String lineString = "Hello world of toys!"; + assertEquals(lineString.indexOf('w'), EditorUtils.getWordBoundary(new Locale("en"), lineString, 8, + false)); + assertEquals(lineString.indexOf('d') + 1, EditorUtils.getWordBoundary(new Locale("en"), lineString, 8, + true)); + assertEquals(lineString.indexOf('!'), EditorUtils.getWordBoundary(new Locale("en"), lineString, 15, + true)); + assertEquals(lineString.indexOf('!') + 1, EditorUtils.getWordBoundary(new Locale("en"), lineString, + lineString.length() + 2, true)); + } + + @Test + public void testGetWordBoundaryJa() { + final String lineString = "太平寺の中心的なペン塔"; + //太平寺-の-中心-的-な-ペン-塔 + assertEquals(lineString.indexOf('太'), EditorUtils.getWordBoundary(new Locale("ja"), lineString, 2, + false)); + assertEquals(lineString.indexOf('寺') + 1, EditorUtils.getWordBoundary(new Locale("ja"), lineString, 2, + true)); + assertEquals(lineString.indexOf('中'), EditorUtils.getWordBoundary(new Locale("ja"), lineString, 5, + false)); + assertEquals(lineString.indexOf('心') + 1, EditorUtils.getWordBoundary(new Locale("ja"), lineString, 5, + true)); + } + + @Test + public void testGetWordBoundaryCn() { + final String lineString = "太平寺中的文笔塔"; + // 太平寺-中的-文笔-塔 + assertEquals(lineString.indexOf('太'), EditorUtils.getWordBoundary(new Locale("zh_CN"), lineString, 2, + false)); + assertEquals(lineString.indexOf('寺') + 1, EditorUtils.getWordBoundary(new Locale("zh_CN"), lineString, 2, + true)); + assertEquals(lineString.indexOf('中'), EditorUtils.getWordBoundary(new Locale("zh_CN"), lineString, 4, + false)); + assertEquals(lineString.indexOf('的') + 1, EditorUtils.getWordBoundary(new Locale("zh_CN"), lineString, 4, + true)); + } +} diff --git a/tipoftheday/build.gradle b/tipoftheday/build.gradle index b2159b1927..f85b9d1bb4 100644 --- a/tipoftheday/build.gradle +++ b/tipoftheday/build.gradle @@ -29,14 +29,14 @@ dependencies { exclude module: 'commons-io' } compileOnly(libs.commons.lang3) - constraints { - implementation(libs.icj4j) - } + runtimeOnly(libs.icu4j) + // jackson-databind-yaml is dependency of languagetool-core compileOnly(libs.jackson.yaml) } testImplementation(testFixtures(project.rootProject)) testImplementation(libs.commons.io) + testRuntimeOnly(libs.icu4j) } jar { From 5cfc7d07af1b8de8c86ed8b9b4100a3c43ed3b8f Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Tue, 3 Dec 2024 08:43:28 +0900 Subject: [PATCH 08/12] refactor: give locale by callers of EditorUtils Signed-off-by: Hiroshi Miura --- .../omegat/gui/editor/EditorController.java | 10 ++++--- .../omegat/gui/editor/EditorTextArea3.java | 22 ++++++++++++++-- src/org/omegat/gui/editor/EditorUtils.java | 26 ++++++++----------- .../omegat/gui/editor/EditorUtilsTest.java | 14 +++++----- 4 files changed, 45 insertions(+), 27 deletions(-) diff --git a/src/org/omegat/gui/editor/EditorController.java b/src/org/omegat/gui/editor/EditorController.java index 436df05c01..ac60c22bbf 100644 --- a/src/org/omegat/gui/editor/EditorController.java +++ b/src/org/omegat/gui/editor/EditorController.java @@ -704,9 +704,12 @@ protected void loadDocument() { doc.setDocumentFilter(new DocumentFilter3()); - // add locate for target language to editor + // add locales to editor Locale targetLocale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); editor.setLocale(targetLocale); + editor.setTargetLocale(targetLocale); + Locale sourceLocale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); + editor.setSourceLocale(sourceLocale); editor.setDocument(doc); @@ -1639,8 +1642,9 @@ public void changeCase(CHANGE_CASE_TO toWhat) { try { // no selection? make it the current word if (start == end) { - start = EditorUtils.getWordStart(editor, start); - end = EditorUtils.getWordEnd(editor, end); + Locale locale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); + start = EditorUtils.getWordStart(editor, start, locale); + end = EditorUtils.getWordEnd(editor, end, locale); // adjust the bound again if (start < translationStart && end <= translationEnd) { diff --git a/src/org/omegat/gui/editor/EditorTextArea3.java b/src/org/omegat/gui/editor/EditorTextArea3.java index f7d7e4fe31..2461da1f73 100644 --- a/src/org/omegat/gui/editor/EditorTextArea3.java +++ b/src/org/omegat/gui/editor/EditorTextArea3.java @@ -42,6 +42,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Locale; import javax.swing.JEditorPane; import javax.swing.JPopupMenu; @@ -141,6 +142,9 @@ public class EditorTextArea3 extends JEditorPane { */ protected boolean overtypeMode = false; + private Locale targetLocale; + private Locale sourceLocale; + public EditorTextArea3(EditorController controller) { this.controller = controller; setEditorKit(new StyledEditorKit() { @@ -165,10 +169,16 @@ protected void createInputAttributes(Element element, MutableAttributeSet set) { c.setBlinkRate(getCaret().getBlinkRate()); setCaret(c); + sourceLocale = getLocale(); + targetLocale = getLocale(); + addCaretListener(e -> { try { - int start = EditorUtils.getWordStart(EditorTextArea3.this, e.getMark()); - int end = EditorUtils.getWordEnd(EditorTextArea3.this, e.getMark()); + // Detection of target string locale. + // It uses a source or a target language as a processing locale. + Locale locale = isInActiveTranslation(e.getMark()) ? targetLocale : sourceLocale; + int start = EditorUtils.getWordStart(EditorTextArea3.this, e.getMark(), locale); + int end = EditorUtils.getWordEnd(EditorTextArea3.this, e.getMark(), locale); if (end - start <= 0) { // word not defined return; @@ -200,6 +210,14 @@ public void setFont(Font font) { } } + void setTargetLocale(Locale targetLocale) { + this.targetLocale = targetLocale; + } + + void setSourceLocale(Locale sourceLocale) { + this.sourceLocale = sourceLocale; + } + /** * Return OmDocument instead just a Document. If editor was not initialized * with OmDocument, it will contains other Document implementation. In this diff --git a/src/org/omegat/gui/editor/EditorUtils.java b/src/org/omegat/gui/editor/EditorUtils.java index 0eac3b860a..3cf2e6e55b 100644 --- a/src/org/omegat/gui/editor/EditorUtils.java +++ b/src/org/omegat/gui/editor/EditorUtils.java @@ -72,7 +72,11 @@ private EditorUtils() { * when there is no line found in the text component. */ public static int getWordStart(JTextComponent c, int offs) throws BadLocationException { - int result = getWordBoundary(c, offs, false); + return getWordStart(c, offs, c.getLocale()); + } + + public static int getWordStart(JTextComponent c, int offs, Locale locale) throws BadLocationException { + int result = getWordBoundary(c, offs, locale, false); char ch = c.getDocument().getText(result, 1).charAt(0); if (isDirectionChar(ch)) { result++; @@ -91,7 +95,11 @@ public static int getWordStart(JTextComponent c, int offs) throws BadLocationExc * when there is no line found in the text component. */ public static int getWordEnd(JTextComponent c, int offs) throws BadLocationException { - int result = getWordBoundary(c, offs, true); + return getWordEnd(c, offs, c.getLocale()); + } + + public static int getWordEnd(JTextComponent c, int offs, Locale locale) throws BadLocationException { + int result = getWordBoundary(c, offs, locale, true); if (result > 0) { char ch = c.getDocument().getText(result - 1, 1).charAt(0); if (isDirectionChar(ch)) { @@ -101,7 +109,7 @@ public static int getWordEnd(JTextComponent c, int offs) throws BadLocationExcep return result; } - private static int getWordBoundary(JTextComponent c, int offs, boolean end) throws BadLocationException { + private static int getWordBoundary(JTextComponent c, int offs, Locale locale, boolean end) throws BadLocationException { int result = offs; Element line = Utilities.getParagraphElement(c, offs); if (line == null) { @@ -112,18 +120,6 @@ private static int getWordBoundary(JTextComponent c, int offs, boolean end) thro int lineEnd = Math.min(line.getEndOffset(), doc.getLength()); if (lineEnd - lineStart > 0) { String lineString = doc.getText(lineStart, lineEnd - lineStart); - // Detection of target string locale. - // A default is UI component locale, and when OmegaT project - // is loaded, it uses a source or a target language as a - // processing locale. - Locale locale = c.getLocale(); - if (c instanceof EditorTextArea3 && Core.getProject().isProjectLoaded()) { - if (((EditorTextArea3) c).isInActiveTranslation(offs)) { - locale = Core.getProject().getProjectProperties().getTargetLanguage().getLocale(); - } else { - locale = Core.getProject().getProjectProperties().getSourceLanguage().getLocale(); - } - } result = lineStart + getWordBoundary(locale, lineString, offs - lineStart, end); } return result; diff --git a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java index 0c12fa54ee..c693196519 100644 --- a/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java +++ b/test-acceptance/src/org/omegat/gui/editor/EditorUtilsTest.java @@ -60,14 +60,14 @@ public class EditorUtilsTest { public static class EditorUtilsFirstStepsTest extends TestCoreGUI { @Rule - public final LocaleRule localeRule = new LocaleRule(new Locale("en")); + public final LocaleRule localeRule = new LocaleRule(Locale.ENGLISH); @Test public void testEditorUtilsGetWordFirstSteps() throws BadLocationException { int offs = 518; JTextComponent editPane = window.panel("First Steps").textBox("IntroPane").target(); - int posStart = EditorUtils.getWordStart(editPane, offs); - int posEnd = EditorUtils.getWordEnd(editPane, offs); + int posStart = EditorUtils.getWordStart(editPane, offs, Locale.ENGLISH); + int posEnd = EditorUtils.getWordEnd(editPane, offs, Locale.ENGLISH); String word = editPane.getText(posStart, posEnd - posStart); assertEquals("translation", word); assertEquals(508, posStart); @@ -107,14 +107,14 @@ public void testEditorUtilsGetWordLoadedProject() throws Exception { final JTextComponent editPane = window.panel("Editor - source.txt").textBox().target(); // select word from a source text int offs = 102; - int posStart = EditorUtils.getWordStart(editPane, offs); - int posEnd = EditorUtils.getWordEnd(editPane, offs); + int posStart = EditorUtils.getWordStart(editPane, offs, Locale.SIMPLIFIED_CHINESE); + int posEnd = EditorUtils.getWordEnd(editPane, offs, Locale.SIMPLIFIED_CHINESE); String word = editPane.getText(posStart, posEnd - posStart); assertEquals("太平寺", word); // select word from a translation offs = 109; - posStart = EditorUtils.getWordStart(editPane, offs); - posEnd = EditorUtils.getWordEnd(editPane, offs); + posStart = EditorUtils.getWordStart(editPane, offs, Locale.JAPANESE); + posEnd = EditorUtils.getWordEnd(editPane, offs, Locale.JAPANESE); word = editPane.getText(posStart, posEnd - posStart); assertEquals("太平寺", word); } From 9a692a1c9d36701b95152145236884c1ae080907 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Tue, 3 Dec 2024 09:59:02 +0900 Subject: [PATCH 09/12] docs: javadoc of EditorUtils - Update javadoc - @Deprecated for methods with old signature - Simplify some lines Signed-off-by: Hiroshi Miura --- src/org/omegat/gui/editor/EditorUtils.java | 44 +++++++++++++++++++--- 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/src/org/omegat/gui/editor/EditorUtils.java b/src/org/omegat/gui/editor/EditorUtils.java index 3cf2e6e55b..7a97a30916 100644 --- a/src/org/omegat/gui/editor/EditorUtils.java +++ b/src/org/omegat/gui/editor/EditorUtils.java @@ -71,10 +71,22 @@ private EditorUtils() { * @throws BadLocationException * when there is no line found in the text component. */ + @Deprecated public static int getWordStart(JTextComponent c, int offs) throws BadLocationException { return getWordStart(c, offs, c.getLocale()); } + /** + * Determines the start of a word for the given model location. This method + * skips direction char. + * + * @param c TextComponent of the editor area. + * @param offs offset of the text. + * @param locale locale of the text. + * @return position of word start on the text component. + * @throws BadLocationException + * when there is no line found in the text component. + */ public static int getWordStart(JTextComponent c, int offs, Locale locale) throws BadLocationException { int result = getWordBoundary(c, offs, locale, false); char ch = c.getDocument().getText(result, 1).charAt(0); @@ -94,10 +106,22 @@ public static int getWordStart(JTextComponent c, int offs, Locale locale) throws * @throws BadLocationException * when there is no line found in the text component. */ + @Deprecated public static int getWordEnd(JTextComponent c, int offs) throws BadLocationException { return getWordEnd(c, offs, c.getLocale()); } + /** + * Determines the end of a word for the given model location. This method + * skips direction char. + * + * @param c TextComponent of the editor area. + * @param offs offset of the text. + * @param locale locale of the text. + * @return position of the word end on the text component. + * @throws BadLocationException + * when there is no line found in the text component. + */ public static int getWordEnd(JTextComponent c, int offs, Locale locale) throws BadLocationException { int result = getWordBoundary(c, offs, locale, true); if (result > 0) { @@ -125,6 +149,17 @@ private static int getWordBoundary(JTextComponent c, int offs, Locale locale, bo return result; } + /** + * Get word boundary. + *

+ * When the end argument is true, return a word end. + * Otherwise, return a start of word. + * @param locale locale of the line string. + * @param lineString a string of the line. + * @param wordPosition target position of the line. + * @param end return end of word, otherwise start of word. + * @return index of the word boundary. + */ static int getWordBoundary(Locale locale, String lineString, int wordPosition, boolean end) { BreakIterator words = com.ibm.icu.text.BreakIterator.getWordInstance(locale); words.setText(lineString); @@ -459,7 +494,7 @@ public static String addBidiAroundTags(String text, SourceTextEntry ste) { StringBuilder s = new StringBuilder(text.length() * 12 / 10); for (Tag t : tags) { if (pos < t.pos) { - s.append(text.substring(pos, t.pos)); + s.append(text, pos, t.pos); } s.append(SegmentBuilder.BIDI_RLM_CHAR); s.append(SegmentBuilder.BIDI_LRM_CHAR); @@ -476,11 +511,8 @@ public static String addBidiAroundTags(String text, SourceTextEntry ste) { public static boolean hasBidiAroundTag(String text, String tag, int pos) { try { - boolean has = true; - if (text.charAt(pos - 1) != SegmentBuilder.BIDI_LRM_CHAR - || text.charAt(pos - 2) != SegmentBuilder.BIDI_RLM_CHAR) { - has = false; - } + boolean has = text.charAt(pos - 1) == SegmentBuilder.BIDI_LRM_CHAR + && text.charAt(pos - 2) == SegmentBuilder.BIDI_RLM_CHAR; if (text.charAt(pos + tag.length()) != SegmentBuilder.BIDI_LRM_CHAR || text.charAt(pos + tag.length() + 1) != SegmentBuilder.BIDI_RLM_CHAR) { has = false; From 07be105666a17d255ffea564f799861b456971bf Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Tue, 3 Dec 2024 22:42:05 +0900 Subject: [PATCH 10/12] chore: bump icu4j@74.1 Signed-off-by: Hiroshi Miura --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index e56cf0dc02..0b7271e951 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -8,7 +8,7 @@ commons_io = "2.16.1" commons_text = "1.11.0" commons_validator = "1.9.0" jsoup = "1.18.1" -icu4j = { require = "[70,73.2[", prefer = "72.1" } +icu4j = { require = "[71.1,76.1[", prefer = "74.2" } stax2api = "4.2.2" woodstox = "6.5.0" languagetool = "6.1" From abaae7b4be095fd655a4f14c6ce70b77ab735057 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 12 Dec 2024 19:31:39 +0900 Subject: [PATCH 11/12] chore: fix typo in dependency Signed-off-by: Hiroshi Miura --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 5a2abd2899..a75737c71b 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -65,7 +65,7 @@ commons-lang3 = {group = "org.apache.commons", name = "commons-lang3", version.r commons-text = {group = "org.apache.commons", name = "commons-text", version.ref = "commons_text"} commons-validator = {group = "commons-validator", name = "commons-validator", version.ref = "commons_validator"} jsoup = {group = "org.jsoup", name = "jsoup", version.ref = "jsoup"} -icj4j = {group = "com.ibm.icu", name = "icu4j", version.ref = "icu4j"} +icu4j = {group = "com.ibm.icu", name = "icu4j", version.ref = "icu4j"} stax2-api = {group = "org.codehaus.woodstox", name = "stax2-api", version.ref = "stax2api"} woodstox-core = {group = "com.fasterxml.woodstox", name = "woodstox-core", version.ref = "woodstox"} languagetool-all = {group = "org.languagetool", name = "language-all", version.ref = "languagetool"} From 207ecd3e40a747a1f4595c3fc979cbe0d9170efc Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Thu, 12 Dec 2024 21:41:10 +0900 Subject: [PATCH 12/12] chore: fix degraded merge for dependencies Signed-off-by: Hiroshi Miura --- gradle/libs.versions.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index a75737c71b..dd5389310a 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -25,7 +25,7 @@ jackson = "2.16.1" bc = "1.78.1" nashorn = "15.4" caffeine = "3.1.8" -wiremock = "3.9.2" +wiremock = "3.10.0" vldocking = "3.1.1" xmlunit = "2.10.0" assertj = "3.26.0" @@ -187,7 +187,7 @@ xmlunit = ["xmlunit-core", "xmlunit-assertj", "assertj"] [plugins] spotbugs = {id = "com.github.spotbugs", version = "6.0.26"} -spotless = {id = "com.diffplug.spotless", version = "6.25.0"} +spotless = {id = "com.diffplug.spotless", version = "7.0.0.BETA4"} launch4j = {id = "edu.sc.seis.launch4j", version = "3.0.6"} versions = {id = "com.github.ben-manes.versions", version = "0.51.0"} ssh = {id = "org.hidetake.ssh", version = "2.11.2"}