From 1a15bd724c37069ffe8c8e867087068c80133585 Mon Sep 17 00:00:00 2001 From: Thomas CORDONNIER Date: Mon, 19 Feb 2024 08:24:02 +0100 Subject: [PATCH] Apply max penalty for sub-segment match --- src/org/omegat/core/statistics/FindMatches.java | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/org/omegat/core/statistics/FindMatches.java b/src/org/omegat/core/statistics/FindMatches.java index e6cbfcedd9..fcca0109e9 100644 --- a/src/org/omegat/core/statistics/FindMatches.java +++ b/src/org/omegat/core/statistics/FindMatches.java @@ -7,7 +7,7 @@ 2008 Alex Buloichik 2012 Thomas Cordonnier, Martin Fleurke 2013 Aaron Madlon-Kay, Alex Buloichik - 2024 Hiroshi Miura + 2024 Hiroshi Miura, Thomas Cordonnier Home page: https://www.omegat.org/ Support center: https://omegat.org/support @@ -271,6 +271,7 @@ public List search(String searchText, boolean requiresTranslation, b if (segments.size() > 1) { List fsrc = new ArrayList<>(segments.size()); List ftrans = new ArrayList<>(segments.size()); + int maxPenalty = 0; // multiple segments for (String onesrc : segments) { // find match for a separate segment @@ -280,6 +281,18 @@ public List search(String searchText, boolean requiresTranslation, b && segmentMatch.get(0).scores[0].score >= SUBSEGMENT_MATCH_THRESHOLD) { fsrc.add(segmentMatch.get(0).source); ftrans.add(segmentMatch.get(0).translation); + if (segmentMatch.get(0).fuzzyMark) { + if (maxPenalty < PENALTY_FOR_FUZZY) { + maxPenalty = PENALTY_FOR_FUZZY; + } + } + Matcher matcher = SEARCH_FOR_PENALTY.matcher(segmentMatch.get(0).projs[0]); + if (matcher.find()) { + int penalty = Integer.parseInt(matcher.group(1)); + if (penalty > maxPenalty) { + maxPenalty = penalty; + } + } } else { fsrc.add(""); ftrans.add(""); @@ -289,7 +302,7 @@ public List search(String searchText, boolean requiresTranslation, b PrepareTMXEntry entry = new PrepareTMXEntry(); entry.source = segmenter.glue(sourceLang, sourceLang, fsrc, spaces, brules); entry.translation = segmenter.glue(sourceLang, targetLang, ftrans, spaces, brules); - processEntry(null, entry, "", NearString.MATCH_SOURCE.TM, false, 0); + processEntry(null, entry, "", NearString.MATCH_SOURCE.TM, false, maxPenalty); } } // fill similarity data only for a result