omegat-org · miurahr · Nov 12, 2024 · Oct 31, 2024 · Nov 2, 2024 · Oct 31, 2024
@@ -1734,8 +1734,6 @@ CORE_SRX_RULES_LANG_DEFAULT=Default
 CORE_SRX_RULES_FORMATTING_TEXT=Text files segmentation
 CORE_SRX_RULES_FORMATTING_HTML=HTML, XHTML, ODF and Infix segmentation
 
-CORE_SRX_RULES_UNKNOWN_LANGUAGE_CODE=Unknown language code {0} specified
-
 # org.omegat.core.spellchecker.SpellCheckerManager
 CORE_SPELLCHECKER_NO_ENGINE=No active spell checker engine found
 

@@ -1555,8 +1555,6 @@ CORE_SRX_RULES_LANG_DEFAULT=Standaard
 CORE_SRX_RULES_FORMATTING_TEXT=Segmentatie van tekstbestanden
 CORE_SRX_RULES_FORMATTING_HTML=segmentatie voor HTML, XHTML, ODF en Infix
 
-CORE_SRX_RULES_UNKNOWN_LANGUAGE_CODE=Onbekende taalcode {0} gespecificeerd
-
 # org.omegat.core.spellchecker.SpellCheckerManager
 CORE_SPELLCHECKER_NO_ENGINE=Geen actief programma voor spellingscontrole gevonden
 

diff --git a/src/org/omegat/core/segmentation/LanguageCodes.java b/src/org/omegat/core/segmentation/LanguageCodes.java
@@ -43,48 +43,65 @@ public final class LanguageCodes {
     private LanguageCodes() {
     }
 
-    // Language Codes
-    public static final String CATALAN_CODE = "Catalan";
-    public static final String CZECH_CODE = "Czech";
-    public static final String GERMAN_CODE = "German";
-    public static final String ENGLISH_CODE = "English";
-    public static final String SPANISH_CODE = "Spanish";
-    public static final String FINNISH_CODE = "Finnish";
-    public static final String FRENCH_CODE = "French";
-    public static final String ITALIAN_CODE = "Italian";
-    public static final String JAPANESE_CODE = "Japanese";
-    public static final String DUTCH_CODE = "Dutch";
-    public static final String POLISH_CODE = "Polish";
-    public static final String RUSSIAN_CODE = "Russian";
-    public static final String SWEDISH_CODE = "Swedish";
-    public static final String SLOVAK_CODE = "Slovak";
-    public static final String CHINESE_CODE = "Chinese";
-    public static final String DEFAULT_CODE = "Default";
-    public static final String F_TEXT_CODE = "Text";
-    public static final String F_HTML_CODE = "HTML";
+    // Codes of "languagerulename".
+    static final String CATALAN_CODE = "Catalan";
+    static final String CZECH_CODE = "Czech";
+    static final String GERMAN_CODE = "German";
+    static final String ENGLISH_CODE = "English";
+    static final String SPANISH_CODE = "Spanish";
+    static final String FINNISH_CODE = "Finnish";
+    static final String FRENCH_CODE = "French";
+    static final String ITALIAN_CODE = "Italian";
+    static final String JAPANESE_CODE = "Japanese";
+    static final String DUTCH_CODE = "Dutch";
+    static final String POLISH_CODE = "Polish";
+    static final String RUSSIAN_CODE = "Russian";
+    static final String SWEDISH_CODE = "Swedish";
+    static final String SLOVAK_CODE = "Slovak";
+    static final String CHINESE_CODE = "Chinese";
+    static final String DEFAULT_CODE = "Default";
+    static final String F_TEXT_CODE = "Text";
+    static final String F_HTML_CODE = "HTML";
 
     // Language Keys from Resource Bundle
-    public static final String CATALAN_KEY = "CORE_SRX_RULES_LANG_CATALAN";
-    public static final String CZECH_KEY = "CORE_SRX_RULES_LANG_CZECH";
-    public static final String GERMAN_KEY = "CORE_SRX_RULES_LANG_GERMAN";
-    public static final String ENGLISH_KEY = "CORE_SRX_RULES_LANG_ENGLISH";
-    public static final String SPANISH_KEY = "CORE_SRX_RULES_LANG_SPANISH";
-    public static final String FINNISH_KEY = "CORE_SRX_RULES_LANG_FINNISH";
-    public static final String FRENCH_KEY = "CORE_SRX_RULES_LANG_FRENCH";
-    public static final String ITALIAN_KEY = "CORE_SRX_RULES_LANG_ITALIAN";
-    public static final String JAPANESE_KEY = "CORE_SRX_RULES_LANG_JAPANESE";
-    public static final String DUTCH_KEY = "CORE_SRX_RULES_LANG_DUTCH";
-    public static final String POLISH_KEY = "CORE_SRX_RULES_LANG_POLISH";
-    public static final String RUSSIAN_KEY = "CORE_SRX_RULES_LANG_RUSSIAN";
-    public static final String SWEDISH_KEY = "CORE_SRX_RULES_LANG_SWEDISH";
-    public static final String SLOVAK_KEY = "CORE_SRX_RULES_LANG_SLOVAK";
-    public static final String CHINESE_KEY = "CORE_SRX_RULES_LANG_CHINESE";
-    public static final String DEFAULT_KEY = "CORE_SRX_RULES_LANG_DEFAULT";
-    public static final String F_TEXT_KEY = "CORE_SRX_RULES_FORMATTING_TEXT";
-    public static final String F_HTML_KEY = "CORE_SRX_RULES_FORMATTING_HTML";
+    static final String CATALAN_KEY = "CORE_SRX_RULES_LANG_CATALAN";
+    static final String CZECH_KEY = "CORE_SRX_RULES_LANG_CZECH";
+    static final String GERMAN_KEY = "CORE_SRX_RULES_LANG_GERMAN";
+    static final String ENGLISH_KEY = "CORE_SRX_RULES_LANG_ENGLISH";
+    static final String SPANISH_KEY = "CORE_SRX_RULES_LANG_SPANISH";
+    static final String FINNISH_KEY = "CORE_SRX_RULES_LANG_FINNISH";
+    static final String FRENCH_KEY = "CORE_SRX_RULES_LANG_FRENCH";
+    static final String ITALIAN_KEY = "CORE_SRX_RULES_LANG_ITALIAN";
+    static final String JAPANESE_KEY = "CORE_SRX_RULES_LANG_JAPANESE";
+    static final String DUTCH_KEY = "CORE_SRX_RULES_LANG_DUTCH";
+    static final String POLISH_KEY = "CORE_SRX_RULES_LANG_POLISH";
+    static final String RUSSIAN_KEY = "CORE_SRX_RULES_LANG_RUSSIAN";
+    static final String SWEDISH_KEY = "CORE_SRX_RULES_LANG_SWEDISH";
+    static final String SLOVAK_KEY = "CORE_SRX_RULES_LANG_SLOVAK";
+    static final String CHINESE_KEY = "CORE_SRX_RULES_LANG_CHINESE";
+    static final String DEFAULT_KEY = "CORE_SRX_RULES_LANG_DEFAULT";
+    static final String F_TEXT_KEY = "CORE_SRX_RULES_FORMATTING_TEXT";
+    static final String F_HTML_KEY = "CORE_SRX_RULES_FORMATTING_HTML";
+
+    private static final String CATALAN_PATTERN = "CA.*";
+    private static final String CZECH_PATTERN = "CS.*";
+    private static final String GERMAN_PATTERN = "DE.*";
+    private static final String ENGLISH_PATTERN = "EN.*";
+    private static final String SPANISH_PATTERN = "ES.*";
+    private static final String FINNISH_PATTERN = "FI.*";
+    private static final String FRENCH_PATTERN = "FR.*";
+    private static final String ITALIAN_PATTERN = "IT.*";
+    private static final String JAPANESE_PATTERN = "JA.*";
+    private static final String DUTCH_PATTERN = "NL.*";
+    private static final String POLISH_PATTERN = "PL.*";
+    private static final String RUSSIAN_PATTERN = "RU.*";
+    private static final String SWEDISH_PATTERN = "SV.*";
+    private static final String SLOVAK_PATTERN = "SK.*";
+    private static final String CHINESE_PATTERN = "ZH.*";
 
     /** A Map from language codes to language keys. */
-    private static Map<String, String> codeKeyHash = new HashMap<>();
+    private static final Map<String, String> codeKeyHash = new HashMap<>();
+    private static final Map<String, String> patternHash = new HashMap<>();
 
     static {
         codeKeyHash.put(CATALAN_CODE, CATALAN_KEY);
@@ -105,6 +122,21 @@ private LanguageCodes() {
         codeKeyHash.put(DEFAULT_CODE, DEFAULT_KEY);
         codeKeyHash.put(F_TEXT_CODE, F_TEXT_KEY);
         codeKeyHash.put(F_HTML_CODE, F_HTML_KEY);
+        patternHash.put(CATALAN_PATTERN, CATALAN_CODE);
+        patternHash.put(CZECH_PATTERN, CZECH_CODE);
+        patternHash.put(GERMAN_PATTERN, GERMAN_CODE);
+        patternHash.put(ENGLISH_PATTERN, ENGLISH_CODE);
+        patternHash.put(SPANISH_PATTERN, SPANISH_CODE);
+        patternHash.put(FINNISH_PATTERN, FINNISH_CODE);
+        patternHash.put(FRENCH_PATTERN, FRENCH_CODE);
+        patternHash.put(ITALIAN_PATTERN, ITALIAN_CODE);
+        patternHash.put(JAPANESE_PATTERN, JAPANESE_CODE);
+        patternHash.put(DUTCH_PATTERN, DUTCH_CODE);
+        patternHash.put(POLISH_PATTERN, POLISH_CODE);
+        patternHash.put(RUSSIAN_PATTERN, RUSSIAN_CODE);
+        patternHash.put(SWEDISH_PATTERN, SWEDISH_CODE);
+        patternHash.put(SLOVAK_PATTERN, SLOVAK_CODE);
+        patternHash.put(CHINESE_PATTERN, CHINESE_CODE);
     }
 
     /**
@@ -126,11 +158,24 @@ public static boolean isLanguageCodeKnown(String code) {
     }
 
     public static String getLanguageCodeByName(String name) {
-        for (Map.Entry<String, String> entry: codeKeyHash.entrySet()) {
+        if (name == null) {
+            return null;
+        }
+        for (Map.Entry<String, String> entry : codeKeyHash.entrySet()) {
             if (OStrings.getString(entry.getValue()).equals(name)) {
                 return entry.getKey();
             }
         }
+        // migration heuristics: Germany translation changed in v5.5.
+        // See:
+        // https://github.com/omegat-org/omegat/pull/1158#issuecomment-2448788253
+        if (name.contains("Textdateien")) {
+            return LanguageCodes.F_TEXT_CODE;
+        }
         return null;
     }
+
+    public static String getLanguageCodeByPattern(String pattern) {
+        return patternHash.get(pattern);
+    }
 }
diff --git a/src/org/omegat/core/segmentation/MapRule.java b/src/org/omegat/core/segmentation/MapRule.java
@@ -4,6 +4,7 @@
           glossaries, and translation leveraging into updated projects.
 
  Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
+               2024 Hiroshi Miura
                Home page: https://www.omegat.org/
                Support center: https://omegat.org/support
 
@@ -31,7 +32,8 @@
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
-import gen.core.segmentation.Languagemap;
+import tokyo.northside.logging.ILogger;
+
 import org.omegat.util.Log;
 import org.omegat.util.StringUtil;
 
@@ -44,56 +46,95 @@
 public class MapRule implements Serializable {
 
     private static final long serialVersionUID = -5868132953113679291L;
+    private static final ILogger LOGGER = Log.getLogger(MapRule.class);
+
+    /** Language Name */
+    private String languageCode;
 
-    /** creates a new empty MapRule */
+    /**
+     * Creates a new empty MapRule.
+     * <p>
+     * When SRX.loadSrxFile loads segmentation.conf, java.beans.XMLDecoder
+     * create an empty object, then calls setLanguage and setPattern methods.
+     * </p>
+     */
     public MapRule() {
     }
 
-    /** creates an initialized MapRule */
+    /**
+     * Create initialized MapRule object.
+     * 
+     * @param language
+     *            localized language name (from segmentation.conf), or language
+     *            code (from SRX)
+     * @param pattern
+     *            language pattern such as "EN.*" or ".*"
+     * @param rules
+     *            segmentation rules.
+     */
     public MapRule(String language, String pattern, List<Rule> rules) {
-        this.setLanguage(language);
+        String code = LanguageCodes.getLanguageCodeByPattern(pattern);
+        this.setLanguage(code != null ? code : language);
         this.setPattern(pattern);
         this.setRules(rules);
     }
 
-    /** Language Name */
-    private String languageCode;
-
-    public MapRule(Languagemap languagemap, List<Rule> rules) {
-        this.setLanguage(languagemap.getLanguagerulename());
-        this.setPattern(languagemap.getLanguagepattern());
-        this.setRules(rules);
-    }
-
     /** Returns Language Name (to display it in a dialog). */
     public String getLanguageName() {
+        /*
+         * When there has already migrated a SRX file store, languageCode fields
+         * has a name defined as "LanguageCodes.*_CODE". Otherwise, MapRule
+         * object is created from "segmentation.conf" java beans file, so it is
+         * localized name of language. We first assume the latter. If res is
+         * empty, the object is created from a SRX file, then return
+         * languageCode itself.
+         */
         String res = LanguageCodes.getLanguageName(languageCode);
         return StringUtil.isEmpty(res) ? languageCode : res;
     }
 
     /** Sets Language Code */
     public void setLanguage(String code) {
+        /*
+         * setLanguage method is called from XmlDecoder of a Java beans library
+         * when migrating from "segmentation.conf" beans file. An argument will
+         * be localized name of language. When the object is created from a
+         * standard SRX file, the argument will be standard language name,
+         * defined as "LanguageCodes.*_CODE". The behavior was changed in OmegaT
+         * 6.0.0 release in 2023. We first detect whether the argument is
+         * standard code. If the code is not a standard code, then try to find a
+         * localized name of the language name. When you believe all the OmegaT
+         * 4.x and 5.x users are migrated to OmegaT 6.x or later, you may want
+         * to remove the workaround here.
+         */
         if (!LanguageCodes.isLanguageCodeKnown(code)) {
             String alt = LanguageCodes.getLanguageCodeByName(code);
             if (alt != null) {
                 languageCode = alt;
                 return;
             } else {
-                Log.logWarningRB("CORE_SRX_RULES_UNKNOWN_LANGUAGE_CODE", code);
+                LOGGER.atDebug().setMessage("Unknown languagerulename '{}'").addArgument(code).log();
             }
         }
         languageCode = code;
     }
 
-    /** Returns Language Code for programmatic usage. */
+    /**
+     * Returns Language Code for programmatic usage.
+     */
     public String getLanguage() {
         return languageCode;
     }
 
-    /** Pattern for the language/country ISO code (of a form LL-CC). */
+    /*
+     * Pattern for the language/country ISO code (of a form LL-CC). It is like
+     * "EN.*".
+     */
     private Pattern pattern;
 
-    /** Returns Pattern for the language/country ISO code (of a form LL-CC). */
+    /**
+     * Returns Pattern for the language/country ISO code (of a form LL-CC).
+     */
     public String getPattern() {
         if (pattern != null) {
             return pattern.pattern();
@@ -110,14 +151,24 @@ public Pattern getCompiledPattern() {
         return pattern;
     }
 
-    /** Sets Pattern for the language/country ISO code (of a form LL-CC). */
+    /**
+     * Sets Pattern for the language/country ISO code (of a form LL-CC).
+     * 
+     * @param pattern
+     *            pattern string such as "EN.*"
+     */
     public void setPattern(String pattern) throws PatternSyntaxException {
         // Fix for bug [1643500]
-        // language code in segmentation rule is case sensitive
+        // language code in segmentation rule is a case-sensitive
         // Correction contributed by Tiago Saboga.
         this.pattern = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE);
     }
 
+    /**
+     * Deep copy of the object, mandatory for java beans.
+     * 
+     * @return new MapRule object
+     */
     public MapRule copy() {
         MapRule result = new MapRule();
         result.languageCode = languageCode;
@@ -142,23 +193,28 @@ public void setRules(List<Rule> rules) {
         this.rules = rules;
     }
 
-    /** Indicates whether some other MapRule is "equal to" this one. */
+    /**
+     * Indicates whether some other MapRule is "equal to" this one.
+     */
     public boolean equals(Object obj) {
-        if (obj == null || !(obj instanceof MapRule)) {
+        if (!(obj instanceof MapRule)) {
             return false;
         }
         MapRule that = (MapRule) obj;
-        return this.getPattern().equals(that.getPattern())
-                && this.getLanguage().equals(that.getLanguage())
+        return this.getPattern().equals(that.getPattern()) && this.getLanguage().equals(that.getLanguage())
                 && this.getRules().equals(that.getRules());
     }
 
-    /** Returns a hash code value for the object. */
+    /**
+     * Returns a hash code value for the object.
+     */
     public int hashCode() {
         return this.getPattern().hashCode() + this.getLanguage().hashCode() + this.getRules().hashCode();
     }
 
-    /** Returns a string representation of the MapRule for debugging purposes. */
+    /**
+     * Returns a string representation of the MapRule for debugging purposes.
+     */
     public String toString() {
         return getLanguage() + " (" + getPattern() + ") " + getRules().toString();
     }