-
-
Notifications
You must be signed in to change notification settings - Fork 114
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: o.o.c.segmentation.SRX to load conf and save srx in more robust way and remove warning message #1159
Merged
Merged
fix: o.o.c.segmentation.SRX to load conf and save srx in more robust way and remove warning message #1159
Changes from 3 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
257992c
chore: update tests
miurahr e962706
feat: SRX.saveToSrx to use standard name
miurahr a69a418
fix: workaournd for an unknown language code
miurahr b0c3e17
Update src/org/omegat/core/segmentation/MapRule.java
miurahr d144d42
refactor: adjust review feedbacks
miurahr d7d7975
revert unrelated changes
miurahr 36005ab
fix: remove a warning message in Bundle.properties
miurahr File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
glossaries, and translation leveraging into updated projects. | ||
|
||
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk | ||
2024 Hiroshi Miura | ||
Home page: https://www.omegat.org/ | ||
Support center: https://omegat.org/support | ||
|
||
|
@@ -31,7 +32,8 @@ | |
import java.util.regex.Pattern; | ||
import java.util.regex.PatternSyntaxException; | ||
|
||
import gen.core.segmentation.Languagemap; | ||
import tokyo.northside.logging.ILogger; | ||
|
||
import org.omegat.util.Log; | ||
import org.omegat.util.StringUtil; | ||
|
||
|
@@ -44,56 +46,99 @@ | |
public class MapRule implements Serializable { | ||
|
||
private static final long serialVersionUID = -5868132953113679291L; | ||
private static final ILogger LOGGER = Log.getLogger(MapRule.class); | ||
|
||
/** Language Name */ | ||
private String languageCode; | ||
|
||
/** creates a new empty MapRule */ | ||
public MapRule() { | ||
} | ||
|
||
/** creates an initialized MapRule */ | ||
/** | ||
* Create initialized MapRule object. | ||
* | ||
* @param language | ||
* localized language name (from segmentation.conf), or language | ||
* code (from SRX) | ||
* @param pattern | ||
* language pattern such as "EN.*" or ".*" | ||
* @param rules | ||
* segmentation rules. | ||
*/ | ||
public MapRule(String language, String pattern, List<Rule> rules) { | ||
this.setLanguage(language); | ||
String code = LanguageCodes.getLanguageCodeByPattern(pattern); | ||
this.setLanguage(code != null ? code : language); | ||
this.setPattern(pattern); | ||
this.setRules(rules); | ||
} | ||
|
||
/** Language Name */ | ||
private String languageCode; | ||
|
||
public MapRule(Languagemap languagemap, List<Rule> rules) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed the ctor with |
||
this.setLanguage(languagemap.getLanguagerulename()); | ||
this.setPattern(languagemap.getLanguagepattern()); | ||
this.setRules(rules); | ||
} | ||
|
||
/** Returns Language Name (to display it in a dialog). */ | ||
public String getLanguageName() { | ||
/* | ||
* When there has already migrated a SRX file store, languageCode fields | ||
* has a name defined as "LanguageCodes.*_CODE". Otherwise, MapRule | ||
* object is created from "segmentation.conf" java beans file, so it is | ||
* localized name of language. We first assume the latter. If res is | ||
* empty, the object is created from a SRX file, then return | ||
* languageCode itself. | ||
*/ | ||
String res = LanguageCodes.getLanguageName(languageCode); | ||
return StringUtil.isEmpty(res) ? languageCode : res; | ||
} | ||
|
||
/** Sets Language Code */ | ||
public void setLanguage(String code) { | ||
/* | ||
* setLanguage method is called from XmlDecoder of a Java beans library | ||
* when migrating from "segmentation.conf" beans file. An argument will | ||
* be localized name of language. When the object is created from a | ||
* standard SRX file, the argument will be standard language name, | ||
* defined as "LanguageCodes.*_CODE". The behavior was changed in OmegaT | ||
* 6.0.0 release in 2023. We first detect whether the argument is | ||
* standard code. If the code is not a standard code, then try to find a | ||
* localized name of the language name. When you read the comment long | ||
* after OmegaT 6.x, and you believe all the OmegaT 4.x and 5.x users | ||
* are migrated to OmegaT 6.x or later, you may want to remove the chunk | ||
* below. | ||
*/ | ||
if (!LanguageCodes.isLanguageCodeKnown(code)) { | ||
String alt = LanguageCodes.getLanguageCodeByName(code); | ||
if (alt != null) { | ||
languageCode = alt; | ||
return; | ||
} else { | ||
Log.logWarningRB("CORE_SRX_RULES_UNKNOWN_LANGUAGE_CODE", code); | ||
// migration heuristics: Germany translation changed in v5.5. | ||
// See: | ||
miurahr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// https://github.com/omegat-org/omegat/pull/1158#issuecomment-2448788253 | ||
if (code != null && code.contains("Textdateien")) { | ||
languageCode = LanguageCodes.F_TEXT_CODE; | ||
} else { | ||
LOGGER.atDebug().setMessageRB("CORE_SRX_RULES_UNKNOWN_LANGUAGE_CODE").addArgument(code) | ||
miurahr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.log(); | ||
languageCode = code; | ||
} | ||
} | ||
return; | ||
} | ||
languageCode = code; | ||
} | ||
|
||
/** Returns Language Code for programmatic usage. */ | ||
/** | ||
* Returns Language Code for programmatic usage. | ||
*/ | ||
public String getLanguage() { | ||
return languageCode; | ||
} | ||
|
||
/** Pattern for the language/country ISO code (of a form LL-CC). */ | ||
/* | ||
* Pattern for the language/country ISO code (of a form LL-CC). It is like | ||
* "EN.*". | ||
*/ | ||
private Pattern pattern; | ||
|
||
/** Returns Pattern for the language/country ISO code (of a form LL-CC). */ | ||
/** | ||
* Returns Pattern for the language/country ISO code (of a form LL-CC). | ||
*/ | ||
public String getPattern() { | ||
if (pattern != null) { | ||
return pattern.pattern(); | ||
|
@@ -110,14 +155,24 @@ public Pattern getCompiledPattern() { | |
return pattern; | ||
} | ||
|
||
/** Sets Pattern for the language/country ISO code (of a form LL-CC). */ | ||
/** | ||
* Sets Pattern for the language/country ISO code (of a form LL-CC). | ||
* | ||
* @param pattern | ||
* pattern string such as "EN.*" | ||
*/ | ||
public void setPattern(String pattern) throws PatternSyntaxException { | ||
// Fix for bug [1643500] | ||
// language code in segmentation rule is case sensitive | ||
// language code in segmentation rule is a case-sensitive | ||
// Correction contributed by Tiago Saboga. | ||
this.pattern = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE); | ||
} | ||
|
||
/** | ||
* Deep copy of the object, mandatory for java beans. | ||
* | ||
* @return new MapRule object | ||
*/ | ||
public MapRule copy() { | ||
MapRule result = new MapRule(); | ||
result.languageCode = languageCode; | ||
|
@@ -142,23 +197,28 @@ public void setRules(List<Rule> rules) { | |
this.rules = rules; | ||
} | ||
|
||
/** Indicates whether some other MapRule is "equal to" this one. */ | ||
/** | ||
* Indicates whether some other MapRule is "equal to" this one. | ||
*/ | ||
public boolean equals(Object obj) { | ||
if (obj == null || !(obj instanceof MapRule)) { | ||
if (!(obj instanceof MapRule)) { | ||
return false; | ||
} | ||
MapRule that = (MapRule) obj; | ||
return this.getPattern().equals(that.getPattern()) | ||
&& this.getLanguage().equals(that.getLanguage()) | ||
return this.getPattern().equals(that.getPattern()) && this.getLanguage().equals(that.getLanguage()) | ||
&& this.getRules().equals(that.getRules()); | ||
} | ||
|
||
/** Returns a hash code value for the object. */ | ||
/** | ||
* Returns a hash code value for the object. | ||
*/ | ||
public int hashCode() { | ||
return this.getPattern().hashCode() + this.getLanguage().hashCode() + this.getRules().hashCode(); | ||
} | ||
|
||
/** Returns a string representation of the MapRule for debugging purposes. */ | ||
/** | ||
* Returns a string representation of the MapRule for debugging purposes. | ||
*/ | ||
public String toString() { | ||
return getLanguage() + " (" + getPattern() + ") " + getRules().toString(); | ||
} | ||
|
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We got language from the pattern, if it got "EN.*" as pattern we can know it is for English.