-
Notifications
You must be signed in to change notification settings - Fork 6
/
TranslateStrings.groovy
161 lines (137 loc) · 5.65 KB
/
TranslateStrings.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*
* This script is based on 'StringsResourceTranslator.java' created by
* the ZXing team/ Sean Owen
* http://zxing.googlecode.com/svn-history/r1442/trunk/javase/src/com/google/zxing/StringsResourceTranslator.java
*
* Re-written to Groovy by Thom Nichols
* IMPROVEMENTS:
* - Using a real JSON parser, which handles JSON-encoded (\u1234) characters
* - HTML entity un-escaping
* - Resource string tokens (like %s) are preserved (Google translate mangles it.)
* x Strings without a translation are ignored, not put in translated output file.
* (disabled, since it causes a lot of strings to be re-translated every time.)
*/
import java.util.regex.Matcher
import java.util.regex.Pattern
import org.json.JSONObject
ENTRY_PATTERN = Pattern.compile('<string name="([^"]+)">([^<]+)</string>')
STRINGS_FILE_NAME_PATTERN = Pattern.compile("values-(.+)")
STRING_TOKEN_PATTERN = Pattern.compile( /\\?['"]?%[dsfx]\\?['"]?/ )
HTML_ENTITY_PATTERN = Pattern.compile( /&#(\d+);/ )
HTML_ENTITIES = [
"34": '"',
"38": '&',
"39": "'",
"60": '<',
"62": '>' ]
LANGUAGE_CODE_MASSAGINGS = [
"ja-rJP": "ja",
"zh-rCN": "zh-cn",
"zh-rTW": "zh-tw" ]
@Grab(group='org.json', module='json', version='20090211')
void translate(File englishFile, File translatedFile, Collection<String> forceRetranslation) {
SortedMap<String,String> english = readLines(englishFile)
SortedMap<String,String> translated = readLines(translatedFile)
String parentName = translatedFile.parentFile.name
Matcher stringsFileNameMatcher = STRINGS_FILE_NAME_PATTERN.matcher(parentName)
stringsFileNameMatcher.find()
String language = stringsFileNameMatcher.group(1)
String massagedLanguage = LANGUAGE_CODE_MASSAGINGS.get(language)
if (massagedLanguage) language = massagedLanguage
println "Translating " + language
File resultTempFile = File.createTempFile(parentName, ".xml")
resultTempFile.deleteOnExit()
boolean anyChange = false
resultTempFile.withWriter('UTF-8') { out ->
out << '<?xml version="1.0" encoding="UTF-8"?>\n'
out << "<resources>\n"
english.entrySet().each { englishEntry ->
String key = englishEntry.key
String englishString = englishEntry.value
String translatedString = translated[key]
if (translatedString == null || forceRetranslation.contains(key)) {
anyChange = true
translatedString = translateString(englishString, language)
}
// if ( englishString == translatedString ) return // skip identical strings
out << """ <string name="$key">"""
out << translatedString
out << "</string>\n"
}
out << "</resources>\n"
}
if (anyChange) {
println " Writing translations for $language"
resultTempFile.withInputStream { _in ->
translatedFile.withOutputStream { out -> out << _in }
}
}
}
String translateString(String english, String language) {
println " >> " + english
/* Google translate mangles tokens like '%s' so we substitute them
* with a single unicode character that will be left alone */
Matcher tokenMatch = STRING_TOKEN_PATTERN.matcher( english )
StringBuffer sb = new StringBuffer()
Map tokens = [:] // used to keep track of substitutions
// start with a character that is virtually guaranteed not to be used in text:
int charPoint = 0x10330
while ( tokenMatch.find() ) {
def replaceChar = (charPoint++) as char
tokens[replaceChar] = tokenMatch.group(0)
tokenMatch.appendReplacement(sb,(String)replaceChar)
}
english = tokenMatch.appendTail(sb).toString()
URL translateURL = new URL(
"http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&q=" +
URLEncoder.encode(english, "UTF-8") +
"&langpair=en%7C" + language)
/* Request Google translation. Sometimes Google replies back with a
* 'not ok' status code, but it usually works after a couple retries. */
int retries=5
def resp = new JSONObject( translateURL.getText("UTF-8") )
while ( resp.getInt('responseStatus') != 200 && --retries > 0 )
resp = new JSONObject( translateURL.getText("UTF-8") )
if ( retries == 0 ) throw new IOException("No translate result")
// parse as JSON to remove \u0000-encoded characters
String translation = resp.getJSONObject('responseData').getString('translatedText')
// replace HTML entities:
Matcher entityMatch = HTML_ENTITY_PATTERN.matcher( translation )
sb = new StringBuffer()
while ( entityMatch.find() ) {
def entityCode = entityMatch.group(1)
if ( ! HTML_ENTITIES.containsKey(entityCode) )
throw new IOException('Unknown entity code: &#$entityCode;')
entityMatch.appendReplacement( sb, HTML_ENTITIES[entityCode] )
}
translation = entityMatch.appendTail(sb).toString()
// Replace each unicode character substitute with the original token:
tokens.each { translation = translation.replace((String)it.key, it.value) }
translation = translation.replaceAll( /([^\\])'/, /$1\\'/ )
println " << " + translation
return translation
}
SortedMap<String,String> readLines(File file) {
SortedMap<String,String> entries = new TreeMap<String,String>()
if ( ! file.exists() ) return entries
file.eachLine('UTF-8') { line ->
Matcher m = ENTRY_PATTERN.matcher(line)
if ( !m.find() ) return
entries.put m.group(1), m.group(2)
}
return entries
}
// main():
File resDir = new File(args[0])
File valueDir = new File(resDir, "values")
File stringsFile = new File(valueDir, "strings.xml")
Collection<String> forceRetranslation = args.length > 1 ? args[1..-1] : []
File[] translatedValuesDirs = resDir.listFiles(new FileFilter() {
public boolean accept(File file) {
return file.isDirectory() && file.name.startsWith("values-")
}
})
translatedValuesDirs.each { translatedValuesDir ->
File translatedStringsFile = new File(translatedValuesDir, "strings.xml")
translate stringsFile, translatedStringsFile, forceRetranslation
}