-
+
diff --git a/DotifyTranslator/lib/org.daisy.util.jar b/DotifyTranslator/lib/org.daisy.util.jar
deleted file mode 100644
index b1f04e80..00000000
Binary files a/DotifyTranslator/lib/org.daisy.util.jar and /dev/null differ
diff --git a/DotifyTranslator/src/org/daisy/dotify/text/CharFilter.java b/DotifyTranslator/src/org/daisy/dotify/text/CharFilter.java
index 8db972c9..f5a5fbd8 100644
--- a/DotifyTranslator/src/org/daisy/dotify/text/CharFilter.java
+++ b/DotifyTranslator/src/org/daisy/dotify/text/CharFilter.java
@@ -2,8 +2,6 @@
import java.net.URL;
-import org.daisy.util.i18n.UCharReplacer;
-
/**
* Implements StringFilter using UCharReplacer.
*
@@ -12,14 +10,14 @@
* @since 1.0
*/
public class CharFilter implements StringFilter {
- private UCharReplacer ucr;
+ private final SimpleCharReplacer ucr;
/**
* Create a new CharFilter
* @param table relative path to replacement table, see UCharReplacement for more information
*/
public CharFilter(URL table) {
- this.ucr = new UCharReplacer();
+ this.ucr = new SimpleCharReplacer();
try {
this.ucr.addSubstitutionTable(table);
} catch (Exception e) {
@@ -27,6 +25,10 @@ public CharFilter(URL table) {
}
}
+ public CharFilter(SimpleCharReplacer replacer) {
+ this.ucr = replacer;
+ }
+
public String filter(String str) {
return ucr.replace(str).toString();
}
diff --git a/DotifyTranslator/src/org/daisy/dotify/text/SimpleCharReplacer.java b/DotifyTranslator/src/org/daisy/dotify/text/SimpleCharReplacer.java
new file mode 100644
index 00000000..ac06cd49
--- /dev/null
+++ b/DotifyTranslator/src/org/daisy/dotify/text/SimpleCharReplacer.java
@@ -0,0 +1,163 @@
+package org.daisy.dotify.text;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import com.ibm.icu.text.Normalizer;
+import com.ibm.icu.text.UCharacterIterator;
+
+/**
+ *
+ * Provides substitution for unicode characters with replacement strings.
+ *
+ *
+ *
+ * This is a much simplified version of UCharReplacer by Markus Gylling from the
+ * org.daisy.util package.
+ *
+ *
+ *
+ * The use of this class may result in a change in unicode character
+ * composition between input and output. If you need a certain normalization
+ * form, normalize after the use of this class.
+ *
+ *
+ *
+ * Usage example:
+ *
+ *
+ * SimpleCharReplacer ucr = new SimpleCharReplacer();
+ * ucr.addSubstitutionTable(fileURL);
+ * ucr.addSubstitutionTable(fileURL2);
+ * String ret = ucr.replace(input);
+ *
+ *
+ *
+ * The translation table file is using the same xml format as that of
+ * java.util.Properties [1][2], using the HEX representation (without the
+ * characteristic 0x-prefix!) of a unicode character as the key
+ * attribute and the replacement string as value of the entry element.
+ *
+ *
+ *
+ * If the key attribute contains exactly one unicode codepoint (one
+ * character) it will be treated literally. It will not be interpreted as a HEX
+ * representation of another character, even if theoretically possible. E.g. if
+ * the key is "a", it will be treated as 0x0061 rather than as 0x000a
+ *
+ *
+ *
+ * Note - there is a significant difference between a unicode codepoint (32 bit
+ * int) and a UTF16 codeunit (=char) - a codepoint consists of one or two
+ * codeunits.
+ *
+ *
+ * To make sure an int represents a codepoint and not a codeunit, use for
+ * example com.ibm.icu.text.Normalizer
to NFC compose, followed by
+ * com.ibm.icu.text.UCharacterIterator
to retrieve possibly non-BMP
+ * codepoints from a string.
+ *
+ *
+ * @see [1] http://java.sun.com/j2se/1.5.0/docs/api/java/util/Properties.html
+ * @see [2] http://java.sun.com/dtd/properties.dtd
+ *
+ * @author Joel HÃ¥kansson
+ * @author Markus Gylling (UCharReplacer)
+ */
+public class SimpleCharReplacer {
+ private Map mSubstitutionTable = null;
+
+ public SimpleCharReplacer() {
+ mSubstitutionTable = new HashMap();
+ }
+
+ public void addSubstitutionTable(URL table) throws IOException {
+ try {
+ loadTable(table);
+ } catch (Exception e) {
+ throw new IOException(e.getMessage());
+ }
+ }
+
+ public CharSequence replace(String input) {
+ int codePoint;
+
+ StringBuilder sb = new StringBuilder(input.length());
+
+ // icu4j version
+ // normalize to eliminate any ambiguities vis-a-vis the user tables
+ Normalizer.normalize(input, Normalizer.NFC);
+
+ // Java 1.6 SDK version
+ // Normalizer.normalize(input, Normalizer.Form.NFC);
+
+ // icu4j version
+ // iterate over each code point in the input string
+ UCharacterIterator uci = UCharacterIterator.getInstance(input.toString());
+ while ((codePoint = uci.nextCodePoint()) != UCharacterIterator.DONE) {
+ CharSequence substitution = substitute(codePoint);
+ if (null != substitution && substitution.length() > 0) {
+ // a replacement occurred
+ sb.append(substitution);
+ } else {
+ // a replacement didn't occur
+ sb.appendCodePoint(codePoint);
+ }
+ }
+
+ /*
+ * Java 1.5 SDK version
+ * // iterate over each code point in the input string
+ * final int length = input.length();
+ * for (int offset = 0; offset < length;) {
+ * codePoint = input.codePointAt(offset);
+ * CharSequence substitution = substitute(codePoint);
+ * if (null != substitution && substitution.length() > 0) {
+ * // a replacement occurred
+ * sb.append(substitution);
+ * } else {
+ * // a replacement didn't occur
+ * sb.appendCodePoint(codePoint);
+ * }
+ * offset += Character.charCount(codePoint);
+ * }
+ */
+
+ return sb;
+ }
+
+ /**
+ * Loads a table using the Properties class.
+ */
+ private void loadTable(URL tableURL) throws IOException {
+ Properties props = new Properties();
+ props.loadFromXML(tableURL.openStream());
+ Set> keys = props.keySet();
+ for (Iterator> it = keys.iterator(); it.hasNext();) {
+ String key = (String) it.next();
+ if (key.codePointCount(0, key.length()) == 1) {
+ mSubstitutionTable.put(key.codePointAt(0), props.getProperty(key));
+ } else {
+ try {
+ mSubstitutionTable.put(Integer.decode("0x" + key), props.getProperty(key));
+ } catch (NumberFormatException e) {
+ System.err.println("error in translation table " + tableURL.toString() + ": attribute key=\"" + key + "\" is not a hex number.");
+ }
+ }
+ }
+ }
+
+ /**
+ * @return a substite string if available in tables, or null if not
+ * available
+ */
+ private String substitute(int codePoint) {
+ return mSubstitutionTable.get(Integer.valueOf(codePoint));
+ }
+
+}