Skip to content
This repository has been archived by the owner on Nov 9, 2018. It is now read-only.

Commit

Permalink
removed dependency on org.daisy.util in dotify-translator
Browse files Browse the repository at this point in the history
  • Loading branch information
Joel Håkansson committed Dec 27, 2012
1 parent 79c9ade commit fdc708a
Show file tree
Hide file tree
Showing 6 changed files with 172 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,6 @@ DotifyTranslator/build-properties.xml -text
DotifyTranslator/build.xml -text
DotifyTranslator/lib/icu4j-3_8.jar -text
DotifyTranslator/lib/junit-4.7.jar -text
DotifyTranslator/lib/org.daisy.util.jar -text
DotifyTranslator/lib/texhyphj.jar -text
DotifyTranslator/logging.properties -text
DotifyTranslator/src/META-INF/services/org.daisy.dotify.hyphenator.HyphenatorFactory -text
Expand Down Expand Up @@ -515,6 +514,7 @@ DotifyTranslator/src/org/daisy/dotify/text/FilterFactory.java -text svneol=unset
DotifyTranslator/src/org/daisy/dotify/text/FilterLocale.java -text svneol=unset#text/plain
DotifyTranslator/src/org/daisy/dotify/text/IdentityFilter.java -text
DotifyTranslator/src/org/daisy/dotify/text/RegexFilter.java -text svneol=unset#text/plain
DotifyTranslator/src/org/daisy/dotify/text/SimpleCharReplacer.java -text
DotifyTranslator/src/org/daisy/dotify/text/SplitResult.java -text svneol=unset#text/plain
DotifyTranslator/src/org/daisy/dotify/text/StringFilter.java -text svneol=unset#text/plain
DotifyTranslator/src/org/daisy/dotify/text/StringSplitter.java -text svneol=unset#text/plain
Expand Down
2 changes: 1 addition & 1 deletion DotifyTranslator/.classpath
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5"/>
<classpathentry kind="lib" path="lib/junit-4.7.jar"/>
<classpathentry kind="lib" path="lib/texhyphj.jar"/>
<classpathentry kind="lib" path="lib/org.daisy.util.jar"/>
<classpathentry kind="lib" path="lib/icu4j-3_8.jar"/>
<classpathentry kind="output" path="bin/src"/>
</classpath>
2 changes: 1 addition & 1 deletion DotifyTranslator/build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
<patternset refid="dist-jar.patternset"/>
</fileset>
<manifest>
<attribute name="Class-Path" value="icu4j-3_8.jar org.daisy.util.jar texhyphj.jar"/>
<attribute name="Class-Path" value="icu4j-3_8.jar texhyphj.jar"/>
<attribute name="Main-Class" value="org.daisy.dotify.Main"/>
<attribute name="Built-By" value="${user.name}"/>
<attribute name="Built-On" value="${ISO-TODAY}"/>
Expand Down
Binary file removed DotifyTranslator/lib/org.daisy.util.jar
Binary file not shown.
10 changes: 6 additions & 4 deletions DotifyTranslator/src/org/daisy/dotify/text/CharFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import java.net.URL;

import org.daisy.util.i18n.UCharReplacer;

/**
* Implements StringFilter using UCharReplacer.
*
Expand All @@ -12,21 +10,25 @@
* @since 1.0
*/
public class CharFilter implements StringFilter {
private UCharReplacer ucr;
private final SimpleCharReplacer ucr;

/**
* Create a new CharFilter
* @param table relative path to replacement table, see UCharReplacement for more information
*/
public CharFilter(URL table) {
this.ucr = new UCharReplacer();
this.ucr = new SimpleCharReplacer();
try {
this.ucr.addSubstitutionTable(table);
} catch (Exception e) {
e.printStackTrace();
}
}

public CharFilter(SimpleCharReplacer replacer) {
this.ucr = replacer;
}

public String filter(String str) {
return ucr.replace(str).toString();
}
Expand Down
163 changes: 163 additions & 0 deletions DotifyTranslator/src/org/daisy/dotify/text/SimpleCharReplacer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package org.daisy.dotify.text;

import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import com.ibm.icu.text.Normalizer;
import com.ibm.icu.text.UCharacterIterator;

/**
* <p>
* Provides substitution for unicode characters with replacement strings.
* </p>
*
* <p>
* This is a much simplified version of UCharReplacer by Markus Gylling from the
* org.daisy.util package.
* </p>
*
* <p>
* The use of this class <em>may</em> result in a change in unicode character
* composition between input and output. If you need a certain normalization
* form, normalize after the use of this class.
* </p>
*
* <p>
* Usage example:
* </p>
* <code><pre>
* SimpleCharReplacer ucr = new SimpleCharReplacer();
* ucr.addSubstitutionTable(fileURL);
* ucr.addSubstitutionTable(fileURL2);
* String ret = ucr.replace(input);
* </pre></code>
*
* <p>
* The translation table file is using the same xml format as that of
* java.util.Properties [1][2], using the HEX representation (without the
* characteristic 0x-prefix!) of a unicode character as the <tt>key</tt>
* attribute and the replacement string as value of the <tt>entry</tt> element.
* </p>
*
* <p>
* If the <tt>key</tt> attribute contains exactly one unicode codepoint (one
* character) it will be treated literally. It will not be interpreted as a HEX
* representation of another character, even if theoretically possible. E.g. if
* the <tt>key</tt> is "a", it will be treated as 0x0061 rather than as 0x000a
* </p>
*
* <p>
* Note - there is a significant difference between a unicode codepoint (32 bit
* int) and a UTF16 codeunit (=char) - a codepoint consists of one or two
* codeunits.
* </p>
* <p>
* To make sure an int represents a codepoint and not a codeunit, use for
* example <code>com.ibm.icu.text.Normalizer</code> to NFC compose, followed by
* <code>com.ibm.icu.text.UCharacterIterator</code> to retrieve possibly non-BMP
* codepoints from a string.
* </p>
*
* @see [1] http://java.sun.com/j2se/1.5.0/docs/api/java/util/Properties.html
* @see [2] http://java.sun.com/dtd/properties.dtd
*
* @author Joel Håkansson
* @author Markus Gylling (UCharReplacer)
*/
public class SimpleCharReplacer {
private Map<Integer, String> mSubstitutionTable = null;

public SimpleCharReplacer() {
mSubstitutionTable = new HashMap<Integer, String>();
}

public void addSubstitutionTable(URL table) throws IOException {
try {
loadTable(table);
} catch (Exception e) {
throw new IOException(e.getMessage());
}
}

public CharSequence replace(String input) {
int codePoint;

StringBuilder sb = new StringBuilder(input.length());

// icu4j version
// normalize to eliminate any ambiguities vis-a-vis the user tables
Normalizer.normalize(input, Normalizer.NFC);

// Java 1.6 SDK version
// Normalizer.normalize(input, Normalizer.Form.NFC);

// icu4j version
// iterate over each code point in the input string
UCharacterIterator uci = UCharacterIterator.getInstance(input.toString());
while ((codePoint = uci.nextCodePoint()) != UCharacterIterator.DONE) {
CharSequence substitution = substitute(codePoint);
if (null != substitution && substitution.length() > 0) {
// a replacement occurred
sb.append(substitution);
} else {
// a replacement didn't occur
sb.appendCodePoint(codePoint);
}
}

/*
* Java 1.5 SDK version
* // iterate over each code point in the input string
* final int length = input.length();
* for (int offset = 0; offset < length;) {
* codePoint = input.codePointAt(offset);
* CharSequence substitution = substitute(codePoint);
* if (null != substitution && substitution.length() > 0) {
* // a replacement occurred
* sb.append(substitution);
* } else {
* // a replacement didn't occur
* sb.appendCodePoint(codePoint);
* }
* offset += Character.charCount(codePoint);
* }
*/

return sb;
}

/**
* Loads a table using the Properties class.
*/
private void loadTable(URL tableURL) throws IOException {
Properties props = new Properties();
props.loadFromXML(tableURL.openStream());
Set<?> keys = props.keySet();
for (Iterator<?> it = keys.iterator(); it.hasNext();) {
String key = (String) it.next();
if (key.codePointCount(0, key.length()) == 1) {
mSubstitutionTable.put(key.codePointAt(0), props.getProperty(key));
} else {
try {
mSubstitutionTable.put(Integer.decode("0x" + key), props.getProperty(key));
} catch (NumberFormatException e) {
System.err.println("error in translation table " + tableURL.toString() + ": attribute key=\"" + key + "\" is not a hex number.");
}
}
}
}

/**
* @return a substite string if available in tables, or null if not
* available
*/
private String substitute(int codePoint) {
return mSubstitutionTable.get(Integer.valueOf(codePoint));
}

}

0 comments on commit fdc708a

Please sign in to comment.