Skip to content

Commit

Permalink
[optimise] Rewrite of XMLWrite#writeChars(CharSequence, boolean) to u…
Browse files Browse the repository at this point in the history
…se a single loop and avoid an additional copy
  • Loading branch information
adamretter committed Dec 4, 2024
1 parent 7bcfa93 commit 0f7111c
Showing 1 changed file with 82 additions and 77 deletions.
159 changes: 82 additions & 77 deletions exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,23 @@ public class XMLWriter implements SerializerWriter {
defaultProperties.setProperty(EXistOutputKeys.XDM_SERIALIZATION, "no");
}

private static final BitSet TEXT_SPECIAL_CHARS = new BitSet(128);
private static final BitSet ATTR_SPECIAL_CHARS = new BitSet(128);
static {
TEXT_SPECIAL_CHARS.set('<');
TEXT_SPECIAL_CHARS.set('>');
// TEXT_SPECIAL_CHARS('\r');
TEXT_SPECIAL_CHARS.set('&');

ATTR_SPECIAL_CHARS.set('<');
ATTR_SPECIAL_CHARS.set('>');
ATTR_SPECIAL_CHARS.set('\r');
ATTR_SPECIAL_CHARS.set('\n');
ATTR_SPECIAL_CHARS.set('\t');
ATTR_SPECIAL_CHARS.set('&');
ATTR_SPECIAL_CHARS.set('"');
}

protected Writer writer = null;

protected CharacterSet charSet;
Expand All @@ -79,10 +96,6 @@ public class XMLWriter implements SerializerWriter {

private final char[] charref = new char[10];

private static final boolean[] textSpecialChars;

private static final boolean[] attrSpecialChars;

private String defaultNamespace = "";

/**
Expand All @@ -98,24 +111,12 @@ public class XMLWriter implements SerializerWriter {
private LazyVal<Set<QName>> cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames);
private boolean cdataSetionElement = false;

static {
textSpecialChars = new boolean[128];
Arrays.fill(textSpecialChars, false);
textSpecialChars['<'] = true;
textSpecialChars['>'] = true;
// textSpecialChars['\r'] = true;
textSpecialChars['&'] = true;

attrSpecialChars = new boolean[128];
Arrays.fill(attrSpecialChars, false);
attrSpecialChars['<'] = true;
attrSpecialChars['>'] = true;
attrSpecialChars['\r'] = true;
attrSpecialChars['\n'] = true;
attrSpecialChars['\t'] = true;
attrSpecialChars['&'] = true;
attrSpecialChars['"'] = true;
}
/**
* Buffer for use in {@link #writeChars(CharSequence, boolean)}.
*/
private static final int WRITE_CHARS_BUFFER_SIZE = 1024;
private final char[] writeCharsBuffer = new char[WRITE_CHARS_BUFFER_SIZE];
private int writeCharsBufferIdx = 0;

public XMLWriter() {
charSet = CharacterSet.getCharacterSet(UTF_8.name());
Expand Down Expand Up @@ -573,66 +574,70 @@ protected void writeDoctype(final String rootElement) throws TransformerExceptio
protected boolean needsEscape(final char ch) {
return true;
}

protected void writeChars(final CharSequence s, final boolean inAttribute) throws IOException {
final boolean[] specialChars = inAttribute ? attrSpecialChars : textSpecialChars;
char ch = 0;

void writeChars(final CharSequence s, final boolean inAttribute) throws IOException {
final BitSet specialChars = inAttribute ? ATTR_SPECIAL_CHARS : TEXT_SPECIAL_CHARS;

char c;
boolean specialChar = false;
final int len = s.length();
int pos = 0, i;
while(pos < len) {
i = pos;
while(i < len) {
ch = s.charAt(i);
if(ch < 128) {
if(specialChars[ch]) {
break;
} else {
i++;
}
} else if(!charSet.inCharacterSet(ch)) {
break;
} else {
i++;
}
for (int i = 0; i < len; i++) {
c = s.charAt(i);

// the largest special char we have to worry about is '>' (ASCII code 62 (decimal))
if ('>' > c && specialChars.get(c) || !charSet.inCharacterSet(c)) {
// character is special and needs to be escaped, or character is not in expected character set
specialChar = true;
} else {
// normal character
writeCharsBuffer[writeCharsBufferIdx++] = c;
}
writeCharSeq(s, pos, i);
// writer.write(s.subSequence(pos, i).toString());

if (i >= len) {
return;

if (specialChar || WRITE_CHARS_BUFFER_SIZE == writeCharsBufferIdx) {
// encountered a special char, or buffer is full, in either case we flush the buffer
writer.write(writeCharsBuffer, 0, writeCharsBufferIdx); // write the buffer
writeCharsBufferIdx = 0; // reset the buffer
}

if(needsEscape(ch)) {
switch(ch) {
case '<':
writer.write("&lt;");
break;
case '>':
writer.write("&gt;");
break;
case '&':
writer.write("&amp;");
break;
case '\r':
writer.write("&#xD;");
break;
case '\n':
writer.write("&#xA;");
break;
case '\t':
writer.write("&#x9;");
break;
case '"':
writer.write("&#34;");
break;
default:
writeCharacterReference(ch);

if (specialChar) {
if (needsEscape(c)) {
switch (c) {
case '<':
writer.write("&lt;");
break;
case '>':
writer.write("&gt;");
break;
case '&':
writer.write("&amp;");
break;
case '\r':
writer.write("&#xD;");
break;
case '\n':
writer.write("&#xA;");
break;
case '\t':
writer.write("&#x9;");
break;
case '"':
writer.write("&#34;");
break;
default:
writeCharacterReference(c);
}
} else {
writer.write(c);
}
} else {
writer.write(ch);

specialChar = false; // reset specialChar flag
}

pos = ++i;
}

// flush out anything left in the buffer
if (writeCharsBufferIdx > 0) {
writer.write(writeCharsBuffer, 0, writeCharsBufferIdx); // write the buffer
writeCharsBufferIdx = 0; // reset the buffer for use next time
}
}

Expand Down

0 comments on commit 0f7111c

Please sign in to comment.