Skip to content

Commit

Permalink
Changed default XML parser to Woodstox (#482)
Browse files Browse the repository at this point in the history
Changes the default StAX parser to Woodstox. This will allow us to be
more in control of the parser's behavior. Also it's a bit faster than
Java's default one.
  • Loading branch information
andrecsilva authored Dec 6, 2024
1 parent 4eecd14 commit f69332a
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,14 @@ public Optional<XPathStreamProcessChange> process(
XMLEventWriter xmlWriter = outputFactory.createXMLEventWriter(sw);
while (xmlReader.hasNext()) {
final XMLEvent currentEvent = xmlReader.nextEvent();
Location location = currentEvent.getLocation();
if (doesPositionMatch(httpMethodPositions, location)) {
handler.handle(xmlReader, xmlWriter, currentEvent);
// get the position of the last character of the event, that is, the start of the next one
if (xmlReader.hasNext()) {
Location location = xmlReader.peek().getLocation();
if (doesPositionMatch(httpMethodPositions, location)) {
handler.handle(xmlReader, xmlWriter, currentEvent);
} else {
xmlWriter.add(currentEvent);
}
} else {
xmlWriter.add(currentEvent);
}
Expand Down
2 changes: 2 additions & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ picocli = "4.7.0"
slf4j = "2.0.6"
guice = "5.1.0"
dom4j = "2.1.4"
woodstox = "7.1.0"

[libraries]
autovalue-annotations = { module = "com.google.auto.value:auto-value-annotations", version.ref = "auto-value" }
Expand All @@ -27,6 +28,7 @@ contrast-sarif = "com.contrastsecurity:java-sarif:2.0"
gson = "com.google.code.gson:gson:2.9.0"
guice = { module = "com.google.inject:guice", version.ref = "guice" }
immutables = "org.immutables:value:2.9.0"
woodstox = { module = "com.fasterxml.woodstox:woodstox-core", version.ref = "woodstox" }
jackson-core = { module = "com.fasterxml.jackson.core:jackson-core", version.ref = "jackson" }
jackson-yaml = { module = "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml", version.ref = "jackson" }
javadiff = "io.github.java-diff-utils:java-diff-utils:4.12"
Expand Down
1 change: 1 addition & 0 deletions plugins/codemodder-plugin-maven/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ dependencies {
implementation(libs.diff.match.patch)
implementation(libs.slf4j.simple)
implementation(libs.slf4j.api)
implementation(libs.woodstox)
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*
* <p>a. We skip parent finding if there's not a relativePath declaration (this is by design), so
* sometimes pom finding will fail on purpose b. there are several flags on ProjectModelFactory
* which aren't applied. They relate to verisons, upgrading and particularly: Actives Profiles c. If
* which aren't applied. They relate to versions, upgrading and particularly: Actives Profiles c. If
* you need anything declared in a ~/.m2/settings.xml, we don't support that (e.g., passwords or
* proxies) d. Haven't tested, but I'm almost sure that it wouldn't work on any repo other than
* central e. We allow on this module to do online resolution. HOWEVER by default its offline f. You
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class FormatCommand extends AbstractCommand {
private static final Logger LOGGER = LoggerFactory.getLogger(FormatCommand.class);

/** StAX InputFactory */
private XMLInputFactory inputFactory = hardenFactory(XMLInputFactory.newInstance());
private XMLInputFactory inputFactory = XMLInputFactory.newInstance().newInstance();

/** StAX OutputFactory */
private XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
Expand Down Expand Up @@ -324,8 +324,27 @@ private void parseXmlAndCharset(POMDocument pomFile) throws XMLStreamException,
String originalPomCharsetString =
new String(pomFile.getOriginalPom(), pomFile.getCharset());

String untrimmedOriginalContent =
originalPomCharsetString.substring(elementStart, offset);
var prev = prevEvents.get(prevEvents.size() - 1);
String untrimmedOriginalContent = "";
// is self-closing element, tag is contained within the offset of the next element
if (prev instanceof StartElement
&& prev.getLocation().getCharacterOffset()
== endElementEvent.getLocation().getCharacterOffset()) {
untrimmedOriginalContent =
originalPomCharsetString.substring(
offset, eventReader.peek().getLocation().getCharacterOffset());
} else {
// is empty tag, the last character events is not in between the tags
if (prev.isStartElement()) {
untrimmedOriginalContent =
originalPomCharsetString.substring(
prev.getLocation().getCharacterOffset(),
eventReader.peek().getLocation().getCharacterOffset());

} else {
untrimmedOriginalContent = originalPomCharsetString.substring(elementStart, offset);
}
}

String trimmedOriginalContent = untrimmedOriginalContent.trim();

Expand Down

0 comments on commit f69332a

Please sign in to comment.