From f69332a08afc420bcbaefca1a105580c37c79124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20C=2E=20Silva?= <12188364+andrecsilva@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:31:31 -0300 Subject: [PATCH] Changed default XML parser to Woodstox (#482) Changes the default StAX parser to Woodstox. This will allow us to be more in control of the parser's behavior. Also it's a bit faster than Java's default one. --- .../DefaultXPathStreamProcessor.java | 11 +++++--- gradle/libs.versions.toml | 2 ++ .../codemodder-plugin-maven/build.gradle.kts | 1 + .../plugins/maven/MavenProvider.java | 2 +- .../plugins/maven/operator/FormatCommand.java | 25 ++++++++++++++++--- 5 files changed, 34 insertions(+), 7 deletions(-) diff --git a/framework/codemodder-base/src/main/java/io/codemodder/DefaultXPathStreamProcessor.java b/framework/codemodder-base/src/main/java/io/codemodder/DefaultXPathStreamProcessor.java index 6fc938056..4898e1e43 100644 --- a/framework/codemodder-base/src/main/java/io/codemodder/DefaultXPathStreamProcessor.java +++ b/framework/codemodder-base/src/main/java/io/codemodder/DefaultXPathStreamProcessor.java @@ -73,9 +73,14 @@ public Optional process( XMLEventWriter xmlWriter = outputFactory.createXMLEventWriter(sw); while (xmlReader.hasNext()) { final XMLEvent currentEvent = xmlReader.nextEvent(); - Location location = currentEvent.getLocation(); - if (doesPositionMatch(httpMethodPositions, location)) { - handler.handle(xmlReader, xmlWriter, currentEvent); + // get the position of the last character of the event, that is, the start of the next one + if (xmlReader.hasNext()) { + Location location = xmlReader.peek().getLocation(); + if (doesPositionMatch(httpMethodPositions, location)) { + handler.handle(xmlReader, xmlWriter, currentEvent); + } else { + xmlWriter.add(currentEvent); + } } else { xmlWriter.add(currentEvent); } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 151a1040b..a4ca3f94c 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -15,6 +15,7 @@ picocli = "4.7.0" slf4j = "2.0.6" guice = "5.1.0" dom4j = "2.1.4" +woodstox = "7.1.0" [libraries] autovalue-annotations = { module = "com.google.auto.value:auto-value-annotations", version.ref = "auto-value" } @@ -27,6 +28,7 @@ contrast-sarif = "com.contrastsecurity:java-sarif:2.0" gson = "com.google.code.gson:gson:2.9.0" guice = { module = "com.google.inject:guice", version.ref = "guice" } immutables = "org.immutables:value:2.9.0" +woodstox = { module = "com.fasterxml.woodstox:woodstox-core", version.ref = "woodstox" } jackson-core = { module = "com.fasterxml.jackson.core:jackson-core", version.ref = "jackson" } jackson-yaml = { module = "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml", version.ref = "jackson" } javadiff = "io.github.java-diff-utils:java-diff-utils:4.12" diff --git a/plugins/codemodder-plugin-maven/build.gradle.kts b/plugins/codemodder-plugin-maven/build.gradle.kts index 754a60150..a385b4d50 100644 --- a/plugins/codemodder-plugin-maven/build.gradle.kts +++ b/plugins/codemodder-plugin-maven/build.gradle.kts @@ -31,4 +31,5 @@ dependencies { implementation(libs.diff.match.patch) implementation(libs.slf4j.simple) implementation(libs.slf4j.api) + implementation(libs.woodstox) } diff --git a/plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/MavenProvider.java b/plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/MavenProvider.java index 72dce755f..ae470e984 100644 --- a/plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/MavenProvider.java +++ b/plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/MavenProvider.java @@ -18,7 +18,7 @@ * *

a. We skip parent finding if there's not a relativePath declaration (this is by design), so * sometimes pom finding will fail on purpose b. there are several flags on ProjectModelFactory - * which aren't applied. They relate to verisons, upgrading and particularly: Actives Profiles c. If + * which aren't applied. They relate to versions, upgrading and particularly: Actives Profiles c. If * you need anything declared in a ~/.m2/settings.xml, we don't support that (e.g., passwords or * proxies) d. Haven't tested, but I'm almost sure that it wouldn't work on any repo other than * central e. We allow on this module to do online resolution. HOWEVER by default its offline f. You diff --git a/plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/operator/FormatCommand.java b/plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/operator/FormatCommand.java index 4ada1c5a7..96b5810c7 100644 --- a/plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/operator/FormatCommand.java +++ b/plugins/codemodder-plugin-maven/src/main/java/io/codemodder/plugins/maven/operator/FormatCommand.java @@ -38,7 +38,7 @@ class FormatCommand extends AbstractCommand { private static final Logger LOGGER = LoggerFactory.getLogger(FormatCommand.class); /** StAX InputFactory */ - private XMLInputFactory inputFactory = hardenFactory(XMLInputFactory.newInstance()); + private XMLInputFactory inputFactory = XMLInputFactory.newInstance().newInstance(); /** StAX OutputFactory */ private XMLOutputFactory outputFactory = XMLOutputFactory.newInstance(); @@ -324,8 +324,27 @@ private void parseXmlAndCharset(POMDocument pomFile) throws XMLStreamException, String originalPomCharsetString = new String(pomFile.getOriginalPom(), pomFile.getCharset()); - String untrimmedOriginalContent = - originalPomCharsetString.substring(elementStart, offset); + var prev = prevEvents.get(prevEvents.size() - 1); + String untrimmedOriginalContent = ""; + // is self-closing element, tag is contained within the offset of the next element + if (prev instanceof StartElement + && prev.getLocation().getCharacterOffset() + == endElementEvent.getLocation().getCharacterOffset()) { + untrimmedOriginalContent = + originalPomCharsetString.substring( + offset, eventReader.peek().getLocation().getCharacterOffset()); + } else { + // is empty tag, the last character events is not in between the tags + if (prev.isStartElement()) { + untrimmedOriginalContent = + originalPomCharsetString.substring( + prev.getLocation().getCharacterOffset(), + eventReader.peek().getLocation().getCharacterOffset()); + + } else { + untrimmedOriginalContent = originalPomCharsetString.substring(elementStart, offset); + } + } String trimmedOriginalContent = untrimmedOriginalContent.trim();