Skip to content

Commit 2b18f63

Browse files
Copilotslachiewicz
andcommitted
Fix polynomial regex vulnerability in ENCODING_PATTERN
Changed the regex pattern from .* to .*? to use non-greedy matching, preventing catastrophic backtracking on malicious input. Added test case to validate the fix with various edge cases. Co-authored-by: slachiewicz <[email protected]>
1 parent e45c0a1 commit 2b18f63

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

src/main/java/org/codehaus/plexus/util/xml/XmlReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,7 +597,7 @@ private static String getXMLGuessEncoding(BufferedInputStream is) throws IOExcep
597597
}
598598

599599
static final Pattern ENCODING_PATTERN =
600-
Pattern.compile("<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
600+
Pattern.compile("<\\?xml.*?encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))", Pattern.MULTILINE);
601601

602602
// returns the encoding declared in the <?xml encoding=...?>, NULL if none
603603
private static String getXmlProlog(BufferedInputStream is, String guessedEnc) throws IOException {

src/test/java/org/codehaus/plexus/util/xml/XmlStreamReaderTest.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,4 +261,25 @@ void encodingAttribute() throws IOException {
261261
xml = "<element encoding='attribute value'/>";
262262
checkXmlContent(xml, "UTF-8");
263263
}
264+
265+
/**
266+
* Test that the regex pattern handles edge cases efficiently without catastrophic backtracking.
267+
* This validates the fix for polynomial regex vulnerability.
268+
*
269+
* @throws java.io.IOException if any.
270+
*/
271+
@Test
272+
void encodingPatternWithManyAttributes() throws IOException {
273+
// Test with many attributes before encoding to ensure non-greedy matching works
274+
String xml = "<?xml version='1.0' a='1' b='2' c='3' d='4' e='5' encoding='UTF-8'?><root/>";
275+
checkXmlContent(xml, "UTF-8");
276+
277+
// Test with whitespace variations
278+
xml = "<?xml version='1.0' encoding = 'US-ASCII' ?><root/>";
279+
checkXmlContent(xml, "US-ASCII");
280+
281+
// Test with longer prolog (but still valid)
282+
xml = "<?xml version='1.0' standalone='yes' encoding='ISO-8859-1'?><root/>";
283+
checkXmlContent(xml, "ISO-8859-1");
284+
}
264285
}

0 commit comments

Comments
 (0)