Skip to content

Commit 3d2bdaa

Browse files
authored
Have the parser reject quant bounds over UInt16.max (#812)
* Have the parser reject quant bounds over UInt16.max
1 parent e1611c5 commit 3d2bdaa

File tree

3 files changed

+54
-5
lines changed

3 files changed

+54
-5
lines changed

Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,9 @@ extension Parser {
331331
///
332332
/// Diagnoses on overflow
333333
///
334-
mutating func lexNumber(_ kind: RadixKind = .decimal) -> AST.Atom.Number? {
334+
mutating func lexNumber(
335+
_ kind: RadixKind = .decimal
336+
) -> AST.Atom.Number? {
335337
guard let str = tryEatPrefix(kind.characterFilter) else {
336338
return nil
337339
}
@@ -342,6 +344,26 @@ extension Parser {
342344
return .init(i, at: str.location)
343345
}
344346

347+
/// Try to eat a quantification bound, such as appears in `/x{3,12}`
348+
///
349+
/// Returns: `nil` if there's no number, otherwise the number
350+
///
351+
/// Diagnoses on overflow. Currently, we will diagnose for any values over `UInt16.max`
352+
///
353+
mutating func lexQuantBound() -> AST.Atom.Number? {
354+
let kind = RadixKind.decimal
355+
guard let str = tryEatPrefix(kind.characterFilter) else {
356+
return nil
357+
}
358+
guard let i = UInt16(str.value, radix: kind.radix) else {
359+
error(.numberOverflow(str.value), at: str.location)
360+
return .init(nil, at: str.location)
361+
}
362+
363+
return .init(Int(i), at: str.location)
364+
}
365+
366+
345367
/// Expect a number of a given `kind`, diagnosing if a number cannot be
346368
/// parsed.
347369
mutating func expectNumber(_ kind: RadixKind = .decimal) -> AST.Atom.Number {
@@ -492,7 +514,7 @@ extension Parser {
492514

493515
return p.tryEating { p in
494516
guard p.tryEat("{"),
495-
let range = p.lexRange(trivia: &trivia),
517+
let range = p.lexQuantRange(trivia: &trivia),
496518
p.tryEat("}")
497519
else { return nil }
498520
return range.value
@@ -519,12 +541,14 @@ extension Parser {
519541
/// | ExpRange
520542
/// ExpRange -> '..<' <Int> | '...' <Int>
521543
/// | <Int> '..<' <Int> | <Int> '...' <Int>?
522-
mutating func lexRange(trivia: inout [AST.Trivia]) -> Located<Quant.Amount>? {
544+
mutating func lexQuantRange(
545+
trivia: inout [AST.Trivia]
546+
) -> Located<Quant.Amount>? {
523547
recordLoc { p in
524548
p.tryEating { p in
525549
if let t = p.lexWhitespace() { trivia.append(t) }
526550

527-
let lowerOpt = p.lexNumber()
551+
let lowerOpt = p.lexQuantBound()
528552

529553
if let t = p.lexWhitespace() { trivia.append(t) }
530554

@@ -546,7 +570,7 @@ extension Parser {
546570

547571
if let t = p.lexWhitespace() { trivia.append(t) }
548572

549-
var upperOpt = p.lexNumber()
573+
var upperOpt = p.lexQuantBound()
550574
if closedRange == false {
551575
// If we have an open range, the upper bound should be adjusted down.
552576
upperOpt?.value? -= 1

Tests/RegexTests/LexTests.swift

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,25 @@ extension RegexTests {
6363
_ = p.lexNumber()
6464
}
6565

66+
let invalidQuantBounds: Array<String> = [
67+
"65536", // UInt16.max + 1
68+
"2147483646", // Int32.max - 1
69+
"9223372036854775806", // Int64.max - 1
70+
]
71+
72+
for invalidNum in invalidQuantBounds {
73+
let regexes: Array<String> = [
74+
"x{\(invalidNum)}",
75+
"x{1,\(invalidNum)}",
76+
"x{\(invalidNum),1}",
77+
]
78+
for regex in regexes {
79+
diagnose(regex, expecting: .numberOverflow(invalidNum)) { p in
80+
_ = p.parse()
81+
}
82+
}
83+
}
84+
6685
// TODO: want to dummy print out source ranges, etc, test that.
6786
}
6887

Tests/RegexTests/MatchTests.swift

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,12 @@ extension RegexTests {
751751
firstMatchTest("(?U)a??", input: "a", match: "a")
752752
firstMatchTest("(?U)a??a", input: "aaa", match: "aa")
753753

754+
// Quantification syntax is somewhat dependent on the contents.
755+
// In JS, PCRE2, Python, and some others, /x{-1}/ will be literally "x{-1}"
756+
// Note that Java8 and Rust throw an (unhelpful) error
757+
firstMatchTest("x{-1}", input: "x{-1}", match: "x{-1}")
758+
firstMatchTest("x{-1}", input: "xax{-2}bx{-1}c", match: "x{-1}")
759+
754760
// TODO: After captures, easier to test these
755761
}
756762

0 commit comments

Comments
 (0)