From bbaeb150734853a5a18af26d3ca050eb11238aa4 Mon Sep 17 00:00:00 2001 From: Sindre Sorhus Date: Mon, 17 May 2021 16:12:53 +0700 Subject: [PATCH] Fix Unicode handling (#5) --- Sources/Regex/Regex.swift | 21 ++++-- Sources/Regex/Utilities.swift | 14 ++++ Tests/RegexTests/RegexTests.swift | 120 +++++++++++++++++++++++++++++- 3 files changed, 146 insertions(+), 9 deletions(-) diff --git a/Sources/Regex/Regex.swift b/Sources/Regex/Regex.swift index f186cab..0d8d23d 100644 --- a/Sources/Regex/Regex.swift +++ b/Sources/Regex/Regex.swift @@ -157,7 +157,7 @@ extension Regex { public let range: Range fileprivate init(originalString: String, range: NSRange) { - self.range = Range(range, in: originalString)! + self.range = originalString.range(fromNSRange: range) self.value = String(originalString[self.range]) } } @@ -193,7 +193,10 @@ extension Regex { public func group(named name: String) -> Group? { let range = checkingResult.range(withName: name) - guard range.length > 0 else { + guard + range.location != NSNotFound, + range.length > 0 + else { return nil } @@ -203,12 +206,20 @@ extension Regex { fileprivate init(checkingResult: NSTextCheckingResult, string: String) { self.checkingResult = checkingResult self.originalString = string - self.value = string[nsRange: checkingResult.range]!.string - self.range = Range(checkingResult.range, in: string)! + self.range = string.range(fromNSRange: checkingResult.range) + self.value = String(string[self.range]) // The first range is the full range, so we ignore that. - self.groups = (1.. 0 + else { + return nil + } + return Group(originalString: string, range: range) } } diff --git a/Sources/Regex/Utilities.swift b/Sources/Regex/Utilities.swift index 71ca65b..5199927 100644 --- a/Sources/Regex/Utilities.swift +++ b/Sources/Regex/Utilities.swift @@ -25,6 +25,20 @@ extension String { } +extension String { + /** + Get a string range from a `NSRange`. + + This works better than the built-in `Range(nsRange, in: string)`, which doesn't correctly handle some Unicode compositions. + */ + func range(fromNSRange nsRange: NSRange) -> Range { + let startIndex = utf16.index(utf16.startIndex, offsetBy: nsRange.lowerBound) + let endIndex = utf16.index(startIndex, offsetBy: nsRange.length) + return rangeOfComposedCharacterSequences(for: startIndex..