From 3c8d0370497e3aa89f001ed2fcfacc38bc4be70f Mon Sep 17 00:00:00 2001 From: David Mohundro Date: Sat, 6 Jan 2018 12:04:28 -0600 Subject: [PATCH 1/6] Add handling for XML parsing errors The parsing error is optional as there is a good amount of prior usage of this library for HTML parsing, which isn't always valid XML. As a result, defaulting it to false ensures that pre-existing code doesn't begin failing. --- Source/SWXMLHash.swift | 39 ++++++++++++++++++---- Tests/SWXMLHashTests/XMLParsingTests.swift | 24 ++++++++++++- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/Source/SWXMLHash.swift b/Source/SWXMLHash.swift index 27f0540e..db8ed678 100644 --- a/Source/SWXMLHash.swift +++ b/Source/SWXMLHash.swift @@ -53,7 +53,10 @@ public class SWXMLHashOptions { /// Any contextual information set by the user for encoding public var userInfo = [CodingUserInfoKey: Any]() -} + + /// Detect XML parsing errors... defaults to false as this library will + /// attempt to handle HTML which isn't always XML-compatible + public var detectParsingErrors = false } /// Simple XML parser public class SWXMLHash { @@ -223,9 +226,11 @@ extension XMLParserDelegate { didStartMappingPrefix prefix: String, toURI namespaceURI: String) { } - func parser(_ parser: Foundation.XMLParser, didEndMappingPrefix prefix: String) { } + func parser(_ parser: Foundation.XMLParser, + didEndMappingPrefix prefix: String) { } - func parser(_ parser: Foundation.XMLParser, foundCharacters string: String) { } + func parser(_ parser: Foundation.XMLParser, + foundCharacters string: String) { } func parser(_ parser: Foundation.XMLParser, foundIgnorableWhitespace whitespaceString: String) { } @@ -234,15 +239,18 @@ extension XMLParserDelegate { foundProcessingInstructionWithTarget target: String, data: String?) { } - func parser(_ parser: Foundation.XMLParser, foundComment comment: String) { } + func parser(_ parser: Foundation.XMLParser, + foundComment comment: String) { } - func parser(_ parser: Foundation.XMLParser, foundCDATA CDATABlock: Data) { } + func parser(_ parser: Foundation.XMLParser, + foundCDATA CDATABlock: Data) { } func parser(_ parser: Foundation.XMLParser, resolveExternalEntityName name: String, systemID: String?) -> Data? { return nil } - func parser(_ parser: Foundation.XMLParser, parseErrorOccurred parseError: NSError) { } + func parser(_ parser: Foundation.XMLParser, + parseErrorOccurred parseError: NSError) { } func parser(_ parser: Foundation.XMLParser, validationErrorOccurred validationError: NSError) { } @@ -360,6 +368,7 @@ class FullXMLParser: NSObject, SimpleXmlParser, XMLParserDelegate { let root: XMLElement var parentStack = Stack() let options: SWXMLHashOptions + var parsingError: ParsingError? func parse(_ data: Data) -> XMLIndexer { // clear any prior runs of parse... expected that this won't be necessary, @@ -373,7 +382,11 @@ class FullXMLParser: NSObject, SimpleXmlParser, XMLParserDelegate { parser.delegate = self _ = parser.parse() - return XMLIndexer(root) + if options.detectParsingErrors, let err = parsingError { + return XMLIndexer.parsingError(err) + } else { + return XMLIndexer(root) + } } func parser(_ parser: Foundation.XMLParser, @@ -410,6 +423,12 @@ class FullXMLParser: NSObject, SimpleXmlParser, XMLParserDelegate { current.addText(cdataText) } } + + func parser(_ parser: XMLParser, parseErrorOccurred parseError: Error) { + let err = parseError as NSError + parsingError = ParsingError(line: err.userInfo["NSXMLParserErrorLineNumber"] as? Int ?? 0, + column: err.userInfo["NSXMLParserErrorColumn"] as? Int ?? 0) + } } /// Represents an indexed operation against a lazily parsed `XMLIndexer` @@ -465,6 +484,11 @@ public class IndexOps { } } +public struct ParsingError: Error { + public let line: Int + public let column: Int +} + /// Error type that is thrown when an indexing or parsing operation fails. public enum IndexingError: Error { case attribute(attr: String) @@ -510,6 +534,7 @@ public enum XMLIndexer { case list([XMLElement]) case stream(IndexOps) case xmlError(IndexingError) + case parsingError(ParsingError) // swiftlint:disable identifier_name // unavailable diff --git a/Tests/SWXMLHashTests/XMLParsingTests.swift b/Tests/SWXMLHashTests/XMLParsingTests.swift index 9315805b..58fea9b6 100644 --- a/Tests/SWXMLHashTests/XMLParsingTests.swift +++ b/Tests/SWXMLHashTests/XMLParsingTests.swift @@ -302,6 +302,27 @@ class XMLParsingTests: XCTestCase { XCTAssertEqual(subIndexer.children[1].element?.text, "Fantasy") XCTAssertEqual(subIndexer.children[2].element?.text, "5.95") } + + func testShouldThrowErrorForInvalidXML() { + let invalidXML = "what is this" + var err: ParsingError? = nil + let parser = SWXMLHash.config { config in + config.detectParsingErrors = true + }.parse(invalidXML) + + switch parser { + case .parsingError(let error): + err = error + default: + err = nil + } + + XCTAssertNotNil(err) + + if err != nil { + XCTAssert(err!.line == 1) + } + } } extension XMLParsingTests { @@ -329,7 +350,8 @@ extension XMLParsingTests { ("testShouldProvideAnErrorElementWhenIndexersDontMatch", testShouldProvideAnErrorElementWhenIndexersDontMatch), ("testShouldStillReturnErrorsWhenAccessingViaSubscripting", testShouldStillReturnErrorsWhenAccessingViaSubscripting), ("testShouldBeAbleToCreateASubIndexerFromFilter", testShouldBeAbleToCreateASubIndexerFromFilter), - ("testShouldBeAbleToFilterOnIndexer", testShouldBeAbleToFilterOnIndexer) + ("testShouldBeAbleToFilterOnIndexer", testShouldBeAbleToFilterOnIndexer), + ("testShouldThrowErrorForInvalidXML", testShouldThrowErrorForInvalidXML) ] } } From 742a646a7fe7cef569fe17ff90dd0ff43a126286 Mon Sep 17 00:00:00 2001 From: David Mohundro Date: Sun, 7 Jan 2018 12:41:58 -0600 Subject: [PATCH 2/6] Fix issue with Error/NSError on Linux --- Source/SWXMLHash.swift | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Source/SWXMLHash.swift b/Source/SWXMLHash.swift index db8ed678..32e14fac 100644 --- a/Source/SWXMLHash.swift +++ b/Source/SWXMLHash.swift @@ -250,10 +250,10 @@ extension XMLParserDelegate { systemID: String?) -> Data? { return nil } func parser(_ parser: Foundation.XMLParser, - parseErrorOccurred parseError: NSError) { } + parseErrorOccurred parseError: Error) { } func parser(_ parser: Foundation.XMLParser, - validationErrorOccurred validationError: NSError) { } + validationErrorOccurred validationError: Error) { } } #endif @@ -425,9 +425,10 @@ class FullXMLParser: NSObject, SimpleXmlParser, XMLParserDelegate { } func parser(_ parser: XMLParser, parseErrorOccurred parseError: Error) { - let err = parseError as NSError - parsingError = ParsingError(line: err.userInfo["NSXMLParserErrorLineNumber"] as? Int ?? 0, - column: err.userInfo["NSXMLParserErrorColumn"] as? Int ?? 0) + if let err = parseError as? NSError { + parsingError = ParsingError(line: err.userInfo["NSXMLParserErrorLineNumber"] as? Int ?? 0, + column: err.userInfo["NSXMLParserErrorColumn"] as? Int ?? 0) + } } } From ebadc773cacce55d1f379193e0de4f17191b5a5b Mon Sep 17 00:00:00 2001 From: David Mohundro Date: Mon, 15 Jan 2018 11:49:42 -0600 Subject: [PATCH 3/6] Fix cast warning --- Source/SWXMLHash.swift | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Source/SWXMLHash.swift b/Source/SWXMLHash.swift index 32e14fac..9acfdca5 100644 --- a/Source/SWXMLHash.swift +++ b/Source/SWXMLHash.swift @@ -425,10 +425,9 @@ class FullXMLParser: NSObject, SimpleXmlParser, XMLParserDelegate { } func parser(_ parser: XMLParser, parseErrorOccurred parseError: Error) { - if let err = parseError as? NSError { - parsingError = ParsingError(line: err.userInfo["NSXMLParserErrorLineNumber"] as? Int ?? 0, - column: err.userInfo["NSXMLParserErrorColumn"] as? Int ?? 0) - } + let err = parseError as NSError + parsingError = ParsingError(line: err.userInfo["NSXMLParserErrorLineNumber"] as? Int ?? 0, + column: err.userInfo["NSXMLParserErrorColumn"] as? Int ?? 0) } } From b6e96365a79d1748df093f5d02981398f69935f9 Mon Sep 17 00:00:00 2001 From: David Mohundro Date: Fri, 23 Feb 2018 16:34:54 -0600 Subject: [PATCH 4/6] Fix swiftlint issues --- Tests/SWXMLHashTests/XMLParsingTests.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/Tests/SWXMLHashTests/XMLParsingTests.swift b/Tests/SWXMLHashTests/XMLParsingTests.swift index 58fea9b6..e4087423 100644 --- a/Tests/SWXMLHashTests/XMLParsingTests.swift +++ b/Tests/SWXMLHashTests/XMLParsingTests.swift @@ -27,6 +27,7 @@ import SWXMLHash import XCTest // swiftlint:disable line_length +// swiftlint:disable type_body_length class XMLParsingTests: XCTestCase { let xmlToParse = """ From 0483ffef0673caa9249948c7bdebf0ce825671e4 Mon Sep 17 00:00:00 2001 From: David Mohundro Date: Fri, 23 Feb 2018 16:56:35 -0600 Subject: [PATCH 5/6] Fix for Linux --- Source/SWXMLHash.swift | 7 +++++++ Tests/SWXMLHashTests/XMLParsingTests.swift | 2 ++ 2 files changed, 9 insertions(+) diff --git a/Source/SWXMLHash.swift b/Source/SWXMLHash.swift index 9acfdca5..231c19d5 100644 --- a/Source/SWXMLHash.swift +++ b/Source/SWXMLHash.swift @@ -425,9 +425,16 @@ class FullXMLParser: NSObject, SimpleXmlParser, XMLParserDelegate { } func parser(_ parser: XMLParser, parseErrorOccurred parseError: Error) { +#if os(Linux) + if let err = parseError as? NSError { + parsingError = ParsingError(line: err.userInfo["NSXMLParserErrorLineNumber"] as? Int ?? 0, + column: err.userInfo["NSXMLParserErrorColumn"] as? Int ?? 0) + } +#else let err = parseError as NSError parsingError = ParsingError(line: err.userInfo["NSXMLParserErrorLineNumber"] as? Int ?? 0, column: err.userInfo["NSXMLParserErrorColumn"] as? Int ?? 0) +#endif } } diff --git a/Tests/SWXMLHashTests/XMLParsingTests.swift b/Tests/SWXMLHashTests/XMLParsingTests.swift index e4087423..49c20a87 100644 --- a/Tests/SWXMLHashTests/XMLParsingTests.swift +++ b/Tests/SWXMLHashTests/XMLParsingTests.swift @@ -320,9 +320,11 @@ class XMLParsingTests: XCTestCase { XCTAssertNotNil(err) +#if !os(Linux) if err != nil { XCTAssert(err!.line == 1) } +#endif } } From 46393861eefbd981b1c26d58be2a23b7c85cc40d Mon Sep 17 00:00:00 2001 From: David Mohundro Date: Sat, 10 Mar 2018 14:36:52 -0600 Subject: [PATCH 6/6] Tweak last curly brace --- Source/SWXMLHash.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/SWXMLHash.swift b/Source/SWXMLHash.swift index 231c19d5..60304cc8 100644 --- a/Source/SWXMLHash.swift +++ b/Source/SWXMLHash.swift @@ -56,7 +56,8 @@ public class SWXMLHashOptions { /// Detect XML parsing errors... defaults to false as this library will /// attempt to handle HTML which isn't always XML-compatible - public var detectParsingErrors = false } + public var detectParsingErrors = false +} /// Simple XML parser public class SWXMLHash {