diff --git a/Sources/Kanna/Kanna.swift b/Sources/Kanna/Kanna.swift index 6e8f9d6..c02a71a 100755 --- a/Sources/Kanna/Kanna.swift +++ b/Sources/Kanna/Kanna.swift @@ -119,9 +119,6 @@ public protocol Searchable { @param xpath */ func xpath(_ xpath: String, namespaces: [String: String]?) -> XPathObject - func xpath(_ xpath: String) -> XPathObject - func at_xpath(_ xpath: String, namespaces: [String: String]?) -> XMLElement? - func at_xpath(_ xpath: String) -> XMLElement? /** Search for node from current node by CSS selector. @@ -129,9 +126,24 @@ public protocol Searchable { @param selector a CSS selector */ func css(_ selector: String, namespaces: [String: String]?) -> XPathObject - func css(_ selector: String) -> XPathObject - func at_css(_ selector: String, namespaces: [String: String]?) -> XMLElement? - func at_css(_ selector: String) -> XMLElement? +} + +public extension Searchable { + func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject { + self.xpath(xpath, namespaces: namespaces) + } + + func at_xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XMLElement? { + self.xpath(xpath, namespaces: namespaces).nodeSetValue.first + } + + func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject { + self.css(selector, namespaces: namespaces) + } + + func at_css(_ selector: String, namespaces: [String: String]? = nil) -> XMLElement? { + self.css(selector, namespaces: namespaces).nodeSetValue.first + } } /** @@ -181,7 +193,7 @@ public protocol HTMLDocument: XMLDocument { XMLNodeSet */ public final class XMLNodeSet { - private var nodes: [XMLElement] = [] + private var nodes: [XMLElement] public var toHTML: String? { let html = nodes.reduce("") { @@ -219,9 +231,7 @@ public final class XMLNodeSet { public var count: Int { nodes.count } - init() {} - - init(nodes: [XMLElement]) { + init(nodes: [XMLElement] = []) { self.nodes = nodes } @@ -271,16 +281,15 @@ extension XPathObject { init(document: XMLDocument?, docPtr: xmlDocPtr, object: xmlXPathObject) { switch object.type { case XPATH_NODESET: - let nodeSet = object.nodesetval - if nodeSet == nil || nodeSet?.pointee.nodeNr == 0 || nodeSet?.pointee.nodeTab == nil { + guard let nodeSet = object.nodesetval, nodeSet.pointee.nodeNr != 0, let nodeTab = nodeSet.pointee.nodeTab else { self = .none return } var nodes: [XMLElement] = [] - let size = Int((nodeSet?.pointee.nodeNr)!) + let size = Int(nodeSet.pointee.nodeNr) for i in 0 ..< size { - let node: xmlNodePtr = nodeSet!.pointee.nodeTab[i]! + let node: xmlNodePtr = nodeTab[i]! let htmlNode = libxmlHTMLNode(document: document, docPtr: docPtr, node: node) nodes.append(htmlNode) } diff --git a/Sources/Kanna/libxmlHTMLDocument.swift b/Sources/Kanna/libxmlHTMLDocument.swift index 9a5e226..4a19613 100755 --- a/Sources/Kanna/libxmlHTMLDocument.swift +++ b/Sources/Kanna/libxmlHTMLDocument.swift @@ -220,36 +220,14 @@ final class libxmlHTMLDocument: HTMLDocument { var head: XMLElement? { at_xpath("//head") } var body: XMLElement? { at_xpath("//body") } - func xpath(_ xpath: String, namespaces: [String: String]?) -> XPathObject { - rootNode?.xpath(xpath, namespaces: namespaces) ?? .none + func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject { + guard let docPtr = docPtr else { return .none } + return XPath(doc: self, docPtr: docPtr).xpath(xpath, namespaces: namespaces) } - func xpath(_ xpath: String) -> XPathObject { - self.xpath(xpath, namespaces: nil) - } - - func at_xpath(_ xpath: String, namespaces: [String: String]?) -> XMLElement? { - rootNode?.at_xpath(xpath, namespaces: namespaces) - } - - func at_xpath(_ xpath: String) -> XMLElement? { - self.at_xpath(xpath, namespaces: nil) - } - - func css(_ selector: String, namespaces: [String: String]?) -> XPathObject { - rootNode?.css(selector, namespaces: namespaces) ?? .none - } - - func css(_ selector: String) -> XPathObject { - self.css(selector, namespaces: nil) - } - - func at_css(_ selector: String, namespaces: [String: String]?) -> XMLElement? { - rootNode?.at_css(selector, namespaces: namespaces) - } - - func at_css(_ selector: String) -> XMLElement? { - self.at_css(selector, namespaces: nil) + func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject { + guard let docPtr = docPtr else { return .none } + return XPath(doc: self, docPtr: docPtr).css(selector, namespaces: namespaces) } } @@ -328,36 +306,66 @@ final class libxmlXMLDocument: XMLDocument { xmlFreeDoc(docPtr) } - func xpath(_ xpath: String, namespaces: [String: String]?) -> XPathObject { - rootNode?.xpath(xpath, namespaces: namespaces) ?? .none + func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject { + guard let docPtr = docPtr else { return .none } + return XPath(doc: self, docPtr: docPtr).xpath(xpath, namespaces: namespaces) } - func xpath(_ xpath: String) -> XPathObject { - self.xpath(xpath, namespaces: nil) + func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject { + guard let docPtr = docPtr else { return .none } + return XPath(doc: self, docPtr: docPtr).css(selector, namespaces: namespaces) } +} - func at_xpath(_ xpath: String, namespaces: [String: String]?) -> XMLElement? { - rootNode?.at_xpath(xpath, namespaces: namespaces) +struct XPath { + private let doc: XMLDocument + private var docPtr: xmlDocPtr + private var nodePtr: xmlNodePtr? + private var isRoot: Bool { + guard let nodePtr = nodePtr else { return true } + return xmlDocGetRootElement(docPtr) == nodePtr } - func at_xpath(_ xpath: String) -> XMLElement? { - self.at_xpath(xpath, namespaces: nil) + init(doc: XMLDocument, docPtr: xmlDocPtr, nodePtr: xmlNodePtr? = nil) { + self.doc = doc + self.docPtr = docPtr + self.nodePtr = nodePtr } - func css(_ selector: String, namespaces: [String: String]?) -> XPathObject { - rootNode?.css(selector, namespaces: namespaces) ?? .none - } + func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject { + guard let ctxt = xmlXPathNewContext(docPtr) else { return .none } + defer { xmlXPathFreeContext(ctxt) } + + if let nsDictionary = namespaces { + for (ns, name) in nsDictionary { + xmlXPathRegisterNs(ctxt, ns, name) + } + } - func css(_ selector: String) -> XPathObject { - self.css(selector, namespaces: nil) + if let node = nodePtr { + ctxt.pointee.node = node + } + + guard let result = xmlXPathEvalExpression(adoptXpath(xpath), ctxt) else { return .none } + defer { xmlXPathFreeObject(result) } + + return XPathObject(document: doc, docPtr: docPtr, object: result.pointee) } - func at_css(_ selector: String, namespaces: [String: String]?) -> XMLElement? { - rootNode?.at_css(selector, namespaces: namespaces) + func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject { + if let xpath = try? CSS.toXPath(selector, isRoot: isRoot) { + return self.xpath(xpath, namespaces: namespaces) + } + return .none } - func at_css(_ selector: String) -> XMLElement? { - self.at_css(selector, namespaces: nil) + private func adoptXpath(_ xpath: String) -> String { + guard !isRoot else { return xpath } + if xpath.hasPrefix("/") { + return "." + xpath + } else { + return xpath + } } } diff --git a/Sources/Kanna/libxmlHTMLNode.swift b/Sources/Kanna/libxmlHTMLNode.swift index 4c53b6a..a59daee 100755 --- a/Sources/Kanna/libxmlHTMLNode.swift +++ b/Sources/Kanna/libxmlHTMLNode.swift @@ -30,7 +30,6 @@ libxmlHTMLNode */ final class libxmlHTMLNode: XMLElement { var text: String? { - guard let nodePtr = nodePtr else { return nil } return libxmlGetNodeContent(nodePtr) } @@ -63,7 +62,7 @@ final class libxmlHTMLNode: XMLElement { var tagName: String? { get { - guard let name = nodePtr?.pointee.name else { + guard let name = nodePtr.pointee.name else { return nil } return String(cString: name) @@ -87,7 +86,7 @@ final class libxmlHTMLNode: XMLElement { var parent: XMLElement? { get { - libxmlHTMLNode(document: doc, docPtr: docPtr!, node: (nodePtr?.pointee.parent)!) + libxmlHTMLNode(document: doc, docPtr: docPtr, node: nodePtr.pointee.parent) } set { if let node = newValue as? libxmlHTMLNode { @@ -106,16 +105,15 @@ final class libxmlHTMLNode: XMLElement { private weak var weakDocument: XMLDocument? private var document: XMLDocument? - private var docPtr: htmlDocPtr? - private var nodePtr: xmlNodePtr? - private var isRoot = false + private var docPtr: htmlDocPtr + private var nodePtr: xmlNodePtr private var doc: XMLDocument? { weakDocument ?? document } subscript(attributeName: String) -> String? { get { - var attr = nodePtr?.pointee.properties + var attr = nodePtr.pointee.properties while attr != nil { let mem = attr?.pointee if let tagName = String(validatingUTF8: UnsafeRawPointer((mem?.name)!).assumingMemoryBound(to: CChar.self)) { @@ -144,7 +142,6 @@ final class libxmlHTMLNode: XMLElement { self.weakDocument = document self.docPtr = docPtr self.nodePtr = xmlDocGetRootElement(docPtr) - self.isRoot = true } init(document: XMLDocument?, docPtr: xmlDocPtr, node: xmlNodePtr) { @@ -154,60 +151,14 @@ final class libxmlHTMLNode: XMLElement { } // MARK: Searchable - func xpath(_ xpath: String, namespaces: [String: String]?) -> XPathObject { - let ctxt = xmlXPathNewContext(docPtr) - if ctxt == nil { - return .none - } - ctxt?.pointee.node = nodePtr - - if let nsDictionary = namespaces { - for (ns, name) in nsDictionary { - xmlXPathRegisterNs(ctxt, ns, name) - } - } - - let result = xmlXPathEvalExpression(xpath, ctxt) - defer { - xmlXPathFreeObject(result) - } - xmlXPathFreeContext(ctxt) - if result == nil { - return .none - } - - return XPathObject(document: doc, docPtr: docPtr!, object: result!.pointee) - } - - func xpath(_ xpath: String) -> XPathObject { - self.xpath(xpath, namespaces: nil) - } - - func at_xpath(_ xpath: String, namespaces: [String: String]?) -> XMLElement? { - self.xpath(xpath, namespaces: namespaces).nodeSetValue.first - } - - func at_xpath(_ xpath: String) -> XMLElement? { - self.at_xpath(xpath, namespaces: nil) - } - - func css(_ selector: String, namespaces: [String: String]?) -> XPathObject { - if let xpath = try? CSS.toXPath(selector, isRoot: isRoot) { - return self.xpath(xpath, namespaces: namespaces) - } - return .none - } - - func css(_ selector: String) -> XPathObject { - self.css(selector, namespaces: nil) - } - - func at_css(_ selector: String, namespaces: [String: String]?) -> XMLElement? { - self.css(selector, namespaces: namespaces).nodeSetValue.first + func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject { + guard let doc = doc else { return .none } + return XPath(doc: doc, docPtr: docPtr, nodePtr: nodePtr).xpath(xpath, namespaces: namespaces) } - func at_css(_ selector: String) -> XMLElement? { - self.css(selector, namespaces: nil).nodeSetValue.first + func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject { + guard let doc = doc else { return .none } + return XPath(doc: doc, docPtr: docPtr, nodePtr: nodePtr).css(selector, namespaces: namespaces) } func addPrevSibling(_ node: XMLElement) { @@ -241,7 +192,7 @@ final class libxmlHTMLNode: XMLElement { } private func node(from ptr: xmlNodePtr?) -> XMLElement? { - guard let doc = doc, let docPtr = docPtr, let nodePtr = ptr else { + guard let doc = doc, let nodePtr = ptr else { return nil } diff --git a/Tests/KannaTests/KannaHTMLTests.swift b/Tests/KannaTests/KannaHTMLTests.swift index 2c6dd16..c0ce5a2 100755 --- a/Tests/KannaTests/KannaHTMLTests.swift +++ b/Tests/KannaTests/KannaHTMLTests.swift @@ -202,6 +202,30 @@ class KannaHTMLTests: XCTestCase { XCTFail("Abnormal test data") } } + + func testInnerXpath() { + let input = """ + + + test title + + +

test header 1

+

test header 2

+ + + """ + do { + let doc = try HTML(html: input, encoding: .utf8) + XCTAssertNil(doc.at_xpath("//head")?.at_xpath("//h1")) + XCTAssertNil(doc.at_xpath("//head")?.at_xpath("//body")) + XCTAssertNil(doc.at_xpath("//body")?.at_xpath("//title")) + XCTAssertEqual(doc.at_xpath("//body/div[@id='2']//h1")?.text, "test header 2") + XCTAssertEqual(doc.at_xpath("//body/div[@id='2']")?.at_xpath("//h1")?.text, "test header 2") + } catch { + XCTFail("Abnormal test data") + } + } } extension KannaHTMLTests {