Skip to content

Commit

Permalink
Merge pull request #238 from anivaros/xpath
Browse files Browse the repository at this point in the history
Fixed own issue #237
  • Loading branch information
tid-kijyun committed Apr 19, 2020
2 parents 609367a + 377ae43 commit 73db627
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 120 deletions.
37 changes: 23 additions & 14 deletions Sources/Kanna/Kanna.swift
Original file line number Diff line number Diff line change
Expand Up @@ -119,19 +119,31 @@ public protocol Searchable {
@param xpath
*/
func xpath(_ xpath: String, namespaces: [String: String]?) -> XPathObject
func xpath(_ xpath: String) -> XPathObject
func at_xpath(_ xpath: String, namespaces: [String: String]?) -> XMLElement?
func at_xpath(_ xpath: String) -> XMLElement?

/**
Search for node from current node by CSS selector.
@param selector a CSS selector
*/
func css(_ selector: String, namespaces: [String: String]?) -> XPathObject
func css(_ selector: String) -> XPathObject
func at_css(_ selector: String, namespaces: [String: String]?) -> XMLElement?
func at_css(_ selector: String) -> XMLElement?
}

public extension Searchable {
func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject {
self.xpath(xpath, namespaces: namespaces)
}

func at_xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XMLElement? {
self.xpath(xpath, namespaces: namespaces).nodeSetValue.first
}

func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject {
self.css(selector, namespaces: namespaces)
}

func at_css(_ selector: String, namespaces: [String: String]? = nil) -> XMLElement? {
self.css(selector, namespaces: namespaces).nodeSetValue.first
}
}

/**
Expand Down Expand Up @@ -181,7 +193,7 @@ public protocol HTMLDocument: XMLDocument {
XMLNodeSet
*/
public final class XMLNodeSet {
private var nodes: [XMLElement] = []
private var nodes: [XMLElement]

public var toHTML: String? {
let html = nodes.reduce("") {
Expand Down Expand Up @@ -219,9 +231,7 @@ public final class XMLNodeSet {

public var count: Int { nodes.count }

init() {}

init(nodes: [XMLElement]) {
init(nodes: [XMLElement] = []) {
self.nodes = nodes
}

Expand Down Expand Up @@ -271,16 +281,15 @@ extension XPathObject {
init(document: XMLDocument?, docPtr: xmlDocPtr, object: xmlXPathObject) {
switch object.type {
case XPATH_NODESET:
let nodeSet = object.nodesetval
if nodeSet == nil || nodeSet?.pointee.nodeNr == 0 || nodeSet?.pointee.nodeTab == nil {
guard let nodeSet = object.nodesetval, nodeSet.pointee.nodeNr != 0, let nodeTab = nodeSet.pointee.nodeTab else {
self = .none
return
}

var nodes: [XMLElement] = []
let size = Int((nodeSet?.pointee.nodeNr)!)
let size = Int(nodeSet.pointee.nodeNr)
for i in 0 ..< size {
let node: xmlNodePtr = nodeSet!.pointee.nodeTab[i]!
let node: xmlNodePtr = nodeTab[i]!
let htmlNode = libxmlHTMLNode(document: document, docPtr: docPtr, node: node)
nodes.append(htmlNode)
}
Expand Down
98 changes: 53 additions & 45 deletions Sources/Kanna/libxmlHTMLDocument.swift
Original file line number Diff line number Diff line change
Expand Up @@ -220,36 +220,14 @@ final class libxmlHTMLDocument: HTMLDocument {
var head: XMLElement? { at_xpath("//head") }
var body: XMLElement? { at_xpath("//body") }

func xpath(_ xpath: String, namespaces: [String: String]?) -> XPathObject {
rootNode?.xpath(xpath, namespaces: namespaces) ?? .none
func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject {
guard let docPtr = docPtr else { return .none }
return XPath(doc: self, docPtr: docPtr).xpath(xpath, namespaces: namespaces)
}

func xpath(_ xpath: String) -> XPathObject {
self.xpath(xpath, namespaces: nil)
}

func at_xpath(_ xpath: String, namespaces: [String: String]?) -> XMLElement? {
rootNode?.at_xpath(xpath, namespaces: namespaces)
}

func at_xpath(_ xpath: String) -> XMLElement? {
self.at_xpath(xpath, namespaces: nil)
}

func css(_ selector: String, namespaces: [String: String]?) -> XPathObject {
rootNode?.css(selector, namespaces: namespaces) ?? .none
}

func css(_ selector: String) -> XPathObject {
self.css(selector, namespaces: nil)
}

func at_css(_ selector: String, namespaces: [String: String]?) -> XMLElement? {
rootNode?.at_css(selector, namespaces: namespaces)
}

func at_css(_ selector: String) -> XMLElement? {
self.at_css(selector, namespaces: nil)
func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject {
guard let docPtr = docPtr else { return .none }
return XPath(doc: self, docPtr: docPtr).css(selector, namespaces: namespaces)
}
}

Expand Down Expand Up @@ -328,36 +306,66 @@ final class libxmlXMLDocument: XMLDocument {
xmlFreeDoc(docPtr)
}

func xpath(_ xpath: String, namespaces: [String: String]?) -> XPathObject {
rootNode?.xpath(xpath, namespaces: namespaces) ?? .none
func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject {
guard let docPtr = docPtr else { return .none }
return XPath(doc: self, docPtr: docPtr).xpath(xpath, namespaces: namespaces)
}

func xpath(_ xpath: String) -> XPathObject {
self.xpath(xpath, namespaces: nil)
func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject {
guard let docPtr = docPtr else { return .none }
return XPath(doc: self, docPtr: docPtr).css(selector, namespaces: namespaces)
}
}

func at_xpath(_ xpath: String, namespaces: [String: String]?) -> XMLElement? {
rootNode?.at_xpath(xpath, namespaces: namespaces)
struct XPath {
private let doc: XMLDocument
private var docPtr: xmlDocPtr
private var nodePtr: xmlNodePtr?
private var isRoot: Bool {
guard let nodePtr = nodePtr else { return true }
return xmlDocGetRootElement(docPtr) == nodePtr
}

func at_xpath(_ xpath: String) -> XMLElement? {
self.at_xpath(xpath, namespaces: nil)
init(doc: XMLDocument, docPtr: xmlDocPtr, nodePtr: xmlNodePtr? = nil) {
self.doc = doc
self.docPtr = docPtr
self.nodePtr = nodePtr
}

func css(_ selector: String, namespaces: [String: String]?) -> XPathObject {
rootNode?.css(selector, namespaces: namespaces) ?? .none
}
func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject {
guard let ctxt = xmlXPathNewContext(docPtr) else { return .none }
defer { xmlXPathFreeContext(ctxt) }

if let nsDictionary = namespaces {
for (ns, name) in nsDictionary {
xmlXPathRegisterNs(ctxt, ns, name)
}
}

func css(_ selector: String) -> XPathObject {
self.css(selector, namespaces: nil)
if let node = nodePtr {
ctxt.pointee.node = node
}

guard let result = xmlXPathEvalExpression(adoptXpath(xpath), ctxt) else { return .none }
defer { xmlXPathFreeObject(result) }

return XPathObject(document: doc, docPtr: docPtr, object: result.pointee)
}

func at_css(_ selector: String, namespaces: [String: String]?) -> XMLElement? {
rootNode?.at_css(selector, namespaces: namespaces)
func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject {
if let xpath = try? CSS.toXPath(selector, isRoot: isRoot) {
return self.xpath(xpath, namespaces: namespaces)
}
return .none
}

func at_css(_ selector: String) -> XMLElement? {
self.at_css(selector, namespaces: nil)
private func adoptXpath(_ xpath: String) -> String {
guard !isRoot else { return xpath }
if xpath.hasPrefix("/") {
return "." + xpath
} else {
return xpath
}
}
}

Expand Down
73 changes: 12 additions & 61 deletions Sources/Kanna/libxmlHTMLNode.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ libxmlHTMLNode
*/
final class libxmlHTMLNode: XMLElement {
var text: String? {
guard let nodePtr = nodePtr else { return nil }
return libxmlGetNodeContent(nodePtr)
}

Expand Down Expand Up @@ -63,7 +62,7 @@ final class libxmlHTMLNode: XMLElement {

var tagName: String? {
get {
guard let name = nodePtr?.pointee.name else {
guard let name = nodePtr.pointee.name else {
return nil
}
return String(cString: name)
Expand All @@ -87,7 +86,7 @@ final class libxmlHTMLNode: XMLElement {

var parent: XMLElement? {
get {
libxmlHTMLNode(document: doc, docPtr: docPtr!, node: (nodePtr?.pointee.parent)!)
libxmlHTMLNode(document: doc, docPtr: docPtr, node: nodePtr.pointee.parent)
}
set {
if let node = newValue as? libxmlHTMLNode {
Expand All @@ -106,16 +105,15 @@ final class libxmlHTMLNode: XMLElement {

private weak var weakDocument: XMLDocument?
private var document: XMLDocument?
private var docPtr: htmlDocPtr?
private var nodePtr: xmlNodePtr?
private var isRoot = false
private var docPtr: htmlDocPtr
private var nodePtr: xmlNodePtr
private var doc: XMLDocument? {
weakDocument ?? document
}

subscript(attributeName: String) -> String? {
get {
var attr = nodePtr?.pointee.properties
var attr = nodePtr.pointee.properties
while attr != nil {
let mem = attr?.pointee
if let tagName = String(validatingUTF8: UnsafeRawPointer((mem?.name)!).assumingMemoryBound(to: CChar.self)) {
Expand Down Expand Up @@ -144,7 +142,6 @@ final class libxmlHTMLNode: XMLElement {
self.weakDocument = document
self.docPtr = docPtr
self.nodePtr = xmlDocGetRootElement(docPtr)
self.isRoot = true
}

init(document: XMLDocument?, docPtr: xmlDocPtr, node: xmlNodePtr) {
Expand All @@ -154,60 +151,14 @@ final class libxmlHTMLNode: XMLElement {
}

// MARK: Searchable
func xpath(_ xpath: String, namespaces: [String: String]?) -> XPathObject {
let ctxt = xmlXPathNewContext(docPtr)
if ctxt == nil {
return .none
}
ctxt?.pointee.node = nodePtr

if let nsDictionary = namespaces {
for (ns, name) in nsDictionary {
xmlXPathRegisterNs(ctxt, ns, name)
}
}

let result = xmlXPathEvalExpression(xpath, ctxt)
defer {
xmlXPathFreeObject(result)
}
xmlXPathFreeContext(ctxt)
if result == nil {
return .none
}

return XPathObject(document: doc, docPtr: docPtr!, object: result!.pointee)
}

func xpath(_ xpath: String) -> XPathObject {
self.xpath(xpath, namespaces: nil)
}

func at_xpath(_ xpath: String, namespaces: [String: String]?) -> XMLElement? {
self.xpath(xpath, namespaces: namespaces).nodeSetValue.first
}

func at_xpath(_ xpath: String) -> XMLElement? {
self.at_xpath(xpath, namespaces: nil)
}

func css(_ selector: String, namespaces: [String: String]?) -> XPathObject {
if let xpath = try? CSS.toXPath(selector, isRoot: isRoot) {
return self.xpath(xpath, namespaces: namespaces)
}
return .none
}

func css(_ selector: String) -> XPathObject {
self.css(selector, namespaces: nil)
}

func at_css(_ selector: String, namespaces: [String: String]?) -> XMLElement? {
self.css(selector, namespaces: namespaces).nodeSetValue.first
func xpath(_ xpath: String, namespaces: [String: String]? = nil) -> XPathObject {
guard let doc = doc else { return .none }
return XPath(doc: doc, docPtr: docPtr, nodePtr: nodePtr).xpath(xpath, namespaces: namespaces)
}

func at_css(_ selector: String) -> XMLElement? {
self.css(selector, namespaces: nil).nodeSetValue.first
func css(_ selector: String, namespaces: [String: String]? = nil) -> XPathObject {
guard let doc = doc else { return .none }
return XPath(doc: doc, docPtr: docPtr, nodePtr: nodePtr).css(selector, namespaces: namespaces)
}

func addPrevSibling(_ node: XMLElement) {
Expand Down Expand Up @@ -241,7 +192,7 @@ final class libxmlHTMLNode: XMLElement {
}

private func node(from ptr: xmlNodePtr?) -> XMLElement? {
guard let doc = doc, let docPtr = docPtr, let nodePtr = ptr else {
guard let doc = doc, let nodePtr = ptr else {
return nil
}

Expand Down
24 changes: 24 additions & 0 deletions Tests/KannaTests/KannaHTMLTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,30 @@ class KannaHTMLTests: XCTestCase {
XCTFail("Abnormal test data")
}
}

func testInnerXpath() {
let input = """
<html>
<head>
<title>test title</title>
</head>
<body>
<div id="1"><div><h1>test header 1</h1></div></div>
<div id="2"><div><h1>test header 2</h1></div></div>
</body>
</html>
"""
do {
let doc = try HTML(html: input, encoding: .utf8)
XCTAssertNil(doc.at_xpath("//head")?.at_xpath("//h1"))
XCTAssertNil(doc.at_xpath("//head")?.at_xpath("//body"))
XCTAssertNil(doc.at_xpath("//body")?.at_xpath("//title"))
XCTAssertEqual(doc.at_xpath("//body/div[@id='2']//h1")?.text, "test header 2")
XCTAssertEqual(doc.at_xpath("//body/div[@id='2']")?.at_xpath("//h1")?.text, "test header 2")
} catch {
XCTFail("Abnormal test data")
}
}
}

extension KannaHTMLTests {
Expand Down

0 comments on commit 73db627

Please sign in to comment.