Skip to content

Commit c5791a6

Browse files
committed
Extract XML::Document from XML::Node
Introduces the XML::Document type to wrap a Pointer(LibXML::Doc), while XLM::Node is meant to wrap the subtree Pointer(LibXML::Node). The XML::Document inherits from XML::Node and acts exactly like a XML::Node, so it should hopefully be a transparent change. The main advantage is that we now register a single finalizer instead of one for every node (only the document needed a finalizer). Sadly, it doesn't allow to have a dual strong reference from a document to its nodes, and from the nodes to their document. Boehm GC still complains about cyclic finalization, despite only the document having a finalizer.
1 parent 7202519 commit c5791a6

File tree

5 files changed

+116
-86
lines changed

5 files changed

+116
-86
lines changed

src/xml.cr

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ module XML
5454
# Parses an XML document from *string* with *options* into an `XML::Node`.
5555
#
5656
# See `ParserOptions.default` for default options.
57-
def self.parse(string : String, options : ParserOptions = ParserOptions.default) : Node
57+
def self.parse(string : String, options : ParserOptions = ParserOptions.default) : Document
5858
raise XML::Error.new("Document is empty", 0) if string.empty?
5959
ctxt = LibXML.xmlNewParserCtxt
6060
from_ptr(ctxt) do
@@ -65,7 +65,7 @@ module XML
6565
# Parses an XML document from *io* with *options* into an `XML::Node`.
6666
#
6767
# See `ParserOptions.default` for default options.
68-
def self.parse(io : IO, options : ParserOptions = ParserOptions.default) : Node
68+
def self.parse(io : IO, options : ParserOptions = ParserOptions.default) : Document
6969
ctxt = LibXML.xmlNewParserCtxt
7070
from_ptr(ctxt) do
7171
LibXML.xmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, nil, options)
@@ -75,7 +75,7 @@ module XML
7575
# Parses an HTML document from *string* with *options* into an `XML::Node`.
7676
#
7777
# See `HTMLParserOptions.default` for default options.
78-
def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Node
78+
def self.parse_html(string : String, options : HTMLParserOptions = HTMLParserOptions.default) : Document
7979
raise XML::Error.new("Document is empty", 0) if string.empty?
8080
ctxt = LibXML.htmlNewParserCtxt
8181
from_ptr(ctxt) do
@@ -86,7 +86,7 @@ module XML
8686
# Parses an HTML document from *io* with *options* into an `XML::Node`.
8787
#
8888
# See `HTMLParserOptions.default` for default options.
89-
def self.parse_html(io : IO, options : HTMLParserOptions = HTMLParserOptions.default) : Node
89+
def self.parse_html(io : IO, options : HTMLParserOptions = HTMLParserOptions.default) : Document
9090
ctxt = LibXML.htmlNewParserCtxt
9191
from_ptr(ctxt) do
9292
LibXML.htmlCtxtReadIO(ctxt, ->read_callback, ->close_callback, Box(IO).box(io), nil, "utf-8", options)
@@ -119,7 +119,7 @@ module XML
119119
{% end %}
120120
raise Error.new(LibXML.xmlGetLastError) unless doc
121121

122-
Node.new(doc, errors)
122+
Document.new(doc, errors)
123123
end
124124

125125
{% unless LibXML.has_method?(:xmlSaveSetIndentString) %}

src/xml/document.cr

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
require "weak_ref"
2+
3+
class XML::Document < XML::Node
4+
# The constructors allocate a XML::Node for a libxml node once, so we don't
5+
# finalize a document twice for example.
6+
#
7+
# We store the reference into the libxml struct (_private) for documents
8+
# because a document's XML::Node lives as long as its libxml doc.
9+
#
10+
# However we can lose references to subtree XML::Node, so using _private would
11+
# leave dangling pointers. We thus keep a cache of weak references to all
12+
# nodes in the document, so we can still collect lost references, and at worst
13+
# reinstantiate a XML::Node if needed.
14+
protected getter cache : Hash(LibXML::Node*, WeakRef(Node))
15+
16+
# :nodoc:
17+
def self.new(doc : LibXML::Doc*, errors : Array(Error)? = nil) : Document
18+
if ptr = doc.value._private
19+
ptr.as(Document)
20+
else
21+
new(doc_: doc, errors_: errors)
22+
end
23+
end
24+
25+
# must never be called directly, use the constructors above
26+
private def initialize(*, doc_ : LibXML::Doc*, errors_ : Array(Error)?)
27+
@node = doc_.as(LibXML::Node*)
28+
@errors = errors_
29+
@cache = Hash(LibXML::Node*, WeakRef(Node)).new
30+
@document = nil
31+
doc_.value._private = self.as(Void*)
32+
end
33+
34+
# :nodoc:
35+
def finalize
36+
# free unlinked nodes and their subtrees
37+
@cache.each do |node, ref|
38+
if (obj = ref.value) && obj.unlinked?
39+
LibXML.xmlFreeNode(node)
40+
end
41+
end
42+
43+
# free the doc and its subtree
44+
LibXML.xmlFreeDoc(@node.as(LibXML::Doc*))
45+
end
46+
47+
# Returns the encoding of this node's document.
48+
def encoding : String?
49+
if encoding = @node.as(LibXML::Doc*).value.encoding
50+
String.new(encoding)
51+
end
52+
end
53+
54+
# Returns the version of this node's document.
55+
def version : String?
56+
if version = @node.as(LibXML::Doc*).value.version
57+
String.new(version)
58+
end
59+
end
60+
61+
# :nodoc:
62+
def document : Document
63+
self
64+
end
65+
66+
# Returns the list of `XML::Error` found when parsing this document.
67+
# Returns `nil` if no errors were found.
68+
def errors : Array(XML::Error)?
69+
@errors unless @errors.try &.empty?
70+
end
71+
end

src/xml/namespace.cr

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
class XML::Namespace
2-
getter document : Node
2+
getter document : Document
33

44
# :nodoc:
5-
def initialize(@document : Node, @ns : LibXML::NS*)
5+
@[Deprecated]
6+
def self.new(document : Node, ns : LibXML::NS*)
7+
new(document.as(Document), ns)
8+
end
9+
10+
# :nodoc:
11+
def initialize(@document : Document, @ns : LibXML::NS*)
612
end
713

814
# See `Object#hash(hasher)`

src/xml/node.cr

Lines changed: 30 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,11 @@
1-
require "weak_ref"
2-
31
class XML::Node
42
LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
53

64
# Every Node must keep a reference to its document Node. To keep things
75
# simple, a document Node merely references itself. An unlinked node must
86
# still reference its original document Node until adopted into another
97
# document's tree (the libxml nodes keep a pointer to their libxml doc).
10-
@document : Node
11-
12-
# The constructors allocate a XML::Node for a libxml node once, so we don't
13-
# finalize a document twice for example.
14-
#
15-
# We store the reference into the libxml struct (_private) for documents
16-
# because a document's XML::Node lives as long as its libxml doc.
17-
#
18-
# However we can lose references to subtree XML::Node, so using _private would
19-
# leave dangling pointers. We thus keep a cache of weak references to all
20-
# nodes in the document, so we can still collect lost references, and at worst
21-
# reinstantiate a XML::Node if needed.
22-
protected getter! cache : Hash(LibXML::Node*, WeakRef(Node))?
8+
@document : Document?
239

2410
# Unlinked Nodes, and all their descendant nodes, don't appear in the
2511
# document's tree anymore, and must be manually freed. Yet, the finalizer
@@ -28,16 +14,7 @@ class XML::Node
2814
protected getter? unlinked = false
2915

3016
# :nodoc:
31-
def self.new(doc : LibXML::Doc*, errors : Array(Error)? = nil)
32-
if ptr = doc.value._private
33-
ptr.as(Node)
34-
else
35-
new(doc_: doc, errors_: errors)
36-
end
37-
end
38-
39-
# :nodoc:
40-
def self.new(node : LibXML::Node*, document : self) : self
17+
def self.new(node : LibXML::Node*, document : Document) : self
4118
if node == document.@node
4219
# should never happen, but just in case
4320
return document
@@ -54,47 +31,28 @@ class XML::Node
5431

5532
# :nodoc:
5633
@[Deprecated]
57-
def self.new(node : LibXML::Node*) : self
58-
new(node, new(node.value.doc))
34+
def self.new(doc : LibXML::Doc*, errors : Array(Error)? = nil)
35+
Document.new(doc, errors).as(Node)
5936
end
6037

6138
# :nodoc:
6239
@[Deprecated]
63-
def self.new(node : LibXML::Attr*) : self
64-
new(node.as(LibXML::Node*), new(node.value.doc))
40+
def self.new(node : LibXML::Node*) : self
41+
new(node, Document.new(node.value.doc))
6542
end
6643

67-
# the initializers must never be called directly, use the constructors above
68-
69-
private def initialize(*, doc_ : LibXML::Doc*, errors_ : Array(Error)?)
70-
@node = doc_.as(LibXML::Node*)
71-
@errors = errors_
72-
@cache = Hash(LibXML::Node*, WeakRef(Node)).new
73-
@document = uninitialized Node
74-
@document = self
75-
doc_.value._private = self.as(Void*)
44+
# :nodoc:
45+
@[Deprecated]
46+
def self.new(node : LibXML::Attr*) : self
47+
new(node.as(LibXML::Node*), Document.new(node.value.doc))
7648
end
7749

78-
private def initialize(*, node_ : LibXML::Node*, document_ : self)
50+
# must never be called directly, use the constructors above
51+
private def initialize(*, node_ : LibXML::Node*, document_ : Document)
7952
@node = node_.as(LibXML::Node*)
8053
@document = document_
8154
end
8255

83-
# :nodoc:
84-
def finalize
85-
return unless @document == self
86-
87-
# free unlinked nodes and their subtrees
88-
cache.each do |node, ref|
89-
if (obj = ref.value) && obj.unlinked?
90-
LibXML.xmlFreeNode(node)
91-
end
92-
end
93-
94-
# free the doc and its subtree
95-
LibXML.xmlFreeDoc(@node.as(LibXML::Doc*))
96-
end
97-
9856
# Gets the attribute content for the *attribute* given by name.
9957
# Raises `KeyError` if attribute is not found.
10058
def [](attribute : String) : String
@@ -152,7 +110,7 @@ class XML::Node
152110

153111
child = @node.value.children
154112
nodes = Slice(Node).new(size) do
155-
node = Node.new(child, @document)
113+
node = Node.new(child, document)
156114
child = child.value.next
157115
node
158116
end
@@ -179,9 +137,9 @@ class XML::Node
179137
LibXML.xmlNodeSetContent(self, content)
180138
end
181139

182-
# Gets the document for this Node as a `XML::Node`.
183-
def document : XML::Node
184-
@document
140+
# Gets the document for this Node as a `Document`.
141+
def document : Document
142+
@document.not_nil!
185143
end
186144

187145
# Returns `true` if this is a Document or HTML Document node.
@@ -197,16 +155,12 @@ class XML::Node
197155

198156
# Returns the encoding of this node's document.
199157
def encoding : String?
200-
if encoding = @document.@node.as(LibXML::Doc*).value.encoding
201-
String.new(encoding)
202-
end
158+
document.encoding
203159
end
204160

205161
# Returns the version of this node's document.
206162
def version : String?
207-
if version = @document.@node.as(LibXML::Doc*).value.version
208-
String.new(version)
209-
end
163+
document.version
210164
end
211165

212166
# Returns `true` if this is an Element node.
@@ -220,7 +174,7 @@ class XML::Node
220174
child = @node.value.children
221175
while child
222176
if child.value.type == XML::Node::Type::ELEMENT_NODE
223-
return Node.new(child, @document)
177+
return Node.new(child, document)
224178
end
225179
child = child.value.next
226180
end
@@ -360,7 +314,7 @@ class XML::Node
360314
# Returns the next sibling node or `nil` if not found.
361315
def next : XML::Node?
362316
next_node = @node.value.next
363-
next_node ? Node.new(next_node, @document) : nil
317+
next_node ? Node.new(next_node, document) : nil
364318
end
365319

366320
# :ditto:
@@ -373,7 +327,7 @@ class XML::Node
373327
next_node = @node.value.next
374328
while next_node
375329
if next_node.value.type == XML::Node::Type::ELEMENT_NODE
376-
return Node.new(next_node, @document)
330+
return Node.new(next_node, document)
377331
end
378332
next_node = next_node.value.next
379333
end
@@ -423,7 +377,7 @@ class XML::Node
423377
nil
424378
else
425379
ns = @node.value.ns
426-
ns ? Namespace.new(@document, ns) : nil
380+
ns ? Namespace.new(document, ns) : nil
427381
end
428382
end
429383

@@ -433,7 +387,7 @@ class XML::Node
433387

434388
ns = @node.value.ns_def
435389
while ns
436-
namespaces << Namespace.new(@document, ns)
390+
namespaces << Namespace.new(document, ns)
437391
ns = ns.value.next
438392
end
439393

@@ -481,7 +435,7 @@ class XML::Node
481435

482436
if ns_list
483437
while ns_list.value
484-
yield Namespace.new(@document, ns_list.value)
438+
yield Namespace.new(document, ns_list.value)
485439
ns_list += 1
486440
end
487441
end
@@ -495,21 +449,21 @@ class XML::Node
495449
# Returns the parent node or `nil` if not found.
496450
def parent : XML::Node?
497451
parent = @node.value.parent
498-
parent ? Node.new(parent, @document) : nil
452+
parent ? Node.new(parent, document) : nil
499453
end
500454

501455
# Returns the previous sibling node or `nil` if not found.
502456
def previous : XML::Node?
503457
prev_node = @node.value.prev
504-
prev_node ? Node.new(prev_node, @document) : nil
458+
prev_node ? Node.new(prev_node, document) : nil
505459
end
506460

507461
# Returns the previous sibling node that is an element or `nil` if not found.
508462
def previous_element : XML::Node?
509463
prev_node = @node.value.prev
510464
while prev_node
511465
if prev_node.value.type == XML::Node::Type::ELEMENT_NODE
512-
return Node.new(prev_node, @document)
466+
return Node.new(prev_node, document)
513467
end
514468
prev_node = prev_node.value.prev
515469
end
@@ -530,7 +484,7 @@ class XML::Node
530484
# Returns the root node for this document or `nil`.
531485
def root : XML::Node?
532486
root = LibXML.xmlDocGetRootElement(@node.value.doc)
533-
root ? Node.new(root, @document) : nil
487+
root ? Node.new(root, document) : nil
534488
end
535489

536490
# Same as `#content`.
@@ -736,10 +690,9 @@ class XML::Node
736690
xpath(path, namespaces, variables).as(String)
737691
end
738692

739-
# Returns the list of `XML::Error` found when parsing this document.
740-
# Returns `nil` if no errors were found.
693+
@[Deprecated("Access XML::Document#errors instead.")]
741694
def errors : Array(XML::Error)?
742-
return @errors unless @errors.try &.empty?
695+
document.errors
743696
end
744697

745698
private def check_no_null_byte(string)

src/xml/reader.cr

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ class XML::Reader
188188
XML::Node.new(xml, document) if xml
189189
end
190190

191-
def document
191+
def document : Document
192192
@document ||= if doc = LibXML.xmlTextReaderCurrentDoc(@reader)
193-
Node.new(doc)
193+
Document.new(doc)
194194
else
195195
raise XML::Error.new("Failed to get current doc for XML::Reader", 0)
196196
end

0 commit comments

Comments
 (0)