diff --git a/src/xml/attributes.cr b/src/xml/attributes.cr index 7a91c619eeb2..fff7c9db4573 100644 --- a/src/xml/attributes.cr +++ b/src/xml/attributes.cr @@ -3,13 +3,13 @@ require "./node" class XML::Attributes include Enumerable(Node) + # :nodoc: def initialize(@node : Node) end def empty? : Bool return true unless @node.element? - props = self.props props.null? end @@ -39,14 +39,45 @@ class XML::Attributes end def []=(name : String, value) + if prop = find_prop(name) + # manually unlink the prop's children if we have live references, so + # xmlSetProp won't free them immediately + @node.document.unlink_cached_children(prop) + end + LibXML.xmlSetProp(@node, name, value.to_s) value end def delete(name : String) : String? - value = self[name]?.try &.content - res = LibXML.xmlUnsetProp(@node, name) - value if res == 0 + prop = find_prop(name) + return unless prop + + value = "" + if content = LibXML.xmlNodeGetContent(prop) + value = String.new(content) + end + + if node = @node.document.cached?(prop) + # can't call xmlUnsetProp: it would free the node + node.unlink + value + else + # manually unlink the prop's children if we have live references, so + # xmlUnsetProp won't free them immediately + @node.document.unlink_cached_children(prop) + value if LibXML.xmlUnsetProp(@node, name) == 0 + end + end + + private def find_prop(name) + prop = @node.to_unsafe.value.properties.as(LibXML::Node*) + while prop + if String.new(prop.value.name) == name + return prop + end + prop = prop.value.next + end end def each(&) : Nil @@ -54,8 +85,8 @@ class XML::Attributes props = self.props until props.null? - yield Node.new(props) - props = props.value.next + yield Node.new(props.as(LibXML::Node*), @node.document) + props = props.value.next.as(LibXML::Attr*) end end @@ -73,7 +104,7 @@ class XML::Attributes pp.list("[", self, "]") end - protected def props + protected def props : LibXML::Attr* @node.to_unsafe.value.properties end end diff --git a/src/xml/builder.cr b/src/xml/builder.cr index 54e5f45adddc..0669821d3276 100644 --- a/src/xml/builder.cr +++ b/src/xml/builder.cr @@ -28,6 +28,11 @@ class XML::Builder @writer = LibXML.xmlNewTextWriter(buffer) end + # :nodoc: + def finalize + LibXML.xmlFreeTextWriter(@writer) + end + # Emits the start of the document. def start_document(version = nil, encoding = nil) : Nil call StartDocument, string_to_unsafe(version), string_to_unsafe(encoding), nil diff --git a/src/xml/libxml2.cr b/src/xml/libxml2.cr index a9008ce0e103..4745495ec234 100644 --- a/src/xml/libxml2.cr +++ b/src/xml/libxml2.cr @@ -149,6 +149,7 @@ lib LibXML fun xmlTextReaderReadOuterXml(reader : XMLTextReader) : UInt8* fun xmlTextReaderExpand(reader : XMLTextReader) : Node* fun xmlTextReaderCurrentNode(reader : XMLTextReader) : Node* + fun xmlTextReaderCurrentDoc(reader : XMLTextReader) : Doc* fun xmlTextReaderSetErrorHandler(reader : XMLTextReader, f : TextReaderErrorFunc) : Void fun xmlTextReaderSetStructuredErrorHandler(reader : XMLTextReader, f : StructuredErrorFunc, arg : Void*) : Void @@ -372,18 +373,14 @@ lib LibXML {% if compare_versions(LibXML::VERSION, "2.14.0") >= 0 %} fun xmlSaveSetIndentString(SaveCtxPtr, UInt8*) {% end %} + + fun xmlFreeDoc(Doc*) + fun xmlFreeNode(Node*) + fun xmlFreeTextReader(XMLTextReader) + fun xmlFreeTextWriter(TextWriter) + fun xmlXPathFreeContext(XPathContext*) + fun xmlXPathFreeNodeSet(NodeSet*) + fun xmlXPathFreeObject(XPathObject*) end LibXML.xmlInitParser - -LibXML.xmlMemSetup( - ->GC.free, - ->GC.malloc(LibC::SizeT), - ->GC.realloc(Void*, LibC::SizeT), - ->(str) { - len = LibC.strlen(str) + 1 - copy = Pointer(UInt8).malloc(len) - copy.copy_from(str, len) - copy - } -) diff --git a/src/xml/namespace.cr b/src/xml/namespace.cr index 78553011a725..0f8233949a77 100644 --- a/src/xml/namespace.cr +++ b/src/xml/namespace.cr @@ -1,6 +1,7 @@ class XML::Namespace getter document : Node + # :nodoc: def initialize(@document : Node, @ns : LibXML::NS*) end diff --git a/src/xml/node.cr b/src/xml/node.cr index 4361be5ae7e5..52ade58aab78 100644 --- a/src/xml/node.cr +++ b/src/xml/node.cr @@ -1,18 +1,121 @@ +require "weak_ref" + class XML::Node LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/ - # Creates a new node. - def initialize(node : LibXML::Attr*) - initialize(node.as(LibXML::Node*)) + # Every Node must keep a reference to its document XML::Node. To keep things + # simple, a document XML::Node merely references itself. An unlinked node must + # still reference its original document XML::Node until adopted into another + # document's tree (the libxml nodes keep a pointer to their libxml doc). + # + # NOTE: when a libxml node is moved to another document, then the @document + # reference of its XML::Node and any instantiated descendant must be updated + # to pointer to the new document Node. + @document : Node + + # :nodoc: + # + # The constructors allocate a XML::Node for a libxml node once, so we don't + # finalize a document twice for example. + # + # We store the reference into the libxml struct (_private) for documents + # because a document's XML::Node lives as long as its libxml doc. However we + # can lose references to subtree XML::Node, so using _private would leave + # dangling pointers. We thus keep a cache of weak references to all nodes in + # the document, so we can still collect lost references, and at worst + # reinstantiate a XML::Node if needed. + # + # NOTE: when a XML::Node is moved to another document, the XML::Node and any + # instantiated descendant XML::Node shall be cleaned from the original + # document's cache, and must be added to the new document's cache. + protected getter! cache : Hash(LibXML::Node*, WeakRef(Node))? + + # :nodoc: + # + # Unlinked libxml nodes, and all their descendant nodes, don't appear in the + # document's tree anymore, and must be manually freed, yet we can't merely + # free the libxml node in a finalizer, because it would free the whole + # subtree, while we may still have live XML::Node instances. + # + # We keep an explicit list of unlinked libxml nodes. We can't rely on the + # cache because it uses weak references and the XML::Node could be collected, + # leaking the libxml node and its subtree. + # + # NOTE: the libxml node, along with any descendant shall be removed from the + # list when relinked into a tree, be it the same document or another. + protected getter! unlinked_nodes : Set(LibXML::Node*)? + + # :nodoc: + def self.new(doc : LibXML::Doc*, errors : Array(Error)? = nil) + if ptr = doc.value._private + ptr.as(Node) + else + new(doc_: doc, errors_: errors) + end end - # :ditto: - def initialize(node : LibXML::Doc*, @errors : Array(XML::Error)? = nil) - initialize(node.as(LibXML::Node*)) + # :nodoc: + def self.new(node : LibXML::Node*, document : self) : self + if node == document.@node + # should never happen, but just in case + return document + end + + if obj = document.cached?(node) + return obj + end + + obj = new(node_: node, document_: document) + document.cache[node] = WeakRef.new(obj) + obj end - # :ditto: - def initialize(@node : LibXML::Node*) + # :nodoc: + @[Deprecated] + def self.new(node : LibXML::Node*) : self + new(node, new(node.value.doc)) + end + + # :nodoc: + @[Deprecated] + def self.new(node : LibXML::Attr*) : self + new(node.as(LibXML::Node*), new(node.value.doc)) + end + + # the initializers must never be called directly, use the constructors above + + private def initialize(*, doc_ : LibXML::Doc*, errors_ : Array(Error)?) + @node = doc_.as(LibXML::Node*) + @errors = errors_ + @cache = Hash(LibXML::Node*, WeakRef(Node)).new + @unlinked_nodes = Set(LibXML::Node*).new + @document = uninitialized Node + @document = self + doc_.value._private = self.as(Void*) + end + + private def initialize(*, node_ : LibXML::Node*, document_ : self) + @node = node_.as(LibXML::Node*) + @document = document_ + end + + # :nodoc: + def finalize + return unless @document == self + + doc = @node.as(LibXML::Doc*) + + # free unlinked nodes and their subtrees + unlinked_nodes.each do |node| + if node.value.doc == doc + LibXML.xmlFreeNode(node) + else + # the node has been adopted into another document, don't free! + end + end + + # free the doc and its subtree + LibXML.xmlFreeDoc(@node.as(LibXML::Doc*)) end # Gets the attribute content for the *attribute* given by name. @@ -63,18 +166,21 @@ class XML::Node # Gets the list of children for this node as a `XML::NodeSet`. def children : XML::NodeSet child = @node.value.children + return NodeSet.new unless child - set = LibXML.xmlXPathNodeSetCreate(child) + size = 1 + while child = child.value.next + size += 1 + end - if child + child = @node.value.children + nodes = Slice(Node).new(size) do + node = Node.new(child, @document) child = child.value.next - while child - LibXML.xmlXPathNodeSetAddUnique(set, child) - child = child.value.next - end + node end - NodeSet.new(document, set) + NodeSet.new(nodes) end # Returns `true` if this is a comment node. @@ -93,12 +199,29 @@ class XML::Node # The string gets XML escaped, not interpreted as markup. def content=(content) check_no_null_byte(content) + + if fragment? || element? || attribute? + # libxml will immediately free all the children nodes, while we may have + # live references to a child or a descendant; explicitly unlink all the + # children before replacing the node's contents + child = @node.value.children + while child + if node = document.cached?(child) + node.unlink + else + document.unlinked_nodes << child + LibXML.xmlUnlinkNode(child) + end + child = child.value.next + end + end + LibXML.xmlNodeSetContent(self, content) end # Gets the document for this Node as a `XML::Node`. def document : XML::Node - Node.new @node.value.doc + @document end # Returns `true` if this is a Document or HTML Document node. @@ -114,21 +237,15 @@ class XML::Node # Returns the encoding of this node's document. def encoding : String? - if document? - encoding = @node.as(LibXML::Doc*).value.encoding - encoding ? String.new(encoding) : nil - else - document.encoding + if encoding = @document.@node.as(LibXML::Doc*).value.encoding + String.new(encoding) end end # Returns the version of this node's document. def version : String? - if document? - version = @node.as(LibXML::Doc*).value.version - version ? String.new(version) : nil - else - document.version + if version = @document.@node.as(LibXML::Doc*).value.version + String.new(version) end end @@ -143,7 +260,7 @@ class XML::Node child = @node.value.children while child if child.value.type == XML::Node::Type::ELEMENT_NODE - return Node.new(child) + return Node.new(child, @document) end child = child.value.next end @@ -283,7 +400,7 @@ class XML::Node # Returns the next sibling node or `nil` if not found. def next : XML::Node? next_node = @node.value.next - next_node ? Node.new(next_node) : nil + next_node ? Node.new(next_node, @document) : nil end # :ditto: @@ -296,7 +413,7 @@ class XML::Node next_node = @node.value.next while next_node if next_node.value.type == XML::Node::Type::ELEMENT_NODE - return Node.new(next_node) + return Node.new(next_node, @document) end next_node = next_node.value.next end @@ -346,7 +463,7 @@ class XML::Node nil else ns = @node.value.ns - ns ? Namespace.new(document, ns) : nil + ns ? Namespace.new(@document, ns) : nil end end @@ -356,7 +473,7 @@ class XML::Node ns = @node.value.ns_def while ns - namespaces << Namespace.new(document, ns) + namespaces << Namespace.new(@document, ns) ns = ns.value.next end @@ -404,7 +521,7 @@ class XML::Node if ns_list while ns_list.value - yield Namespace.new(document, ns_list.value) + yield Namespace.new(@document, ns_list.value) ns_list += 1 end end @@ -418,13 +535,13 @@ class XML::Node # Returns the parent node or `nil` if not found. def parent : XML::Node? parent = @node.value.parent - parent ? Node.new(parent) : nil + parent ? Node.new(parent, @document) : nil end # Returns the previous sibling node or `nil` if not found. def previous : XML::Node? prev_node = @node.value.prev - prev_node ? Node.new(prev_node) : nil + prev_node ? Node.new(prev_node, @document) : nil end # Returns the previous sibling node that is an element or `nil` if not found. @@ -432,7 +549,7 @@ class XML::Node prev_node = @node.value.prev while prev_node if prev_node.value.type == XML::Node::Type::ELEMENT_NODE - return Node.new(prev_node) + return Node.new(prev_node, @document) end prev_node = prev_node.value.prev end @@ -453,7 +570,7 @@ class XML::Node # Returns the root node for this document or `nil`. def root : XML::Node? root = LibXML.xmlDocGetRootElement(@node.value.doc) - root ? Node.new(root) : nil + root ? Node.new(root, @document) : nil end # Same as `#content`. @@ -548,7 +665,7 @@ class XML::Node LibC::Int.new(0) end - # Returns underlying `LibXML::Node*` instance. + # :nodoc: def to_unsafe @node end @@ -560,7 +677,8 @@ class XML::Node # Removes the node from the XML document. def unlink : Nil - LibXML.xmlUnlinkNode(self) + document.unlinked_nodes << @node + LibXML.xmlUnlinkNode(@node) end # Returns `true` if this is an xml Document node. @@ -667,4 +785,22 @@ class XML::Node raise XML::Error.new("Cannot escape string containing null character", 0) end end + + # these helpers must only be called on document nodes: + + protected def cached?(node : LibXML::Node*) : Node? + cache[node]?.try(&.value) + end + + protected def unlink_cached_children(node : LibXML::Node*) : Nil + child = node.value.children + while child + if obj = cached?(node) + obj.unlink + else + unlink_cached_children(child) + end + child = child.value.next + end + end end diff --git a/src/xml/node_set.cr b/src/xml/node_set.cr index 8f6a8a3474e4..e89c6f5242f8 100644 --- a/src/xml/node_set.cr +++ b/src/xml/node_set.cr @@ -1,36 +1,35 @@ -class XML::NodeSet +struct XML::NodeSet include Enumerable(Node) - def initialize(@doc : Node, @set : LibXML::NodeSet*) - end + # :nodoc: + def self.new(doc : Node, set : LibXML::NodeSet*) + return NodeSet.new unless set || set.value.node_nr > 0 - def self.new(doc : Node) - new doc, LibXML.xmlXPathNodeSetCreate(nil) + nodes = Slice(Node).new(set.value.node_nr) do |i| + Node.new(set.value.node_tab[i], doc) + end + NodeSet.new(nodes) end - def [](index : Int) : XML::Node - index += size if index < 0 + @nodes : Slice(Node) - unless 0 <= index < size - raise IndexError.new - end + # :nodoc: + def initialize(nodes : Slice(Node)? = nil) + @nodes = nodes || Slice(Node).new(0, Pointer(Void).null.as(Node)) + end - internal_at(index) + def [](index : Int) : Node + @nodes[index] end def each(&) : Nil - size.times do |i| - yield internal_at(i) - end + @nodes.each { |node| yield node } end def empty? : Bool - size == 0 + @nodes.empty? end - # See `Object#hash(hasher)` - def_hash object_id - def inspect(io : IO) : Nil io << '[' join io, ", ", &.inspect(io) @@ -42,22 +41,10 @@ class XML::NodeSet end def size : Int32 - @set.value.node_nr - end - - def object_id - @set.address + @nodes.size end def to_s(io : IO) : Nil join io, '\n' end - - def to_unsafe - @set - end - - private def internal_at(index) - Node.new(@set.value.node_tab[index]) - end end diff --git a/src/xml/reader.cr b/src/xml/reader.cr index 767e27c557a7..c600a969c6d2 100644 --- a/src/xml/reader.cr +++ b/src/xml/reader.cr @@ -49,6 +49,11 @@ class XML::Reader LibXML.xmlTextReaderSetStructuredErrorHandler(@reader, ->Error.structured_callback, Box.box(@errors)) end + # :nodoc: + def finalize + LibXML.xmlFreeTextReader(@reader) + end + # Moves the reader to the next node. def read : Bool LibXML.xmlTextReaderRead(@reader) == 1 @@ -180,7 +185,15 @@ class XML::Reader # Returns `nil` if the node could not be expanded. def expand? : XML::Node? xml = LibXML.xmlTextReaderExpand(@reader) - XML::Node.new(xml) if xml + XML::Node.new(xml, document) if xml + end + + def document + @document ||= if doc = LibXML.xmlTextReaderCurrentDoc(@reader) + Node.new(doc) + else + raise XML::Error.new("Failed to get current doc for XML::Reader", 0) + end end # Returns the text content of the node. @@ -189,7 +202,7 @@ class XML::Reader value ? String.new(value) : "" end - # Returns a reference to the underlying `LibXML::XMLTextReader`. + # :nodoc: def to_unsafe @reader end diff --git a/src/xml/xpath_context.cr b/src/xml/xpath_context.cr index c52a87a3c147..f057a57a074a 100644 --- a/src/xml/xpath_context.cr +++ b/src/xml/xpath_context.cr @@ -1,17 +1,22 @@ class XML::XPathContext getter errors = [] of XML::Error - def initialize(node : Node) - @ctx = LibXML.xmlXPathNewContext(node.to_unsafe.value.doc) - @ctx.value.node = node.to_unsafe + def initialize(@node : Node) + @ctx = LibXML.xmlXPathNewContext(@node.to_unsafe.value.doc) + @ctx.value.node = @node.to_unsafe {% if LibXML.has_method?(:xmlXPathSetErrorHandler) %} LibXML.xmlXPathSetErrorHandler(@ctx, ->Error.structured_callback, Box.box(@errors)) {% end %} end + # :nodoc: + def finalize + LibXML.xmlXPathFreeContext(@ctx) + end + def evaluate(search_path : String) - xpath = + xpath_object = {% if LibXML.has_method?(:xmlXPathSetErrorHandler) %} LibXML.xmlXPathEvalExpression(search_path, self) {% else %} @@ -20,7 +25,7 @@ class XML::XPathContext end {% end %} - unless xpath + unless xpath_object if error = @errors.first? raise error else @@ -28,22 +33,22 @@ class XML::XPathContext end end - case xpath.value.type - when LibXML::XPathObjectType::STRING - String.new(xpath.value.stringval) - when LibXML::XPathObjectType::NUMBER - xpath.value.floatval - when LibXML::XPathObjectType::BOOLEAN - xpath.value.boolval != 0 - when LibXML::XPathObjectType::NODESET - if xpath.value.nodesetval - NodeSet.new(Node.new(@ctx.value.doc), xpath.value.nodesetval) + retval = + case xpath_object.value.type + when LibXML::XPathObjectType::STRING + String.new(xpath_object.value.stringval) + when LibXML::XPathObjectType::NUMBER + xpath_object.value.floatval + when LibXML::XPathObjectType::BOOLEAN + xpath_object.value.boolval != 0 + when LibXML::XPathObjectType::NODESET + NodeSet.new(@node.document, xpath_object.value.nodesetval) else - NodeSet.new(Node.new(@ctx.value.doc)) + NodeSet.new end - else - NodeSet.new(Node.new(@ctx.value.doc)) - end + + LibXML.xmlXPathFreeObject(xpath_object) + retval end def register_namespaces(namespaces) : Nil @@ -76,6 +81,7 @@ class XML::XPathContext LibXML.xmlXPathRegisterVariable(self, name, obj) end + # :nodoc: def to_unsafe @ctx end