From 7cb5eaeb221c322b9912f724183294d8ce96bae3 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 17 Aug 2024 17:45:52 +0900 Subject: [PATCH] parser tree: improve namespace conflicted attribute check performance It was slow for deep element. Reported by l33thaxor. Thanks!!! --- lib/rexml/element.rb | 11 ----------- lib/rexml/parsers/baseparser.rb | 15 +++++++++++++++ test/parse/test_element.rb | 14 ++++++++++++++ test/test_core.rb | 4 ++++ 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index eb802165..4e3a60b9 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -2384,17 +2384,6 @@ def []=( name, value ) elsif old_attr.kind_of? Hash old_attr[value.prefix] = value elsif old_attr.prefix != value.prefix - # Check for conflicting namespaces - if value.prefix != "xmlns" and old_attr.prefix != "xmlns" - old_namespace = old_attr.namespace - new_namespace = value.namespace - if old_namespace == new_namespace - raise ParseException.new( - "Namespace conflict in adding attribute \"#{value.name}\": "+ - "Prefix \"#{old_attr.prefix}\" = \"#{old_namespace}\" and "+ - "prefix \"#{value.prefix}\" = \"#{new_namespace}\"") - end - end store value.name, {old_attr.prefix => old_attr, value.prefix => value} else diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 9ed032d3..d11c2766 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -754,6 +754,7 @@ def process_instruction def parse_attributes(prefixes) attributes = {} + expanded_names = {} closed = false while true if @source.match(">", true) @@ -805,6 +806,20 @@ def parse_attributes(prefixes) raise REXML::ParseException.new(msg, @source, self) end + unless prefix == "xmlns" + uri = @namespaces[prefix] + expanded_name = [uri, local_part] + existing_prefix = expanded_names[expanded_name] + if existing_prefix + message = "Namespace conflict in adding attribute " + + "\"#{local_part}\": " + + "Prefix \"#{existing_prefix}\" = \"#{uri}\" and " + + "prefix \"#{prefix}\" = \"#{uri}\"" + raise REXML::ParseException.new(message, @source, self) + end + expanded_names[expanded_name] = prefix + end + attributes[name] = value else message = "Invalid attribute name: <#{@source.buffer.split(%r{[/>\s]}).first}>" diff --git a/test/parse/test_element.rb b/test/parse/test_element.rb index 2b0746ea..ab4818da 100644 --- a/test/parse/test_element.rb +++ b/test/parse/test_element.rb @@ -131,5 +131,19 @@ def test_linear_performance_attribute_value_gt REXML::Document.new('" * n + '">') end end + + def test_linear_performance_deep_same_name_attributes + seq = [100, 500, 1000, 1500, 2000] + assert_linear_performance(seq, rehearsal: 10) do |n| + xml = <<-XML + + +#{"\n" * n} +#{"\n" * n} + + XML + REXML::Document.new(xml) + end + end end end diff --git a/test/test_core.rb b/test/test_core.rb index b079c203..48666c86 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -136,6 +136,10 @@ def test_attribute_namespace_conflict # https://www.w3.org/TR/xml-names/#uniqAttrs message = <<-MESSAGE.chomp Namespace conflict in adding attribute "a": Prefix "n1" = "http://www.w3.org" and prefix "n2" = "http://www.w3.org" +Line: 4 +Position: 140 +Last 80 unconsumed characters: +/> MESSAGE assert_raise(REXML::ParseException.new(message)) do Document.new(<<-XML)