Skip to content

Commit

Permalink
Fix to not allow Parameter Entity References at internal subsets.
Browse files Browse the repository at this point in the history
## Why?
In the internal subset of DTD, references to parameter entities are not allowed within markup declarations.

See: https://www.w3.org/TR/xml/#wfc-PEinInternalSubset

> Well-formedness constraint: PEs in Internal Subset

> In the internal DTD subset, parameter-entity references MUST NOT occur within markup declarations; they may occur where markup declarations can occur. (This does not apply to references that occur in external parameter entities or to the external subset.)
  • Loading branch information
naitoh committed Aug 12, 2024
1 parent 9fcad51 commit 5d75caf
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 118 deletions.
56 changes: 9 additions & 47 deletions lib/rexml/entity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ class Entity < Child
EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
NDATADECL = "\\s+NDATA\\s+#{NAME}"
PEREFERENCE = "%#{NAME};"
PEREFERENCE_RE = /#{PEREFERENCE}/um
ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um

attr_reader :name, :external, :ref, :ndata, :pubid
attr_reader :name, :external, :ref, :ndata, :pubid, :value

# Create a new entity. Simple entities can be constructed by passing a
# name, value to the constructor; this creates a generic, plain entity
Expand Down Expand Up @@ -59,6 +60,10 @@ def initialize stream, value=nil, parent=nil, reference=false
@name = stream
@value = value
end

if PEREFERENCE_RE.match?(@value)
raise "Parameter Entity References forbidden in internal subset: #{@value}"
end
end

# Evaluates whether the given string matches an entity definition,
Expand All @@ -68,14 +73,11 @@ def Entity::matches? string
end

# Evaluates to the unnormalized value of this entity; that is, replacing
# all entities -- both %ent; and &ent; entities. This differs from
# +value()+ in that +value+ only replaces %ent; entities.
# &ent; entities.
def unnormalized
document.record_entity_expansion unless document.nil?
v = value()
return nil if v.nil?
@unnormalized = Text::unnormalize(v, parent)
@unnormalized
return nil if @value.nil?
@unnormalized = Text::unnormalize(@value, parent)
end

#once :unnormalized
Expand Down Expand Up @@ -121,46 +123,6 @@ def to_s
write rv
rv
end

PEREFERENCE_RE = /#{PEREFERENCE}/um
# Returns the value of this entity. At the moment, only internal entities
# are processed. If the value contains internal references (IE,
# %blah;), those are replaced with their values. IE, if the doctype
# contains:
# <!ENTITY % foo "bar">
# <!ENTITY yada "nanoo %foo; nanoo>
# then:
# doctype.entity('yada').value #-> "nanoo bar nanoo"
def value
@resolved_value ||= resolve_value
end

def parent=(other)
@resolved_value = nil
super
end

private
def resolve_value
return nil if @value.nil?
return @value unless @value.match?(PEREFERENCE_RE)

matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
sum = 0
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += entity_value.bytesize
end
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
end
end
rv
end
end

# This is a set of entity constants -- the ones defined in the XML
Expand Down
3 changes: 3 additions & 0 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class BaseParser
}

module Private
PEREFERENCE_RE = /#{PEREFERENCE}/um
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
Expand Down Expand Up @@ -334,6 +335,8 @@ def pull_event
match[4] = match[4][1..-2] # HREF
match.delete_at(5) if match.size > 5 # Chop out NDATA decl
# match is [ :entity, name, PUBLIC, pubid, href(, ndata)? ]
elsif Private::PEREFERENCE_RE.match?(match[2])
raise REXML::ParseException.new("Parameter Entity References forbidden in internal subset: #{match[2]}", @source)
else
match[2] = match[2][1..-2]
match.pop if match.size == 4
Expand Down
50 changes: 0 additions & 50 deletions test/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,56 +147,6 @@ def test_entity_expansion_text_limit
assert_equal(90, doc.root.children.first.value.bytesize)
end
end

class ParameterEntityTest < self
def test_have_value
xml = <<EOF
<!DOCTYPE root [
<!ENTITY % a "BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.BOOM.">
<!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
<!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
<!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
<!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
<!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
<!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
<!ENTITY test "test %g;">
]>
<cd></cd>
EOF

assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
REXML::Security.entity_expansion_limit = 100
assert_equal(100, REXML::Security.entity_expansion_limit)
assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
end

def test_empty_value
xml = <<EOF
<!DOCTYPE root [
<!ENTITY % a "">
<!ENTITY % b "%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;%a;">
<!ENTITY % c "%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;%b;">
<!ENTITY % d "%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;%c;">
<!ENTITY % e "%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;%d;">
<!ENTITY % f "%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;%e;">
<!ENTITY % g "%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;%f;">
<!ENTITY test "test %g;">
]>
<cd></cd>
EOF

REXML::Document.new(xml)
REXML::Security.entity_expansion_limit = 90
assert_equal(90, REXML::Security.entity_expansion_limit)
assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
end
end
end

def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source
Expand Down
46 changes: 25 additions & 21 deletions test/test_entity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ def test_parse_entity

def test_constructor
one = [ %q{<!ENTITY % YN '"Yes"'>},
%q{<!ENTITY % YN2 "Yes">},
%q{<!ENTITY WhatHeSaid "He said %YN;">},
%q{<!ENTITY WhatHeSaid 'He said "Yes"'>},
'<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">',
'<!ENTITY open-hatch2
Expand All @@ -71,8 +70,7 @@ def test_constructor
NDATA gif>' ]
source = %q{<!DOCTYPE foo [
<!ENTITY % YN '"Yes"'>
<!ENTITY % YN2 "Yes">
<!ENTITY WhatHeSaid "He said %YN;">
<!ENTITY WhatHeSaid 'He said "Yes"'>
<!ENTITY open-hatch
SYSTEM "http://www.textuality.com/boilerplate/OpenHatch.xml">
<!ENTITY open-hatch2
Expand Down Expand Up @@ -138,6 +136,28 @@ def test_doctype_entity_with_nested_references
assert_equal("X", doctype.entities["b"].unnormalized)
end

def test_parameter_entity_reference_forbidden_by_internal_subset_in_new
assert_raise(RuntimeError.new('Parameter Entity References forbidden in internal subset: %a;')) do
REXML::Entity.new( [:entitydecl, "c", "%a;"])
end
end

def test_parameter_entity_reference_forbidden_by_internal_subset_in_parser
source = '<!DOCTYPE root [ <!ENTITY % a "B" > <!ENTITY c "%a;" > ]><root/>'
parser = REXML::Parsers::BaseParser.new(source)
exception = assert_raise(REXML::ParseException) do
while parser.has_next?
parser.pull
end
end
assert_equal(<<-DETAIL, exception.to_s)
Parameter Entity References forbidden in internal subset: "%a;"
Line: 1
Position: 54
Last 80 unconsumed characters:
DETAIL
end

def test_entity_string_limit
template = '<!DOCTYPE bomb [ <!ENTITY a "^" > ]> <bomb>$</bomb>'
len = 5120 # 5k per entity
Expand All @@ -156,22 +176,6 @@ def test_entity_string_limit
end
end

def test_entity_string_limit_for_parameter_entity
template = '<!DOCTYPE bomb [ <!ENTITY % a "^" > <!ENTITY bomb "$" > ]><root/>'
len = 5120 # 5k per entity
template.sub!(/\^/, "B" * len)

# 10k is OK
entities = '%a;' * 2 # 5k entity * 2 = 10k
REXML::Document.new(template.sub(/\$/, entities))

# above 10k explodes
entities = '%a;' * 3 # 5k entity * 2 = 15k
assert_raise(REXML::ParseException) do
REXML::Document.new(template.sub(/\$/, entities))
end
end

def test_raw
source = '<!DOCTYPE foo [
<!ENTITY ent "replace">
Expand All @@ -195,7 +199,7 @@ def test_lazy_evaluation
def test_entity_replacement
source = %q{<!DOCTYPE foo [
<!ENTITY % YN '"Yes"'>
<!ENTITY WhatHeSaid "He said %YN;">]>
<!ENTITY WhatHeSaid 'He said "Yes"'>]>
<a>&WhatHeSaid;</a>}

d = REXML::Document.new( source )
Expand Down

0 comments on commit 5d75caf

Please sign in to comment.