Skip to content

Commit

Permalink
Handle closing tag with whitespace (#128)
Browse files Browse the repository at this point in the history
  • Loading branch information
ashabhasa authored Jul 4, 2024
1 parent 2e08d54 commit daff30c
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 12 deletions.
29 changes: 17 additions & 12 deletions lib/saxy/parser/builder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ defmodule Saxy.Parser.Builder do
open_tag_name(rest, more?, original, pos, state, Utils.compute_char_len(codepoint))

"/" <> rest ->
close_tag_name(rest, more?, original, pos + 1, state, 0)
close_tag_name(rest, more?, original, pos + 1, state, 0, 0)

"![CDATA[" <> rest ->
element_cdata(rest, more?, original, pos + 8, state, 0)
Expand Down Expand Up @@ -1161,30 +1161,31 @@ defmodule Saxy.Parser.Builder do
end
end

defp close_tag_name(<<buffer::bits>>, more?, original, pos, state, 0) do
defp close_tag_name(<<buffer::bits>>, more?, original, pos, state, 0, 0) do
lookahead buffer, @streaming do
char <> rest when is_ascii_name_start_char(char) ->
close_tag_name(rest, more?, original, pos, state, 1)
close_tag_name(rest, more?, original, pos, state, 1, 1)

token in unquote(utf8_binaries()) when more? ->
halt!(close_tag_name(token, more?, original, pos, state, 0))
halt!(close_tag_name(token, more?, original, pos, state, 0, 0))

<<codepoint::utf8>> <> rest when is_utf8_name_start_char(codepoint) ->
close_tag_name(rest, more?, original, pos, state, Utils.compute_char_len(codepoint))
len = Utils.compute_char_len(codepoint)
close_tag_name(rest, more?, original, pos, state, len, len)

_ in [""] when more? ->
halt!(close_tag_name("", more?, original, pos, state, 0))
halt!(close_tag_name("", more?, original, pos, state, 0, 0))

_ ->
Utils.parse_error(original, pos, state, {:token, :end_tag})
end
end

defp close_tag_name(<<buffer::bits>>, more?, original, pos, state, len) do
defp close_tag_name(<<buffer::bits>>, more?, original, pos, state, len, copy_to) do
lookahead buffer, @streaming do
">" <> rest ->
[open_tag | stack] = state.stack
ending_tag = binary_part(original, pos, len)
ending_tag = binary_part(original, pos, copy_to)
pos = pos + len + 1

if open_tag == ending_tag do
Expand All @@ -1205,16 +1206,20 @@ defmodule Saxy.Parser.Builder do
end

char <> rest when is_ascii_name_char(char) ->
close_tag_name(rest, more?, original, pos, state, len + 1)
close_tag_name(rest, more?, original, pos, state, len + 1, copy_to + 1)

char <> rest when is_whitespace(char) ->
close_tag_name(rest, more?, original, pos, state, len + 1, copy_to)

token in unquote(utf8_binaries()) when more? ->
halt!(close_tag_name(token, more?, original, pos, state, len))
halt!(close_tag_name(token, more?, original, pos, state, len, copy_to))

<<codepoint::utf8>> <> rest when is_utf8_name_char(codepoint) ->
close_tag_name(rest, more?, original, pos, state, len + Utils.compute_char_len(codepoint))
char_len = Utils.compute_char_len(codepoint)
close_tag_name(rest, more?, original, pos, state, len + char_len, copy_to + char_len)

_ in [""] when more? ->
halt!(close_tag_name("", more?, original, pos, state, len))
halt!(close_tag_name("", more?, original, pos, state, len, copy_to))

_ ->
Utils.parse_error(original, pos + len, state, {:token, :end_tag})
Expand Down
18 changes: 18 additions & 0 deletions test/saxy_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ defmodule SaxyTest do
]
end

test "parse_string/4 parses XML binary with closing tags containing whitespaces" do
data = "<foo>Some data</foo >"

assert {:ok, state} = parse(data, StackHandler, [])

assert state == [
end_document: {},
end_element: "foo",
characters: "Some data",
start_element: {"foo", []},
start_document: []
]
end

test "handles trailing Unicode codepoints during streaming" do
data = "<foo>𠜎𠜱𠝹𠱓</foo>"
stream = for byte <- :binary.bin_to_list(data), do: <<byte>>
Expand Down Expand Up @@ -134,6 +148,10 @@ defmodule SaxyTest do
data = "<foo><bar></bee></foo>"
assert {:error, exception} = parse(data, StackHandler, [])
assert Exception.message(exception) == "unexpected ending tag \"bee\", expected tag: \"bar\""

data = "<foo>Some data</foo bar >"
assert {:error, exception} = parse(data, StackHandler, [])
assert Exception.message(exception) == "unexpected ending tag \"foo \", expected tag: \"foo\""
end

describe "encode!/2" do
Expand Down

0 comments on commit daff30c

Please sign in to comment.