From db5e8b6b822e1793f21fbc637802372d6f49a1dd Mon Sep 17 00:00:00 2001 From: Brad Hanks Date: Mon, 12 Feb 2024 13:47:32 -0700 Subject: [PATCH] Bug with line starting with HTML comment #361 fix (#486) * Bug with line starting with HTML comment #361 * lib/earmark_parser/line_scanner.ex: regex refactor --- lib/earmark_parser/line_scanner.ex | 158 ++++++++++++++++++----------- 1 file changed, 97 insertions(+), 61 deletions(-) diff --git a/lib/earmark_parser/line_scanner.ex b/lib/earmark_parser/line_scanner.ex index b5e898c1..04d5a2e4 100644 --- a/lib/earmark_parser/line_scanner.ex +++ b/lib/earmark_parser/line_scanner.ex @@ -1,6 +1,6 @@ defmodule Earmark.Parser.LineScanner do @moduledoc false - + require Logger alias Earmark.Parser.{Helpers, Line, Options} # This is the re that matches the ridiculous "[id]: url title" syntax @@ -39,6 +39,35 @@ defmodule Earmark.Parser.LineScanner do .*? > '''x + + @rgx_map %{ + block_quote: ~r/\A>\s?(.*)/, + column_rgx: ~r{\A[\s|:-]+\z}, + comment_rest: ~r/()(.*)/, + fence: ~r/\A(\s*)(`{3,}|~{3,})\s*([^`\s]*)\s*\z/u, + footnote_definition: ~r/\A\[\^([^\s\]]+)\]:\s+(.*)/, + heading: ~r/^(\#{1,6})\s+(?|(.*?)\s*#*\s*$|(.*))/u, + html_close_tag: ~r/\A<\/([-\w]+?)>/, + html_comment_complete: ~r/\A \z/x, + html_comment_start: ~r/\A /u, options, recursive)] - - other -> - [other | _with_lookahead(lines, options, recursive)] - end + process_line(line_lnb, options, recursive) ++ + _with_lookahead(lines, options, recursive) end defp _with_lookahead([], _options, _recursive), do: [] - defp _lookahead_until_match([], _, _, _), do: [] + defp process_line({line, lnb}, options, recursive) do + case regex_run(:comment_rest, line, capture: :all_but_first) do + [comment, rest] -> + [type_of({comment, lnb}, options, recursive)] ++ + [type_of({rest, lnb}, options, recursive)] - defp _lookahead_until_match([{line, lnb} | lines], regex, options, recursive) do - if line =~ regex do - [type_of({line, lnb}, options, recursive) | _with_lookahead(lines, options, recursive)] - else - [ - %{_create_text(line) | lnb: lnb} - | _lookahead_until_match(lines, regex, options, recursive) - ] + nil -> + [type_of({line, lnb}, options, recursive)] end end - @column_rgx ~r{\A[\s|:-]+\z} defp _determine_if_header(columns) do columns - |> Enum.all?(fn col -> Regex.run(@column_rgx, col) end) + |> Enum.all?(fn col -> regex_run(:column_rgx, col) end) end defp _split_table_columns(line) do line |> String.split(~r{(? Enum.map(&String.trim/1) - |> Enum.map(fn col -> Regex.replace(~r{\\\|}, col, "|") end) + |> Enum.map(fn col -> + Regex.replace(~r{\\\|}, col, "|") + |> String.trim() + end) + end + + defp regex_run(key, target), do: regex_run(key, target, []) + + defp regex_run(key, target, opts) do + @rgx_map + |> Map.get(key) + |> Regex.run(target, opts) + end + + defp table_line?(line), do: table_line?(line, :none) + + defp table_line?(line, opt) do + line + |> String.replace(@rgx_map.wiki_link, "") + |> case do + line when opt in [:gfm] -> String.match?(line, @rgx_map.table_line_gfm) + _ -> String.match?(line, @rgx_map.table_line) + end end end