From 3732a3b20aeb19958c15823a0d32f1da450fa685 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diogo=20Os=C3=B3rio?= Date: Thu, 8 Oct 2020 16:54:05 +0100 Subject: [PATCH] Fixes unintentional newline characters within lists with paragraphs This commit aims to fix a problem when parsing markup similar to: ```ruby [8] pry(main)> ReverseMarkdown.convert("") => "- \n\na\n\n" ``` Note that the list item has newlines between the markdown list item and the actual text. If you remove the space between the list item tag and the paragraph tag: ```ruby [7] pry(main)> ReverseMarkdown.convert("") => "- a\n\n" ``` Reverse markdown appears to assume what believe to be the intended behaviour. There was already a test in place to account for this situation, but it was `xit`'ed. The proposed patch is to ask Nokogiri to find the first child element of the list, instead of the first child (which might include stuff like a newline). --- lib/reverse_markdown/converters/li.rb | 4 ++-- spec/assets/lists.html | 2 ++ spec/components/lists_spec.rb | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/reverse_markdown/converters/li.rb b/lib/reverse_markdown/converters/li.rb index 549ca17..478b317 100644 --- a/lib/reverse_markdown/converters/li.rb +++ b/lib/reverse_markdown/converters/li.rb @@ -2,8 +2,8 @@ module ReverseMarkdown module Converters class Li < Base def convert(node, state = {}) - contains_child_paragraph = node.children.first ? node.children.first.name == 'p' : false - content_node = contains_child_paragraph ? node.children.first : node + contains_child_paragraph = node.first_element_child ? node.first_element_child.name == 'p' : false + content_node = contains_child_paragraph ? node.first_element_child : node content = treat_children(content_node, state) indentation = indentation_from(state) prefix = prefix_for(node) diff --git a/spec/assets/lists.html b/spec/assets/lists.html index a23922c..8a2ba9e 100644 --- a/spec/assets/lists.html +++ b/spec/assets/lists.html @@ -44,6 +44,8 @@ + +