From ad8eea87e4a674f2cd412d9fe3ca9dfe0b6ff51d Mon Sep 17 00:00:00 2001 From: Hofi Date: Fri, 31 May 2024 16:30:58 +0200 Subject: [PATCH] Fixing of markdown processing - processing our special markdown notation [[ ]] still had numerous issues, like parsed inside code blocks - some markdown links still not skipped - processing order of [[ ]] [[ | ]] was bogus - added handling of a special form of our notation [[title|-]] that can protect the given title from further autolink/tooltip processing - prepared possible processing of our markdown notation in H2-H6 headings Signed-off-by: Hofi Signed-off-by: Hofi --- _plugins/generate_tooltips.rb | 96 ++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 23 deletions(-) diff --git a/_plugins/generate_tooltips.rb b/_plugins/generate_tooltips.rb index 1be9c976..5861f034 100644 --- a/_plugins/generate_tooltips.rb +++ b/_plugins/generate_tooltips.rb @@ -47,39 +47,60 @@ def prefixed_url(url, base_url) return url end + def is_modifiable_markdown_part?(part) + # TODO: Allowing our custom markdown notation in the headings require removing the custom notation during link generation + return part.start_with?('[[') # || part.start_with?('#') + end + def make_tooltip(page, page_links, id, url, match) match_parts = match.split(/\|/) + # If the text has an '|' it means it comes from our special autolink/tooltip [[text|id]] markdown block # We have to reparse it a bit and get the id we must use if match_parts.length > 1 #puts "match_parts: #{match_parts}" - match = match_parts[0] + title = match_parts[0] + if title.length <= 0 + puts "Error: Empty title in matching part: '#{match}' -> #{match_parts}" + # nil means, show the original markdown part, instead of a half rendered one + return nil + end id = match_parts[1] + # This is a special use case [[title|-]] that protects the given title from further processing + if id == '-' + # Just use the original title text + return title + end link_data = page_links[id] if link_data != nil url = link_data["url"] url = prefixed_url(url, page.site.config["baseurl"]) else - puts "Error: Unknown ID in matching part: #{match_parts}" - return match + puts "Error: Unknown ID in matching part: '#{match}' -> #{match_parts}" + # nil means, show the original markdown part, instead of a half rendered one + return nil end + else + title = match end if id == nil or id.length <= 0 - puts "Error: Empty ID in matching part: #{match}" - return match + puts "Error: Empty ID in matching part: '#{match}' -> #{match_parts}" + # nil means, show the original markdown part, instead of a half rendered one + return nil end if url == nil or url.length <= 0 - puts "Error: Empty URL for ID: #{id} in matching part: #{match}" - return match + puts "Error: Empty URL for ID: '#{id}' in matching part: '#{match}' -> #{match_parts}" + # nil means, show the original markdown part, instead of a half rendered one + return nil end # NOTE: Now we treat every link that has protocol prefix part as an external one # that allows usage of direct links anywhere if needed (not recommended, plz use external_links.yml instead) # but, at the same time requires e.g. all the really external links to be fully qualified (even in external_links.yml as well) external_url = is_prefixed_url?(url) - match = save_from_markdownify(match) - replacement_text = '' + match + '' + title = save_from_markdownify(title) + replacement_text = '' + title + '' # puts "replacement_text: " + replacement_text return replacement_text @@ -91,11 +112,15 @@ def process_markdown_part(page, markdown_part, page_links, full_pattern, id, url left_separator = $1 matched_text = $2 right_separator = $3 - #puts "\nmatch: #{match}\nleft_separator: #{left_separator}\nmatched_text: #{matched_text}\nright_separator: #{right_separator}" + # puts "\nmatch: #{match}\nleft_separator: #{left_separator}\nmatched_text: #{matched_text}\nright_separator: #{right_separator}" replacement_text = make_tooltip(page, page_links, id, url, matched_text) - if add_separator - replacement_text = left_separator + replacement_text + right_separator + if replacement_text != nil + if add_separator + replacement_text = left_separator + replacement_text + right_separator + end + else + replacement_text = markdown_part.gsub(/\|/, "\\\|") end replacement_text end @@ -110,10 +135,26 @@ def process_markdown_parts(page, markdown) # Regular expression pattern to match special Markdown blocks # Unlike the others this needs grouping as we use do |match| for enumeration # NOTE: Use multi line matching partially as e.g. code blocks can span to multiple lines - special_markdown_blocks_pattern = /((?m:````.*?````|```.*?```|``.*?``|`.*?`)|\[\[.*?\]\]|\[.*?\]\(.*?\)\{\:.*?\}|\[.*?\]\(.*?\)|\[.*?\]\{.*?\}|^#+\s.*?$)/ + markdown_blocks_pattern = /((?m:````.*?````|```.*?```|``.*?``|`.*?`)|\[\[[^\]^\[]*?\]\]|\[[^\]^\[]*?\]\(.*?\)\{\:.*?\}|\[[^\]^\[]*?\]\(.*?\)|\[[^\]^\[]*?\]:.*?$|\[[^\]^\[]*?\]\s*\[.*?\]|^#+\s.*?$)/ + # TODO: Always sync the bellow with the one-liner version for readability + # FIXME: Check why the /x version bellow is not working the same way + # markdown_blocks_pattern = /( # Either Code blocks + # (?m: # Even Multiline ones + # ````.*?```` | # Code block with 4 backticks + # ```.*?``` | # Code block with 3 backticks + # ``.*?`` | # Code block with 2 backticks + # `.*?` # Inline code with 1 backtick + # ) | # + # \[\[[^\]^\[]*?\]\] | # or Our special, custom markdown notation + # \[[^\]^\[]*?\]\(.*?\)\{\:.*?\} | # or Link with attribute + # \[[^\]^\[]*?\]\(.*?\) | # Link without attribute + # \[[^\]^\[]*?\]:.*?$ | # Link reference label declaration + # \[[^\]^\[]*?\]\s*\[.*?\] | # Link using reference label + # ^#+\s.*?$ # or Headers + # )/x # Split the content by special Markdown blocks - markdown_parts = markdown.split(special_markdown_blocks_pattern) + markdown_parts = markdown.split(markdown_blocks_pattern) #puts markdown_parts markdown_parts.each_with_index do |markdown_part, markdown_index| # puts "---------------\nmarkdown_index: " + markdown_index.to_s + "\n" + (markdown_index.even? ? "NOT " : "") + "markdown_part: " + markdown_part @@ -134,27 +175,36 @@ def process_markdown_parts(page, markdown) #puts "searching for #{title} with pattern #{pattern}" if markdown_index.even? - # Content outside of special Markdown blocks, aka. pure text (NOTE: Also excludes the reqursively self added title tooltips/links) + # Content outside of Markdown blocks, aka. pure text # Search for known link titles # NOTE: Using multi line matching here will not help either if the pattern itself is in the middle broken/spaned to multiple lines, so # using whitespace replacements now inside the patter to handle this, see above! + # NOTE: Also excludes the reqursively self added title tooltips/links full_pattern = /(^|[\s.,;:&'"(])(#{pattern})([\s.,;:&'")]|\z)(?![^<]*?<\/a>)/ markdown_part = process_markdown_part(page, markdown_part, page_links, full_pattern, id, url, true) else - # Content inside of special Markdown blocks + # Content inside of Markdown blocks - # Handle own auto\tooltip links [[title]], but NOT [[title|id]], see bellow why - full_pattern = /(\[\[)(#{pattern})(\]\])/ - markdown_part = process_markdown_part(page, markdown_part, page_links, full_pattern, id, url, false) + # Handle our special markdown notation autolink/tooltip links [[title]], but NOT [[title|id]], see bellow why + if is_modifiable_markdown_part?(markdown_part) + full_pattern = /(\[\[)(#{pattern})(\]\])/ + markdown_part = process_markdown_part(page, markdown_part, page_links, full_pattern, id, url, false) + end end end + # Handle our special markdown notation autolink/tooltip links [[title|id]] + # This must be a separate run, as independent from the given title, if ID is presented it will always override the title, and the title exclusion as well if markdown_index.odd? - # Handle own auto\tooltip links [[title|id]] - # This must be a separate run, as independent from the given title, if ID is presented it will always override title, and title exclusion as well - full_pattern = /(\[\[)(.+\|.+)(\]\])/ - markdown_part = process_markdown_part(page, markdown_part, page_links, full_pattern, nil, nil, false) + # Content inside of Markdown blocks + + if is_modifiable_markdown_part?(markdown_part) + # puts "\nmarkdown_index: " + markdown_index.to_s + "\n" + (markdown_index.even? ? "NOT " : "") + "markdown_part: " + markdown_part + # NOTE: The differences in the patter is intentional, allowing empty part on both sides of | allows the same flow inside process_markdown_part + full_pattern = /(\[\[)(.*?\|.*?)(\]\])/ + markdown_part = process_markdown_part(page, markdown_part, page_links, full_pattern, nil, nil, false) + end end #puts "new markdown_part: " + markdown_part