From cbb23240143571fe65a11211240c94726427783e Mon Sep 17 00:00:00 2001 From: Markus Neteler Date: Thu, 31 Oct 2024 09:28:39 +0100 Subject: [PATCH] docs: script to convert HTML manual pages to markdown Script to convert recursively all .html files to .md (GitHub flavoured Markdown). (see related #3849) --- utils/grass_html2md.sh | 42 ++++++++++++++++++++++++++++++++++++++ utils/pandoc_codeblock.lua | 8 ++++++++ 2 files changed, 50 insertions(+) create mode 100755 utils/grass_html2md.sh create mode 100644 utils/pandoc_codeblock.lua diff --git a/utils/grass_html2md.sh b/utils/grass_html2md.sh new file mode 100755 index 0000000000..f09695f407 --- /dev/null +++ b/utils/grass_html2md.sh @@ -0,0 +1,42 @@ +#!/bin/sh +set -eu + +############################################################################### +# Convert recursively all .html files to .md (GitHub flavoured Markdown) +# +# Dependencies: +# pandoc +# wget +# +# Author(s): +# Martin Landa, Markus Neteler +# +# Usage: +# If you have "pandoc" in PATH, execute for HTML file conversion in +# current directory and subdirectories: +# ./utils/grass_html2md.sh +# +# COPYRIGHT: (C) 2024 by the GRASS Development Team +# +# This program is free software under the GNU General Public +# License (>=v2). Read the file COPYING that comes with GRASS +# for details. +# +############################################################################### + +# define $TMP if not present +if test -z "${TMP}" ; then + TMP="/tmp" +fi + +# TODO: path to LUA file setting to be improved (./utils/pandoc_codeblock.lua) +#wget https://raw.githubusercontent.com/OSGeo/grass/refs/heads/main/utils/pandoc_codeblock.lua -O "${TMP}/pandoc_codeblock.lua" +TMP="utils" + +# run recursively: HTML to MD +for f in `find . -name *.html`; do + echo "${f}" + cat "${f}" | sed 's#
#
#g' | sed 's#
##g' | pandoc \ + --from=html --to=markdown -t gfm --lua-filter "${TMP}/pandoc_codeblock.lua" | \ + sed 's+ $++g' | sed 's+\.html)+\.md)+g' > "${f%%.html}.md" +done diff --git a/utils/pandoc_codeblock.lua b/utils/pandoc_codeblock.lua new file mode 100644 index 0000000000..ebdce56c2d --- /dev/null +++ b/utils/pandoc_codeblock.lua @@ -0,0 +1,8 @@ +-- Pandoc Lua filter to handle code blocks +-- Test cases +-- raster/r.sun/r.sun.html + +-- Function to convert code blocks to markdown +function CodeBlock (cb) + return pandoc.RawBlock('markdown', '```bash\n' .. cb.text .. '\n```\n') +end