-
Notifications
You must be signed in to change notification settings - Fork 4
/
pandoc-html-md-cleanup.lua
93 lines (84 loc) · 2.73 KB
/
pandoc-html-md-cleanup.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
PANDOC_VERSION:must_be_at_least '2.17.1.1'
local utils = require 'pandoc.utils'
local function removeAttr(el)
-- strip all attributes
if el.attr then
el.attr = pandoc.Attr()
end
return el
end
local function titleCase(first, rest)
return first:upper()..rest:lower()
end
local function header(el)
-- stringify to get only text
upper_text = utils.stringify(el.content)
-- convert from upper case to title case
title_text = string.gsub(upper_text, "(%a)([%w_']*)", titleCase)
-- trim whitespace (front and back)
trimmed_text = string.match(title_text, "^%s*(.-)%s*$")
el.content = trimmed_text
return el
end
local function fixImageLinks(source)
-- munge for image links. note that the image include macro automatically
-- prepends "images/" to the source. before running the script, feel free to
-- customize the sub-directory further, for example, "<topic>".
image_sub_directory = ""
if string.find(source, "article_attachments") then
source = string.gsub(source, "/hc/article_attachments/(%d*)/", image_sub_directory)
source = string.gsub(source, "_", "-")
source = source:lower()
end
return source
end
local function image(el)
-- most captions are just the filename..., so we ignore them
src = fixImageLinks(el.src)
-- replace image elements with the image macro
return pandoc.Str([[{% include image.html alt="" file="]] .. src .. [[" %}]])
end
local function removeBreaks(el)
--- remove or replace breaks
for i, item in ipairs(el.content) do
if item.t == "SoftBreak" or item.t == "LineBreak" then
local is_first = el.content[i - 1] == nil
local is_last = el.content[i + 1] == nil
if is_first or is_last then
-- completely remove the break
el.content[i] = pandoc.Str("")
else
-- replace with a space
el.content[i] = pandoc.Str(" ")
end
end
end
return el
end
local function removePlainBreaks(el)
--- remove or replace all soft breaks, and some line breaks
for i, item in ipairs(el.content) do
if item.t == "SoftBreak" or item.t == "LineBreak" then
local is_first = el.content[i - 1] == nil
local is_last = el.content[i + 1] == nil
if is_first or is_last then
-- completely remove the break
el.content[i] = pandoc.Str("")
elseif item.t == "SoftBreak" then
-- replace with a space
el.content[i] = pandoc.Str(" ")
elseif item.t == "LineBreak" and el.content[i + 1].t == "Image" then
-- replace with a space
el.content[i] = pandoc.Str(" ")
end
end
end
return el
end
return {
{ Inline = removeAttr, Block = removeAttr },
{ Header = header },
{ Image = image },
{ Span = removeBreaks, Para = removeBreaks },
{ Plain = removePlainBreaks },
}