Skip to content

Commit

Permalink
Add support for diff pdfs (#161)
Browse files Browse the repository at this point in the history
* Add support for diff pdfs

This change adds new parameters:

--diffpdf
--diffbase
--diffpdflog

These parameters control the creation of a pdf diff-document against the
provided ref, using latexdiff.

* update invocation of --diff in action
  • Loading branch information
chrisfenner authored Sep 5, 2024
1 parent 73d7936 commit 9d6f51c
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/render-samples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
uses: trustedcomputinggroup/markdown@latest
with:
input-md: guide.tcg
extra-build-options: "--versioned_filenames --pr_number=${{ github.event.number }} --pr_repo=${{ github.repository }} --diff=${{ github.event.pull_request.base.sha }}"
extra-build-options: "--versioned_filenames --pr_number=${{ github.event.number }} --pr_repo=${{ github.repository }} --diffbase=${{ github.event.pull_request.base.sha }}"
output-pdf: guide.pdf
output-tex: guide.tex
output-docx: guide.docx
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@
*.lot
*.toc
*.fdb_latexmk
*.upa
*.upb
10 changes: 10 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ ENV PATH="/usr/local/texlive/bin/aarch64-linux:/usr/local/texlive/bin/x86_64-lin

# Packages that are needed despite not being used explicitly by the template:
# bigfoot, catchfile, fancyvrb, footmisc, hardwrap, lineno, ltablex, latexmk, needspace, pgf, zref
# Package dependencies introduced by latexdiff:
# changebar, datetime2, latexdiff, listings, marginnote, pdfcomment, soulpos, ulem
RUN tlmgr update --self && tlmgr install \
accsupp \
adjustbox \
Expand All @@ -165,6 +167,8 @@ RUN tlmgr update --self && tlmgr install \
booktabs \
caption \
catchfile \
changebar \
datetime2 \
draftwatermark \
enumitem \
etoolbox \
Expand All @@ -181,29 +185,35 @@ RUN tlmgr update --self && tlmgr install \
hyperref \
hyphenat \
koma-script \
latexdiff \
latexmk \
lineno \
listings \
ltablex \
lualatex-math \
luatex \
luatex85 \
luatexbase \
makecell \
marginnote \
mathtools \
mdframed \
microtype \
multirow \
needspace \
newunicodechar \
pagecolor \
pdfcomment \
pdflscape \
pgf \
polyglossia \
ragged2e \
selnolig \
setspace \
soulpos \
textpos \
titling \
ulem \
unicode-math \
upquote \
varwidth \
Expand Down
135 changes: 104 additions & 31 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ RESOURCE_DIR="/" #default to root of pandoc container buildout
DO_GITVERSION="yes"
DO_GITSTATUS="yes"
PDF_OUTPUT=""
DIFFPDF_OUTPUT=""
DOCX_OUTPUT=""
HTML_OUTPUT=""
LATEX_OUTPUT=""
Expand Down Expand Up @@ -41,7 +42,9 @@ print_usage() {
echo " --latex=output: enable output of latex and specify the output file name."
echo " --html=output: enable output of html and specify the output file name."
echo " --pdflog=output: enable logging of pdf engine and specify the output file name."
echo " --diff=commit: create diff documents against the provided commit"
echo " --diffpdf=output: enable output of pdf diff and specify the output file name (requires --diffpdf)"
echo " --diffbase=ref: create diff documents against the provided commit (no effect if --diffpdf is not provided)"
echo " --diffpdflog=output: enable logging of pdf engine during diffing and specify the output file name."
echo
echo "Miscellaneous"
echo " --resourcedir=dir: Set the resource directory, defaults to root for pandoc containers"
Expand All @@ -57,7 +60,7 @@ print_usage() {
}


if ! options=$(getopt --longoptions=help,puppeteer,gitversion,gitstatus,nogitversion,table_rules,plain_quotes,versioned_filenames,pr_number:,pr_repo:,diff:,pdf:,latex:,pdflog:,pdf_engine:,docx:,html:,resourcedir: --options="" -- "$@"); then
if ! options=$(getopt --longoptions=help,puppeteer,gitversion,gitstatus,nogitversion,table_rules,plain_quotes,versioned_filenames,pr_number:,pr_repo:,diffbase:,pdf:,diffpdf:,diffpdflog:,latex:,pdflog:,pdf_engine:,docx:,html:,resourcedir: --options="" -- "$@"); then
echo "Incorrect options provided"
print_usage
exit 1
Expand All @@ -66,7 +69,7 @@ fi
eval set -- "${options}"
while true; do
case "$1" in
--diff)
--diffbase)
DIFFBASE="${2}"
shift 2
;;
Expand Down Expand Up @@ -115,6 +118,14 @@ while true; do
PDF_OUTPUT="${2}"
shift 2
;;
--diffpdf)
DIFFPDF_OUTPUT="${2}"
shift 2
;;
--diffpdflog)
DIFFPDFLOG_OUTPUT="${2}"
shift 2
;;
--html)
HTML_OUTPUT="${2}"
shift 2
Expand Down Expand Up @@ -157,6 +168,7 @@ readonly PR_REPO
readonly DIFFBASE
readonly PDF_ENGINE
readonly PDFLOG_OUTPUT
readonly DIFFPDFLOG_OUTPUT

shift "$(( OPTIND - 1 ))"

Expand Down Expand Up @@ -198,6 +210,25 @@ mkdir -p "${BUILD_DIR}"
cp -r . "${BUILD_DIR}"
cd "${BUILD_DIR}"

# Let git work
git config --global --add safe.directory "${BUILD_DIR}"

# make sure the diff arguments make sense
if [ -n "${DIFFPDF_OUTPUT}" ]; then
# --diff must be provided, and it must make sense to Git
if [ -z "${DIFFBASE}" ]; then
>&2 echo "--diffpdf was provided, but --diffbase was not."
print_usage
exit 1
fi
git rev-parse --verify "${DIFFBASE}" > /dev/null 2>&1
if [ $? -ne 0 ]; then
>&2 echo "--diffbase was provided, but it was not a valid Git commit, tag, or branch name."
print_usage
exit 1
fi
fi

# Get the default browser
if ! browser=$(command -v "chromium-browser"); then
if ! browser=$(command -v "chromium"); then
Expand All @@ -210,8 +241,6 @@ fi
# figure out git version and revision if needed.
EXTRA_PANDOC_OPTIONS=""
if test "${DO_GITVERSION}" == "yes"; then
git config --global --add safe.directory /workspace

# TODO: Should we fail if dirty?
raw_version="$(git describe --always --tags)"
echo "Git version: ${raw_version}"
Expand Down Expand Up @@ -348,6 +377,9 @@ if [ "${VERSIONED_FILENAMES}" == "yes" ]; then
if [ ! -z "${PDF_OUTPUT}" ]; then
PDF_OUTPUT=$(prefix_filename "${version_prefix}" "${PDF_OUTPUT}")
fi
if [ ! -z "${DIFFPDF_OUTPUT}" ]; then
DIFFPDF_OUTPUT=$(prefix_filename "${DIFFBASE}_to_${version_prefix}" "${DIFFPDF_OUTPUT}")
fi
if [ ! -z "${LATEX_OUTPUT}" ]; then
LATEX_OUTPUT=$(prefix_filename "${version_prefix}" "${LATEX_OUTPUT}")
fi
Expand All @@ -356,6 +388,7 @@ if [ "${VERSIONED_FILENAMES}" == "yes" ]; then
fi
fi
readonly PDF_OUTPUT
readonly DIFFPDF_OUTPUT
readonly DOCX_OUTPUT
readonly HTML_OUTPUT
readonly LATEX_OUTPUT
Expand All @@ -364,16 +397,15 @@ echo "Starting Build with"
echo "file: ${INPUT_FILE}"
echo "docx: ${DOCX_OUTPUT:-none}"
echo "pdf: ${PDF_OUTPUT:-none} (engine: ${PDF_ENGINE})"
echo "diff pdf: ${DIFFPDF_OUTPUT:-none} (engine: ${PDF_ENGINE})"
echo "latex: ${latex_ouput:-none}"
echo "html: ${html_ouput:-none}"
echo "resource dir: ${RESOURCE_DIR}"
echo "build dir: ${BUILD_DIR}"
echo "browser: ${browser}"
echo "use git version: ${DO_GITVERSION}"
echo "use table rules: ${TABLE_RULES}"
echo "make block quotes Informative Text: ${BLOCK_QUOTES_ARE_INFORMATIVE_TEXT}"
if [ ! -z "${DIFFBASE}" ]; then
echo "diff against: ${DIFFBASE}"
echo "diff against: ${DIFFBASE} ($(git rev-parse --verify ${DIFFBASE}))"
fi
if test "${DO_GITVERSION}" == "yes"; then
echo "Git Generated Document Version Information"
Expand Down Expand Up @@ -418,18 +450,27 @@ if [ "${BLOCK_QUOTES_ARE_INFORMATIVE_TEXT}" == "yes" ]; then
EXTRA_PANDOC_OPTIONS+=" --lua-filter=informative-quote-blocks.lua"
fi

# Hacks

# \newpage is rendered as the string "\newpage" in GitHub markdown.
# Transform horizontal rules into \newpages.
# Exception: the YAML front matter of the document, so undo the instance on the first line.
# TODO: Turn this into a Pandoc filter.
sed -i.bak 's/^---$/\\newpage/g;1s/\\newpage/---/g' "${BUILD_DIR}/${INPUT_FILE}"

# Transform sections before the table of contents into section*, which does not number them.
# While we're doing this, transform the case to all-caps.
# TODO: Turn this into a Pandoc filter.
sed -i.bak '0,/\\tableofcontents/s/^# \(.*\)/\\section*\{\U\1\}/g' "${BUILD_DIR}/${INPUT_FILE}"
# Use sed to perform some basic fixups on certain input files.
do_md_fixups() {
local input=$1
# \newpage is rendered as the string "\newpage" in GitHub markdown.
# Transform horizontal rules into \newpages.
# Exception: the YAML front matter of the document, so undo the instance on the first line.
# TODO: Turn this into a Pandoc filter.
sed -i.bak 's/^---$/\\newpage/g;1s/\\newpage/---/g' "${input}"

# Transform sections before the table of contents into section*, which does not number them.
# While we're doing this, transform the case to all-caps.
# TODO: Turn this into a Pandoc filter.
sed -i.bak '0,/\\tableofcontents/s/^# \(.*\)/\\section*\{\U\1\}/g' "${input}"
}
do_tex_fixups() {
local input=$1
# We have a "code" enviroment that displays everything, including comments.
# Sometimes latexdiff injects comments that it thinks won't be displayed.
# Delete those latexdiff comments.
sed -i.bak 's/%DIFDELCMD.*//g' "${input}"
}

if test "${DO_GITVERSION}" == "yes"; then
# If using the git information for versioning, grab the date from there
Expand Down Expand Up @@ -565,7 +606,7 @@ do_pdf() {

local logfile=$3
# LaTeX engines choose this filename based on TEMP_TEX_FILE's basename. It also emits a bunch of other files.
readonly temp_pdf_file="$(basename ${input%.*}).pdf"
local temp_pdf_file="$(basename ${input%.*}).pdf"

echo "Rendering PDF"
local start=$(date +%s)
Expand All @@ -580,16 +621,15 @@ do_pdf() {
# Write any LaTeX errors to stderr.
>&2 grep -A 5 "] ! " "${logfile}"

# Copy aux, lof, lot, and toc files back to the source directory so they can be cached and speed up future runs.
if [ -n "${PDFLOG_OUTPUT}" ]; then
cp "${logfile}" "${SOURCE_DIR}/${PDFLOG_OUTPUT}"
fi
cp *.aux "${SOURCE_DIR}"
cp *.lof "${SOURCE_DIR}"
cp *.lot "${SOURCE_DIR}"
cp *.toc "${SOURCE_DIR}"
# Copy aux, lof, lot, toc, upa, and upb files (if any) back to the source directory so they can be cached and speed up future runs.
cp *.aux "${SOURCE_DIR}" 2>/dev/null
cp *.lof "${SOURCE_DIR}" 2>/dev/null
cp *.lot "${SOURCE_DIR}" 2>/dev/null
cp *.toc "${SOURCE_DIR}" 2>/dev/null
cp *.upa "${SOURCE_DIR}" 2>/dev/null
cp *.upb "${SOURCE_DIR}" 2>/dev/null
# Copy converted images so they can be cached as well.
cp *.convert.pdf "${SOURCE_DIR}"
cp *.convert.pdf "${SOURCE_DIR}" 2>/dev/null
echo "Elapsed time: $(($end-$start)) seconds"
# Write any LaTeX errors to stderr.
>&2 grep -A 5 "! " "${logfile}"
Expand Down Expand Up @@ -700,7 +740,8 @@ do_html() {
# Generate .tex output if either latex or pdf formats were requested, because
# the .tex is an intermediate requirement to the pdf.
readonly TEMP_TEX_FILE="${BUILD_DIR}/${INPUT_FILE}.tex"
if [ -n "${PDF_OUTPUT}" -o -n "${LATEX_OUTPUT}" ]; then
if [ -n "${PDF_OUTPUT}" -o -n "${LATEX_OUTPUT}" -o -n "${DIFFPDF_OUTPUT}" ]; then
do_md_fixups "${BUILD_DIR}/${INPUT_FILE}"
do_latex "${BUILD_DIR}/${INPUT_FILE}" "${TEMP_TEX_FILE}"
fi
if [ -n "${LATEX_OUTPUT}" ]; then
Expand All @@ -711,6 +752,12 @@ fi
readonly LATEX_LOG="${BUILD_DIR}/latex.log"
if [ -n "${PDF_OUTPUT}" ]; then
do_pdf "${TEMP_TEX_FILE}" "${SOURCE_DIR}/${PDF_OUTPUT}" "${LATEX_LOG}"

# Copy the logs, if requested.
if [ -n "${PDFLOG_OUTPUT}" ]; then
mkdir -p "$(dirname ${SOURCE_DIR}/${PDFLOG_OUTPUT})"
cp "${LATEX_LOG}" "${SOURCE_DIR}/${PDFLOG_OUTPUT}"
fi
fi

# Generate the docx output
Expand All @@ -724,6 +771,32 @@ if [ -n "${HTML_OUTPUT}" ]; then
do_html "${BUILD_DIR}/${INPUT_FILE}" "${SOURCE_DIR}/${HTML_OUTPUT}"
fi

# Generate the diff output
# Do this last so we can do whatever we want to the build directory
readonly TEMP_DIFFBASE_TEX_FILE="${BUILD_DIR}/${INPUT_FILE}.diffbase.tex"
readonly TEMP_DIFF_TEX_FILE="${BUILD_DIR}/${INPUT_FILE}.diff.tex"
readonly TEMP_LATEXDIFF_LOG="${BUILD_DIR}/latexdiff.log"
export MERMAID_FILTER_FORMAT="pdf"
if [ -n "${DIFFPDF_OUTPUT}" ]; then
git reset --hard ${DIFFBASE}

do_md_fixups "${BUILD_DIR}/${INPUT_FILE}"
do_latex "${BUILD_DIR}/${INPUT_FILE}" "${TEMP_DIFFBASE_TEX_FILE}"
latexdiff --type PDFCOMMENT --driver "${PDF_ENGINE}" "${TEMP_DIFFBASE_TEX_FILE}" "${TEMP_TEX_FILE}" > "${TEMP_DIFF_TEX_FILE}" 2>"${TEMP_LATEXDIFF_LOG}"
do_tex_fixups "${TEMP_DIFF_TEX_FILE}"
do_pdf "${TEMP_DIFF_TEX_FILE}" "${SOURCE_DIR}/${DIFFPDF_OUTPUT}" "${LATEX_LOG}"

# Copy the logs, if requested. Note that this file gets the latexdiff and PDF driver output.
if [ -n "${DIFFPDFLOG_OUTPUT}" ]; then
mkdir -p "$(dirname ${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT})"
echo "latexdiff output:" > "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}"
cat "${TEMP_LATEXDIFF_LOG}" >> "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}"
echo "" >> "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}"
echo "${PDF_ENGINE} output:" >> "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}"
cat "${LATEX_LOG}" >> "${SOURCE_DIR}/${DIFFPDFLOG_OUTPUT}"
fi
fi

if [ "${FAILED}" = "true" ]; then
echo "Overall workflow failed"
exit 1
Expand Down

0 comments on commit 9d6f51c

Please sign in to comment.