From 1d8ee00a280358e24684b0eef65a53a1e9401dd4 Mon Sep 17 00:00:00 2001 From: Milton Mazzarri Date: Thu, 25 Jan 2024 21:27:02 -0600 Subject: [PATCH] epub: fix fatal errors while parsing EPUB files After generating the EPUB file for the Elixir docs with this version, and reviewing the result with `epubcheck`, I got the following summary: ```console $ epubcheck doc/elixir/Elixir.epub --json elixir_docs.json (base) Check finished with errors Messages: 0 fatals / 141 errors / 0 warnings / 0 infos ``` If you compare the previous result with what we had on #1851 ``` Messages: 9 fatals / 425 errors / 0 warnings / 0 infos ``` you can see that now we don't have messages with `fatal` severity and we have reduced considerably the number of errors =) I manually checked the generated EPUB on Apple Books and the previous truncated sections are solved, I don't see the banner _Below is a rendering of the page up to the first error_ and also the links to anchor different anchor seems to work. Fixes: #1851 --- lib/ex_doc/formatter/epub.ex | 29 +++++++++++++++++++++++++++-- test/ex_doc/formatter/epub_test.exs | 18 ++++++++++++++++++ test/fixtures/README.md | 4 ++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/lib/ex_doc/formatter/epub.ex b/lib/ex_doc/formatter/epub.ex index 6c14b693f..5d4111b72 100644 --- a/lib/ex_doc/formatter/epub.ex +++ b/lib/ex_doc/formatter/epub.ex @@ -50,6 +50,23 @@ defmodule ExDoc.Formatter.EPUB do Path.relative_to_cwd(epub) end + @doc """ + Helper that replaces anchor names and links that could potentially cause problems on EPUB documents + + This helper replaces all the `&` with `&` found in anchors like + `Kernel.xhtml#&&/2` or `

...

` + + These anchor names cause a fatal error while EPUB readers parse the files, + resulting in truncated content. + + For more details, see: https://github.com/elixir-lang/ex_doc/issues/1851 + """ + def fix_anchors(content) do + content + |> String.replace(~r{id="&+/\d+[^"]*}, &String.replace(&1, "&", "&")) + |> String.replace(~r{href="[^#"]*#&+/\d+[^"]*}, &String.replace(&1, "&", "&")) + end + defp normalize_config(config) do output = config.output @@ -63,7 +80,11 @@ defmodule ExDoc.Formatter.EPUB do for {_title, extras} <- config.extras do Enum.each(extras, fn %{id: id, title: title, title_content: title_content, content: content} -> output = "#{config.output}/OEBPS/#{id}.xhtml" - html = Templates.extra_template(config, title, title_content, content) + + html = + config + |> Templates.extra_template(title, title_content, content) + |> fix_anchors() if File.regular?(output) do ExDoc.Utils.warn("file #{Path.relative_to_cwd(output)} already exists", []) @@ -157,7 +178,11 @@ defmodule ExDoc.Formatter.EPUB do end defp generate_module_page(module_node, config) do - content = Templates.module_page(config, module_node) + content = + config + |> Templates.module_page(module_node) + |> fix_anchors() + File.write("#{config.output}/OEBPS/#{module_node.id}.xhtml", content) end diff --git a/test/ex_doc/formatter/epub_test.exs b/test/ex_doc/formatter/epub_test.exs index c1f02225c..eeb8681c9 100644 --- a/test/ex_doc/formatter/epub_test.exs +++ b/test/ex_doc/formatter/epub_test.exs @@ -151,6 +151,9 @@ defmodule ExDoc.Formatter.EPUBTest do assert content =~ ~r{TypesAndSpecs.Sub} + assert content =~ + ~r{&&/2} + content = File.read!(tmp_dir <> "/epub/OEBPS/nav.xhtml") assert content =~ ~r{
  • README
  • } end @@ -248,4 +251,19 @@ defmodule ExDoc.Formatter.EPUBTest do after File.rm_rf!("test/tmp/epub_assets") end + + describe "fix_anchors/1" do + test "adapts anchor names to avoid parsing errors from EPUB readers" do + for {source, expected} <- [ + {~S|its documentation|, + ~S|its documentation|}, + {~S|&&/2|, + ~S|&&/2|}, + {~S|

    title

    |, + ~S|

    title

    |} + ] do + assert ExDoc.Formatter.EPUB.fix_anchors(source) == expected + end + end + end end diff --git a/test/fixtures/README.md b/test/fixtures/README.md index 6963347fa..ab032b853 100644 --- a/test/fixtures/README.md +++ b/test/fixtures/README.md @@ -15,3 +15,7 @@ hello ## more > than

    raw content

    + +The following text includes a reference to an anchor that causes problems in EPUB documents. + +To remove this anti-pattern, we can replace `&&/2`, `||/2`, and `!/1` by `and/2`, `or/2`, and `not/1` respectively.