Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automatically fill the id attributes of headings #294

Merged
merged 3 commits into from
Nov 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
unreleased
----------

- Support generation of identifiers in headers (#294, @tatchi)

- Support GitHub-Flavoured Markdown tables (#292, @bobatkey)

- Update parser to support CommonMark Spec 0.30 (#266, @SquidDev)
Expand Down
15 changes: 11 additions & 4 deletions bin/main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ let with_open_out fn f =
close_out_noerr oc;
raise e

let process ic oc =
let process ?auto_identifiers ic oc =
let md = Omd.of_channel ic in
output_string oc (Omd.to_html md)
output_string oc (Omd.to_html ?auto_identifiers md)

let print_version () =
let version =
Expand All @@ -33,11 +33,15 @@ let print_version () =

let input = ref []
let output = ref ""
let auto_identifiers = ref None

let spec =
[ ( "-o"
, Arg.Set_string output
, " file.html Specify the output file (default is stdout)." )
; ( "--auto-identifiers"
, Arg.Bool (fun x -> auto_identifiers := Some x)
, " Should identifiers be automatically assigned to headings." )
; ( "--version"
, Arg.Unit print_version
, " Display the version of the currently installed omd." )
Expand All @@ -54,10 +58,13 @@ let main () =
let with_output f =
if !output = "" then f stdout else with_open_out !output f
in
let auto_identifiers = !auto_identifiers in
with_output @@ fun oc ->
if !input = [] then process stdin oc
if !input = [] then process ?auto_identifiers stdin oc
else
let f filename = with_open_in filename @@ fun ic -> process ic oc in
let f filename =
with_open_in filename @@ fun ic -> process ?auto_identifiers ic oc
in
List.(iter f (rev !input))

let () =
Expand Down
113 changes: 109 additions & 4 deletions src/html.ml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,78 @@ let escape_uri s =
s;
Buffer.contents b

let trim_start_while p s =
let start = ref true in
let b = Buffer.create (String.length s) in
Uutf.String.fold_utf_8
(fun () _ -> function
| `Malformed _ -> Buffer.add_string b s
| `Uchar u when p u && !start -> ()
| `Uchar u when !start ->
start := false;
Uutf.Buffer.add_utf_8 b u
| `Uchar u -> Uutf.Buffer.add_utf_8 b u)
()
s;
Buffer.contents b

let underscore = Uchar.of_char '_'
let hyphen = Uchar.of_char '-'
let period = Uchar.of_char '.'
let is_white_space = Uucp.White.is_white_space
let is_alphabetic = Uucp.Alpha.is_alphabetic
let is_hex_digit = Uucp.Num.is_hex_digit

module Identifiers : sig
type t

val empty : t

val touch : string -> t -> int * t
(** Bump the frequency count for the given string.
It returns the previous count (before bumping) *)
end = struct
module SMap = Map.Make (String)

type t = int SMap.t

let empty = SMap.empty
let count s t = match SMap.find_opt s t with None -> 0 | Some x -> x
let incr s t = SMap.add s (count s t + 1) t

let touch s t =
let count = count s t in
(count, incr s t)
end

(* Based on pandoc algorithm to derive id's.
See: https://pandoc.org/MANUAL.html#extension-auto_identifiers *)
let slugify s =
let s = trim_start_while (fun c -> not (is_alphabetic c)) s in
let length = String.length s in
let b = Buffer.create length in
let last_is_ws = ref false in
let add_to_buffer u =
if !last_is_ws = true then begin
Uutf.Buffer.add_utf_8 b (Uchar.of_char '-');
last_is_ws := false
end;
Uutf.Buffer.add_utf_8 b u
in
let fold () _ = function
| `Malformed _ -> add_to_buffer Uutf.u_rep
| `Uchar u when is_white_space u && not !last_is_ws -> last_is_ws := true
| `Uchar u when is_white_space u && !last_is_ws -> ()
| `Uchar u ->
(if is_alphabetic u || is_hex_digit u then
match Uucp.Case.Map.to_lower u with
| `Self -> add_to_buffer u
| `Uchars us -> List.iter add_to_buffer us);
if u = underscore || u = hyphen || u = period then add_to_buffer u
in
Uutf.String.fold_utf_8 fold () s;
Buffer.contents b

let to_plain_text t =
let buf = Buffer.create 1024 in
let rec go : _ inline -> unit = function
Expand Down Expand Up @@ -177,9 +249,13 @@ let table_body headers rows =
row)))
rows))

let rec block = function
let rec block ~auto_identifiers = function
| Blockquote (attr, q) ->
elt Block "blockquote" attr (Some (concat nl (concat_map block q)))
elt
Block
"blockquote"
attr
(Some (concat nl (concat_map (block ~auto_identifiers) q)))
| Paragraph (attr, md) -> elt Block "p" attr (Some (inline md))
| List (attr, ty, sp, bl) ->
let name = match ty with Ordered _ -> "ol" | Bullet _ -> "ul" in
Expand All @@ -192,7 +268,7 @@ let rec block = function
let block' t =
match (t, sp) with
| Paragraph (_, t), Tight -> concat (inline t) nl
| _ -> block t
| _ -> block ~auto_identifiers t
in
let nl = if sp = Tight then Null else nl in
elt Block "li" [] (Some (concat nl (concat_map block' t)))
Expand Down Expand Up @@ -235,7 +311,36 @@ let rec block = function
attr
(Some (concat (table_header headers) (table_body headers rows)))

let of_doc doc = concat_map block doc
let of_doc ?(auto_identifiers = true) doc =
let identifiers = Identifiers.empty in
let f identifiers = function
| Heading (attr, level, text) ->
let attr, identifiers =
if (not auto_identifiers) || List.mem_assoc "id" attr then
(attr, identifiers)
else
let id = slugify (to_plain_text text) in
(* Default identifier if empty. It matches what pandoc does. *)
let id = if id = "" then "section" else id in
let count, identifiers = Identifiers.touch id identifiers in
let id =
if count = 0 then id else Printf.sprintf "%s-%i" id count
in
(("id", id) :: attr, identifiers)
in
(Heading (attr, level, text), identifiers)
| _ as c -> (c, identifiers)
in
let html, _ =
List.fold_left
(fun (accu, ids) x ->
let x', ids = f ids x in
let el = concat accu (block ~auto_identifiers x') in
(el, ids))
(Null, identifiers)
doc
in
html

let to_string t =
let buf = Buffer.create 1024 in
Expand Down
2 changes: 1 addition & 1 deletion src/html.mli
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ type t =
| Null
| Concat of t * t

val of_doc : attributes block list -> t
val of_doc : ?auto_identifiers:bool -> attributes block list -> t
val to_string : t -> string
5 changes: 4 additions & 1 deletion src/omd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,8 @@ let parse_inlines (md, defs) : doc =

let of_channel ic : doc = parse_inlines (Block_parser.Pre.of_channel ic)
let of_string s = parse_inlines (Block_parser.Pre.of_string s)
let to_html (doc : doc) = Html.to_string (Html.of_doc doc)

let to_html ?auto_identifiers doc =
Html.to_string (Html.of_doc ?auto_identifiers doc)

let to_sexp ast = Format.asprintf "@[%a@]@." Sexp.print (Sexp.create ast)
2 changes: 1 addition & 1 deletion src/omd.mli
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,5 @@ val toc : ?start:int list -> ?depth:int -> doc -> doc

val of_channel : in_channel -> doc
val of_string : string -> doc
val to_html : doc -> string
val to_html : ?auto_identifiers:bool -> doc -> string
val to_sexp : doc -> string
32 changes: 32 additions & 0 deletions tests/blackbox/heading-id.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
$ omd << "MD"
> ### This is an Header Without Id
> ### 1 2 Header that starts with 2 numbers
> ### Header with an id {#header-id}
> ### Maître d'hÔtel 😬
> ### 👋👋 ÔHey! 👋👋
> ### 👋👋 ÔHey! 👋👋
> ### *Dogs*?--in *my* house?
> ### [HTML], [S5], or [RTF]?
> ### 3. Applications
> ### hello.world
> ### -hello-
> ### with multiple spaces
> ### with&nbsp;&nbsp;&nbsp;spaces
> ### 33
> ###
> MD
<h3 id="this-is-an-header-without-id">This is an Header Without Id</h3>
<h3 id="header-that-starts-with-2-numbers">1 2 Header that starts with 2 numbers</h3>
<h3 id="header-id">Header with an id</h3>
<h3 id="maître-dhôtel">Maître d'hÔtel 😬</h3>
<h3 id="ôhey">👋👋 ÔHey! 👋👋</h3>
<h3 id="ôhey-1">👋👋 ÔHey! 👋👋</h3>
<h3 id="dogs--in-my-house"><em>Dogs</em>?--in <em>my</em> house?</h3>
<h3 id="html-s5-or-rtf">[HTML], [S5], or [RTF]?</h3>
<h3 id="applications">3. Applications</h3>
<h3 id="hello.world">hello.world</h3>
<h3 id="hello-">-hello-</h3>
<h3 id="with-multiple-spaces">with multiple spaces</h3>
<h3 id="with-spaces">with   spaces</h3>
<h3 id="section">33</h3>
<h3 id="section-1"></h3>
5 changes: 3 additions & 2 deletions tests/expect_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ let%expect_test "construct inline elements" =

let%expect_test "construct headings" =
show Omd.Ctor.[ h 1 ~attrs:[ ("class", "my-class") ] [ txt "Heading 1" ] ];
[%expect {| <h1 class="my-class">Heading 1</h1> |}];
[%expect {| <h1 id="heading-1" class="my-class">Heading 1</h1> |}];
show Omd.Ctor.[ h 6 [ txt "Heading 6"; em "with emphasis!" ] ];
[%expect {| <h6>Heading 6<em>with emphasis!</em></h6> |}]
[%expect
{| <h6 id="heading-6with-emphasis">Heading 6<em>with emphasis!</em></h6> |}]

let%expect_test "construct lists" =
show
Expand Down
3 changes: 2 additions & 1 deletion tests/omd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ let with_open_in fn f =

let () =
with_open_in Sys.argv.(1) @@ fun ic ->
print_string (normalize_html (Omd.to_html (Omd.of_channel ic)))
print_string
(normalize_html (Omd.to_html ~auto_identifiers:false (Omd.of_channel ic)))