From 4ac8493a3a31f1012b7787ed8fdb191721301695 Mon Sep 17 00:00:00 2001 From: Corentin Leruth Date: Tue, 3 May 2022 08:01:29 +0200 Subject: [PATCH 1/3] Automatically fill the id attributes of headings Fixes: #251 add an --auto-identifiers boolean option (Resolution of merge conflics) --- bin/main.ml | 15 +++-- src/html.ml | 113 ++++++++++++++++++++++++++++++++++-- src/html.mli | 2 +- src/omd.ml | 2 +- src/omd.mli | 2 +- tests/blackbox/heading-id.t | 32 ++++++++++ tests/omd.ml | 3 +- 7 files changed, 157 insertions(+), 12 deletions(-) create mode 100644 tests/blackbox/heading-id.t diff --git a/bin/main.ml b/bin/main.ml index 00d3efb9..5db8769c 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -18,9 +18,9 @@ let with_open_out fn f = close_out_noerr oc; raise e -let process ic oc = +let process ?auto_identifiers ic oc = let md = Omd.of_channel ic in - output_string oc (Omd.to_html md) + output_string oc (Omd.to_html ?auto_identifiers md) let print_version () = let version = @@ -33,11 +33,15 @@ let print_version () = let input = ref [] let output = ref "" +let auto_identifiers = ref None let spec = [ ( "-o" , Arg.Set_string output , " file.html Specify the output file (default is stdout)." ) + ; ( "--auto-identifiers" + , Arg.Bool (fun x -> auto_identifiers := Some x) + , " Should identifiers be automatically assigned to headings." ) ; ( "--version" , Arg.Unit print_version , " Display the version of the currently installed omd." ) @@ -54,10 +58,13 @@ let main () = let with_output f = if !output = "" then f stdout else with_open_out !output f in + let auto_identifiers = !auto_identifiers in with_output @@ fun oc -> - if !input = [] then process stdin oc + if !input = [] then process ?auto_identifiers stdin oc else - let f filename = with_open_in filename @@ fun ic -> process ic oc in + let f filename = + with_open_in filename @@ fun ic -> process ?auto_identifiers ic oc + in List.(iter f (rev !input)) let () = diff --git a/src/html.ml b/src/html.ml index 1dbe23ad..dd817d34 100644 --- a/src/html.ml +++ b/src/html.ml @@ -85,6 +85,78 @@ let escape_uri s = s; Buffer.contents b +let trim_start_while p s = + let start = ref true in + let b = Buffer.create (String.length s) in + Uutf.String.fold_utf_8 + (fun () _ -> function + | `Malformed _ -> Buffer.add_string b s + | `Uchar u when p u && !start -> () + | `Uchar u when !start -> + start := false; + Uutf.Buffer.add_utf_8 b u + | `Uchar u -> Uutf.Buffer.add_utf_8 b u) + () + s; + Buffer.contents b + +let underscore = Uchar.of_char '_' +let hyphen = Uchar.of_char '-' +let period = Uchar.of_char '.' +let is_white_space = Uucp.White.is_white_space +let is_alphabetic = Uucp.Alpha.is_alphabetic +let is_hex_digit = Uucp.Num.is_hex_digit + +module Identifiers : sig + type t + + val empty : t + + val touch : string -> t -> int * t + (** Bump the frequency count for the given string. + It returns the previous count (before bumping) *) +end = struct + module SMap = Map.Make (String) + + type t = int SMap.t + + let empty = SMap.empty + let count s t = match SMap.find_opt s t with None -> 0 | Some x -> x + let incr s t = SMap.add s (count s t + 1) t + + let touch s t = + let count = count s t in + (count, incr s t) +end + +(* Based on pandoc algorithm to derive id's. + See: https://pandoc.org/MANUAL.html#extension-auto_identifiers *) +let slugify s = + let s = trim_start_while (fun c -> not (is_alphabetic c)) s in + let length = String.length s in + let b = Buffer.create length in + let last_is_ws = ref false in + let add_to_buffer u = + if !last_is_ws = true then begin + Uutf.Buffer.add_utf_8 b (Uchar.of_char '-'); + last_is_ws := false + end; + Uutf.Buffer.add_utf_8 b u + in + let fold () _ = function + | `Malformed _ -> add_to_buffer Uutf.u_rep + | `Uchar u when is_white_space u && not !last_is_ws -> last_is_ws := true + | `Uchar u when is_white_space u && !last_is_ws -> () + | `Uchar u -> + (if is_alphabetic u || is_hex_digit u then + match Uucp.Case.Map.to_lower u with + | `Self -> add_to_buffer u + | `Uchars us -> List.iter add_to_buffer us); + if u = underscore || u = hyphen || u = period then add_to_buffer u + in + Uutf.String.fold_utf_8 fold () s; + Buffer.contents b + let to_plain_text t = let buf = Buffer.create 1024 in let rec go : _ inline -> unit = function @@ -177,9 +249,13 @@ let table_body headers rows = row))) rows)) -let rec block = function +let rec block ~auto_identifiers = function | Blockquote (attr, q) -> - elt Block "blockquote" attr (Some (concat nl (concat_map block q))) + elt + Block + "blockquote" + attr + (Some (concat nl (concat_map (block ~auto_identifiers) q))) | Paragraph (attr, md) -> elt Block "p" attr (Some (inline md)) | List (attr, ty, sp, bl) -> let name = match ty with Ordered _ -> "ol" | Bullet _ -> "ul" in @@ -192,7 +268,7 @@ let rec block = function let block' t = match (t, sp) with | Paragraph (_, t), Tight -> concat (inline t) nl - | _ -> block t + | _ -> block ~auto_identifiers t in let nl = if sp = Tight then Null else nl in elt Block "li" [] (Some (concat nl (concat_map block' t))) @@ -235,7 +311,36 @@ let rec block = function attr (Some (concat (table_header headers) (table_body headers rows))) -let of_doc doc = concat_map block doc +let of_doc ?(auto_identifiers = true) doc = + let identifiers = Identifiers.empty in + let f identifiers = function + | Heading (attr, level, text) -> + let attr, identifiers = + if (not auto_identifiers) || List.mem_assoc "id" attr then + (attr, identifiers) + else + let id = slugify (to_plain_text text) in + (* Default identifier if empty. It matches what pandoc does. *) + let id = if id = "" then "section" else id in + let count, identifiers = Identifiers.touch id identifiers in + let id = + if count = 0 then id else Printf.sprintf "%s-%i" id count + in + (("id", id) :: attr, identifiers) + in + (Heading (attr, level, text), identifiers) + | _ as c -> (c, identifiers) + in + let html, _ = + List.fold_left + (fun (accu, ids) x -> + let x', ids = f ids x in + let el = concat accu (block ~auto_identifiers x') in + (el, ids)) + (Null, identifiers) + doc + in + html let to_string t = let buf = Buffer.create 1024 in diff --git a/src/html.mli b/src/html.mli index da27c659..9973a282 100644 --- a/src/html.mli +++ b/src/html.mli @@ -12,5 +12,5 @@ type t = | Null | Concat of t * t -val of_doc : attributes block list -> t +val of_doc : ?auto_identifiers:bool -> attributes block list -> t val to_string : t -> string diff --git a/src/omd.ml b/src/omd.ml index 63cc80eb..1cbb406e 100644 --- a/src/omd.ml +++ b/src/omd.ml @@ -26,5 +26,5 @@ let parse_inlines (md, defs) : doc = let of_channel ic : doc = parse_inlines (Block_parser.Pre.of_channel ic) let of_string s = parse_inlines (Block_parser.Pre.of_string s) -let to_html (doc : doc) = Html.to_string (Html.of_doc doc) +let to_html ?auto_identifiers doc = Html.to_string (Html.of_doc ?auto_identifiers doc) let to_sexp ast = Format.asprintf "@[%a@]@." Sexp.print (Sexp.create ast) diff --git a/src/omd.mli b/src/omd.mli index bcfb4df7..b21951dc 100644 --- a/src/omd.mli +++ b/src/omd.mli @@ -21,5 +21,5 @@ val toc : ?start:int list -> ?depth:int -> doc -> doc val of_channel : in_channel -> doc val of_string : string -> doc -val to_html : doc -> string +val to_html : ?auto_identifiers:bool -> doc -> string val to_sexp : doc -> string diff --git a/tests/blackbox/heading-id.t b/tests/blackbox/heading-id.t new file mode 100644 index 00000000..b6c09258 --- /dev/null +++ b/tests/blackbox/heading-id.t @@ -0,0 +1,32 @@ + $ omd << "MD" + > ### This is an Header Without Id + > ### 1 2 Header that starts with 2 numbers + > ### Header with an id {#header-id} + > ### Maître d'hÔtel 😬 + > ### 👋👋 ÔHey! 👋👋 + > ### 👋👋 ÔHey! 👋👋 + > ### *Dogs*?--in *my* house? + > ### [HTML], [S5], or [RTF]? + > ### 3. Applications + > ### hello.world + > ### -hello- + > ### with multiple spaces + > ### with   spaces + > ### 33 + > ### + > MD +

This is an Header Without Id

+

1 2 Header that starts with 2 numbers

+

Header with an id

+

Maître d'hÔtel 😬

+

👋👋 ÔHey! 👋👋

+

👋👋 ÔHey! 👋👋

+

Dogs?--in my house?

+

[HTML], [S5], or [RTF]?

+

3. Applications

+

hello.world

+

-hello-

+

with multiple spaces

+

with   spaces

+

33

+

diff --git a/tests/omd.ml b/tests/omd.ml index d0d86eba..efef26f8 100644 --- a/tests/omd.ml +++ b/tests/omd.ml @@ -19,4 +19,5 @@ let with_open_in fn f = let () = with_open_in Sys.argv.(1) @@ fun ic -> - print_string (normalize_html (Omd.to_html (Omd.of_channel ic))) + print_string + (normalize_html (Omd.to_html ~auto_identifiers:false (Omd.of_channel ic))) From 34e396a77b83ac9e761f30a5b01bec6da98e17dd Mon Sep 17 00:00:00 2001 From: Shon Feder Date: Sun, 20 Nov 2022 18:55:39 -0500 Subject: [PATCH 2/3] Update changelog --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 3166ebd0..793421c5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,8 @@ unreleased ---------- +- Support generation of identifiers in headers (#294, @tatchi) + - Support GitHub-Flavoured Markdown tables (#292, @bobatkey) - Update parser to support CommonMark Spec 0.30 (#266, @SquidDev) From 04f098e5d1ae5cca350ccc08c86c4328d131a092 Mon Sep 17 00:00:00 2001 From: Shon Feder Date: Sun, 20 Nov 2022 19:14:51 -0500 Subject: [PATCH 3/3] Fix formatting and tests --- src/omd.ml | 5 ++++- tests/expect_tests.ml | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/omd.ml b/src/omd.ml index 1cbb406e..97b0efd5 100644 --- a/src/omd.ml +++ b/src/omd.ml @@ -26,5 +26,8 @@ let parse_inlines (md, defs) : doc = let of_channel ic : doc = parse_inlines (Block_parser.Pre.of_channel ic) let of_string s = parse_inlines (Block_parser.Pre.of_string s) -let to_html ?auto_identifiers doc = Html.to_string (Html.of_doc ?auto_identifiers doc) + +let to_html ?auto_identifiers doc = + Html.to_string (Html.of_doc ?auto_identifiers doc) + let to_sexp ast = Format.asprintf "@[%a@]@." Sexp.print (Sexp.create ast) diff --git a/tests/expect_tests.ml b/tests/expect_tests.ml index 4df33cb0..4940fae0 100644 --- a/tests/expect_tests.ml +++ b/tests/expect_tests.ml @@ -34,9 +34,10 @@ let%expect_test "construct inline elements" = let%expect_test "construct headings" = show Omd.Ctor.[ h 1 ~attrs:[ ("class", "my-class") ] [ txt "Heading 1" ] ]; - [%expect {|

Heading 1

|}]; + [%expect {|

Heading 1

|}]; show Omd.Ctor.[ h 6 [ txt "Heading 6"; em "with emphasis!" ] ]; - [%expect {|
Heading 6with emphasis!
|}] + [%expect + {|
Heading 6with emphasis!
|}] let%expect_test "construct lists" = show