Skip to content

Commit

Permalink
unicode version of slugify
Browse files Browse the repository at this point in the history
  • Loading branch information
tatchi committed May 16, 2022
1 parent 0570385 commit eec7388
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 14 deletions.
1 change: 1 addition & 0 deletions src/dune
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
(library
(name omd)
(public_name omd)
(libraries uucp uutf)
(flags :standard -w -30))

(rule
Expand Down
44 changes: 30 additions & 14 deletions src/html.ml
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,6 @@ let escape_uri s =
s;
Buffer.contents b

let is_alphabetical = function
| 'A' .. 'Z'
| 'a' .. 'z' ->
true
| _ -> false

let skip_while p s =
let i = ref 0 in
while !i < String.length s && p s.[!i] do
Expand All @@ -94,17 +88,39 @@ let skip_while p s =
!i

let slugify s =
let offset = skip_while (fun c -> not (is_alphabetical c)) s in
let s = String.trim s in
let offset =
skip_while (fun c -> not (Uucp.Alpha.is_alphabetic (Uchar.of_char c))) s
in
let length = String.length s - offset in
let s = String.sub s offset length in
let b = Buffer.create length in
String.iter
(function
| 'A' .. 'Z' as c -> Buffer.add_char b (Char.lowercase_ascii c)
| ' ' -> Buffer.add_char b '-'
| _ as c -> Printf.bprintf b "%c" c)
s;
Buffer.contents b
let fold () _ = function
| `Malformed _ -> Uutf.Buffer.add_utf_8 b Uutf.u_rep
| `Uchar u -> (
if Uucp.Alpha.is_alphabetic u || Uucp.Num.is_hex_digit u then
match Uucp.Case.Map.to_lower u with
| `Self -> Uutf.Buffer.add_utf_8 b u
| `Uchars us -> List.iter (Uutf.Buffer.add_utf_8 b) us
else
try
match Uchar.to_char u with
| '.'
| '-'
| '_' ->
Uutf.Buffer.add_utf_8 b u
| ' ' -> Uutf.Buffer.add_utf_8 b (Uchar.of_char '-')
| _ -> ()
with
| Invalid_argument _ -> ())
in
Uutf.String.fold_utf_8 fold () s;
let s = Buffer.contents b in
(* let length = String.length s in
if length > 0 && s.[length - 1] = '-' then
String.sub s 0 (length - 1)
else *)
s

let to_plain_text t =
let buf = Buffer.create 1024 in
Expand Down
20 changes: 20 additions & 0 deletions tests/blackbox/heading-id.t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,27 @@
> ### This is an Header Without Id
> ### 1 2 Header that starts with 2 numbers
> ### Header with an id {#header-id}
> ### Maître d'hÔtel 😬
> ### 👋👋 ÔHey!
> ### *Dogs*?--in *my* house?
> ### [HTML], [S5], or [RTF]?
> ### 3. Applications
> ### hello.world
> ### -hello-
> ### with multiple spaces
> ### 33
> ###
> MD
<h3 id="this-is-an-header-without-id">This is an Header Without Id</h3>
<h3 id="header-that-starts-with-2-numbers">1 2 Header that starts with 2 numbers</h3>
<h3 id="header-id">Header with an id</h3>
<h3 id="maître-dhôtel-">Maître d'hÔtel 😬</h3>
<h3 id="----ôhey">👋👋 ÔHey!</h3>
<h3 id="dogs--in-my-house"><em>Dogs</em>?--in <em>my</em> house?</h3>
<h3 id="html-s5-or-rtf">[HTML], [S5], or [RTF]?</h3>
<h3 id="applications">3. Applications</h3>
<h3 id="hello.world">hello.world</h3>
<h3 id="hello-">-hello-</h3>
<h3 id="with----multiple---spaces">with multiple spaces</h3>
<h3 id="">33</h3>
<h3 id=""></h3>

0 comments on commit eec7388

Please sign in to comment.