Skip to content

Commit

Permalink
Better accents over dotless 'i' and 'j'. Cf. issue #29.
Browse files Browse the repository at this point in the history
<#29>
  • Loading branch information
maranget committed Oct 19, 2020
1 parent 9661fdb commit 95005c9
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 28 deletions.
7 changes: 3 additions & 4 deletions iso-symb.hva
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,10 @@
\DeclareSymbolHtml[\mbox{Re}]{\Re}{X211C}
\DeclareSymbolHtml{\top}{X22A4}
\DeclareSymbolHtml{\wp}{X2118}
\newcommand{\i}{i}%To have \^{\i} work. (No accent as fct of U-chars yet)
%Dotless 'i' and 'j'
\DeclareSymbolHtml[i]{\i}{X131}
\let\imath\i
%\DeclareSymbolHtml[i]{\imath}{X131}
\newcommand{\j}{j}
%\DeclareSymbolHtml[j]{\jmath}{X237}
\DeclareSymbolHtml[j]{\j}{X237}
\let\jmath\j
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Table 122: Math-mode Accents %
Expand Down
42 changes: 39 additions & 3 deletions outUnicode.ml
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ let open_mapping name =

and close_mapping chan = try close_in chan with _ -> ()


let set_output_translator name =
let key = Filename.basename name in
match key with
Expand Down Expand Up @@ -247,6 +246,25 @@ let translate_out i = !translate_out_fun i
and translate_in c (next:unit -> int) = !translate_in_fun c next

(* Diacritical marks *)
let null = 0x00

let put_empty put_unicode empty =
if empty <> null then put_unicode empty
else raise CannotTranslate

let apply_accent put_char put_unicode f optg empty c =
begin try
if c = ' ' then put_empty put_unicode empty
else put_unicode (f c)
with CannotTranslate ->
begin match optg with
| None -> raise CannotTranslate
| Some g ->
let ext = g c in
put_char c ; put_unicode ext
end
end


(*
Tables from ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859
Expand Down Expand Up @@ -816,8 +834,7 @@ let html_put put put_char i = match i with

(* Constants *)

let null = 0x00
and space = 0X20
let space = 0X20
and nbsp = 0XA0
and acute_alone = 0xB4
and grave_alone = 0X60
Expand Down Expand Up @@ -858,7 +875,26 @@ let comb_cedilla = function
| 'o'|'O' -> 0x0327
| _ -> raise CannotTranslate

let comb_grave = function
| 'j'|'J' -> 0x0300
| _ -> raise CannotTranslate

let comb_acute = function
| 'j'|'J' -> 0x0301
| _ -> raise CannotTranslate



(* Double accents *)

let double_inverted_breve = 0x0361

(* Accent over numerical entities *)

let tr_entity = function
| "&#X131;"|"&#305;" -> 'i'
| "&#X237;"|"&#567;" -> 'j'
| _ -> raise CannotTranslate

let on_entity put_char put_unicode f optg empty s =
apply_accent put_char put_unicode f optg empty (tr_entity s)
14 changes: 14 additions & 0 deletions outUnicode.mli
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ val translate_in : char -> (unit -> int) -> unichar
val translate_out : unichar -> (char -> unit) -> unit

(* Diacritical marks *)
val put_empty : (unichar -> unit) -> unichar -> unit
val apply_accent :
(char -> unit) -> (unichar -> unit) ->
(char -> unichar) -> (char -> unichar) option ->
unichar -> char -> unit

val grave : char -> unichar
val acute : char -> unichar
val circumflex : char -> unichar
Expand Down Expand Up @@ -96,6 +102,14 @@ val rtprime : unichar

(* Combinations *)
val comb_cedilla : char -> unichar
val comb_grave : char -> unichar
val comb_acute : char -> unichar

(* Double diacritics *)
val double_inverted_breve : unichar

(* Apply accent on unicode entity *)
val on_entity :
(char -> unit) -> (unichar -> unit) ->
(char -> unichar) -> (char -> unichar) option ->
unichar -> string -> unit
32 changes: 11 additions & 21 deletions package.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,35 +39,24 @@ open Scan
(* See iso-sym.hva, for the definition of \text@accent *)
(*********************************************************)

let put_empty empty =
if empty <> OutUnicode.null then Dest.put_unicode empty
else raise OutUnicode.CannotTranslate

exception DiacriticFailed of string * string

let do_def_diacritic _verb _name f optg empty =
(fun lexbuf ->
let arg0 = save_arg lexbuf in
let arg = get_prim_onarg arg0 in
try match String.length arg with
| 0 -> put_empty empty
| 0 -> OutUnicode.put_empty Dest.put_unicode empty
| 1 ->
let c = arg.[0] in
begin try
if c = ' ' then put_empty empty
else Dest.put_unicode (f c)
with OutUnicode.CannotTranslate ->
begin match optg with
| None -> raise OutUnicode.CannotTranslate
| Some g ->
let ext = g c in
Dest.put_char c ; Dest.put_unicode ext
end
end
| _ -> raise OutUnicode.CannotTranslate
OutUnicode.apply_accent
Dest.put_char Dest.put_unicode f optg empty arg.[0]
| _ ->
OutUnicode.on_entity
Dest.put_char Dest.put_unicode f optg empty arg
with
| OutUnicode.CannotTranslate
| Misc.CannotPut -> raise (DiacriticFailed (Subst.do_subst_this arg0,arg)))
;;

let full_def_diacritic name internal f optg empty =
def_code name
Expand All @@ -79,16 +68,17 @@ let full_def_diacritic name internal f optg empty =
;;

let def_diacritic name internal f empty =
full_def_diacritic name internal f None empty
full_def_diacritic name internal f None empty

and def_diacritic_opt name internal f g empty =
full_def_diacritic name internal f (Some g) empty


open OutUnicode

let () =
def_diacritic "\\'" "acute" OutUnicode.acute acute_alone ;
def_diacritic "\\`" "grave" OutUnicode.grave grave_alone ;
def_diacritic_opt "\\'" "acute" OutUnicode.acute comb_acute acute_alone ;
def_diacritic_opt "\\`" "grave" OutUnicode.grave comb_grave grave_alone ;
def_diacritic "\\^" "circumflex" OutUnicode.circumflex circum_alone ;
def_diacritic "\\\"" "diaeresis" OutUnicode.diaeresis diaeresis_alone ;
def_diacritic_opt "\\c" "cedilla" OutUnicode.cedilla
Expand Down

0 comments on commit 95005c9

Please sign in to comment.