From 86cf829d9a29eb058cf8aafb9cd26a2ca5d03b11 Mon Sep 17 00:00:00 2001 From: zapashcanon Date: Fri, 10 Nov 2023 19:39:58 +0100 Subject: [PATCH] switch to menhir --- interpreter/dune | 5 ++- interpreter/dune-project | 5 ++- interpreter/jslib/wast.ml | 4 +- interpreter/script/js.ml | 9 +++- interpreter/script/run.ml | 35 +++++++++------ interpreter/text/arrange.ml | 10 +++-- interpreter/text/parse.ml | 88 +++++++++++++++++++++++++------------ interpreter/text/parse.mli | 30 +++++++++---- interpreter/text/parser.mly | 13 ++---- interpreter/wasm.opam | 1 + 10 files changed, 131 insertions(+), 69 deletions(-) diff --git a/interpreter/dune b/interpreter/dune index 9a853921db..e221bea7a1 100644 --- a/interpreter/dune +++ b/interpreter/dune @@ -6,7 +6,8 @@ ; Wasm REPL every time in all the dependencies. ; We exclude the 'wast' module as it is only used for the JS build. ; 'smallint' is a separate test module. - (modules :standard \ main wasm smallint wast)) + (modules :standard \ main wasm smallint wast) + (libraries menhirLib)) (executable (public_name wasm) @@ -43,7 +44,7 @@ (chdir %{workspace_root} (run %{bin:ocamllex} -ml -q -o %{target} %{deps})))) - (ocamlyacc + (menhir (modules parser))) (env diff --git a/interpreter/dune-project b/interpreter/dune-project index 0d15135d31..8392b339f5 100644 --- a/interpreter/dune-project +++ b/interpreter/dune-project @@ -3,6 +3,8 @@ (name wasm) (generate_opam_files true) +(using menhir 2.1) +(implicit_transitive_deps false) (license Apache-2.0) @@ -17,4 +19,5 @@ (synopsis "Library to read and write WebAssembly (Wasm) files and manipulate their AST") (tags (wasm webassembly spec interpreter)) (depends - (ocaml (>= 4.12)))) + (ocaml (>= 4.12)) + (menhir (>= 20220210)))) diff --git a/interpreter/jslib/wast.ml b/interpreter/jslib/wast.ml index 0ab4bd8fdd..407de555e1 100644 --- a/interpreter/jslib/wast.ml +++ b/interpreter/jslib/wast.ml @@ -4,12 +4,12 @@ open Wasm open Js_of_ocaml -let _ = +let () = Js.export "WebAssemblyText" (object%js (_self) method encode (s : Js.js_string Js.t) : (Typed_array.arrayBuffer Js.t) = - let def = Parse.string_to_module (Js.to_string s) in + let def = snd @@ Parse.Module.from_string (Js.to_string s) in let bs = match def.Source.it with | Script.Textual m -> (Encode.encode m) diff --git a/interpreter/script/js.ml b/interpreter/script/js.ml index 2eb849a6c1..d172f58026 100644 --- a/interpreter/script/js.ml +++ b/interpreter/script/js.ml @@ -526,7 +526,10 @@ let rec of_definition def = | Textual m -> of_bytes (Encode.encode m) | Encoded (_, bs) -> of_bytes bs | Quoted (_, s) -> - try of_definition (Parse.string_to_module s) with Parse.Syntax _ -> + try + let _v, m = Parse.Module.from_string s in + of_definition m + with Script.Syntax _ -> of_bytes "" let of_wrapper mods x_opt name wrap_action wrap_assertion at = @@ -594,7 +597,9 @@ let of_command mods cmd = match def.it with | Textual m -> m | Encoded (_, bs) -> Decode.decode "binary" bs - | Quoted (_, s) -> unquote (Parse.string_to_module s) + | Quoted (_, s) -> + let _v, m = Parse.Module.from_string s in + unquote m in bind mods x_opt (unquote def); "let " ^ current_var mods ^ " = instance(" ^ of_definition def ^ ");\n" ^ (if x_opt = None then "" else diff --git a/interpreter/script/run.ml b/interpreter/script/run.ml index e0019d84a0..8797718ba9 100644 --- a/interpreter/script/run.ml +++ b/interpreter/script/run.ml @@ -105,7 +105,7 @@ let input_from get_script run = true with | Decode.Code (at, msg) -> error at "decoding error" msg - | Parse.Syntax (at, msg) -> error at "syntax error" msg + | Syntax (at, msg) -> error at "syntax error" msg | Valid.Invalid (at, msg) -> error at "invalid module" msg | Import.Unknown (at, msg) -> error at "link failure" msg | Eval.Link (at, msg) -> error at "link failure" msg @@ -118,17 +118,26 @@ let input_from get_script run = | Assert (at, msg) -> error at "assertion failure" msg | Abort _ -> false -let input_script start name lexbuf run = - input_from (fun _ -> Parse.parse name lexbuf start) run +let input_script name lexbuf run = + input_from (fun () -> + Lexing.set_filename lexbuf name; + Parse.Script.from_lexbuf lexbuf) + run + +let input_script1 name lexbuf run = + input_from (fun () -> + Lexing.set_filename lexbuf name; + Parse.Script1.from_lexbuf lexbuf) + run let input_sexpr name lexbuf run = - input_from (fun _ -> - let var_opt, def = Parse.parse name lexbuf Parse.Module in + input_from (fun () -> + let var_opt, def = Parse.Module.from_lexbuf lexbuf in [Module (var_opt, def) @@ no_region]) run let input_binary name buf run = let open Source in - input_from (fun _ -> + input_from (fun () -> [Module (None, Encoded (name, buf) @@ no_region) @@ no_region]) run let input_sexpr_file input file run = @@ -162,8 +171,8 @@ let input_file file run = dispatch_file_ext input_binary_file (input_sexpr_file input_sexpr) - (input_sexpr_file (input_script Parse.Script)) - (input_sexpr_file (input_script Parse.Script)) + (input_sexpr_file (input_script)) + (input_sexpr_file (input_script)) input_js_file file run @@ -171,7 +180,7 @@ let input_string string run = trace ("Running (\"" ^ String.escaped string ^ "\")..."); let lexbuf = Lexing.from_string string in trace "Parsing..."; - input_script Parse.Script "string" lexbuf run + input_script "string" lexbuf run (* Interactive *) @@ -195,7 +204,7 @@ let lexbuf_stdin buf len = let input_stdin run = let lexbuf = Lexing.from_function lexbuf_stdin in let rec loop () = - let success = input_script Parse.Script1 "stdin" lexbuf run in + let success = input_script1 "stdin" lexbuf run in if not success then Lexing.flush_input lexbuf; if Lexing.(lexbuf.lex_curr_pos >= lexbuf.lex_buffer_len - 1) then continuing := false; @@ -337,8 +346,8 @@ let rec run_definition def : Ast.module_ = Decode.decode name bs | Quoted (_, s) -> trace "Parsing quote..."; - let def' = Parse.string_to_module s in - run_definition def' + let def' = Parse.Module.from_string s in + run_definition (snd def') let run_action act : Values.value list = match act.it with @@ -443,7 +452,7 @@ let run_assertion ass = trace "Asserting malformed..."; (match ignore (run_definition def) with | exception Decode.Code (_, msg) -> assert_message ass.at "decoding" msg re - | exception Parse.Syntax (_, msg) -> assert_message ass.at "parsing" msg re + | exception Syntax (_, msg) -> assert_message ass.at "parsing" msg re | _ -> Assert.error ass.at "expected decoding/parsing error" ) diff --git a/interpreter/text/arrange.ml b/interpreter/text/arrange.ml index dc56743eb6..a11e50045d 100644 --- a/interpreter/text/arrange.ml +++ b/interpreter/text/arrange.ml @@ -679,21 +679,25 @@ let definition mode x_opt def = match def.it with | Textual m -> m | Encoded (_, bs) -> Decode.decode "" bs - | Quoted (_, s) -> unquote (Parse.string_to_module s) + | Quoted (_, s) -> + let _v, m = Parse.Module.from_string s in + unquote m in module_with_var_opt x_opt (unquote def) | `Binary -> let rec unquote def = match def.it with | Textual m -> Encode.encode m | Encoded (_, bs) -> Encode.encode (Decode.decode "" bs) - | Quoted (_, s) -> unquote (Parse.string_to_module s) + | Quoted (_, s) -> + let _v, m = Parse.Module.from_string s in + unquote m in binary_module_with_var_opt x_opt (unquote def) | `Original -> match def.it with | Textual m -> module_with_var_opt x_opt m | Encoded (_, bs) -> binary_module_with_var_opt x_opt bs | Quoted (_, s) -> quoted_module_with_var_opt x_opt s - with Parse.Syntax _ -> + with Script.Syntax _ -> quoted_module_with_var_opt x_opt "" let access x_opt n = diff --git a/interpreter/text/parse.ml b/interpreter/text/parse.ml index 71c4cc4a9c..039867d29a 100644 --- a/interpreter/text/parse.ml +++ b/interpreter/text/parse.ml @@ -1,28 +1,60 @@ -type 'a start = - | Module : (Script.var option * Script.definition) start - | Script : Script.script start - | Script1 : Script.script start - -exception Syntax = Script.Syntax - -let parse' name lexbuf start = - lexbuf.Lexing.lex_curr_p <- - {lexbuf.Lexing.lex_curr_p with Lexing.pos_fname = name}; - try start Lexer.token lexbuf - with Syntax (region, s) -> - let region' = if region <> Source.no_region then region else - {Source.left = Lexer.convert_pos lexbuf.Lexing.lex_start_p; - Source.right = Lexer.convert_pos lexbuf.Lexing.lex_curr_p} in - raise (Syntax (region', s)) - -let parse (type a) name lexbuf : a start -> a = function - | Module -> parse' name lexbuf Parser.module1 - | Script -> parse' name lexbuf Parser.script - | Script1 -> parse' name lexbuf Parser.script1 - -let string_to start s = - let lexbuf = Lexing.from_string s in - parse "string" lexbuf start - -let string_to_script s = string_to Script s -let string_to_module s = snd (string_to Module s) +module Make (M : sig + type t + + val rule : (Lexing.lexbuf -> Parser.token) -> Lexing.lexbuf -> t + +end) = struct + + type nonrec t = M.t + + let from_lexbuf = + let parser = MenhirLib.Convert.Simplified.traditional2revised M.rule in + fun buf -> + let provider () = + let tok = Lexer.token buf in + let start = Lexing.lexeme_start_p buf in + let stop = Lexing.lexeme_end_p buf in + tok, start, stop + in + try parser provider with + | Parser.Error -> + let left = Lexer.convert_pos buf.Lexing.lex_start_p in + let right = Lexer.convert_pos buf.Lexing.lex_curr_p in + let region = { Source.left; right } in + raise (Script.Syntax (region, "unexpected token")) + | Script.Syntax (region, s) as exn -> + if region <> Source.no_region then raise exn + else + let region' = { + Source.left = Lexer.convert_pos buf.Lexing.lex_start_p; + Source.right = Lexer.convert_pos buf.Lexing.lex_curr_p } + in + raise (Script.Syntax (region', s)) + + let from_file filename = + let chan = open_in filename in + Fun.protect ~finally:(fun () -> close_in chan) + (fun () -> + let lb = Lexing.from_channel ~with_positions:true chan in + Lexing.set_filename lb filename; + from_lexbuf lb) + + let from_string s = from_lexbuf (Lexing.from_string ~with_positions:true s) + + let from_channel c = from_lexbuf (Lexing.from_channel ~with_positions:true c) +end + +module Module = Make (struct + type t = Script.var option * Script.definition + let rule = Parser.module1 +end) + +module Script1 = Make (struct + type t = Script.script + let rule = Parser.script1 +end) + +module Script = Make (struct + type t = Script.script + let rule = Parser.script +end) diff --git a/interpreter/text/parse.mli b/interpreter/text/parse.mli index 89f8e58024..4077329760 100644 --- a/interpreter/text/parse.mli +++ b/interpreter/text/parse.mli @@ -1,11 +1,23 @@ -type 'a start = - | Module : (Script.var option * Script.definition) start - | Script : Script.script start - | Script1 : Script.script start +module Module : sig + type t = Script.var option * Script.definition + val from_lexbuf : Lexing.lexbuf -> t + val from_file : string -> t + val from_string : string -> t + val from_channel : in_channel -> t +end -exception Syntax of Source.region * string +module Script1 : sig + type t = Script.script + val from_lexbuf : Lexing.lexbuf -> t + val from_file : string -> t + val from_string : string -> t + val from_channel : in_channel -> t +end -val parse : string -> Lexing.lexbuf -> 'a start -> 'a (* raises Syntax *) - -val string_to_script : string -> Script.script (* raises Syntax *) -val string_to_module : string -> Script.definition (* raises Syntax *) +module Script : sig + type t = Script.script + val from_lexbuf : Lexing.lexbuf -> t + val from_file : string -> t + val from_string : string -> t + val from_channel : in_channel -> t +end diff --git a/interpreter/text/parser.mly b/interpreter/text/parser.mly index e29be3ae3b..44ddd3651b 100644 --- a/interpreter/text/parser.mly +++ b/interpreter/text/parser.mly @@ -10,11 +10,6 @@ open Script let error at msg = raise (Script.Syntax (at, msg)) -let parse_error msg = - error Source.no_region - (if msg = "syntax error" then "unexpected token" else msg) - - (* Position handling *) let position_to_pos position = @@ -700,10 +695,10 @@ func_body : {ftype = -1l @@ at(); locals = []; body = $1 c'} } | LPAR LOCAL value_type_list RPAR func_body { fun c -> anon_locals c (lazy $3); let f = $5 c in - {f with locals = $3 @ f.locals} } + {f with locals = $3 @ f.Ast.locals} } | LPAR LOCAL bind_var value_type RPAR func_body /* Sugar */ { fun c -> ignore (bind_local c $3); let f = $6 c in - {f with locals = $4 :: f.locals} } + {f with locals = $4 :: f.Ast.locals} } /* Tables, Memories & Globals */ @@ -966,11 +961,11 @@ module_fields1 : | elem module_fields { fun c -> let ef = $1 c in let mf = $2 c in fun () -> let elems = ef () in let m = mf () in - {m with elems = elems :: m.elems} } + {m with elems = elems :: m.Ast.elems} } | data module_fields { fun c -> let df = $1 c in let mf = $2 c in fun () -> let data = df () in let m = mf () in - {m with datas = data :: m.datas} } + {m with datas = data :: m.Ast.datas} } | start module_fields { fun c -> let mf = $2 c in fun () -> let m = mf () in let x = $1 c in diff --git a/interpreter/wasm.opam b/interpreter/wasm.opam index 5d5984106c..ad8b60af23 100644 --- a/interpreter/wasm.opam +++ b/interpreter/wasm.opam @@ -11,6 +11,7 @@ bug-reports: "https://github.com/WebAssembly/spec/issues" depends: [ "dune" {>= "2.9"} "ocaml" {>= "4.12"} + "menhir" {>= "20220210"} "odoc" {with-doc} ] build: [