From daf812123ccc539cf11449d9a363f294906ec141 Mon Sep 17 00:00:00 2001 From: Guillaume Petiot Date: Sun, 3 Mar 2024 15:43:35 +0000 Subject: [PATCH] Inspect the expression instead of parsing its representation --- config/cfg_lang.ml | 163 ++++++++++++++-------------------------- config/cfg_lang_test.ml | 112 ++------------------------- config/cfg_ppx.ml | 1 - config/ppx.t/main.ml | 2 +- config/ppx.t/run.t | 14 ++-- 5 files changed, 70 insertions(+), 222 deletions(-) diff --git a/config/cfg_lang.ml b/config/cfg_lang.ml index 12b8249..c842a15 100644 --- a/config/cfg_lang.ml +++ b/config/cfg_lang.ml @@ -22,79 +22,7 @@ Contenxt: %s |} (* let log = Printf.printf *) -module Lexer = struct - type token = - | ATOM of string - | STRING of string - | NUMBER of int - | LPARENS - | EQ - | RPARENS - | COMMA - - let digit = [%sedlex.regexp? Plus '0' .. '9'] - let letter = [%sedlex.regexp? '_' | 'a' .. 'z' | 'A' .. 'Z'] - let ident = [%sedlex.regexp? letter, Star (letter | digit)] - - let pp_one fmt (t : token) = - match t with - | ATOM name -> Format.fprintf fmt "ATOM(%S)" name - | STRING s -> Format.fprintf fmt "STRING(%S)" s - | NUMBER n -> Format.fprintf fmt "NUMBER(%d)" n - | RPARENS -> Format.fprintf fmt "RPARENS" - | LPARENS -> Format.fprintf fmt "LPARENS" - | EQ -> Format.fprintf fmt "EQ" - | COMMA -> Format.fprintf fmt "COMMA" - - let pp fmt t = - Format.fprintf fmt "[\r\n "; - Format.pp_print_list - ~pp_sep:(fun fmt () -> Format.fprintf fmt ";\r\n ") - pp_one fmt t; - Format.fprintf fmt "\r\n]\r\n" - - let rec token ~loc buf (acc : token list) = - match%sedlex buf with - | white_space | "\n" | "\r" -> token ~loc buf acc - | ")" -> token ~loc buf (RPARENS :: acc) - | "(" -> token ~loc buf (LPARENS :: acc) - | "=" -> token ~loc buf (EQ :: acc) - | "," -> token ~loc buf (COMMA :: acc) - | digit -> - let num = Sedlexing.Utf8.lexeme buf in - token ~loc buf (NUMBER (Int64.of_string num |> Int64.to_int) :: acc) - | "\"" -> string ~loc buf acc - | ident -> - let atom = Sedlexing.Utf8.lexeme buf in - token ~loc buf (ATOM atom :: acc) - | eof -> acc - | _ -> - let char = Sedlexing.Utf8.lexeme buf in - failwith ~loc - (Format.sprintf "Syntax error, invalid character: %S" char) - - and string ~loc buf ?(str = []) acc = - match%sedlex buf with - | "\"" -> - let str = List.rev str |> String.concat "" in - (* log "%s\"%!" str; *) - token ~loc buf (STRING str :: acc) - | "\\r" -> string ~loc buf ~str:("\r" :: str) acc - | "\\n" -> string ~loc buf ~str:("\n" :: str) acc - | any -> - let ident = Sedlexing.Utf8.lexeme buf in - (* log "%s%!" ident; *) - string ~loc buf ~str:(ident :: str) acc - | _ -> failwith ~loc "unsupported character in string" - - let read ~loc str = - let lexbuf = Sedlexing.Utf8.from_string str in - token ~loc lexbuf [] |> List.rev -end - module Parser = struct - open Lexer - type value = Number of int | String of string type parsetree = @@ -123,43 +51,62 @@ module Parser = struct pp fmt parts; Format.fprintf fmt "\r\n]\r\n" - let rec parse ~loc str = - let tokens = Lexer.read ~loc str in - let tree, _rest = do_parse ~loc tokens in - tree - - and do_parse ~loc tokens = - match tokens with - | ATOM "any" :: LPARENS :: rest -> - let list, rest = parse_list ~loc rest in - (Any list, rest) - | ATOM "all" :: LPARENS :: rest -> - let list, rest = parse_list ~loc rest in - (All list, rest) - | ATOM "not" :: LPARENS :: rest -> ( - match do_parse ~loc rest with - | pred, RPARENS :: rest -> (Not pred, rest) - | _ -> failwith ~loc "Not expressions must have a single parameter") - | ATOM ("any" | "all" | "not") :: _ -> - failwith ~loc "Forms any/all/not must parenthesize its arguments" - | LPARENS :: ATOM var :: EQ :: STRING s :: RPARENS :: rest -> - (Pred { var; value = String s }, rest) - | LPARENS :: ATOM var :: EQ :: NUMBER n :: RPARENS :: rest -> - (Pred { var; value = Number n }, rest) - | [ ATOM var; EQ; STRING s; RPARENS ] -> (Pred { var; value = String s }, []) - | [ ATOM var; EQ; NUMBER n; RPARENS ] -> (Pred { var; value = Number n }, []) - | ATOM var :: rest -> (Pred { var; value = String "true" }, rest) - | _ -> + module Exp = struct + let id = function + | { pexp_desc = Pexp_ident { txt = Lident "any"; _ }; _ } -> `any + | { pexp_desc = Pexp_ident { txt = Lident "all"; _ }; _ } -> `all + | { pexp_desc = Pexp_ident { txt = Lident "not"; _ }; _ } -> `not + | { pexp_desc = Pexp_ident { txt = Lident "="; _ }; _ } -> `eq + | _ -> `invalid + + let arg_list loc = function + | [ (Nolabel, { pexp_desc = Pexp_tuple args; _ }) ] -> args + | [ (Nolabel, arg) ] -> [ arg ] + | _ -> failwith ~loc "Forms any/all/not must parenthesize its arguments" + + let one_arg loc = function + | [ (Nolabel, { pexp_desc = Pexp_tuple _; _ }) ] -> + failwith ~loc "Not expressions must have a single parameter" + | [ (Nolabel, arg) ] -> arg + | _ -> failwith ~loc "Forms any/all/not must parenthesize its arguments" + + let var loc = function + | Nolabel, { pexp_desc = Pexp_ident { txt = Lident var; _ }; _ } -> var + | _ -> failwith ~loc (Format.sprintf "Expected identifier") + + let value loc = function + | Nolabel, { pexp_desc = Pexp_constant (Pconst_integer (i, _)); _ } -> + Number (int_of_string i) + | Nolabel, { pexp_desc = Pexp_constant (Pconst_string (s, _, _)); _ } -> + String s + | ( Nolabel, + { pexp_desc = Pexp_construct ({ txt = Lident "true"; _ }, None); _ } ) + -> + String "true" + | _ -> failwith ~loc (Format.sprintf "Expected int, string or bool value") + + let rec parse ~loc (exp : expression) = + let fail () = failwith ~loc - (Format.asprintf "Invalid sequence of tokens: %a" Lexer.pp tokens) - - and parse_list ~loc ?(acc = []) tokens = - match tokens with - | [] -> (List.rev acc, []) - | COMMA :: RPARENS :: rest | RPARENS :: rest -> (List.rev acc, rest) - | COMMA :: rest | rest -> - let pred, rest = do_parse ~loc rest in - parse_list ~loc rest ~acc:(pred :: acc) + (Format.asprintf "Invalid expression: %a" Pprintast.expression exp) + in + match exp.pexp_desc with + | Pexp_apply (fn, args) -> ( + match id fn with + | `any -> Any (List.map (parse ~loc) (arg_list loc args)) + | `all -> All (List.map (parse ~loc) (arg_list loc args)) + | `not -> Not (parse ~loc (one_arg loc args)) + | `eq -> ( + match args with + | [ x; y ] -> Pred { var = var loc x; value = value loc y } + | _ -> fail ()) + | `invalid -> fail ()) + | Pexp_ident { txt = Lident var; _ } -> + Pred { var; value = String "true" } + | _ -> fail () + end + + let parse = Exp.parse end module Eval = struct diff --git a/config/cfg_lang_test.ml b/config/cfg_lang_test.ml index c294d70..89b29d9 100644 --- a/config/cfg_lang_test.ml +++ b/config/cfg_lang_test.ml @@ -4,117 +4,14 @@ open Cfg_lang let keyword fmt = Spices.(default |> fg (color "#00FF00") |> build) fmt let error fmt = Spices.(default |> fg (color "#FF0000") |> build) fmt let loc = Location.none - -let () = - let test str expected = - let actual_str = - match Lexer.read ~loc str with - | exception Cfg_lang.Error { error; _ } -> - Format.sprintf "Exception: %S" error - | actual -> Format.asprintf "%a" Lexer.pp actual - in - let expect_str = Format.asprintf "%a" Lexer.pp expected in - - if String.equal actual_str expect_str then - Format.printf "lexer test %S %s\r\n%!" str (keyword "OK") - else ( - Format.printf "%s\n\nExpected:\n\n%a\n\nbut found:\n\n%s\n\n" - (error "Tokens do not match") - Lexer.pp expected actual_str; - assert false) - in - - test "" []; - test "not" [ ATOM "not" ]; - test "not any" [ ATOM "not"; ATOM "any" ]; - test "(target_os = \"macos\")" - [ LPARENS; ATOM "target_os"; EQ; STRING "macos"; RPARENS ]; - test "any (macos)" [ ATOM "any"; LPARENS; ATOM "macos"; RPARENS ]; - test "all (target_os = \"macos\")" - [ ATOM "all"; LPARENS; ATOM "target_os"; EQ; STRING "macos"; RPARENS ]; - test "any ((target_os = \"macos\"))" - [ - ATOM "any"; - LPARENS; - LPARENS; - ATOM "target_os"; - EQ; - STRING "macos"; - RPARENS; - RPARENS; - ]; - test "any (architecture = 2112)" - [ ATOM "any"; LPARENS; ATOM "architecture"; EQ; NUMBER 2112; RPARENS ]; - test "any (target_os = \"macos\", another)" - [ - ATOM "any"; - LPARENS; - ATOM "target_os"; - EQ; - STRING "macos"; - COMMA; - ATOM "another"; - RPARENS; - ]; - test - {| -any - ( target_os = "macos", - target_os = "ios", - target_os = "watchos", - target_os = "tvos", - not(all(target_os = "freebsd", - target_os = "netbsd")), - target_os = "linux" ) - - - |} - [ - ATOM "any"; - LPARENS; - ATOM "target_os"; - EQ; - STRING "macos"; - COMMA; - ATOM "target_os"; - EQ; - STRING "ios"; - COMMA; - ATOM "target_os"; - EQ; - STRING "watchos"; - COMMA; - ATOM "target_os"; - EQ; - STRING "tvos"; - COMMA; - ATOM "not"; - LPARENS; - ATOM "all"; - LPARENS; - ATOM "target_os"; - EQ; - STRING "freebsd"; - COMMA; - ATOM "target_os"; - EQ; - STRING "netbsd"; - RPARENS; - RPARENS; - COMMA; - ATOM "target_os"; - EQ; - STRING "linux"; - RPARENS; - ]; - - () +let parse_expression str = Parse.expression (Lexing.from_string str) let () = let open Parser in let test str expected = let actual_str = - match Parser.parse ~loc str with + let exp = parse_expression str in + match Parser.parse ~loc exp with | exception Cfg_lang.Error { error; _ } -> Format.sprintf "Exception: %S" error | actual -> Format.asprintf "%a" Parser.pp actual @@ -195,7 +92,8 @@ any( (target_os = "macos"), let () = let test str env expected = let actual_str = - match Eval.eval ~loc ~env str with + let exp = parse_expression str in + match Eval.eval ~loc ~env exp with | exception Cfg_lang.Error { error; _ } -> Format.sprintf "Exception: %S" error | actual -> Format.asprintf "%b" actual diff --git a/config/cfg_ppx.ml b/config/cfg_ppx.ml index c5996a3..407a7c0 100644 --- a/config/cfg_ppx.ml +++ b/config/cfg_ppx.ml @@ -34,7 +34,6 @@ let eval_attr attr = (* Printf.printf "\n\nattr name: %S\n\n" attr.attr_name.txt; *) match attr.attr_payload with | PStr [ { pstr_desc = Pstr_eval (e, []); _ } ] -> - let e = Pprintast.string_of_expression e in if Cfg_lang.eval ~loc ~env e then `keep else `drop | _ -> `keep diff --git a/config/ppx.t/main.ml b/config/ppx.t/main.ml index 009cca9..dafeb27 100644 --- a/config/ppx.t/main.ml +++ b/config/ppx.t/main.ml @@ -52,5 +52,5 @@ module Sys = Sys_win32 module Sys = Sys_win64 [@@config all (target_os = "windows", target_arch = "arm")] -let () = Printf.printf "sys=%s" Sys.name +let () = Printf.printf "sys=%s env=%s" Sys.name Env.name (* will print "sys=unix" on my mac! *) diff --git a/config/ppx.t/run.t b/config/ppx.t/run.t index 1d94e68..c77c838 100644 --- a/config/ppx.t/run.t +++ b/config/ppx.t/run.t @@ -32,13 +32,14 @@ module Cond_recmod = Cond_recmod module Cond_class = Cond_class module Whole_mod = Whole_mod + module Env = struct let name = "unknown" end[@@config target_env = ""] module Sys = Sys_unix[@@config any ((target_os = "macos"), (target_os = "ios"), (target_os = "watchos"), (target_os = "tvos"), (target_os = "freebsd"), (target_os = "netbsd"), (target_os = "linux"))] - let () = Printf.printf "sys=%s" Sys.name + let () = Printf.printf "sys=%s env=%s" Sys.name Env.name $ dune clean $ target_os=windows target_arch=x86 dune describe pp main.ml [@@@ocaml.ppx.context @@ -74,9 +75,10 @@ module Cond_recmod = Cond_recmod module Cond_class = Cond_class module Whole_mod = Whole_mod + module Env = struct let name = "unknown" end[@@config target_env = ""] module Sys = Sys_win32[@@config all ((target_os = "windows"), (target_arch = "x86"))] - let () = Printf.printf "sys=%s" Sys.name + let () = Printf.printf "sys=%s env=%s" Sys.name Env.name $ dune clean $ target_os=windows target_arch=arm dune describe pp main.ml @@ -113,17 +115,18 @@ module Cond_recmod = Cond_recmod module Cond_class = Cond_class module Whole_mod = Whole_mod + module Env = struct let name = "unknown" end[@@config target_env = ""] module Sys = Sys_win64[@@config all ((target_os = "windows"), (target_arch = "arm"))] - let () = Printf.printf "sys=%s" Sys.name + let () = Printf.printf "sys=%s env=%s" Sys.name Env.name $ dune clean $ dune exec ./main.exe - sys=unix + sys=unix env=unknown $ dune clean $ target_os=windows target_arch=x86 dune exec ./main.exe - sys=win32 + sys=win32 env=unknown $ dune clean $ dune build @@ -147,3 +150,4 @@ unsafe_string = false; cookies = [] }] + external foo : unit -> int = "made_up_call"