diff --git a/CHANGES.md b/CHANGES.md index 8e1e85371..4535b8d01 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,15 +1,19 @@ -# Unreleased +# Unreleased (scheduled for 0.9.8) List of changes since the latest release. + +## Queries mixing set and bag semantics +Links now provides experimental support for SQL queries with grouping and aggregation. These require the _mixing_ normaliser (`mixing_norm=on` in the configuration file). + +The result of grouping over a relation is represented as a finite map, which in Links is treated as a list of (grouping key, associated subrelation) pairs. Aggregation can then be applied groupwise to a finite map to obtain again a relation. Such Links queries are translated to SQL queries using `group by` and aggregates. + +Further information on this feature is provided in the [Links GitHub wiki](https://github.com/links-lang/links/wiki/Grouping-and-aggregation). + +## Other changes and fixes * The package `links-mysql`, based on the `mysql` opam package is no longer supported. Instead, the package `links-mysql8`, based on the `mysql8` package is provided, which also supports version of MySQL prior to 8. - -# 0.9.8 - -This release ... - * Control-flow linearity: Links now tracks control-flow linearity when the flag `--control-flow-linearity` is enabled. This extension fixes a long-standing soundness bug (see issue diff --git a/core/evalir.ml b/core/evalir.ml index ef3f92f17..0b20d45eb 100644 --- a/core/evalir.ml +++ b/core/evalir.ml @@ -775,7 +775,7 @@ struct begin match evaluator e with | None -> computation env cont e - | Some (db, q, t) -> + | Some (db, q, t, readback) -> let q = db#string_of_query ~range q in let (fieldMap, _, _) = let r, _ = Types.unwrap_row (TypeUtils.extract_row t) in @@ -790,7 +790,9 @@ struct fieldMap [] in - apply_cont cont env (Database.execute_select fields q db) + Database.execute_select fields q db + |> readback (* unflattens records/finite maps *) + |> apply_cont cont env end end | TemporalJoin (tmp, e, _t) -> diff --git a/core/json.ml b/core/json.ml index 4366917a4..40e420b5a 100644 --- a/core/json.ml +++ b/core/json.ml @@ -120,6 +120,8 @@ let rec jsonize_value' : Value.t -> Yojson.Basic.t = | #Value.primitive_value as p -> jsonize_primitive p | `Variant (label, value) -> lit ~tag:"Variant" [("_label", `String label); ("_value", jsonize_value' value)] + | `Entry (key, value) -> + lit ~tag:"MapEntry" [("_key", jsonize_value' key); ("_value", jsonize_value' value)] | `Record fields -> lit ~tag:"Record" (List.map (fun (k, v) -> (k, jsonize_value' v )) fields) | `List l -> cons_listify (List.map jsonize_value' l) diff --git a/core/lib.ml b/core/lib.ml index 556a987fd..a0801ba6b 100644 --- a/core/lib.ml +++ b/core/lib.ml @@ -237,6 +237,26 @@ let env : (string * (located_primitive * Types.datatype * pure)) list = [ "^.", float_op ( ** ) PURE; "^^", string_op ( ^ ) PURE; + "max_int", + (Value.box_int max_int, + datatype "Int", + PURE); + + "min_int", + (Value.box_int min_int, + datatype "Int", + PURE); + + "infinity", + (Value.box_float Float.infinity, + datatype "Float", + PURE); + + "neg_infinity", + (Value.box_float Float.neg_infinity, + datatype "Float", + PURE); + (* Comparisons *) "==", (p2 (fun v1 v2 -> Value.box_bool (equal v1 v2)), @@ -592,6 +612,11 @@ let env : (string * (located_primitive * Types.datatype * pure)) list = [ datatype "([a]) -> Int", PURE); + "Sum", + (p1 (Value.unbox_list ->- List.fold_left (fun x y -> x + Value.unbox_int y) 0 ->- Value.box_int), + datatype "([Int]) -> Int", + PURE); + "take", (p2 (fun n l -> Value.box_list (Utility.take (Value.unbox_int n) (Value.unbox_list l))), diff --git a/core/query/delateralize.ml b/core/query/delateralize.ml index d34ab5e6e..789b3eaae 100644 --- a/core/query/delateralize.ml +++ b/core/query/delateralize.ml @@ -22,15 +22,19 @@ let graph_query (q1,ty1) x (q2,ty2) = let y = Var.fresh_raw_var () in let p = Q.flattened_pair (QL.Var (x,ty1)) (QL.Var (y,ty2)) in let ftys = Q.flattened_pair_ft (QL.Var (x,ty1)) (QL.Var (y,ty2)) in - QL.For (None, [(x, q1); (y, q2)], [], QL.Singleton p), ftys + QL.For (None, [(QL.Entries, x, q1); (QL.Entries, y, q2)], [], QL.Singleton p), ftys (* DELATERALIZING REWRITE for Prom: for gs, y <- Prom(q3) do q1 -- s.t. x <- q2 in gs ~> for gs, p <- Prom(G(x <- Dedup q2; q3)) where x = p.1 do (\lambda y.q1) p.2 + + there's a similar rewrite for key comprehension, but the bag promotion is implicit: + this is why we require a genkind parameter *) -let prom_delateralize gs q1 x (q2,ty2) y (q3,ty3) = +let rew_delateralize genkind gs q1 x (q2,ty2) y (q3,ty3) = + let cast x = match genkind with QL.Entries -> QL.Prom x | _ -> x in let p = Var.fresh_raw_var () in let graph, ftys = graph_query (QL.Dedup q2,ty2) x (q3,ty3) in let vp = QL.Var (p,Types.make_record_type ftys) in @@ -58,7 +62,7 @@ let prom_delateralize gs q1 x (q2,ty2) y (q3,ty3) = let q1_rp = QL.subst q1 y rp in QL.For (None, - gs @ [(p, QL.Prom graph)], + gs @ [(QL.Entries, p, cast graph)], [], QL.If (eq_query, q1_rp, QL.nil)) @@ -70,23 +74,24 @@ let rec delateralize_step q = match q with | QL.For (_tag, gs, os, q) -> let rec findgs gsx = function - | (y,QL.Prom qy as gy)::gsy -> + | (QL.Entries as genkind, y,QL.Prom qy as gy)::gsy + | (QL.Keys as genkind, y, qy as gy)::gsy -> begin match QL.occurs_free_gens gsx qy with (* tail-consing is annoying, but occurs_free_list needs arguments in this order *) | None -> findgs (gsx@[gy]) gsy - | Some (x,qx,tyx) -> Some (gsx,x,qx,tyx,y,qy,gsy) + | Some (x,qx,tyx) -> Some (gsx,x,qx,tyx,genkind,y,qy,gsy) end | gy::gsy -> findgs (gy::gsx) gsy | [] -> None in begin match findgs [] gs with - | Some (gsx,x,qx,tyx,y,qy,gsy) -> + | Some (gsx,x,qx,tyx,gky,y,qy,gsy) -> let qf = QL.For (None, gsy, [], q) in - let tyy = Q.type_of_for_var qy in - Some (prom_delateralize gsx qf x (qx,tyx) y (qy,tyy)) + let tyy = Q.type_of_for_var gky qy in + Some (rew_delateralize gky gsx qf x (qx,tyx) y (qy,tyy)) | None -> - let ogs = gs >>==? (fun (z,qz) -> ds qz >>=? fun qz' -> Some (z,qz')) in + let ogs = gs >>==? (fun (genkind, z,qz) -> ds qz >>=? fun qz' -> Some (genkind,z,qz')) in let oq = ds q in begin match ogs, oq with diff --git a/core/query/evalMixingQuery.ml b/core/query/evalMixingQuery.ml index 7e532bf83..4a3f710a7 100644 --- a/core/query/evalMixingQuery.ml +++ b/core/query/evalMixingQuery.ml @@ -23,7 +23,7 @@ let eval_error fmt : 'r = let mapstrcat sep f l = l |> List.map f |> String.concat sep let dummy_sql_empty_query = - (S.All,S.Fields [(S.Constant (Constant.Int 42), "@unit@")], [], S.Constant (Constant.Bool false), []) + (S.All,S.Fields [(S.Constant (Constant.Int 42), "@unit@")], [], S.Constant (Constant.Bool false), [], []) let dependency_of_contains_free = function true -> S.Lateral | _ -> S.Standard @@ -36,25 +36,72 @@ and disjunct is_set = function | QL.Prom p -> sql_of_query S.Distinct p | QL.Singleton _ as j -> S.Select (body is_set [] [] j) | QL.For (_, gs, os, j) -> S.Select (body is_set gs os j) -| _arg -> Debug.print ("error in EvalMixingQuery.disjunct: unexpected arg = " ^ QL.show _arg); failwith "disjunct" +| QL.AggBy (ar, q) -> aggregator ar q +| _arg -> + Debug.print ("error in EvalMixingQuery.disjunct: unexpected arg = " ^ QL.show _arg); + failwith "disjunct" + +and aggregator ar q = + let aggr = function + | QL.Primitive "Sum" -> "sum" + | QL.Primitive "SumF" -> "sum" + | QL.Primitive "Avg" -> "avg" + | QL.Primitive "AvgF" -> "avg" + | QL.Primitive "Min" -> "min" + | QL.Primitive "Max" -> "max" + | QL.Primitive "MinF" -> "min" + | QL.Primitive "MaxF" -> "max" + | QL.Primitive "length" -> "count" + | _ -> assert false + in + let z = Var.fresh_raw_var () in + let tyk, _tyv = q |> QL.type_of_expression |> Types.unwrap_map_type in + let fsk, _, _ = tyk |> Types.extract_row |> Types.extract_row_parts in + let fields_k = fsk |> StringMap.to_alist |> List.map (fun (f,_) -> S.Project (z, "1@" ^ f), "1@" ^ f) in + let fields_v = ar |> StringMap.to_alist |> List.map (fun (f_out, (aggfun, f_in)) -> + S.Apply (aggr aggfun, [S.Project (z, "2@" ^ f_in)]), "2@" ^ f_out) + in + let fields = fields_k @ fields_v in + let gbys = List.map (fun (_,f) -> S.Project (z, f)) fields_k in + S.Select (S.All, S.Fields fields, [S.Subquery (S.Standard, sql_of_query S.All q, z)], S.Constant (Constant.Bool true), gbys, []) and generator locvars = function -| (v, QL.Prom p) -> (S.Subquery (dependency_of_contains_free (E.contains_free locvars p), sql_of_query S.Distinct p, v)) -| (v, QL.Table Value.Table.{ name; _}) -> (S.TableRef (name, v)) -| (v, QL.Dedup (QL.Table Value.Table.{ name; _ })) -> - S.Subquery (S.Standard, S.Select (S.Distinct, S.Star, [S.TableRef (name, v)], S.Constant (Constant.Bool true), []), v) -| (_, _arg) -> Debug.print ("error in EvalMixingQuery.disjunct: unexpected arg = " ^ QL.show _arg); failwith "generator" +| (QL.Entries, v, QL.Prom p) -> (S.Subquery (dependency_of_contains_free (E.contains_free locvars p), sql_of_query S.Distinct p, v)) +| (QL.Entries, v, QL.Table Value.Table.{ name; _}) -> (S.TableRef (name, v)) +| (QL.Entries, v, QL.Dedup (QL.Table Value.Table.{ name; _ })) -> + S.Subquery (S.Standard, S.Select (S.Distinct, S.Star, [S.TableRef (name, v)], S.Constant (Constant.Bool true), [], []), v) +| (QL.Keys, v, QL.GroupBy ((x, QL.Record gc), QL.Table Value.Table.{ name; _})) +| (QL.Keys, v, QL.GroupBy ((x, QL.Record gc), QL.Dedup (QL.Table Value.Table.{ name; _}))) -> + let fields = List.map (fun (f,e) -> (base_exp e, f)) (StringMap.to_alist gc) in + S.Subquery (dependency_of_contains_free (E.contains_free locvars (QL.Record gc)), + S.Select (S.Distinct, S.Fields fields, [S.TableRef (name, x)], S.Constant (Constant.Bool true), [], []), v) +| (QL.Keys, v, q) -> + let z = Var.fresh_raw_var () in + let tyk, _ = q |> QL.type_of_expression |> Types.unwrap_map_type in + let fsk, _, _ = tyk |> Types.extract_row |> Types.extract_row_parts in + let fields = + fsk + |> StringMap.to_alist + |> List.map (fun (f,_) -> S.Project (z, "1@" ^ f), f) + in + S.Subquery (dependency_of_contains_free (E.contains_free locvars q), + S.Select (S.Distinct, + S.Fields fields, + [S.Subquery (S.Standard, sql_of_query S.All q, z)], + S.Constant (Constant.Bool true), + [], []), v) +| (_genkind, _, _arg) -> Debug.print ("error in EvalMixingQuery.disjunct: unexpected arg = " ^ QL.show _arg); failwith "generator" and body is_set gs os j = let selquery body where = let froms = gs - |> List.fold_left (fun (locvars,acc) (v,_q as g) -> (v::locvars, generator locvars g::acc)) ([],[]) + |> List.fold_left (fun (locvars,acc) (_genkind, v,_q as g) -> (v::locvars, generator locvars g::acc)) ([],[]) |> snd |> List.rev in let os = List.map base_exp os in - (is_set, S.Fields body, froms, where, os) + (is_set, S.Fields body, froms, where, [], os) in match j with | QL.Concat [] -> dummy_sql_empty_query @@ -62,10 +109,20 @@ and body is_set gs os j = selquery <| List.map (fun (f,x) -> (base_exp x, f)) (StringMap.to_alist fields) <| Sql.Constant (Constant.Bool true) + | QL.Singleton (QL.MapEntry (QL.Record keys, QL.Record values)) -> + selquery + <| List.map (fun (f,x) -> (base_exp x, "1@" ^ f)) (StringMap.to_alist keys) + @ List.map (fun (f,x) -> (base_exp x, "2@" ^ f)) (StringMap.to_alist values) + <| Sql.Constant (Constant.Bool true) | QL.If (c, QL.Singleton (QL.Record fields), QL.Concat []) -> selquery <| List.map (fun (f,x) -> (base_exp x, f)) (StringMap.to_alist fields) <| base_exp c + | QL.If (c, QL.Singleton (QL.MapEntry (QL.Record keys, QL.Record values)), QL.Concat []) -> + selquery + <| List.map (fun (f,x) -> (base_exp x, "1@" ^ f)) (StringMap.to_alist keys) + @ List.map (fun (f,x) -> (base_exp x, "2@" ^ f)) (StringMap.to_alist values) + <| base_exp c | _ -> Debug.print ("error in EvalMixingQuery.body: unexpected j = " ^ QL.show j); failwith "body" and base_exp = function @@ -91,7 +148,16 @@ and base_exp = function Sql.Apply ("RLIKE", [base_exp s; r]) end | QL.Apply (QL.Primitive "Empty", [v]) -> S.Empty (sql_of_query S.All v) +(* length takes as input a collection of records so it cannot be converted to S.Aggr *) | QL.Apply (QL.Primitive "length", [v]) -> S.Length (sql_of_query S.All v) +| QL.Apply (QL.Primitive "Sum", [v]) -> S.Aggr ("sum", sql_of_query S.All v) +| QL.Apply (QL.Primitive "SumF", [v]) -> S.Aggr ("sum", sql_of_query S.All v) +| QL.Apply (QL.Primitive "Avg", [v]) -> S.Aggr ("avg", sql_of_query S.All v) +| QL.Apply (QL.Primitive "AvgF", [v]) -> S.Aggr ("avg", sql_of_query S.All v) +| QL.Apply (QL.Primitive "Min", [v]) -> S.Aggr ("min", sql_of_query S.All v) +| QL.Apply (QL.Primitive "MinF", [v]) -> S.Aggr ("min", sql_of_query S.All v) +| QL.Apply (QL.Primitive "Max", [v]) -> S.Aggr ("max", sql_of_query S.All v) +| QL.Apply (QL.Primitive "MaxF", [v]) -> S.Aggr ("max", sql_of_query S.All v) | QL.Apply (QL.Primitive f, vs) -> S.Apply (f, List.map base_exp vs) | QL.Constant c -> S.Constant c | e -> @@ -101,7 +167,7 @@ and base_exp = function (* external call will start with a bag query *) let sql_of_query = sql_of_query S.All -let compile_mixing : delateralize:QueryPolicy.t -> Value.env -> (int * int) option * Ir.computation -> (Value.database * Sql.query * Types.datatype) option = +let compile_mixing : delateralize:QueryPolicy.t -> Value.env -> (int * int) option * Ir.computation -> (Value.database * Sql.query * Types.datatype * (Value.t -> Value.t)) option = fun ~delateralize env (range, e) -> (* Debug.print ("env: "^Value.show_env env); Debug.print ("e: "^Ir.show_computation e); *) @@ -116,9 +182,34 @@ let compile_mixing : delateralize:QueryPolicy.t -> Value.env -> (int * int) opti match QL.used_database v with | None -> None | Some db -> - let t = Types.unwrap_list_type (QL.type_of_expression v) in - (* Debug.print ("Generated NRC query: " ^ QL.show v ); *) - let q = sql_of_query v in - let _range = None in - (* Debug.print ("Generated SQL query: "^(Sql.string_of_query db _range q)); *) - Some (db, q, t) + let strip_presence = function Types.Present t -> t | _ -> assert false in + let v_flat = QL.FlattenRecords.flatten_query v in + (* + Debug.print ("Generated NRC query: " ^ QL.show v); + Debug.print ("Flattened NRC query: " ^ QL.show v_flat); + *) + let readback = QL.FlattenRecords.unflatten_query (QL.type_of_expression v) in + (* the calling code expects the item type, not the list type *) + let t_flat = Types.unwrap_list_type (QL.type_of_expression v_flat) in + let t_flat = + try + let tyk, tyv = Types.unwrap_mapentry_type t_flat in + let rowk, _, _ = tyk |> Types.extract_row |> Types.extract_row_parts in + let rowv, _, _ = tyv |> Types.extract_row |> Types.extract_row_parts in + let row = StringMap.fold + <| (fun k v acc -> StringMap.add ("1@" ^ k) (strip_presence v) acc) + <| rowk + <| StringMap.empty + in + let row = StringMap.fold + <| (fun k v acc -> StringMap.add ("2@" ^ k) (strip_presence v) acc) + <| rowv + <| row + in + Types.make_record_type row + with _ -> t_flat + in + let q = sql_of_query v_flat in + let _range = None in + Debug.print ("Generated SQL query: "^(db#string_of_query ~range:_range q)); + Some (db, q, t_flat, readback) diff --git a/core/query/evalNestedQuery.ml b/core/query/evalNestedQuery.ml index 321425222..bf3817b8d 100644 --- a/core/query/evalNestedQuery.ml +++ b/core/query/evalNestedQuery.ml @@ -23,6 +23,7 @@ let tag_query : QL.t -> QL.t = If (tag c, tag t, tag e) | Table t -> Table t | Singleton e -> Singleton (tag e) + | MapEntry (k, v) -> MapEntry (tag k, tag v) | Concat es -> Concat (List.map tag es) | Dedup t -> Dedup (tag t) @@ -39,6 +40,10 @@ let tag_query : QL.t -> QL.t = | Var (x, t) -> Var (x, t) | Constant c -> Constant c | Database db -> Database db + | GroupBy ((x,k), q) -> GroupBy ((x,tag k), tag q) + (* XXX: defensive programming: recursion on ar not needed now, but might be in the future *) + | AggBy (ar, q) -> AggBy (StringMap.map (fun (x,y) -> tag x, y) ar, tag q) + | Lookup (q,k) -> Lookup (tag q, tag k) in tag e @@ -260,7 +265,7 @@ end *) module Split = struct - type gen = Var.var * QL.t + type gen = QL.genkind * Var.var * QL.t let rec query : gen list -> QL.t list -> QL.t -> QL.t -> QL.t list = fun gs os cond -> @@ -304,7 +309,7 @@ struct [@@deriving show] type cond = QL.t option - type gen = Var.var * QL.t + type gen = QL.genkind * Var.var * QL.t let where c e = match c with @@ -446,7 +451,7 @@ struct let rec lins c : let_clause = let gs_out = List.concat (init (gens c)) in - let ys = List.map fst gs_out in + let ys = List.map (fun (_,x,_) -> x) gs_out in let x_out = List.fold_right @@ -460,7 +465,7 @@ struct let r_out = tuple (List.map - (fun (x, source) -> + (fun (_genkind, x, source) -> match source with | QL.Table t -> let tyx = Types.make_record_type (QL.table_field_types t) in @@ -470,7 +475,7 @@ struct let r_out_type = Types.make_tuple_type (List.map - (fun (_, source) -> + (fun (_genkind,_, source) -> match source with | QL.Table Value.Table.{ row; _ } -> Types.Record (Types.Row row) diff --git a/core/query/evalQuery.ml b/core/query/evalQuery.ml index f3faa2b08..b0a3a609d 100644 --- a/core/query/evalQuery.ml +++ b/core/query/evalQuery.ml @@ -7,7 +7,7 @@ module QL = QueryLang semantics. *) module Order = struct - type gen = Var.var * QL.t + type gen = QL.genkind * Var.var * QL.t type context = gen list (* TODO: @@ -100,9 +100,9 @@ struct type preclause = (path * (context * QL.t)) * query_tree type clause = context * QL.t * orders - let gen : (Var.var * QL.t) -> QL.t list = + let gen : (QL.genkind * Var.var * QL.t) -> QL.t list = function - | (x, QL.Table t) -> + | (_genkind, x, QL.Table t) -> let field_types = QL.table_field_types t in let tyx = Types.make_record_type field_types in List.rev @@ -298,7 +298,7 @@ let ordered_query v = (* Debug.print ("concat vs: "^Q.string_of_t (`Concat vs)); *) Sql.Union (Sql.All, List.map QL.sql_of_query vs, n) -let compile : Value.env -> (int * int) option * Ir.computation -> (Value.database * Sql.query * Types.datatype) option = +let compile : Value.env -> (int * int) option * Ir.computation -> (Value.database * Sql.query * Types.datatype * (Value.t -> Value.t)) option = fun env (range, e) -> (* Debug.print ("e: "^Ir.show_computation e); *) let v = Q.Eval.eval QueryPolicy.Flat env e in @@ -308,5 +308,7 @@ let compile : Value.env -> (int * int) option * Ir.computation -> (Value.databas | Some db -> let t = Types.unwrap_list_type (QL.type_of_expression v) in let q = ordered_query v in - Debug.print ("Generated query: "^(db#string_of_query ~range q)); - Some (db, q, t) + Debug.print ("Generated query: "^(db#string_of_query ~range q)); + (* TODO: trivial readback, might be changed (see EvalMixingQuery) to allow nested records *) + let readback x = x in + Some (db, q, t, readback) diff --git a/core/query/evalQuery.mli b/core/query/evalQuery.mli index 16948ff0e..6304b67a9 100644 --- a/core/query/evalQuery.mli +++ b/core/query/evalQuery.mli @@ -1 +1 @@ -val compile : Value.env -> (int * int) option * Ir.computation -> (Value.database * Sql.query * Types.datatype) option +val compile : Value.env -> (int * int) option * Ir.computation -> (Value.database * Sql.query * Types.datatype * (Value.t -> Value.t)) option diff --git a/core/query/mixingQuery.ml b/core/query/mixingQuery.ml index 16173272f..8ba326716 100644 --- a/core/query/mixingQuery.ml +++ b/core/query/mixingQuery.ml @@ -10,7 +10,6 @@ open Utility open CommonTypes -open Var open Errors module Q = QueryLang @@ -28,9 +27,10 @@ let rec tail_of_t : Q.t -> Q.t = fun v -> | Q.For (_, _gs, _os, t) -> tt t | _ -> (* Debug.print ("v: "^string_of_t v); *) assert false -let type_of_for_var gen = - Q.type_of_expression gen - |> Types.unwrap_list_type +let type_of_for_var pol gen = + match pol, Q.type_of_expression gen with + | Q.Keys, ty -> fst <| Types.unwrap_map_type ty + | _, ty -> Types.unwrap_list_type ty let rec freshen_for_bindings : Var.var Env.Int.t -> Q.t -> Q.t = fun env v -> @@ -44,9 +44,9 @@ let rec freshen_for_bindings : Var.var Env.Int.t -> Q.t -> Q.t = | Q.For (tag, gs, os, b) -> let gs', env' = List.fold_left - (fun (gs', env') (x, source) -> + (fun (gs', env') (genkind, x, source) -> let y = Var.fresh_raw_var () in - ((y, ffb source)::gs', Env.Int.bind x y env')) + ((genkind, y, ffb source)::gs', Env.Int.bind x y env')) ([], env) gs in @@ -54,6 +54,7 @@ let rec freshen_for_bindings : Var.var Env.Int.t -> Q.t -> Q.t = | Q.If (c, t, e) -> Q.If (ffb c, ffb t, ffb e) | Q.Table _ as t -> t | Q.Singleton v -> Q.Singleton (ffb v) + | Q.MapEntry (k,v) -> Q.MapEntry (ffb k, ffb v) | Q.Database db -> Q.Database db | Q.Concat vs -> Q.Concat (List.map ffb vs) | Q.Dedup q -> Q.Dedup (ffb q) @@ -77,6 +78,13 @@ let rec freshen_for_bindings : Var.var Env.Int.t -> Q.t -> Q.t = end *) Q.Var (var_lookup x, ts) | Q.Constant c -> Q.Constant c + | Q.GroupBy ((v,i),q) -> + let y = Var.fresh_raw_var () in + let env' = Env.Int.bind v y env in + Q.GroupBy ((y, freshen_for_bindings env' i), ffb q) + (* XXX: defensive programming; recursion on ar not needed now, but may be in the future *) + | Q.AggBy (ar, q) -> Q.AggBy (StringMap.map (fun (x,y) -> ffb x, y) ar, ffb q) + | Q.Lookup (q,k) -> Q.Lookup (ffb q, ffb k) let flatfield f1 f2 = f1 ^ "@" ^ f2 @@ -88,7 +96,7 @@ let rec flattened_pair x y = | _, Q.Var (_ny, Types.Record row) -> let y' = Q.Record (StringMap.fold (fun f _ acc -> StringMap.add f (Q.Project (y,f)) acc) (Q.field_types_of_row row) StringMap.empty) in flattened_pair x y' - (* We uese a field with an empty name to deal with variables of non-record type *) + (* We use a field with an empty name to deal with variables of non-record type *) | Q.Var (_nx, _), _ -> let x' = Q.Record (StringMap.from_alist ["",x]) in flattened_pair x' y @@ -110,7 +118,7 @@ let rec flattened_pair_ft x y = StringMap.fold (fun f t acc -> StringMap.add (flatfield "1" f) t acc) (Q.field_types_of_row rowx) StringMap.empty in StringMap.fold (fun f t acc -> StringMap.add (flatfield "2" f) t acc) (Q.field_types_of_row rowy) out1 - (* XXX: same as above, using a field with an empty name to deal with variables of non-record type ... will it work? *) + (* XXX: same as above, using a field with an empty name to deal with variables of non-record type *) | Q.Var (nx, tyx), _ -> flattened_pair_ft (Q.Var (nx, Types.make_record_type (StringMap.from_alist ["", tyx]))) y | _, Q.Var (ny, tyy) -> flattened_pair_ft x (Q.Var (ny, Types.make_record_type (StringMap.from_alist ["", tyy]))) | _ -> assert false @@ -208,82 +216,6 @@ let rec reduce_where_then (c, t) = | _ -> Q.If (c, t, Q.Concat []) -let rec reduce_for_body (gs, os, body) = - let rb = reduce_for_body in - match body with - | Q.For (_, gs', os', body') -> rb (gs @ gs', os @ os', body') - (* | Prom _ as u -> - let z = Var.fresh_raw_var () in - let tyz = type_of_expression u in - let ftz = recdty_field_types (Types.unwrap_list_type tyz) in - let vz = Var (z, ftz) in - For (None, gs @ [(z, u)], [] (* os *), (Singleton vz)) *) - (* make sure when we reach this place, gs can NEVER be empty - | _ when gs = [] (* && _os = [] *) -> body *) - | Q.Concat vs -> reduce_concat (List.map (fun v -> rb (gs, os, v)) vs) - | _ -> Q.For (None, gs, os, body) - -let rec reduce_for_source : Q.env -> var * Q.t * Types.datatype -> (Q.env -> (Q.t list -> Q.t list) -> Q.t) -> Q.t = - fun env (x, source, ty) body -> - let empty_os = fun os -> os in - let add_os os = fun os' -> os@os' in - let rs = fun gen' -> reduce_for_source env gen' body in - match source with - | Q.Singleton v -> - begin - let env' = Q.bind env (x, v) in - match body env' empty_os with - (* the normal form of a For does not have Prom in its body - --> we hoist it to a generator *) - | Q.Prom _ as q -> - let z = Var.fresh_raw_var () in - let tyq = Q.type_of_expression q in - (* Debug.print ("reduce_for_source.Singleton fresh var: " ^ show (Var (z,tyq))); *) - reduce_for_body ([(z,q)], [], Q.Singleton (Q.eta_expand_var (z, tyq))) - | q -> q - end - | Q.Concat vs -> - reduce_concat (List.map (fun s -> rs (x,s,ty)) vs) - | Q.If (c, t, Q.Concat []) -> - reduce_for_source env (x, t, ty) (fun env' os_f -> reduce_where_then (c, body env' os_f)) - | Q.For (_, gs, os', v) -> - (* NOTE: - - We are relying on peculiarities of the way we manage - the environment in order to avoid having to - augment it with the generator bindings here. - - In particular, we rely on the fact that if a variable - is not found on a lookup then we return the eta - expansion of that variable rather than complaining that - it isn't bound in the environment. - - *) - let tyv = Q.type_of_expression v in - (* this ensures os' is added to the right of the final comprehension, and further inner orderings to the right of os' *) - let body' = fun env' os_f -> body env' (os_f ->- add_os os') in - reduce_for_body (gs, [], reduce_for_source env (x,v,tyv) body') - | Q.Table Value.Table.{ row; _ } - | Q.Dedup (Q.Table Value.Table.{ row; _ }) -> - (* we need to generate a fresh variable in order to - correctly handle self joins *) - let y = Var.fresh_raw_var () in - let ty_elem = Types.Record (Types.Row row) in - (* Debug.print ("reduce_for_source.Table fresh var: " ^ string_of_int y ^ " for " ^ string_of_int x); *) - let env' = Q.bind env (x, Q.Var (y, ty_elem)) in - (* Debug.print ("reduce_for_source.Table body before renaming: " ^ show (body env empty_os)); *) - let body' = body env' empty_os in - (* Debug.print ("reduce_for_source.Table body after renaming: " ^ show body'); *) - reduce_for_body ([(y, source)], [], body') - | Q.Prom _ -> - let y = Var.fresh_raw_var () in - let ty_elem = type_of_for_var source in - (* Debug.print ("reduce_for_source.Prom fresh var: " ^ string_of_int y); *) - let env' = Q.bind env (x, Q.Var (y, ty_elem)) in - let body' = body env' empty_os in - reduce_for_body ([(y,source)], [], body') - | v -> Q.query_error "Bad source in for comprehension: %s" (Q.string_of_t v) - let rec reduce_if_body (c, t, e) = match t with | Q.Record then_fields -> @@ -338,7 +270,21 @@ struct let open Q in Env.Int.bindings (e.qenv) - let reduce_artifacts = function + let rec reduce_project (r, label) = + match r with + | Q.Record fields -> + assert (StringMap.mem label fields); + StringMap.find label fields + | Q.If (c, t, e) -> + Q.If (c, reduce_project (t, label), reduce_project (e, label)) + | Q.Var (_x, Types.Record row) -> + let field_types = Q.field_types_of_row row in + assert (StringMap.mem label field_types); + Q.Project (r, label) + | _ -> Q.query_error ("Error projecting label %s from record: %s") label (Q.string_of_t r) + + (* this now receives a function xc (xlate computation) *) + let reduce_artifacts xc = function | Q.Apply (Q.Primitive "stringToXml", [u]) -> Q.Singleton (Q.XML (Value.Text (Q.unbox_string u))) | Q.Apply (Q.Primitive "AsList", [xs]) @@ -355,6 +301,57 @@ struct | Q.Apply (Q.Primitive "vtTo", [x]) -> Q.Project (x, TemporalField.to_field) | Q.Apply (Q.Primitive "Distinct", [u]) -> Q.Prom (Q.Dedup u) + | Q.Apply (Q.Primitive "AggBy", [q; aggs]) -> + let aggError x = Debug.print ("AggBy error --" ^ x); assert false in + let of_closure = function + | Q.Closure (([x], comp), env) -> + (* a dummy type because we cannot synthesize the right one here *) + let vty = Types.wrong_type in + let vx = Q.Var (x, vty) in + let env' = Q.bind env (x, vx) in + x, xc env' comp + | q -> aggError ("of_closure of " ^ (Q.show q)) + in + let of_apply = function + | Q.Apply (f, [arg]) -> f, arg + | q -> aggError ("of_apply of " ^ (Q.show q)) + in + let of_map_project = function + | Q.Apply (Q.Primitive "ConcatMap", [c;q]) -> c, q + | Q.Apply (Q.Project (r, l), [c;q]) when reduce_project (r, l) = Q.Primitive "ConcatMap" -> c, q + | q -> aggError ("of_map_project of " ^ (Q.show q)) + in + let of_project = function + | Q.Project (r, l) -> l, r + | q -> aggError ("of_project of " ^ (Q.show q)) + in + let of_singleton = function + (* | Q.Singleton q -> q -- but not really *) + | Q.Apply (Q.Primitive "Cons", [q; Q.Concat []]) -> q + | q -> aggError ("of_singleton of " ^ (Q.show q)) + in + let of_record _x = function + | Q.Record fields -> + StringMap.fold (fun label v acc -> + (* f is the aggregate function for this label *) + let f, arg = of_apply v in + let c, _q = of_map_project arg in + (* TODO we should check q = x *) + let y, cbody = of_closure c in + match of_project (of_singleton cbody) with + | l, Q.Var (var, _) when var = y -> + StringMap.add label (f, l) acc + | l, q -> aggError ("of_record label " ^ l ^ ": " ^ (Q.show q)) + ) + fields + StringMap.empty + | q -> aggError ("of_record " ^ (Q.show q)) + in + Debug.print ("Aggregating with: " ^ Q.show aggs); + let x, v = of_closure aggs in + let ar = of_record x v in + Q.AggBy (ar, q) + | u -> u let rec xlate env : Ir.value -> Q.t = let open Ir in function @@ -362,9 +359,6 @@ struct | Variable var -> begin match Q.lookup env var with - | Q.Var (x, tyx) -> - (* eta-expand record variables *) - Q.eta_expand_var (x, tyx) | Q.Primitive "Nil" -> Q.nil (* We could consider detecting and eta-expand tables here. The only other possible sources of table values would @@ -437,7 +431,7 @@ struct Q.Singleton (Q.XML (Value.Node (tag, children))) | ApplyPure (f, ps) -> - reduce_artifacts (Q.Apply (xlate env f, List.map (xlate env) ps)) + reduce_artifacts computation (Q.Apply (xlate env f, List.map (xlate env) ps)) | Closure (f, _, v) -> let open Q in let (_finfo, (xs, body), z_opt, _location) = Tables.find Tables.fun_defs f in @@ -475,7 +469,7 @@ struct and tail_computation env : Ir.tail_computation -> Q.t = let open Ir in function | Return v -> xlate env v | Apply (f, args) -> - reduce_artifacts (Q.Apply (xlate env f, List.map (xlate env) args)) + reduce_artifacts computation (Q.Apply (xlate env f, List.map (xlate env) args)) | Special (Ir.Query (None, policy, e, _)) -> let open Q in check_policies_compatible env.policy policy; @@ -540,22 +534,99 @@ struct | Q.Variant (_, t) -> cfree bvs t | Q.Concat tl -> List.exists (cfree bvs) tl | Q.For (_, gs, os, b) -> - let bvs'', res = List.fold_left (fun (bvs',acc) (w,q) -> w::bvs', acc || cfree bvs' q) (bvs, false) gs in + let bvs'', res = List.fold_left (fun (bvs',acc) (_genkind,w,q) -> w::bvs', acc || cfree bvs' q) (bvs, false) gs in res || cfree bvs'' b || List.exists (cfree bvs) os | Q.Record fl -> StringMap.exists (fun _ t -> cfree bvs t) fl | _ -> false in cfree [] - let mk_for_term env (x,xs) body_f = - let ty_elem = - Q.type_of_expression xs - |> TypeUtils.element_type ~overstep_quantifiers:true - in - (* let newx = Var.fresh_raw_var () in *) + let mk_for_term env (pol,x,xs) body_f = + let ty_elem = type_of_for_var pol xs in let vx = Q.Var (x, ty_elem) in let cenv = Q.bind env (x, vx) in - (* Debug.print ("mk_for_term: " ^ string_of_int newx ^ " for " ^ string_of_int x); *) - Q.For (None, [x,xs], [], body_f cenv) + Q.For (None, [pol,x,xs], [], body_f cenv) + + (* this has the effect of performing beta reduction when the generator in + * the main input is a Singleton *) + let reduce_for_source gsx env = function + (* in a value comprehesion, no need to unbox a MapEntry in v because + * NRC with grouping forces the generator to be a pure collection *) + | (Q.Entries, x, Q.Singleton v) -> gsx, Q.bind env (x,v) + | (Q.Keys, x, Q.Singleton (Q.MapEntry (k,_))) -> gsx, Q.bind env (x,k) + | (Q.Entries, x, (Q.Table (Value.Table.{ row; temporality; temporal_fields; _ } as table) as q)) + when temporality = Temporality.Transaction || temporality = Temporality.valid -> + let (from_field, to_field) = OptionUtils.val_of temporal_fields in + (* Transaction / Valid-time tables: Need to wrap as metadata *) + (* First, generate a fresh variable for the table *) + let make_spec_map = StringMap.map (fun x -> Types.Present x) in + let field_types = Q.table_field_types table in + let base_field_types = + StringMap.filter + (fun x _ -> x <> from_field && x <> to_field) + field_types in + let (_, row_var, dual) = row in + let z = Var.fresh_raw_var () in + let tyz = Types.(Record (Row (make_spec_map field_types, row_var, dual))) in + let base_ty_elem = Types.(Record (Row (make_spec_map base_field_types, row_var, dual))) in + let vz = Q.Var (z, tyz) in + + (* Second, generate a fresh variable for the metadata *) + let metadata_record = + StringMap.from_alist [ + (TemporalField.data_field, + Q.eta_expand_var (z, base_ty_elem)); + (TemporalField.from_field, + Q.Project (vz, from_field)); + (TemporalField.to_field, + Q.Project (vz, to_field)) + ] in + Debug.print ("reduce_for_source with temporal generator: " ^ (Q.show (Q.Record metadata_record))); + let env' = Q.bind env (x, Q.Record metadata_record) in + gsx@[Q.Entries, z, q], env' + | (pol, x, q) -> + let z = Var.fresh_raw_var () in + let tyz = type_of_for_var pol q in + let vz = Q.Var (z, tyz) in + let env' = Q.bind env (x, vz) in + gsx@[pol, z, q], env' + + (* when the head of a comprehension is a Prom, this lifts it to a generator + * by means of eta-expansion *) + let reduce_for_body gsx = function + | Q.Prom _ as u -> + let z = Var.fresh_raw_var () in + let tyz = + Q.type_of_expression u + |> TypeUtils.element_type + in + let vz = Q.Var (z, tyz) in + gsx@[Q.Entries,z,u], Q.Singleton vz + | u -> gsx, u + + (* auxiliary functions to pack/unpack normalised collections *) + let pack_for (body, c, gs, os) = + let body' = reduce_where_then (c,body) in + match gs, os with + | [], [] -> body' + | _ -> Q.For (None, gs, os, body') + + let pack_ncoll ql = reduce_concat (List.map pack_for ql) + + let unpack_where = function + | Q.If (c, t, Q.Concat []) -> t, c + | q -> q, Q.Constant (Constant.Bool true) + + let unpack_for = function + | Q.For (_, gs, os, body) -> + let body', c = unpack_where body in + body', c, gs, os + | q -> + let body', c = unpack_where q in + body', c, [], [] + + let unpack_ncoll = function + | Q.Concat ql -> List.map unpack_for ql + | q -> [unpack_for q] let rec norm in_dedup env : Q.t -> Q.t = function @@ -603,22 +674,10 @@ struct end | Q.Record fl -> Q.Record (StringMap.map (norm false env) fl) | Q.Singleton v -> Q.Singleton (norm false env v) + | Q.MapEntry (k,v) -> Q.MapEntry (norm false env k, norm false env v) | Q.Concat xs -> reduce_concat (List.map (norm in_dedup env) xs) | Q.Project (r, label) -> - let rec project (r, label) = - match r with - | Q.Record fields -> - assert (StringMap.mem label fields); - StringMap.find label fields - | Q.If (c, t, e) -> - Q.If (c, project (t, label), project (e, label)) - | Q.Var (_x, Types.Record row) -> - let field_types = Q.field_types_of_row row in - assert (StringMap.mem label field_types); - Q.Project (r, label) - | _ -> Q.query_error ("Error projecting label %s from record: %s") label (Q.string_of_t r) - in - retn in_dedup (project (norm false env r, label)) + retn in_dedup (reduce_project (norm false env r, label)) | Q.Erase (r, labels) -> let rec erase (r, labels) = match r with @@ -647,31 +706,36 @@ struct | Q.Apply (f, xs) as _orig -> apply in_dedup env (norm false env f, List.map (norm false env) xs) | Q.For (_, gs, os, u) as _orig -> - let rec reduce_gs env os_f body = function - | [] -> - begin - match norm in_dedup env body with - | Q.For (_, gs', os', u') -> - reduce_gs env (os_f -<- (fun os'' -> os'@os'')) u' gs' - (* this special case allows us to hoist a non-standard For body into a generator *) - | Q.Prom _ as u' -> - let z = Var.fresh_raw_var () in - let tyz = - Q.type_of_expression u' - |> TypeUtils.element_type - in - let vz = Q.Var (z, tyz) in - reduce_for_source env (z, u', tyz) (fun env' os_f' -> - Q.For (None, [], List.map (norm false env') (os_f' (os_f [])), - norm in_dedup env' (Q.Singleton vz))) - | u' -> - Q.For (None, [], List.map (norm false env) (os_f []), u') - end - | (x,g)::gs' -> (* equivalent to xs = For gs' u, body = g, but possibly the arguments aren't normalized *) - let tyg = Q.type_of_expression g in - reduce_for_source env (x, norm in_dedup env g, tyg) (fun env' os_f' -> reduce_gs env' (os_f -<- os_f') body gs') + let reduce_gs = + let rec rgs gsx cx env os body = function + | (pol,x,g)::gs' -> + let ng = unpack_ncoll (norm in_dedup env g) in + begin + match pol, in_dedup, ng with + (* if pol is Entries, we are in a set comprehension, or the generator is nil or a singleton, we can reduce *) + | Q.Entries, _, _ + | _, true, _ + | _, _, ([] | [(Q.Singleton _, _, [], _)]) -> + List.concat (List.map (fun (gsrc, gc, ggs, gos) -> + let gsx', env' = reduce_for_source (gsx@ggs) env (pol,x,gsrc) in + rgs gsx' (reduce_and (cx, gc)) env' (os@gos) body gs') ng) + (* pol is Keys, we are in a bag comprehension, and the generator is not nil or a singleton: no unnesting or splitting of unions *) + | _ -> + let z = Var.fresh_raw_var () in + let tyz = type_of_for_var pol g in + let vz = Q.Var (z, tyz) in + let env' = Q.bind env (x, vz) in + rgs (gsx@[pol,z,pack_ncoll ng]) cx env' os body gs' + end + | [] -> + let nbody = unpack_ncoll (norm in_dedup env body) in + List.map (fun (bbody, bc, bgs, bos) -> + let bgs', bbody' = reduce_for_body (gsx@bgs) bbody in + let os' = List.map (fun o -> norm false env o) (os@bos) in + bbody', reduce_and (cx, bc), bgs', os') nbody + in rgs [] (Q.Constant (Constant.Bool true)) in - reduce_gs env (fun os' -> os@os') u gs + pack_ncoll (reduce_gs env os u gs) | Q.If (c, t, e) -> reduce_if_condition (norm false env c, norm in_dedup env t, norm in_dedup env e) | Q.Case (v, cases, default) -> @@ -700,8 +764,58 @@ struct | Q.Prom v when in_dedup -> norm false env v | Q.Prom v (* when not in_dedup *) -> Q.Prom (norm false env v) + | Q.GroupBy ((x,qx), q) -> + let ql = unpack_ncoll (norm in_dedup env q) in + let qx' = norm false env qx in + (* the following assumes normalised records are eta expanded *) + let rcd_combine = function + | Q.Record rx, Q.Record ry -> + begin + try Q.Record (StringMap.union_disjoint rx ry) + with StringMap.Not_disjoint _ -> Q.query_error "rcd_combine: unnable to merge overlapping grouping criteria (buggy typechecker?)" + end + | Q.Record _, z | z, _ -> Q.query_error "rcd_combine: unexpected non-record argument (buggy normaliser?): %s" (Q.show z) + in + let gc_combine (x,qx) (y,qy) = + let z = Var.fresh_raw_var () in + let tyz = TypeUtils.element_type ~overstep_quantifiers:true (Q.type_of_expression q) in + let vz = Q.Var (z, tyz) in + let qx = norm false (Q.bind env (x, vz)) qx in + let qy = norm false (Q.bind env (y, vz)) qy in + z, rcd_combine (qx, qy) + in + let reduce_groupby = function + | Q.Singleton (Q.MapEntry (k, v)) -> Q.Singleton (Q.MapEntry (rcd_combine ((norm false (Q.bind env (x, v)) qx'), k), v)) + | Q.Singleton v (* not MapEntry *) -> Q.Singleton (Q.MapEntry (norm false (Q.bind env (x, v)) qx', v)) + | Q.GroupBy ((y, qy), q') -> Q.GroupBy (gc_combine (x,qx') (y,qy), q') + | qf -> Q.GroupBy ((x, qx'), qf) + in + let ql' = List.map (fun (b, c, gs, os) -> (reduce_groupby b, c, gs, os)) ql in + pack_ncoll ql' + | Q.AggBy (ar, q) -> Q.AggBy (StringMap.map (fun (x,y) -> norm false env x, y) ar, norm in_dedup env q) + | Q.Lookup (q, k) -> + let ql = unpack_ncoll (norm in_dedup env q) in + let k' = norm false env k in + let reduce_lookup = function + | Q.Singleton (Q.MapEntry (kv, v)) -> reduce_where_then (reduce_eq (kv, k'), Q.Singleton v) + | Q.Singleton _ (* not MapEntry *) as qorig -> assert (Q.type_of_expression k' = Types.unit_type); qorig + | qorig -> Q.Lookup (qorig, k) + in + let ql' = List.map (fun (b, c, gs, os) -> (reduce_lookup b, c, gs, os)) ql in + pack_ncoll ql' | v -> retn in_dedup v + and apply_concatMap in_dedup env pol xs = function + | Q.Closure (([x], body), closure_env) -> + (* Debug.print ("Application of ConcatMap(Key)"); + Debug.print ("pol: " ^ Q.show_genkind pol); + Debug.print ("f: " ^ Q.show f); + Debug.print ("xs: " ^ Q.show xs); *) + (fun cenv -> computation cenv body) + |> mk_for_term (env ++ closure_env) (pol,x,xs) + |> norm in_dedup env + | _ -> assert false + and apply in_dedup env : Q.t * Q.t list -> Q.t = function | Q.Closure ((xs, body), closure_env), args -> (* Debug.print ("Applying closure"); *) @@ -717,27 +831,29 @@ struct norm in_dedup env (Q.Concat [Q.Singleton x; xs]) | Q.Primitive "Concat", ([_xs; _ys] as l) -> norm in_dedup env (Q.Concat l) - | Q.Primitive "ConcatMap", [f; xs] -> + | Q.Primitive "ConcatMap", [f; xs] -> apply_concatMap in_dedup env Q.Entries xs f + | Q.Primitive "ConcatMapKey", [f; xs] -> apply_concatMap in_dedup env Q.Keys xs f + | Q.Primitive "Map", [f; xs] -> begin match f with | Q.Closure (([x], body), closure_env) -> - (* Debug.print ("Applying ConcatMap"); - Debug.print ("f: " ^ Q.show f); - Debug.print ("xs: " ^ Q.show xs); *) - (fun cenv -> computation cenv body) - |> mk_for_term (env ++ closure_env) (x,xs) + (fun cenv -> Q.Singleton (computation cenv body)) + |> mk_for_term (env ++ closure_env) (Q.Entries,x,xs) |> norm in_dedup env | _ -> assert false end - | Q.Primitive "Map", [f; xs] -> + | Q.Primitive "GroupBy", [f; q] -> begin match f with - | Q.Closure (([x], body), closure_env) -> - (fun cenv -> Q.Singleton (computation cenv body)) - |> mk_for_term (env ++ closure_env) (x,xs) - |> norm in_dedup env + | Q.Closure (([x], body_c), closure_env) -> + let tyx = TypeUtils.element_type ~overstep_quantifiers:true (Q.type_of_expression q) in + let vx = Q.Var (x, tyx) in + let body_env = Q.bind (env ++ closure_env) (x,vx) in + let body = computation body_env body_c in + norm in_dedup env (Q.GroupBy ((x, body), q)) | _ -> assert false end + | Q.Primitive "Lookup", [v; q] -> norm in_dedup env (Q.Lookup (q, v)) | Q.Primitive "SortBy", [f; xs] -> begin match xs with @@ -791,7 +907,6 @@ struct and retn in_dedup u = if in_dedup then Q.Dedup u else u (* specialize norm_* with in_dedup = false at the start of normalization *) - (* (norm is currently unused outside query.ml, so we comment the following) *) let norm = norm false let norm_comp = norm_comp false diff --git a/core/query/mixingQuery.mli b/core/query/mixingQuery.mli index 789daab11..69f07ea24 100644 --- a/core/query/mixingQuery.mli +++ b/core/query/mixingQuery.mli @@ -14,7 +14,7 @@ open CommonTypes val flatfield : string -> string -> string val flattened_pair : QueryLang.t -> QueryLang.t -> QueryLang.t val flattened_pair_ft : QueryLang.t -> QueryLang.t -> Types.datatype stringmap -val type_of_for_var : QueryLang.t -> Types.datatype +val type_of_for_var : QueryLang.genkind -> QueryLang.t -> Types.datatype val reduce_where_then : QueryLang.t * QueryLang.t -> QueryLang.t val reduce_and : QueryLang.t * QueryLang.t -> QueryLang.t diff --git a/core/query/query.ml b/core/query/query.ml index 408425660..45eaffe84 100644 --- a/core/query/query.ml +++ b/core/query/query.ml @@ -22,9 +22,9 @@ let rec freshen_for_bindings : Var.var Env.Int.t -> Q.t -> Q.t = | Q.For (tag, gs, os, b) -> let gs', env' = List.fold_left - (fun (gs', env') (x, source) -> + (fun (gs', env') (genkind, x, source) -> let y = Var.fresh_raw_var () in - ((y, ffb source)::gs', Env.Int.bind x y env')) + ((genkind, y, ffb source)::gs', Env.Int.bind x y env')) ([], env) gs in @@ -32,6 +32,7 @@ let rec freshen_for_bindings : Var.var Env.Int.t -> Q.t -> Q.t = | Q.If (c, t, e) -> Q.If (ffb c, ffb t, ffb e) | Q.Table _ as t -> t | Q.Singleton v -> Q.Singleton (ffb v) + | Q.MapEntry (k,v) -> Q.MapEntry (ffb k, ffb v) | Q.Database db -> Q.Database db | Q.Concat vs -> Q.Concat (List.map ffb vs) | Q.Dedup t -> Q.Dedup (ffb t) @@ -54,6 +55,12 @@ let rec freshen_for_bindings : Var.var Env.Int.t -> Q.t -> Q.t = | Some y -> Q.Var (y, ts) end | Q.Constant c -> Q.Constant c + | Q.GroupBy ((x,k),q) -> + let y = Var.fresh_raw_var () in + let env' = Env.Int.bind x y env in + Q.GroupBy ((y,freshen_for_bindings env' k), ffb q) + | Q.AggBy (ar, q) -> Q.AggBy (ar, ffb q) + | Q.Lookup (q,k) -> Q.Lookup (ffb q, ffb k) (* simple optimisations *) let reduce_and (a, b) = @@ -168,7 +175,7 @@ let rec reduce_for_source : Q.t * (Q.t -> Q.t) -> Q.t = | Current -> let x = Var.fresh_raw_var () in let ty_elem = Types.Record (Types.Row row) in - reduce_for_body ([(x, source)], [], body (Q.Var (x, ty_elem))) + reduce_for_body ([(Q.Entries, x, source)], [], body (Q.Var (x, ty_elem))) | Temporality.Transaction | Temporality.Valid -> let (from_field, to_field) = OptionUtils.val_of temporal_fields in (* Transaction / Valid-time tables: Need to wrap as metadata *) @@ -196,7 +203,7 @@ let rec reduce_for_source : Q.t * (Q.t -> Q.t) -> Q.t = (TemporalField.to_field, Q.Project (table_var, to_field)) ] in - let generators = [ (table_raw_var, source) ] in + let generators = [ (Q.Entries, table_raw_var, source) ] in reduce_for_body (generators, [], body (Q.Record metadata_record)) end | v -> Q.query_error "Bad source in for comprehension: %s" (Q.string_of_t v) diff --git a/core/query/queryLang.ml b/core/query/queryLang.ml index c76f073ec..f49327c9b 100644 --- a/core/query/queryLang.ml +++ b/core/query/queryLang.ml @@ -33,15 +33,22 @@ type base_type = | Bool | Char | Float | Int | String | DateTime type tag = int [@@deriving show] +type genkind = Entries | Keys + [@@deriving show] + type t = - | For of tag option * (Var.var * t) list * t list * t + | For of tag option * (genkind * Var.var * t) list * t list * t | If of t * t * t | Table of Value.table | Database of (Value.database * string) | Singleton of t + | MapEntry of t * t | Concat of t list | Dedup of t | Prom of t + | GroupBy of (Var.var * t) * t + | AggBy of (t * string) StringMap.t * t + | Lookup of t * t | Record of t StringMap.t | Project of t * string | Erase of t * StringSet.t @@ -62,13 +69,17 @@ module S = struct (** [pt]: A printable version of [t] *) type pt = - | For of (Var.var * pt) list * pt list * pt + | For of (genkind * Var.var * pt) list * pt list * pt | If of pt * pt * pt | Table of Value.table | Singleton of pt + | MapEntry of pt * pt | Concat of pt list | Dedup of pt | Prom of pt + | GroupBy of (Var.var * pt) * pt + | AggBy of (pt * string) StringMap.t * pt + | Lookup of pt * pt | Record of pt StringMap.t | Project of pt * string | Erase of pt * StringSet.t @@ -87,11 +98,12 @@ let rec pt_of_t : 't -> S.pt = fun v -> let bt = pt_of_t in match v with | For (_, gs, os, b) -> - S.For (List.map (fun (x, source) -> (x, bt source)) gs, + S.For (List.map (fun (genkind, x, source) -> (genkind, x, bt source)) gs, List.map bt os, bt b) | If (c, t, e) -> S.If (bt c, bt t, bt e) | Table t -> S.Table t + | MapEntry (k,v) -> S.MapEntry (bt k, bt v) | Singleton v -> S.Singleton (bt v) | Concat vs -> S.Concat (List.map bt vs) | Dedup q -> S.Dedup (bt q) @@ -107,6 +119,9 @@ let rec pt_of_t : 't -> S.pt = fun v -> | Primitive f -> S.Primitive f | Var (v, t) -> S.Var (v, t) | Constant c -> S.Constant c + | GroupBy ((x,k), q) -> S.GroupBy ((x, bt k), bt q) + | AggBy (ar, q) -> S.AggBy (StringMap.map (fun (x,y) -> bt x, y) ar, bt q) + | Lookup (q,k) -> S.Lookup (bt q, bt k) | Database _ -> assert false let string_of_t = S.show_pt -<- pt_of_t @@ -126,10 +141,13 @@ let default_of_base_type = let rec value_of_expression = fun v -> let ve = value_of_expression in - let value_of_singleton = fun s -> - match s with - | Singleton v -> ve v - | _ -> assert false + let value_of_mapentry = function + | MapEntry (k, v) -> `Entry (ve k, ve v) + | v -> ve v + in + let value_of_singleton = function + | Singleton v -> value_of_mapentry v + | _ -> assert false in match v with | Constant (Constant.Bool b) -> `Bool b @@ -197,6 +215,7 @@ let unbox_pair = x, y | _ -> raise (runtime_type_error "failed to unbox pair") +(* XXX: not updated for grouping: only lists, not maps! *) let rec unbox_list = function | Concat vs -> concat_map unbox_list vs @@ -215,7 +234,8 @@ let unbox_string = implode (List.map (function - | Constant (Constant.Char c) -> c + (* BUG? assumes we will only unbox from plain lists, not maps *) + | (Constant (Constant.Char c)) -> c | _ -> raise (runtime_type_error "failed to unbox string")) (unbox_list v)) | _ -> raise (runtime_type_error "failed to unbox string") @@ -229,6 +249,7 @@ let rec subst t x u = | Var (var, _) when var = x -> u | Record fl -> Record (StringMap.map srec fl) | Singleton v -> Singleton (srec v) + | MapEntry (k, v) -> MapEntry (srec k, srec v) | Concat xs -> Concat (List.map srec xs) | Project (r, label) -> Project (srec r, label) | Erase (r, labels) -> Erase (srec r, labels) @@ -236,7 +257,7 @@ let rec subst t x u = | Apply (f, xs) -> Apply (srec f, List.map srec xs) | For (_, gs, os, u) -> (* XXX: assuming fresh x!*) - let gs' = List.map (fun (v,g) -> (v, srec g)) gs in + let gs' = List.map (fun (genkind, v,g) -> (genkind, v, srec g)) gs in let os' = List.map srec os in let u' = srec u in For (None, gs', os', u') @@ -252,6 +273,11 @@ let rec subst t x u = | Closure (c, closure_env) -> let cenv = bind closure_env (x,u) in Closure (c, cenv) + | AggBy (ar, q) -> AggBy (StringMap.map (fun (t0,l) -> srec t0, l) ar, srec q) + | GroupBy ((v,i), q) -> + let i' = if v = x then i else srec i in + let q' = srec q in + GroupBy ((v,i'), q') | v -> v (** Returns (Some ty) if v occurs free with type ty, None otherwise *) @@ -271,24 +297,27 @@ let occurs_free (v : Var.var) = occf bvs' b ||= tryPick (fun _ q -> occf bvs q) e *) failwith "MixingQuery.occurs_free: unexpected Closure in query" | Apply (t, args) -> occf bvs t ||=? list_tryPick (occf bvs) args - | Singleton t + | Singleton t -> occf bvs t + | MapEntry (k, t) -> occf bvs k ||=? occf bvs t | Dedup t | Prom t | Project (t,_) -> occf bvs t | Concat tl -> list_tryPick (occf bvs) tl | For (_, gs, _os, b) -> (* FIXME: do we need to check os as well? *) - let bvs'', res = List.fold_left (fun (bvs',acc) (w,q) -> w::bvs', acc ||=? occf bvs' q) (bvs, None) gs in + let bvs'', res = List.fold_left (fun (bvs',acc) (_genkind,w,q) -> w::bvs', acc ||=? occf bvs' q) (bvs, None) gs in res ||=? occf bvs'' b | Record fl -> map_tryPick (fun _ t -> occf bvs t) fl + | GroupBy ((v,i), q) -> occf (v::bvs) i ||=? occf bvs q + | AggBy (ar, q) -> map_tryPick (fun _ (t, _) -> occf bvs t) ar ||=? occf bvs q | _ -> None in occf [] (** Returns Some (x,qx,tyx) for the first generator x <- qx such that x occurs free with type tyx *) -let rec occurs_free_gens (gs : (Var.var * t) list) q = +let rec occurs_free_gens (gs : (genkind * Var.var * t) list) q = match gs with | [] -> None - | (x,qx)::gs' -> + | (_genkind,x,qx)::gs' -> match occurs_free x (For (None, gs', [], q)) with | Some tyx -> Some (x,qx,tyx) | None -> occurs_free_gens gs' q @@ -306,7 +335,25 @@ let rec type_of_expression : t -> Types.datatype = fun v -> | Concat [] -> Types.make_list_type(Types.unit_type) | Concat (v::_) -> te v | For (_, _, _os, body) -> te body + | GroupBy ((_x,i),q) -> + let ity = te i in + let elty = TypeUtils.element_type ~overstep_quantifiers:true (te q) in + Types.make_mapentry_type ity elty + |> Types.make_list_type + | AggBy (aggs,q) -> + let tyk = te q |> Types.unwrap_map_type |> fst in + let ty = StringMap.map (function (Primitive f,_) -> TypeUtils.return_type (Env.String.find f Lib.type_env) | _ -> assert false) aggs + |> Types.make_record_type + in + Types.make_mapentry_type tyk ty |> Types.make_list_type + | Lookup (q, _) -> + Types.unwrap_map_type (te q) + |> snd | Singleton t -> Types.make_list_type (te t) + | MapEntry (k,v) -> + let tyk = te k in + let tyv = te v in + Types.make_mapentry_type tyk tyv | Record fields -> record fields | If (_, t, _) -> te t | Table Value.Table.{ row; _ } -> Types.make_list_type (Types.Record (Types.Row row)) @@ -332,6 +379,16 @@ let rec type_of_expression : t -> Types.datatype = fun v -> (string_of_t w) Types.pp_datatype ty) end | Apply (Primitive "Empty", _) -> Types.bool_type (* HACK *) + | Apply (Primitive "Sum", _) -> Types.int_type + | Apply (Primitive "SumF", _) -> Types.float_type + | Apply (Primitive "Avg", _) -> Types.float_type + | Apply (Primitive "AvgF", _) -> Types.float_type + | Apply (Primitive "Min", _) -> Types.int_type + | Apply (Primitive "MinF", _) -> Types.float_type + | Apply (Primitive "Max", _) -> Types.int_type + | Apply (Primitive "MaxF", _) -> Types.float_type + | Apply (Primitive "length", _) -> Types.int_type + (* XXX: the following might be completely unnecessary if we call type_of_expression only on normalized query *) | Apply (Primitive "Distinct", [q]) -> type_of_expression q | Apply (Primitive f, _) -> TypeUtils.return_type (Env.String.find f Lib.type_env) | e -> Debug.print("Can't deduce type for: " ^ show e); assert false @@ -352,7 +409,8 @@ let eta_expand_list xs = let x = Var.fresh_raw_var () in let ty = TypeUtils.element_type ~overstep_quantifiers:true (type_of_expression xs) in (* Debug.print ("eta_expand_list create: " ^ show (Var (x, ty))); *) - ([x, xs], [], Singleton (eta_expand_var (x, ty))) + (* BUG? this assumes no maps! *) + ([Entries, x, xs], [], Singleton (eta_expand_var (x, ty))) (* takes a normal form expression and returns true iff it has list type *) let is_list = @@ -384,8 +442,9 @@ let used_database : t -> Value.database option = | Prom q -> used q | Dedup q -> used_item q | Table Value.Table.{ database = (db, _); _ } -> Some db - | For (_, gs, _, _body) -> List.map snd gs |> traverse - | Singleton v -> used v + | For (_, gs, _, _body) -> List.map (fun (_,_,src) -> src) gs |> traverse + | Singleton v -> used_item v + | MapEntry (k,v) -> used_item v ||=? used_item k | Record v -> StringMap.to_alist v |> List.map snd @@ -400,6 +459,10 @@ let used_database : t -> Value.database option = traverse (scrutinee :: (cases @ default)) | Erase (x, _) -> used x | Variant (_, x) -> used x + | AggBy (aggs, q) -> + let aggs' = StringMap.to_alist aggs |> List.map (fun (_,(x,_)) -> x) in + traverse (q::aggs') + | GroupBy ((_,i), q) -> traverse [q;i] | _ -> None and used = function @@ -445,12 +508,36 @@ let lookup_fun env (f, fvs) = Primitive "Distinct" | "concatMap" -> Primitive "ConcatMap" + | "concatMapKey" -> + Primitive "ConcatMapKey" | "map" -> Primitive "Map" + | "sum" -> + Primitive "Sum" + | "sumF" -> + Primitive "SumF" + | "avg" -> + Primitive "Avg" + | "avgF" -> + Primitive "AvgF" + | "min_list" -> + Primitive "Min" + | "minF_list" -> + Primitive "MinF" + | "max_list" -> + Primitive "Max" + | "maxF_list" -> + Primitive "MaxF" | "empty" -> Primitive "Empty" | "sortByBase" -> Primitive "SortBy" + | "groupBy" | "groupByMap" -> + Primitive "GroupBy" + | "aggBy" -> + Primitive "AggBy" + | "lookupG" -> + Primitive "Lookup" | _ -> begin match location with @@ -486,6 +573,7 @@ let rec expression_of_value : env -> Value.t -> t = fun env v -> | `Database db -> Database db | `List vs -> Concat (List.map (fun v -> Singleton (expression_of_value env v)) vs) + | `Entry (k,v) -> MapEntry (expression_of_value env k, expression_of_value env v) | `Record fields -> Record (List.fold_left @@ -553,6 +641,7 @@ let check_policies_compatible env_policy block_policy = let rec string = function | Constant (Constant.String s) -> Some (str (quote s)) + (* BUGBUG: don't know how to process maps yet *) | Singleton (Constant (Constant.Char c)) -> Some (str (string_of_char c)) | Project (v, field) -> @@ -596,6 +685,7 @@ let check_policies_compatible env_policy block_policy = end end in + (* grouping BUG? what happens if rs is a map? *) seq (unbox_list rs) | Variant ("StartAnchor", _) -> Some (str "") | Variant ("EndAnchor", _) -> Some (str "") @@ -610,22 +700,22 @@ let rec select_clause : Sql.index -> bool -> t -> Sql.select_clause = | Concat _ -> assert false | For (_, [], _, body) -> select_clause index unit_query body - | For (_, (x, Table Value.Table.{ name; _ })::gs, os, body) -> + | For (_, (_genkind, x, Table Value.Table.{ name; _ })::gs, os, body) -> let body = select_clause index unit_query (For (None, gs, [], body)) in let os = List.map (base index) os in begin match body with - | (_, fields, tables, condition, []) -> - (Sql.All, fields, Sql.TableRef(name, x)::tables, condition, os) + | (_, fields, tables, condition, [], []) -> + (Sql.All, fields, Sql.TableRef(name, x)::tables, condition, [], os) | _ -> assert false end | If (c, body, Concat []) -> (* Turn conditionals into where clauses. We might want to do this earlier on. *) let c = base index c in - let (_, fields, tables, c', os) = select_clause index unit_query body in + let (_, fields, tables, c', gbys, os) = select_clause index unit_query body in let c = Sql.smart_and c c' in - (Sql.All, fields, tables, c, os) + (Sql.All, fields, tables, c, gbys, os) | Table Value.Table.{ name = table; row = (fields, _, _); _ } -> (* eta expand tables. We might want to do this earlier on. *) (* In fact this should never be necessary as it is impossible @@ -640,14 +730,15 @@ let rec select_clause : Sql.index -> bool -> t -> Sql.select_clause = fields [])) in - (Sql.All, fields, [Sql.TableRef(table, var)], Sql.Constant (Constant.Bool true), []) + (Sql.All, fields, [Sql.TableRef(table, var)], Sql.Constant (Constant.Bool true), [], []) | Singleton _ when unit_query -> (* If we're inside an Sql.Empty or a Sql.Length it's safe to ignore any fields here. *) (* We currently detect this earlier, so the unit_query stuff here is redundant. *) - (Sql.All, Sql.Fields [], [], Sql.Constant (Constant.Bool true), []) + (Sql.All, Sql.Fields [], [], Sql.Constant (Constant.Bool true), [], []) | Singleton (Record fields) -> + (* this code is only used in the non-mixing normalizer and thus doesn't support grouping *) let fields = Sql.Fields (List.rev @@ -657,7 +748,7 @@ let rec select_clause : Sql.index -> bool -> t -> Sql.select_clause = fields [])) in - (Sql.All, fields, [], Sql.Constant (Constant.Bool true), []) + (Sql.All, fields, [], Sql.Constant (Constant.Bool true), [], []) | _ -> assert false and clause : Sql.index -> bool -> t -> Sql.query = fun index unit_query v -> Sql.Select(select_clause index unit_query v) @@ -722,7 +813,7 @@ type let_clause = Var.var * t * Var.var * t type let_query = let_clause list -let gens_index (gs : (Var.var * t) list) = +let gens_index (gs : (genkind * Var.var * t) list) = let open Value.Table in let all_fields t = let field_types = table_field_types t in @@ -734,7 +825,7 @@ let gens_index (gs : (Var.var * t) list) = | (ks::_) -> StringSet.from_list ks | _ -> all_fields t in - let table_index (x, source) = + let table_index (_genkind, x, source) = let t = match source with Table t -> t | _ -> assert false in let labels = key_fields t in List.rev @@ -759,9 +850,9 @@ let let_clause : let_clause -> Sql.query = let gs_out = extract_gens outer in let gs_in = extract_gens inner in let q_outer = clause (outer_index gs_out) false outer in - let (_fDist, result,tables,where,os) = select_clause (inner_index t gs_in) false inner in + let (_fDist, result,tables,where,gbys,os) = select_clause (inner_index t gs_in) false inner in let tablename = Sql.string_of_subquery_var q in - let q_inner = Sql.Select(Sql.All,result,Sql.TableRef(tablename,t)::tables,where,os) in + let q_inner = Sql.Select(Sql.All,result,Sql.TableRef(tablename,t)::tables,where,gbys,os) in Sql.With (tablename, q_outer, [q_inner]) let sql_of_let_query : let_query -> Sql.query = @@ -851,9 +942,9 @@ struct | For (tag_opt, gs, os, body) -> let (o, tag_opt) = o#option (fun o -> o#tag) tag_opt in let (o, gs) = - o#list (fun o (v, t) -> + o#list (fun o (k, v, t) -> let (o, t) = o#query t in - (o, (v, t))) gs in + (o, (k, v, t))) gs in let (o, os) = o#list (fun o -> o#query) os in let (o, body) = o#query body in (o, For (tag_opt, gs, os, body)) @@ -865,6 +956,10 @@ struct | Table t -> (o, Table t) | Database (dt, s) -> (o, Database (dt, s)) | Singleton x -> let (o, x) = o#query x in (o, Singleton x) + | MapEntry (k,x) -> + let (o, k) = o#query k in + let (o, x) = o#query x + in (o, MapEntry (k,x)) | Concat xs -> let (o, xs) = o#list (fun o -> o#query) xs in (o, Concat xs) | Dedup q -> let (o, q) = o#query q in @@ -907,5 +1002,208 @@ struct | Primitive x -> (o, Primitive x) | Var (v, dts) -> (o, Var (v, dts)) | Constant c -> (o, Constant c) + | GroupBy ((v,i),q) -> + let (o,i) = o#query i in + let (o,q) = o#query q in + (o, GroupBy ((v,i),q)) + | AggBy (ar,q) -> + let (o,ar) = StringMap.fold (fun l_in (v, l_out) (o, acc) -> + let (o, v) = o#query v in + (o, StringMap.add l_in (v, l_out) acc)) ar (o, StringMap.empty) + in + let (o,q) = o#query q in + (o, AggBy (ar, q)) + | Lookup (q,i) -> + let (o,q) = o#query q in + let (o,i) = o#query i in + (o, Lookup (q,i)) end end + +module FlattenRecords = +struct + + (* this is a lightly generalised version of the flattening used by shredding + * TODO: verify that shredding works well with this version and remove the legacy code *) + let rec flatten_base_type = function + | Types.Primitive _ as t -> t + | Types.Record fields -> + Types.make_record_type + (StringMap.fold + (fun name t fields -> + match flatten_base_type t with + | Types.Record inner_fields -> + StringMap.fold + (fun name' t fields -> + StringMap.add (name ^ "@" ^ name') t fields) + (field_types_of_row inner_fields) + fields + | Types.Primitive _ as t -> + StringMap.add name t fields + | _ -> assert false) + (field_types_of_row fields) + StringMap.empty) + | t (* MapEntry *) -> + let kty, vty = Types.unwrap_mapentry_type t in + let kty' = flatten_base_type kty in + let vty' = flatten_base_type vty in + Types.make_mapentry_type kty' vty' + + let flatten_query_type t = + let t' = Types.unwrap_list_type t |> flatten_base_type in + match t' with + | Types.Record _ -> Types.make_list_type t' + | _ -> StringMap.add "@" t' StringMap.empty |> Types.make_record_type |> Types.make_list_type + + let rec flatten_inner : t -> t = + let is_aggr_primitive = function + | "Sum" | "SumF" | "Avg" | "AvgF" | "Min" | "MinF" | "Max" | "MaxF" | "length" -> true + | _ -> false + in + function + | Constant c -> Constant c + | Primitive p -> Primitive p + | Apply (Primitive "Empty", [e]) -> Apply (Primitive "Empty", [flatten_inner_query e]) + | Apply (Primitive f as p, [e]) when is_aggr_primitive f -> Apply (p, [flatten_inner_query e]) + | Apply (Primitive "tilde", [s; r]) as e -> + Debug.print ("Applying flatten_inner to tilde expression: " ^ show e); + Apply (Primitive "tilde", [flatten_inner s; flatten_inner r]) + | Apply (Primitive f, es) -> Apply (Primitive f, List.map flatten_inner es) + | If (c, t, e) -> + If (flatten_inner c, flatten_inner t, flatten_inner e) + | MapEntry (k,v) -> MapEntry (flatten_inner k, flatten_inner v) + | Project (_,_) as e -> + let rec flatten_projs acc = function + | Project (e', l) -> flatten_projs (l::acc) e' + | Var (_,_) as e' -> + (* HACK: FIXME? this keeps z annotated with its original unflattened type *) + (* (we could use the flatten_type above, but we probably don't need the type to be accurate + * as all eta expansions have already happened) *) + let l' = acc |> List.rev |> String.concat "@" + in Project (e', l') + | _ -> assert false + in flatten_projs [] e + | Record fields -> + let extend name name' = name ^ "@" ^ name' in + (* concatenate labels of nested records *) + Record + (StringMap.fold + (fun name body fields -> + match flatten_inner body with + | Record inner_fields -> + StringMap.fold + (fun name' body fields -> + StringMap.add (extend name name') body fields) + inner_fields + fields + | body -> + StringMap.add name body fields) + fields + StringMap.empty) + | Variant ("Simply", x) -> + Variant ("Simply", flatten_inner x) + | Variant ("Seq", Singleton r) -> + Variant ("Seq", Singleton (flatten_inner r)) + | Variant ("Seq", Concat rs) -> + Variant ("Seq", + Concat (List.map ( + function | Singleton x -> Singleton (flatten_inner x) | _ -> assert false) rs)) + | Variant ("Quote", Variant ("Simply", v)) -> + Variant ("Quote", Variant ("Simply", flatten_inner v)) + (* Other regex variants which don't need to be traversed *) + | Variant (s, x) when s = "Repeat" || s = "StartAnchor" || s = "EndAnchor" -> + Variant (s, x) + | e -> + Debug.print ("Can't apply flatten_inner to: " ^ show e); + assert false + + and flatten_inner_query : t -> t = fun e -> flatten_comprehension e + + and flatten_comprehension : t -> t = + function + | For (tag, gs, os, body) -> + (* for heterogeneous and grouping, we need recursion on gs *) + let gs' = List.map (fun (pol,x,g) -> pol, x, flatten_comprehension g) gs in + let body' = flatten_comprehension body in + (* BUG BUG: flattening will render os useless *) + For (tag, gs', os, body') + | GroupBy ((x,kc), v) -> GroupBy ((x, flatten_inner kc), flatten_comprehension v) + | AggBy _ as q -> q (* aggregation is assumed to be flat *) + | Prom q -> Prom (flatten_comprehension q) + | If (c, e, Concat []) -> + If (flatten_inner c, flatten_comprehension e, Concat []) + | Singleton e -> + let e' = + (* lift base expressions to records *) + match flatten_inner e with + | MapEntry (Record _, Record _) + | Record _ as p -> p + | MapEntry (_, _) -> assert false (* we don't want to handle the case of MapEntries not containing records *) + | p -> Record (StringMap.add "@" p StringMap.empty) + in + Singleton e' + (* HACK: not sure if Concat is supposed to appear here... + but it can do inside "Empty" or "Length". *) + | Concat es -> + Concat (List.map flatten_comprehension es) + | Table _ | Dedup _ as e -> + (* this is a (possibly deduplicated) table: it must be already flat *) + e + | e -> + Debug.print ("Can't apply flatten_comprehension to: " ^ show e); + assert false + + let flatten_query = flatten_comprehension + + (* unflattens a flattened record according to a given nested record type *) + let rec unflatten_record ?(prefix = "") nty frow : Value.t = + let ur = unflatten_record in + let extend_label l = if prefix = "" then l else prefix ^ "@" ^ l in + let base_label = if prefix = "" then "@" else prefix in + match nty with + | Types.Primitive _ -> List.assoc base_label frow + | Types.Record nrow -> + let nfields = + StringMap.fold + <| (fun k v acc -> (k, ur ~prefix:(extend_label k) v frow)::acc) + <| field_types_of_row nrow + <| [] + in `Record nfields + | _ -> assert false + + let unflatten_query nty fval : Value.t = + let of_list = function `List l -> l | _ -> assert false in + let of_record = function `Record r -> r | _ -> assert false in + (* under the assumption that the given type is a list *) + match Types.unwrap_list_type nty with + (* special reconstruction for finite maps of relations, resulting from grouping *) + (* standard reconstruction of relations over nested records of primitives *) + | Types.Record _ | Types.Primitive _ as vty -> `List (List.map (fun r -> unflatten_record vty (of_record r)) (of_list fval)) + | t' (* assumed to be MapEntry *) -> + let kty, vty = Types.unwrap_mapentry_type t' in + let l = of_list fval in + let tbl = Hashtbl.create (List.length l) in + let insert (k,v) = + try + let vl = Hashtbl.find tbl k + in Hashtbl.replace tbl k (v::vl) + with NotFound _ -> Hashtbl.add tbl k [v] + in + let split r = + unflatten_record ~prefix:"1" kty r, + unflatten_record ~prefix:"2" vty r + in + let pair x y = `Record [("1",x);("2",y)] + in + List.iter (of_record ->- split ->- insert) l; + `List (Hashtbl.fold (fun k v acc -> pair k (`List v)::acc) tbl []) + + (* XXX: (bug?) from the shredding code, it would appear unit fields are not returned by a DB query + * and need to be inferred from the nested type when unflattening -- we're not doing that here + * + * or maybe we are? we proceed by case analysis on the nested type and, from the looks of it, + * the code, not finding any matching attribute in the DB result, should conjure a `Record StringMap.empty + * i.e. the unit value! *) + +end + diff --git a/core/query/queryLang.mli b/core/query/queryLang.mli index 96445b59f..4f4c0ea8f 100644 --- a/core/query/queryLang.mli +++ b/core/query/queryLang.mli @@ -17,15 +17,22 @@ type base_type = | Bool | Char | Float | Int | String | DateTime type tag = int [@@deriving show] +type genkind = Entries | Keys + [@@deriving show] + type t = - | For of tag option * (Var.var * t) list * t list * t + | For of tag option * (genkind * Var.var * t) list * t list * t | If of t * t * t | Table of Value.table | Database of (Value.database * string) | Singleton of t + | MapEntry of t * t | Concat of t list | Dedup of t | Prom of t + | GroupBy of (Var.var * t) * t + | AggBy of (t * string) StringMap.t * t + | Lookup of t * t | Record of t StringMap.t | Project of t * string | Erase of t * StringSet.t @@ -78,13 +85,13 @@ val append_env : env -> env -> env val subst : t -> Var.var -> t -> t -val occurs_free_gens : (Var.var * t) list -> t -> (Var.var * t * Types.datatype) option +val occurs_free_gens : (genkind * Var.var * t) list -> t -> (Var.var * t * Types.datatype) option val type_of_expression : t -> Types.datatype val eta_expand_var : Var.var * Types.datatype -> t -val eta_expand_list : t -> (Var.var * t) list * t list * t +val eta_expand_list : t -> (genkind * Var.var * t) list * t list * t val default_of_base_type : Primitive.t -> t @@ -131,3 +138,10 @@ sig end module Transform : QUERY_VISITOR + +module FlattenRecords : + sig + val flatten_query_type : Types.t -> Types.t + val flatten_query : t -> t + val unflatten_query : Types.t -> Value.t -> Value.t + end diff --git a/core/query/sql.ml b/core/query/sql.ml index ebb26304f..58e41cd0d 100644 --- a/core/query/sql.ml +++ b/core/query/sql.ml @@ -29,7 +29,8 @@ and insert_records = | Values of (base list list) | TableQuery of Var.var and select_clause = - multiplicity * select_fields * from_clause list * base * base list + (* ( distinct_opt, fs, ts, cond, groupby, orderby) *) + multiplicity * select_fields * from_clause list * base * base list * base list and select_fields = | Star | Fields of (base * string) list @@ -41,6 +42,7 @@ and base = | Constant of Constant.t | Project of Var.var * string | Apply of string * base list + | Aggr of string * query | Empty of query | Length of query | RowNumber of (Var.var * string) list @@ -128,7 +130,11 @@ struct "toLower", "lower"; "ord", "ord"; "chr", "char"; - "random", "rand" ] + "random", "rand"; + "sum", "sum"; + "avg", "avg"; + "min", "min"; + "max", "max"] let is f = StringMap.mem f funs let name f = StringMap.find f funs @@ -177,9 +183,12 @@ class virtual printer = (* SQL doesn't support empty records, so this is a hack. *) Format.pp_print_string ppf "0 as \"@unit@\"" - method pp_select ppf mult fields tables condition os ignore_fields = + method pp_select ppf mult fields tables condition gbys os ignore_fields = let pp_os_condition ppf a = Format.fprintf ppf "%a" (self#pp_base false) a in + (* XXX: probbly same as above, but come back later *) + let pp_gby ppf a = + Format.fprintf ppf "%a" (self#pp_base false) a in let pr_q = self#pp_query ignore_fields in let pp_distinct ppf = function | Distinct -> Format.pp_print_string ppf "distinct " @@ -191,6 +200,10 @@ class virtual printer = | _ -> Format.fprintf ppf "\norder by %a" (self#pp_comma_separated pp_os_condition) os in + let pp_groupby ppf = function + | [] -> () + | gbys -> Format.fprintf ppf "\ngroup by %a" (self#pp_comma_separated pp_gby) gbys + in let pp_from_clause ppf fc = match fc with | TableRef (t, x) -> Format.fprintf ppf "%a as %s" self#pp_quote t (string_of_table_var x) @@ -201,11 +214,12 @@ class virtual printer = | Constant (Constant.Bool true) -> () | _ -> Format.fprintf ppf "\nwhere %a" pp_os_condition condition in - Format.fprintf ppf "select %a%a\nfrom %a%a%a" + Format.fprintf ppf "select %a%a\nfrom %a%a%a%a" pp_distinct mult self#pp_fields fields (self#pp_comma_separated pp_from_clause) tables pp_where condition + pp_groupby gbys pp_orderby os method private pr_b_ignore_fields = self#pp_base true @@ -264,7 +278,7 @@ class virtual printer = | "^." -> Format.fprintf ppf "pow(%a,%a)" pr_b_one_table l pr_b_one_table r - | _ -> Format.fprintf ppf "(%a%s%a)" + | _ -> Format.fprintf ppf "((%a)%s(%a))" pr_b_one_table l (Arithmetic.sql_name op) pr_b_one_table r @@ -307,15 +321,15 @@ class virtual printer = Format.fprintf ppf "%a%a" (Format.pp_print_list ~pp_sep:pp_sep_union pp_union_term) qs Format.pp_print_string (order_by_clause n) - | Select (_, fields, [], Constant (Constant.Bool true), _os) -> + | Select (_, fields, [], Constant (Constant.Bool true), _gbys, _os) -> Format.fprintf ppf "select %a" pp_fields fields - | Select (_, fields, [], condition, _os) -> + | Select (_, fields, [], condition, _gbys, _os) -> Format.fprintf ppf "select * from (select %a) as %a where %a" pp_fields fields Format.pp_print_string (fresh_dummy_var ()) pr_b condition - | Select (mult, fields, tables, condition, os) -> - self#pp_select ppf mult fields tables condition os ignore_fields + | Select (mult, fields, tables, condition, gbys, os) -> + self#pp_select ppf mult fields tables condition gbys os ignore_fields | Delete { del_table; del_where } -> self#pp_delete ppf del_table del_where | Update { upd_table; upd_fields; upd_where } -> @@ -411,6 +425,13 @@ class virtual printer = Format.fprintf ppf "select count(*) from (%a) as %a" pr_q_true q Format.pp_print_string (fresh_dummy_var ()) + | Aggr (f, q) -> + let v = fresh_table_var () in + Format.fprintf ppf "select %a(%a) from (%a) as %a" + Format.pp_print_string (SqlFuns.name f) + (self#pp_projection true) (v, "@") + pr_q_true q + Format.pp_print_string (string_of_table_var v) | RowNumber [] -> Format.fprintf ppf "%a" Format.pp_print_string "1" | RowNumber ps -> @@ -448,7 +469,7 @@ let rec inline_outer_with q = | fromclause -> fromclause in match q with - | With (z, q, [Select (fSet, fields, tables, condition, os)]) -> - Select(fSet, fields, List.map (replace_subquery z q) tables, condition, os) + | With (z, q, [Select (fSet, fields, tables, condition, gbys, os)]) -> + Select(fSet, fields, List.map (replace_subquery z q) tables, condition, gbys, os) | Union (fSet, qs,n) -> Union (fSet, List.map inline_outer_with qs,n) | q -> q diff --git a/core/query/temporalQuery.ml b/core/query/temporalQuery.ml index 913fd2e1b..c52ccaef3 100644 --- a/core/query/temporalQuery.ml +++ b/core/query/temporalQuery.ml @@ -115,7 +115,7 @@ module TransactionTime = struct (* And here's the selection query: *) let sel_query = - Sql.Select (Sql.All, select_fields, [TableRef (table, tbl_var)], sel_where, []) in + Sql.Select (Sql.All, select_fields, [TableRef (table, tbl_var)], sel_where, [], []) in (* Generate fresh variable for selection result *) let sel_var = Var.fresh_raw_var () in @@ -335,7 +335,7 @@ module ValidTime = struct in let select = - Sql.Select (All, Fields select_fields, [TableRef (table, tbl_var)], sel_where, []) in + Sql.Select (All, Fields select_fields, [TableRef (table, tbl_var)], sel_where, [], []) in (* Generate fresh variable for selection result *) let sel_var = Var.fresh_raw_var () in @@ -447,7 +447,7 @@ module ValidTime = struct (* Need to swap (col, val) pairs to (val, col) to fit select_clause AST, * which mirrors "SELECT V as K" form in SQL *) |> List.map (fun (k, v) -> (v, k)) in - Sql.Select (All, Fields fields, [TableRef (table, tbl_var)], where, []) in + Sql.Select (All, Fields fields, [TableRef (table, tbl_var)], where, [], []) in let insert_select sel = let var = Var.fresh_raw_var () in @@ -623,7 +623,7 @@ module ValidTime = struct |> and_where in let sel_query = - Sql.Select (All, Fields select_fields, [TableRef (table, tbl_var)], sel_where, []) in + Sql.Select (All, Fields select_fields, [TableRef (table, tbl_var)], sel_where, [], []) in (* Generate fresh variable for selection result *) let sel_var = Var.fresh_raw_var () in @@ -812,17 +812,17 @@ module TemporalJoin = struct let tables = (* Restrict attention to ValidTime or TransactionTime tables *) List.filter_map (function - | (v, Q.Table ({ temporality; _ } as t)) + | (genkind, v, Q.Table ({ temporality; _ } as t)) when temporality = Temporality.Valid || temporality = Temporality.Transaction -> - Some (v, t) + Some (genkind, v, t) | _ -> None) gens in (* Ensure that all tables correspond to the given temporality *) let matches_mode x = x.temporality = temporality in - let () = List.iter (fun x -> - if matches_mode (snd x) then () else + let () = List.iter (fun (_,_,src) -> + if matches_mode src then () else raise (Errors.runtime_error ("All tables in a temporal join must match the " ^ @@ -832,7 +832,7 @@ module TemporalJoin = struct (* Create a Var for each variable -- requires creating a type from the row and field names. *) let tables = - List.map (fun (v, x) -> + List.map (fun (_genkind, v, x) -> (* Always defined for Valid / Transaction time *) (* Might want a better representation -- this screams bad design. *) let (from_field, to_field) = diff --git a/core/resolveJsonState.ml b/core/resolveJsonState.ml index 0e4aecc30..449a3accd 100644 --- a/core/resolveJsonState.ml +++ b/core/resolveJsonState.ml @@ -39,6 +39,7 @@ let rec extract_json_values : Value.t -> (handler_id_set * (Value.chan list)) = | `Record fields -> let _ls, vs = List.split fields in extract_from_values vs + | `Entry (k, v) -> (* WR: HACK for grouping *) extract_from_values [k;v] | `List (elems) -> extract_from_values elems and extract_from_primitive : Value.primitive_value -> handler_id_set = function (* Everything is empty except XML items *) diff --git a/core/serialisation.ml b/core/serialisation.ml index a9bfd74cd..b570909cf 100644 --- a/core/serialisation.ml +++ b/core/serialisation.ml @@ -126,6 +126,7 @@ module Compressible = struct | `List of compressed_t list | `Record of (string * compressed_t) list | `Variant of string * compressed_t + | `Entry of compressed_t * compressed_t | `FunctionPtr of (Ir.var * compressed_t option) | `PrimitiveFunction of string | `ClientDomRef of int @@ -163,6 +164,7 @@ module Compressible = struct | `List vs -> `List (List.map compress vs) | `Record fields -> `Record (List.map (fun (name, v) -> (name, compress v)) fields) | `Variant (name, v) -> `Variant (name, compress v) + | `Entry (k, v) -> `Entry (compress k, compress v) | `FunctionPtr(x, fvs) -> `FunctionPtr (x, Utility.opt_map compress fvs) | `PrimitiveFunction (f, _op) -> `PrimitiveFunction f @@ -208,6 +210,7 @@ module Compressible = struct | `List vs -> `List (List.map decompress vs) | `Record fields -> `Record (List.map (fun (name, v) -> (name, decompress v)) fields) | `Variant (name, v) -> `Variant (name, decompress v) + | `Entry (k, v) -> `Entry (decompress k, decompress v) | `FunctionPtr (x, fvs) -> `FunctionPtr (x, Utility.opt_map decompress fvs) | `PrimitiveFunction f -> `PrimitiveFunction (f,None) | `ClientDomRef i -> `ClientDomRef i diff --git a/core/typeSugar.ml b/core/typeSugar.ml index e9d9fcaf8..f9f5225b7 100644 --- a/core/typeSugar.ml +++ b/core/typeSugar.ml @@ -3548,9 +3548,10 @@ let rec type_check : context -> phrase -> phrase * Types.datatype * Usage.t = let p = type_check (bind_effects context inner_effects) p in let () = match policy with - | Nested -> () + (* XXX: relaxed (perhaps too much!) to allow nested record flattening in the Mixing normalizer *) | Mixing | Delat + | Nested -> () | Flat -> let shape = Types.make_list_type diff --git a/core/types.ml b/core/types.ml index 6449adac9..6a8690b24 100644 --- a/core/types.ml +++ b/core/types.ml @@ -61,6 +61,14 @@ let list = { arity = [pk_type, (lin_unl, res_any)] ; } +(* WR: used by DB queries *) +let mapentry = { + Abstype.id = "MapEntry" ; + name = "MapEntry" ; + arity = [pk_type, (lin_unl, res_any) + ;pk_type, (lin_unl, res_any)] ; +} + let event = { Abstype.id = "Event" ; name = "Event" ; @@ -102,6 +110,15 @@ let valid_time_data = { arity = [pk_type, (lin_any, res_any)] } +(* Not a real type! + * Used when a type is needed, but none is known. + * You need to make sure any code using this type will not be executed. *) +let wrong = { + Abstype.id = "Wrong"; + name = "Wrong"; + arity = [] +} + (* When unifying, we need to keep track of both mu-bound recursive variables * and names of recursive type applications. The `rec_id` type allows us to * abstract this and keep both in the same environment. *) @@ -1701,6 +1718,7 @@ let float_type = Primitive Primitive.Float let datetime_type = Primitive Primitive.DateTime let xml_type = Alias (pk_type, ("Xml", [], [], false), Application (list, [(PrimaryKind.Type, Primitive Primitive.XmlItem)])) let database_type = Primitive Primitive.DB +let wrong_type = Application (wrong, []) (* Empty type, used for exceptions *) let empty_type = Variant (make_empty_closed_row ()) let wild = "wild" @@ -4676,6 +4694,8 @@ let make_tuple_type (ts : datatype list) : datatype = ts)) let make_list_type t = Application (list, [PrimaryKind.Type, t]) +let make_mapentry_type t t' = + Application (mapentry, [PrimaryKind.Type, t; PrimaryKind.Type, t']) let make_process_type r = Application (process, [PrimaryKind.Row, r]) let make_transaction_time_data_type typ = @@ -4819,3 +4839,9 @@ let pp_tycon_spec : Format.formatter -> tycon_spec -> unit = fun fmt t -> let unwrap_list_type = function | Application ({Abstype.id = "List"; _}, [PrimaryKind.Type, t]) -> t | _ -> assert false + +let unwrap_mapentry_type = function + | Application ({Abstype.id = "MapEntry"; _}, [PrimaryKind.Type, t; PrimaryKind.Type, t']) -> t, t' + | _ -> assert false + +let unwrap_map_type = unwrap_list_type ->- unwrap_mapentry_type diff --git a/core/types.mli b/core/types.mli index 88c0fa19a..5ef8b3f0a 100644 --- a/core/types.mli +++ b/core/types.mli @@ -86,6 +86,7 @@ end val process : Abstype.t val list : Abstype.t +val mapentry : Abstype.t val event : Abstype.t val dom_node : Abstype.t val access_point : Abstype.t @@ -247,6 +248,7 @@ val datetime_type : datatype val database_type : datatype val xml_type : datatype val empty_type : datatype +val wrong_type : datatype val wild : Label.t val hear : Label.t val wild_present : Label.t * datatype @@ -370,12 +372,15 @@ or None otherwise. *) val unwrap_row : row -> (row * row_var option) val unwrap_list_type : typ -> typ +val unwrap_mapentry_type : typ -> typ * typ +val unwrap_map_type : typ -> typ * typ val extract_tuple : row -> datatype list (** type constructors *) val make_tuple_type : datatype list -> datatype val make_list_type : datatype -> datatype +val make_mapentry_type : datatype -> datatype -> datatype val make_process_type : row -> datatype val make_record_type : datatype field_env -> datatype val make_variant_type : datatype field_env -> datatype diff --git a/core/value.ml b/core/value.ml index 8308c07ed..e05565d30 100644 --- a/core/value.ml +++ b/core/value.ml @@ -735,6 +735,7 @@ type t = [ | primitive_value | `Lens of Lens.Database.t * Lens.Value.t | `List of t list +| `Entry of t * t | `Record of (string * t) list | `Variant of string * t | `FunctionPtr of (Ir.var * t option) @@ -779,6 +780,7 @@ let rec p_value (ppf : formatter) : t -> 'a = function try p_tuple ppf fields with Not_tuple -> fprintf ppf "(@[%a@])" p_record_fields (List.sort (fun (l,_) (r, _) -> compare l r) fields) end + | `Entry (k,v) -> fprintf ppf "@{%a@}(@[%a)@]" p_value k p_value v | `List [] -> fprintf ppf "[]" | `List ((`XML _)::_ as elems) -> fprintf ppf "@[%a@]" (pp_print_list p_value) elems diff --git a/core/value.mli b/core/value.mli index e2f85464b..6d4dc430a 100644 --- a/core/value.mli +++ b/core/value.mli @@ -238,6 +238,7 @@ module Continuation : CONTINUATION type t = [ | primitive_value | `List of t list +| `Entry of t * t | `Record of (string * t) list | `Lens of Lens.Database.t * Lens.Value.t | `Variant of string * t diff --git a/prelude.links b/prelude.links index de52f6234..1f53a2b87 100644 --- a/prelude.links +++ b/prelude.links @@ -208,10 +208,6 @@ fun eighth(p) { p.8 } fun ninth(p) { p.9 } fun tenth(p) { p.10 } -fun sum(ns) { - fold_left((+), 0, ns) -} - fun product(ns) { fold_left((*), 1, ns) } @@ -434,6 +430,80 @@ fun empty(l) { } } +unsafe sig sum : ([Int]) -> Int +fun sum(ns) { + switch (ns) { + case [] -> 0 + case n::ns -> n + sum(ns) + } +} + +unsafe sig sumF : ([Float]) -> Float +fun sumF(ns) { + switch (ns) { + case [] -> 0.0 + case n::ns -> n +. sumF(ns) + } +} + +fun avg(l) { + switch (l) { + case [] -> 0.0 + case n::ns -> intToFloat(sum(l)) /. intToFloat(length(l)) + } +} + +fun avgF(l) { + switch (l) { + case [] -> 0.0 + case n::ns -> sumF(l) /. intToFloat(length(l)) + } +} + +# Returns the minimum of a and b. +fun minimum(a,b) { + if (a < b) { a } + else { b } +} + +# Returns the maximum of a and b. +fun maximum(a,b) { + if (a > b) { a } + else { b } +} + +unsafe sig max_list : ([Int]) -> Int +fun max_list(ns) { + switch (ns) { + case [] -> min_int + case n::ns -> maximum(n, max_list(ns)) + } +} + +unsafe sig maxF_list : ([Float]) -> Float +fun maxF_list(ns) { + switch (ns) { + case [] -> neg_infinity + case n::ns -> maximum(n, maxF_list(ns)) + } +} + +unsafe sig min_list : ([Int]) -> Int +fun min_list(ns) { + switch (ns) { + case [] -> max_int + case n::ns -> minimum(n, min_list(ns)) + } +} + +unsafe sig minF_list : ([Float]) -> Float +fun minF_list(ns) { + switch (ns) { + case [] -> infinity + case n::ns -> minimum(n, minF_list(ns)) + } +} + # association lists # TBD: memassoc is an old LISP word. Perhaps a more modern name would fit better? sig memassoc : (a,[(a,b)]) ~> Bool @@ -530,6 +600,41 @@ fun distinct(t) server { dedup(asList(t)) } +sig concatMapKey : ((a) -b-> [c], [(a,[d])]) -b-> [c] +fun concatMapKey(f, l) { + for (x <- l) f(x.1) +} + +# XXX: I believe the arguments are in the wrong order, but see sortBy +sig groupBy : ((a) -b-> c, [a]) -b-> [(c, [a])] +fun groupBy(f, l) { + var is = dedup(map(f, l)); + for (i <- is) [(i, filter(fun (x) { f(x) == i }, l))] +} + +sig groupByMap : ((a) -b-> c, [(d,[a])]) -b-> [((c,d), [a])] +fun groupByMap(f, xs) { + for (x <- xs) + for (k <- dedup(map(f,x.2))) + [((k,x.1), filter(fun (v) { f(v) == k }, x.2))] +} + +unsafe sig lookupG : (a,[(a,[b])]) -> [b] +fun lookupG(x,l) { + switch (l) { + case [] -> [] + case (a,b)::xs -> if (a == x) b + else lookupG(x,xs) + } +} + +sig aggBy : ([(a, [b])], ([b]) -c-> d) -c-> [(a, d)] +fun aggBy(q, agg) { + concatMapKey(fun (k) { + [(k, agg(lookupG(k, q)))] + }, q) +} + # Gets children of elements in an XML forest, concatenated in order # @param {Xml} xml # @returns {Xml} @@ -1418,12 +1523,6 @@ fun elemIndex(x, xs) { second(n) } -# Returns the maximum of a and b. -fun maximum(a,b) { - if (a > b) { a } - else { b } -} - #### sig partition : ((a) ~b~> Bool, [a]) ~b~> ([a], [a]) diff --git a/tests/database/grouping-create.links b/tests/database/grouping-create.links new file mode 100644 index 000000000..97668ef20 --- /dev/null +++ b/tests/database/grouping-create.links @@ -0,0 +1,122 @@ +var db = database "links"; +var factorials = table "factorials" with (i : Int, f : Int) from db; + +var clients = table "cclients" with (cid : Int, name : String, address : String) from db; +var products = table "products" with (pid : Int, name : String, category : Int, price : Int) from db; +var orders = table "orders" with (dates : String, cid : Int, pid : Int, qty : Int) from db; +var classes = table "classes" with (class : String) from db; +var students = table "students" with (sid : Int, name : String, class : String) from db; +var tests = table "tests" with (id : Int, sid : Int, course : String, score: Int) from db; +var degrees = table "degrees" with (sid : Int, degree : String) from db; + +fun exterminate() { + delete (x <-- clients); + delete (x <-- products); + delete (x <-- orders); + delete (x <-- classes); + delete (x <-- students); + delete (x <-- tests); + delete (x <-- degrees) +} + +fun rndInt(i) { + floatToInt(random() *. intToFloat(i)) + 1 +} + +fun rndName() { + "name" ^^ intToString(rndInt(353)) +} + +fun rndCity() { + "city" ^^ intToString(rndInt(23)) +} + +fun rndDate() { + var i = rndInt(31); + if (i < 10) + { "2020-01-0" ^^ intToString(i) } + else + { "2020-01-" ^^ intToString(i) } +} + +fun intToClass(i) { + "class" ^^ intToString(i) +} + +fun rndClass() { + intToClass(rndInt(5)) +} + +fun intToCourse(i) { + "course" ^^ intToString(i) +} + +fun rndCourse() { + intToCourse(rndInt(3)) +} + +fun rndDegree() { + if (random() < 0.8) + { "BSc" } + else + { "MSc" } +} + +fun populate() { + ignore(for (i <- [1..1000]) { + insert clients + values (cid, name, address) + [(cid = i, name = rndName(), address = rndCity())]; + [] + }); + + ignore(for (i <- [1..5000]) { + insert products + values (pid, name, category, price) + [(pid = i, name = rndName(), category = rndInt(11), price = rndInt(1000))]; + [] + }); + + ignore(for (i <- [1..10000]) { + insert orders + values (dates, cid, pid, qty) + [(dates = rndDate(), cid = rndInt(1000), pid = rndInt(5000), qty = rndInt(20))]; + [] + }); + + ignore(for (i <- [1..5]) { + insert classes + values (class) + [(class = intToClass(i))]; + [] + }); + + ignore(for (i <- [1..1000]) { + insert students + values (sid, name, class) + [(sid = i, name = rndName(), class = rndClass())]; + [] + }); + + ignore(for (i <- [1..2000]) { + insert tests + values (id, sid, course, score) + [(id = i, sid = rndInt(1000), course = rndCourse(), score = rndInt(100))]; + [] + }); + + ignore(for (i <- [1..1000]) { + insert degrees + values (sid, degree) + [(sid = i, degree = rndDegree())]; + [] + }) +} + +fun test() { + exterminate(); + populate() +} + +test() + diff --git a/tests/database/grouping.links b/tests/database/grouping.links new file mode 100644 index 000000000..feded8499 --- /dev/null +++ b/tests/database/grouping.links @@ -0,0 +1,63 @@ +var db = database "links"; +var factorials = table "factorials" with (i : Int, f : Int) from db; + +var clients = table "cclients" with (cid : Int, name : String, address : String) from db; +var products = table "products" with (pid : Int, name : String, category : Int, price : Int) from db; +var orders = table "orders" with (dates : String, cid : Int, pid : Int, qty : Int) from db; +var classes = table "classes" with (class : String) from db; +var students = table "students" with (sid : Int, name : String, class : String) from db; +var tests = table "tests" with (id : Int, sid : Int, course : String, score: Int) from db; +var degrees = table "degrees" with (sid : Int, degree : String) from db; + +fun test() { + # 1. + ignore(query mixing { + var m = groupBy(fun (o) { (pid = o.pid) }, for (o <-- orders) where (o.dates == "2020-01-01") [o]); + concatMapKey(fun (k) { [(pid = k.pid, qty_sum = sum(for (o <- lookupG(k,m)) [o.qty]))] }, m) + }); + + # 2. + ignore(query mixing { + # var t = for (o <-- orders, p <-- products) where (o.pid == p.pid) [(ord = o, prod = p)]; + var t = for (o <-- orders, p <-- products) where (o.pid == p.pid) + [(odates = o.dates, ocid = o.cid, opid = o.pid, oqty = o.qty + , ppid = p.pid, pname = p.name, pcategory = p.category, pprice = p.price)]; + var m = groupBy( + fun (x) { + (dates = x.odates, opid = x.opid, + ppid = x.ppid, name = x.pname, category = x.pcategory, price = x.pprice) + }, t); + var t' = concatMapKey( + fun (k) { + [(category = k.category + ,sale = intToFloat(k.price * sum(for (x <- lookupG(k,m)) [x.oqty])) *. 0.8)] + }, m); + var m' = groupBy (fun (z) { (category = z.category) }, t'); + concatMapKey(fun (k) { [(result = sumF(for (z <- lookupG(k,m')) [z.sale]) *. 100.0)] }, m') + }); + + # 1a. + ignore(query mixing { + var m = groupBy(fun (o) { (pid = o.pid) }, for (o <-- orders) where (o.dates == "2020-01-01") [o]); + # the query generation code is too dumb to reduce projqty and convert the query to an aggregation query... + # var projqty = fun (t) { for (o <- t) [o.qty] }; + aggBy(m, fun(t) { (qty_sum = sum(for (o <- t) [o.qty])) }) + }); + + # 3. + query mixing { + # var t = for (p <-- products, o <-- orders) where (p.pid == o.pid) [(prod = p, ord = o)]; + var t = for (o <-- orders, p <-- products) where (p.pid == o.pid) + [(odates = o.dates, ocid = o.cid, opid = o.pid, oqty = o.qty + , ppid = p.pid, pname = p.name, pcategory = p.category, pprice = p.price)]; + var m = groupBy (fun (x) { (pid = x.opid) }, t); + concatMapKey( + fun (k) { + [(pid = k.pid, sales_avg = avg(for (z <- lookupG(k,m)) [ z.pprice * z.oqty ]))] + }, m) + } + +} + +test() + diff --git a/tools/rules/test_files_registered.py b/tools/rules/test_files_registered.py index 81ccfcb6f..cc4bfd4ba 100755 --- a/tools/rules/test_files_registered.py +++ b/tools/rules/test_files_registered.py @@ -7,6 +7,8 @@ # The following files are excluded from being tested, # because they are not entry points to some test BLACKLIST = { + "tests/database/grouping-create.links", + "tests/database/grouping.links", "tests/empty_prelude.links", "tests/freezeml_prelude.links", "tests/presence_type_arg_typename.links",