Skip to content

Commit

Permalink
Deserialize records on the fly.
Browse files Browse the repository at this point in the history
Consider a scenario where input relations contain JSON strings.  These
strings are parsed using the recently added JSON lib and then DDlog goes
on to compute on the parsed data.  When working with large JSON
documents where only a handful of attributes are actually used by DDlog,
DDlog's memory footprint is dominated by unparsed JSON inputs.

To avoid this waste, we introduce a new record type that contains
serialized data represented as a string.  DDlog parses this string using
appropriate deserializer (currently, only json is supported) and drops
the string on the floor, keeping only the extracted data in memory.

Syntactically, the new type of record is written down as
`@<format><string_token>`, where `<string_token>` can be either a quoted
string literal or a reference to a file, e.g.:

```
insert json_test.Deserialized[@JSON"{\"@type\": \"t.V1\", \"b\": true}"]
```
  • Loading branch information
ryzhyk committed Mar 2, 2020
1 parent 3103de6 commit 3a575b4
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 39 deletions.
2 changes: 0 additions & 2 deletions lib/json.toml

This file was deleted.

40 changes: 35 additions & 5 deletions rust/template/cmd_parser/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ named!(rel_key<&[u8], (Name, Record)>,
);

named!(record<&[u8], Record>,
alt!(bool_val | string_val | string_val_from_file | tuple_val | array_val | struct_val | int_val )
alt!(bool_val | string_val | serialized_val | tuple_val | array_val | struct_val | int_val )
);

named!(named_record<&[u8], (Name, Record)>,
Expand Down Expand Up @@ -368,24 +368,32 @@ named!(string_literal<&[u8], String>,
)
);

named!(string_val_from_file<&[u8], Record>,
named!(string_from_file<&[u8], String>,
do_parse!(
tag!("%")
>>
fname: string_literal
>>
(Record::String(std::fs::read_to_string(std::path::Path::new(&fname)).map_err(|e|format!("Failed to read string from file {}: {}", fname, e)).unwrap()))
(std::fs::read_to_string(std::path::Path::new(&fname)).map_err(|e|format!("Failed to read string from file {}: {}", fname, e)).unwrap())
)
);

named!(string_val<&[u8], Record>,
named!(string_inline<&[u8], String>,
do_parse!(
str: string_literal
>>
(Record::String(str))
(str)
)
);

named!(string_token<&[u8], String>,
alt!(string_inline | string_from_file)
);

named!(string_val<&[u8], Record>,
map!(string_token, Record::String)
);

#[test]
fn test_string() {
assert_eq!(
Expand All @@ -406,6 +414,28 @@ fn test_string() {
);
}

named!(serialized_val<&[u8], Record>,
do_parse!(
tag!("@")
>>
format_name: identifier
>>
data: string_token
>>
(Record::Serialized(Cow::from(format_name), data))
)
);

#[test]
fn test_serialized() {
assert_eq!(
serialized_val(br###"@json"foo""###),
Ok((
&br""[..],
Record::Serialized(Cow::from("json"), "foo".to_string())
))
);
}
named!(tuple_val<&[u8], Record>,
delimited!(apply!(sym,"("),
map!(separated_list!(apply!(sym,","), record), Record::Tuple),
Expand Down
38 changes: 8 additions & 30 deletions rust/template/differential_datalog/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ pub enum Record {
Bool(bool),
Int(BigInt),
String(String),
/// Value serialized in a string. The first field stores the name of the
/// serialization format, e.g., "json".
Serialized(Name, String),
Tuple(Vec<Record>),
Array(CollectionKind, Vec<Record>),
PosStruct(Name, Vec<Record>),
Expand All @@ -64,6 +67,10 @@ impl fmt::Display for Record {
Record::Bool(false) => write!(f, "false"),
Record::Int(i) => i.fmt(f),
Record::String(s) => format_ddlog_str(s.as_ref(), f),
Record::Serialized(n, s) => {
write!(f, "#{}", n)?;
format_ddlog_str(s.as_ref(), f)
}
Record::Tuple(recs) => {
write!(f, "(")?;
let len = recs.len();
Expand Down Expand Up @@ -1454,7 +1461,7 @@ macro_rules! decl_record_mutator_struct {
#[macro_export]
macro_rules! decl_record_mutator_enum {
( $n:ident, <$( $targ:ident),*>, $($cons:ident {$( $arg:ident : $type:ty),*}),* ) => {
impl<$($targ: $crate::record::FromRecord+Default),*> $crate::record::Mutator<$n<$($targ),*>> for $crate::record::Record
impl<$($targ: $crate::record::FromRecord+serde::de::DeserializeOwned+Default),*> $crate::record::Mutator<$n<$($targ),*>> for $crate::record::Record
where $($crate::record::Record: $crate::record::Mutator<$targ>),*
{
fn mutate(&self, x: &mut $n<$($targ),*>) -> Result<(), String> {
Expand Down Expand Up @@ -1533,35 +1540,6 @@ macro_rules! decl_val_enum_into_record {
};
}

#[macro_export]
macro_rules! decl_record_mutator_val_enum {
( $n:ident, <$( $targ:ident),*>, $($cons:ident ($type:ty)),* ) => {
impl<$($targ: $crate::record::FromRecord+Default),*> $crate::record::Mutator<$n<$($targ),*>> for $crate::record::Record
where $($crate::record::Record: $crate::record::Mutator<$targ>),*
{
fn mutate(&self, x: &mut $n<$($targ),*>) -> Result<(), String> {
match self {
$crate::record::Record::PosStruct(..) => {
Err(format!("Cannot use positional struct as mutator"))
},
$crate::record::Record::NamedStruct(_, args) => {
match x {
$(
$n::$cons(v) => {
<Mutator<$type>>::mutate(self, v)
}
),*
}
},
_ => {
Result::Err(format!("not a struct {:?}", *self))
}
}
}
}
};
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
1 change: 1 addition & 0 deletions rust/template/types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ lazy_static = "1.3"
libc = "0.2"
num-traits = "0.2"
serde = {version = "1.0", features = ["derive"]}
serde_json = "1.0"
timely = "0.11"
twox-hash = "1.1"

Expand Down
11 changes: 9 additions & 2 deletions src/Language/DifferentialDatalog/Compile.hs
Original file line number Diff line number Diff line change
Expand Up @@ -713,15 +713,22 @@ mkFromRecord t@TypeDef{..} =
" c => result::Result::Err(format!(\"unknown constructor {} of type" <+> rname (name t) <+> "in {:?}\", c, *val))" $$
" }" $$
" }," $$
" record::Record::Serialized(format, s) => {" $$
" if format == \"json\" {" $$
" serde_json::from_str(&*s).map_err(|e|format!(\"{}\", e))" $$
" } else {" $$
" result::Result::Err(format!(\"unsupported serialization format '{}'\", format))" $$
" }" $$
" }," $$
" v => {" $$
" result::Result::Err(format!(\"not a struct {:?}\", *v))" $$
" result::Result::Err(format!(\"not a struct {:?}\", *v))" $$
" }" $$
" }" $$
" }" $$
"}"
where
targs = "<" <> (hcat $ punctuate comma $ map pp tdefArgs) <> ">"
targs_bounds = "<" <> (hcat $ punctuate comma $ map ((<> ": record::FromRecord + Default") . pp) tdefArgs) <> ">"
targs_bounds = "<" <> (hcat $ punctuate comma $ map ((<> ": record::FromRecord + serde::de::DeserializeOwned + Default") . pp) tdefArgs) <> ">"
pos_constructors = vcat $ map mkposcons $ typeCons $ fromJust tdefType
mkposcons :: Constructor -> Doc
mkposcons c@Constructor{..} =
Expand Down
6 changes: 6 additions & 0 deletions test/datalog_tests/json_test.dat
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
dump json_test.JsonTest;

start;
insert json_test.Deserialized[@json"{\"@type\": \"t.V1\", \"b\": true}"],
commit;

dump json_test.ODeserialized;
4 changes: 4 additions & 0 deletions test/datalog_tests/json_test.dl
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,7 @@ JsonTest(scalar3(),
to_json_string_or_default(from_json_string(scalar3()): Result<JsonValue, string>)).
JsonTest(scalar4(),
to_json_string_or_default(from_json_string(scalar4()): Result<JsonValue, string>)).

input relation Deserialized[TaggedEnum]
output relation ODeserialized[TaggedEnum]
ODeserialized[x] :- Deserialized[x].
1 change: 1 addition & 0 deletions test/datalog_tests/json_test.dump.expected
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ json_test.JsonTest{.description = "{\"b\":true}", .value = "{\"std_Ok\":{\"res\"
json_test.JsonTest{.description = "{\"foo\":\"bar\"}", .value = "{\"std_Err\":{\"err\":\"missing field `b` at line 1 column 13\"}}"}
json_test.JsonTest{.description = "{\"t\":\"foo\", \"@id\":\"1001001001\", \"x\": \"x\", \"z\": 100000}", .value = "{\"std_Ok\":{\"res\":{\"t\":\"foo\",\"@id\":\"1001001001\",\"x\":\"x\",\"z\":100000}}}"}
json_test.JsonTest{.description = "{\"t\":\"foo\", \"id\":\"1001001001\", \"nested\": {\"x\": \"x\", \"z\": 100000}}", .value = "{\"std_Ok\":{\"res\":{\"t\":\"foo\",\"id\":\"1001001001\",\"nested\":{\"x\":\"x\",\"z\":100000}}}}"}
json_test.TVariant1{.b = true}

0 comments on commit 3a575b4

Please sign in to comment.