From cb1651eafe06e442051b69299836b2acfbc5f27d Mon Sep 17 00:00:00 2001 From: glendc Date: Wed, 10 Apr 2024 00:27:25 +0200 Subject: [PATCH 1/7] set venndb-macros dep to 0.1.1 in venndb crate dep list --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 59e4497..2e89d2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,7 @@ rustdoc-args = ["--cfg", "docsrs"] bitvec = "1.0.1" hashbrown = "0.14.3" rand = "0.8.5" -venndb-macros = { version = "0.1.0", path = "venndb-macros" } +venndb-macros = { version = "0.1.1", path = "venndb-macros" } [dev-dependencies] divan = "0.1.14" From 7cf68a5a6f3e9f886de0b5722606d017953444ab Mon Sep 17 00:00:00 2001 From: glendc Date: Fri, 12 Apr 2024 23:06:12 +0200 Subject: [PATCH 2/7] support Option for filters and ban it for keys --- CHANGELOG.md | 14 ++++ Cargo.toml | 4 +- venndb-macros/Cargo.toml | 2 +- venndb-macros/src/field.rs | 16 +++-- venndb-macros/src/generate_db.rs | 65 ++++++++++++++----- venndb-macros/src/parse_attrs.rs | 48 ++++++++++++-- venndb-usage/src/main.rs | 50 ++++++++++++++ .../compiles/derive_struct_all_the_things.rs | 7 ++ venndb-usage/tests/fails/option_key.rs | 9 +++ venndb-usage/tests/fails/option_key.stderr | 6 ++ 10 files changed, 193 insertions(+), 28 deletions(-) create mode 100644 venndb-usage/tests/fails/option_key.rs create mode 100644 venndb-usage/tests/fails/option_key.stderr diff --git a/CHANGELOG.md b/CHANGELOG.md index 593468f..81ae6f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +# 0.2.0 (2024-04-12) + +Breaking Changes: + +* Support Option in a special way: + * for filters it means that both positive and negative bits will be set to false if the value is `None`; + * for filter maps this means that the filter is not even registered; + * keys cannot be optional; + * this is potentially breaking as some implementations from `0.1*` might have already used `Option` in a different way; + +Non-Breaking Changes: + +* improve documentaton; + # 0.1.1 (2024-04-10) Non-Breaking Changes: diff --git a/Cargo.toml b/Cargo.toml index 2e89d2b..7f1e9ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ repository = "https://github.com/plabayo/venndb" keywords = ["database", "db", "memory", "bits"] categories = ["database", "db"] authors = ["Glen De Cauwsemaecker "] -version = "0.1.1" +version = "0.2.0" rust-version = "1.75.0" [package.metadata.docs.rs] @@ -23,7 +23,7 @@ rustdoc-args = ["--cfg", "docsrs"] bitvec = "1.0.1" hashbrown = "0.14.3" rand = "0.8.5" -venndb-macros = { version = "0.1.1", path = "venndb-macros" } +venndb-macros = { version = "0.2.0", path = "venndb-macros" } [dev-dependencies] divan = "0.1.14" diff --git a/venndb-macros/Cargo.toml b/venndb-macros/Cargo.toml index be10a79..49a127f 100644 --- a/venndb-macros/Cargo.toml +++ b/venndb-macros/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/plabayo/venndb" keywords = ["database", "db", "memory", "bits"] categories = ["database", "db"] authors = ["Glen De Cauwsemaecker "] -version = "0.1.1" +version = "0.2.0" rust-version = "1.75.0" [package.metadata.docs.rs] diff --git a/venndb-macros/src/field.rs b/venndb-macros/src/field.rs index fcc1107..40b6637 100644 --- a/venndb-macros/src/field.rs +++ b/venndb-macros/src/field.rs @@ -13,7 +13,7 @@ pub struct StructField<'a> { /// The original parsed field field: &'a syn::Field, /// The parsed attributes of the field - attrs: FieldAttrs, + attrs: FieldAttrs<'a>, /// The field name. This is contained optionally inside `field`, /// but is duplicated non-optionally here to indicate that all field that /// have reached this point must have a field name, and it no longer @@ -52,6 +52,7 @@ impl<'a> KeyField<'a> { pub struct FilterField<'a> { pub name: &'a Ident, + pub optional: bool, } impl<'a> FilterField<'a> { @@ -71,7 +72,7 @@ impl<'a> FilterField<'a> { impl<'a> StructField<'a> { /// Attempts to parse a field of a `#[derive(VennDB)]` struct, pulling out the /// fields required for code generation. - pub fn new(_errors: &Errors, field: &'a syn::Field, attrs: FieldAttrs) -> Option { + pub fn new(_errors: &Errors, field: &'a syn::Field, attrs: FieldAttrs<'a>) -> Option { let name = field.ident.as_ref().expect("missing ident for named field"); Some(StructField { field, attrs, name }) } @@ -81,12 +82,16 @@ impl<'a> StructField<'a> { self.attrs.kind.as_ref().map(|kind| match kind { FieldKind::Key => FieldInfo::Key(KeyField { name: self.name, - ty: &self.field.ty, + ty: self.attrs.option_ty.unwrap_or(&self.field.ty), + }), + FieldKind::Filter => FieldInfo::Filter(FilterField { + name: self.name, + optional: self.attrs.option_ty.is_some(), }), - FieldKind::Filter => FieldInfo::Filter(FilterField { name: self.name }), FieldKind::FilterMap => FieldInfo::FilterMap(FilterMapField { name: self.name, - ty: &self.field.ty, + ty: self.attrs.option_ty.unwrap_or(&self.field.ty), + optional: self.attrs.option_ty.is_some(), }), }) } @@ -95,6 +100,7 @@ impl<'a> StructField<'a> { pub struct FilterMapField<'a> { pub name: &'a Ident, pub ty: &'a syn::Type, + pub optional: bool, } impl<'a> FilterMapField<'a> { diff --git a/venndb-macros/src/generate_db.rs b/venndb-macros/src/generate_db.rs index 24edddb..0fbf1a2 100644 --- a/venndb-macros/src/generate_db.rs +++ b/venndb-macros/src/generate_db.rs @@ -342,9 +342,24 @@ fn generate_db_struct_method_append( let name = field.name(); let field_name = field.filter_name(); let field_name_not = field.filter_not_name(); - quote! { - self.#field_name.push(data.#name); - self.#field_name_not.push(!data.#name); + if field.optional { + quote! { + match data.#name { + Some(value) => { + self.#field_name.push(value); + self.#field_name_not.push(!value); + } + None => { + self.#field_name.push(false); + self.#field_name_not.push(false); + } + } + } + } else { + quote! { + self.#field_name.push(data.#name); + self.#field_name_not.push(!data.#name); + } } } FieldInfo::FilterMap(field) => { @@ -352,19 +367,39 @@ fn generate_db_struct_method_append( let filter_map_name = field.filter_map_name(); let filter_vec_name = field.filter_vec_name(); let filter_index = format_ident!("{}_index", filter_vec_name); - quote! { - let #filter_index = match self.#filter_map_name.entry(data.#name.clone()) { - ::venndb::__internal::hash_map::Entry::Occupied(entry) => *entry.get(), - ::venndb::__internal::hash_map::Entry::Vacant(entry) => { - let vec_index = self.#filter_vec_name.len(); - entry.insert(vec_index); - let bv = ::venndb::__internal::BitVec::repeat(false, index); - self.#filter_vec_name.push(bv); - vec_index + if field.optional { + quote! { + if let Some(value) = data.#name.clone() { + let #filter_index = match self.#filter_map_name.entry(value) { + ::venndb::__internal::hash_map::Entry::Occupied(entry) => *entry.get(), + ::venndb::__internal::hash_map::Entry::Vacant(entry) => { + let vec_index = self.#filter_vec_name.len(); + entry.insert(vec_index); + let bv = ::venndb::__internal::BitVec::repeat(false, index); + self.#filter_vec_name.push(bv); + vec_index + } + }; + for (i, row) in self.#filter_vec_name.iter_mut().enumerate() { + row.push(i == #filter_index); + } + } + } + } else { + quote! { + let #filter_index = match self.#filter_map_name.entry(data.#name.clone()) { + ::venndb::__internal::hash_map::Entry::Occupied(entry) => *entry.get(), + ::venndb::__internal::hash_map::Entry::Vacant(entry) => { + let vec_index = self.#filter_vec_name.len(); + entry.insert(vec_index); + let bv = ::venndb::__internal::BitVec::repeat(false, index); + self.#filter_vec_name.push(bv); + vec_index + } + }; + for (i, row) in self.#filter_vec_name.iter_mut().enumerate() { + row.push(i == #filter_index); } - }; - for (i, row) in self.#filter_vec_name.iter_mut().enumerate() { - row.push(i == #filter_index); } } } diff --git a/venndb-macros/src/parse_attrs.rs b/venndb-macros/src/parse_attrs.rs index 9e5af75..96fcfe0 100644 --- a/venndb-macros/src/parse_attrs.rs +++ b/venndb-macros/src/parse_attrs.rs @@ -4,8 +4,9 @@ use crate::errors::Errors; /// Attributes applied to a field of a `#![derive(VennDB)]` struct. #[derive(Default)] -pub struct FieldAttrs { +pub struct FieldAttrs<'a> { pub kind: Option, + pub option_ty: Option<&'a syn::Type>, } pub enum FieldKind { @@ -14,8 +15,8 @@ pub enum FieldKind { FilterMap, } -impl FieldAttrs { - pub fn parse(errors: &Errors, field: &syn::Field) -> Self { +impl<'a> FieldAttrs<'a> { + pub fn parse(errors: &Errors, field: &'a syn::Field) -> Self { let mut this = Self::default(); let mut skipped = false; @@ -72,11 +73,23 @@ impl FieldAttrs { } } + this.option_ty = ty_inner(&["Option"], &field.ty); + if skipped { this.kind = None; } else if is_key { - this.kind = Some(FieldKind::Key); - } else if is_bool(&field.ty) { + if this.option_ty.is_some() { + errors.err( + &field.ty, + concat!( + "Invalid field-level `venndb` attribute\n", + "`key` fields cannot be `Option`", + ), + ); + } else { + this.kind = Some(FieldKind::Key); + } + } else if is_bool(this.option_ty.unwrap_or(&field.ty)) { this.kind = Some(FieldKind::Filter); } else if is_filter { // bool filters are to be seen as regular filters, even when made explicitly so! @@ -164,3 +177,28 @@ fn is_matching_attr(name: &str, attr: &syn::Attribute) -> bool { fn is_venndb_attr(attr: &syn::Attribute) -> bool { is_matching_attr("venndb", attr) } + +/// Returns `Some(T)` if a type is `wrapper_name` for any `wrapper_name` in `wrapper_names`. +fn ty_inner<'a>(wrapper_names: &[&str], ty: &'a syn::Type) -> Option<&'a syn::Type> { + if let syn::Type::Path(path) = ty { + if path.qself.is_some() { + return None; + } + // Since we only check the last path segment, it isn't necessarily the case that + // we're referring to `std::vec::Vec` or `std::option::Option`, but there isn't + // a fool proof way to check these since name resolution happens after macro expansion, + // so this is likely "good enough" (so long as people don't have their own types called + // `Option` or `Vec` that take one generic parameter they're looking to parse). + let last_segment = path.path.segments.last()?; + if !wrapper_names.iter().any(|name| last_segment.ident == *name) { + return None; + } + if let syn::PathArguments::AngleBracketed(gen_args) = &last_segment.arguments { + let generic_arg = gen_args.args.first()?; + if let syn::GenericArgument::Type(ty) = &generic_arg { + return Some(ty); + } + } + } + None +} diff --git a/venndb-usage/src/main.rs b/venndb-usage/src/main.rs index e3dd3ae..6deee29 100644 --- a/venndb-usage/src/main.rs +++ b/venndb-usage/src/main.rs @@ -717,4 +717,54 @@ mod tests { assert_eq!(results.len(), 1); assert_eq!(results[0].id, 1); } + + // v0.2 — Optional Filters + + #[derive(Debug, VennDB)] + pub struct Worker { + #[venndb(key)] + id: u32, + is_admin: bool, + is_active: Option, + #[venndb(filter)] + department: Option, + } + #[test] + fn test_optional_filter() { + let db = WorkerDB::from_rows(vec![ + Worker { + id: 1, + is_admin: false, + is_active: Some(true), + department: Some(Department::Engineering), + }, + Worker { + id: 2, + is_admin: false, + is_active: None, + department: None, + }, + ]) + .unwrap(); + + let mut query = db.query(); + query.is_active(true); + let results = query.execute().unwrap().iter().collect::>(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].id, 1); + + let mut query = db.query(); + query.is_active(false); + assert!(query.execute().is_none()); + + let mut query = db.query(); + query.department(Department::Engineering); + let results = query.execute().unwrap().iter().collect::>(); + assert_eq!(results.len(), 1); + assert_eq!(results[0].id, 1); + + let mut query = db.query(); + query.department(Department::HR); + assert!(query.execute().is_none()); + } } diff --git a/venndb-usage/tests/compiles/derive_struct_all_the_things.rs b/venndb-usage/tests/compiles/derive_struct_all_the_things.rs index 2222872..e93696e 100644 --- a/venndb-usage/tests/compiles/derive_struct_all_the_things.rs +++ b/venndb-usage/tests/compiles/derive_struct_all_the_things.rs @@ -9,10 +9,13 @@ struct Employee { #[venndb(filter)] // explicit bool filter == regular bool is_manager: bool, is_admin: bool, + is_something: Option, #[venndb(skip)] is_active: bool, #[venndb(filter)] department: Department, + #[venndb(filter)] + country: Option, } #[derive(Debug, PartialEq, Eq, Clone, Hash)] @@ -30,14 +33,18 @@ fn main() { name: "Alice".to_string(), is_manager: true, is_admin: false, + is_something: None, is_active: true, department: Department::Engineering, + country: None, }) .unwrap(); let employee_ref = db.get_by_id(&1).unwrap(); assert_eq!(employee_ref.id, 1); assert_eq!(employee_ref.name, "Alice"); + assert_eq!(employee_ref.is_something, None); + assert_eq!(employee_ref.country, None); let mut query = db.query(); query.is_manager(true).is_admin(true); diff --git a/venndb-usage/tests/fails/option_key.rs b/venndb-usage/tests/fails/option_key.rs new file mode 100644 index 0000000..2a33e27 --- /dev/null +++ b/venndb-usage/tests/fails/option_key.rs @@ -0,0 +1,9 @@ +use venndb::VennDB; + +#[derive(Debug, VennDB)] +struct Employee { + #[venndb(key)] + id: Option, +} + +fn main() {} diff --git a/venndb-usage/tests/fails/option_key.stderr b/venndb-usage/tests/fails/option_key.stderr new file mode 100644 index 0000000..46e2ab2 --- /dev/null +++ b/venndb-usage/tests/fails/option_key.stderr @@ -0,0 +1,6 @@ +error: Invalid field-level `venndb` attribute + `key` fields cannot be `Option` + --> tests/fails/option_key.rs:6:9 + | +6 | id: Option, + | ^^^^^^ From 8fe6a3d65c95bfba0e34bcf6cb93b174cfbef889 Mon Sep 17 00:00:00 2001 From: glendc Date: Fri, 12 Apr 2024 23:17:19 +0200 Subject: [PATCH 3/7] start to improve docs --- README.md | 21 +++++++++++++++++---- www/index.html | 45 ++++++++++++++++++++++++++++++++++++++++++++- www/style.css | 5 +++++ 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index cb81816..7ed5c65 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,8 @@ This database is designed for a very specific use case where you have mostly sta like these can be large and should be both fast and compact. For the limited usecases where `venndb` can be applied to, -ithas less dependencies and is faster then traditional choices, -such as a naive implementation or a more heavy lifted dependency then _Sqlite_. +it has less dependencies and is faster then traditional choices, +such as a naive implementation or a more heavy lifted dependency such as _Sqlite_. > See [the benchmarks](#benchmarks) for more information on this topic. @@ -90,12 +90,14 @@ pub struct Employee { #[venndb(key)] id: u32, name: String, - is_manager: bool, + is_manager: Option, is_admin: bool, #[venndb(skip)] foo: bool, #[venndb(filter)] department: Department, + #[venndb(filter)] + country: Option, } fn main() { @@ -254,7 +256,10 @@ pub struct Employee { name: String, is_manager: bool, is_admin: bool, - is_active: bool, + // filter (booleans) can be made optional, + // meaning that the row will not be able to be filtered (found) + // on this column when the row has a `None` value for it + is_active: Option, // booleans are automatically turned into (query) filters, // use the `skip` arg to stop this. As such it is only really needed for // bool properties :) @@ -272,8 +277,16 @@ pub struct Employee { // trying to do so will result in a compile-team failure. #[venndb(filter)] department: Department, + // similar to regular bool filters, + // filter maps can also be optional. + // When a filter map is optional and the row's property for that filter is None, + // it will not be registered and thus not be able to filtered (found) on that property + #[venndb(filter)] + country: Option, } +// TODO: adapt the example to make this work + fn main() { let db = EmployeeInMemDB::from_iter([ RawCsvRow("1,John Doe,true,false,true,false,Engineering"), diff --git a/www/index.html b/www/index.html index 6ec5a0a..a41402c 100644 --- a/www/index.html +++ b/www/index.html @@ -48,7 +48,7 @@
venndb banner

- An in-memory database in Rust for rows queried using bit (flag) columns. + An append-only in-memory database in Rust for rows queried using bit (flag) columns. This database is designed for a very specific use case where you have mostly static data that you typically load at startup and have to query constantly using very simple filters. Datasets like these can be large and should be both fast and compact. @@ -91,6 +91,49 @@

+
+

Example

+
+
+ +
+use venndb::VennDB
+
+#[derive(Debug, VennDB)]
+pub struct Employee {
+    #[venndb(key)]
+    id: u32,
+    name: String,
+    is_manager: Option<bool>,
+    is_admin: bool,
+    #[venndb(skip)]
+    foo: bool,
+    #[venndb(filter)]
+    department: Department,
+    #[venndb(filter)]
+    country: Option<String>,
+}
+
+fn main() {
+    let db = EmployeeDB::from_iter(/* .. */);
+
+    let mut query = db.query();
+    let employee = query
+        .is_admin(true)
+        .is_manager(false)
+        .department(Department::Engineering)
+        .execute()
+        .expect("to have found at least one")
+        .any();
+
+    println!("non-manager admin engineer: {:?}", employee);
+}
+                    
+
+
+

Learn more at https://crates.io/crates/venndb. +
+

diff --git a/www/style.css b/www/style.css index 4d90c21..fd79a25 100644 --- a/www/style.css +++ b/www/style.css @@ -106,4 +106,9 @@ main { #menu img { height: 25px; +} + +#code-block { + padding: 10px; + border: 1px solid grey; } \ No newline at end of file From ef8498d2c6cd2e2e6fa70692170b0e92fee62e74 Mon Sep 17 00:00:00 2001 From: glendc Date: Fri, 12 Apr 2024 23:20:11 +0200 Subject: [PATCH 4/7] add FAQ item about key requirements --- CHANGELOG.md | 2 +- README.md | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81ae6f2..44193ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ Breaking Changes: Non-Breaking Changes: -* improve documentaton; +* improve documentation; # 0.1.1 (2024-04-10) diff --git a/README.md b/README.md index 7ed5c65..43beba5 100644 --- a/README.md +++ b/README.md @@ -226,6 +226,16 @@ That said, we do recommend that you use `enum` values if you can, or some other Using for example a `String` directly is a bad idea as that would mean that `bE` != `Be` != `BE` != `Belgium` != `Belgique` != `België`. Even though these are really referring all to the same country. In such cases a much better idea is to at the very least create a wrapper type such as `struct Country(String)`, to allow you to enforce sanitization/validation when creating the value and ensuring the hashes will be the same for those values that are conceptually the same. +> ❓ Why can do keys have to be unique and non-optional? + +Within `venndb` keys are meant to be able to look up, +a row which was previously received via filters. + +As such it makes no sense for such keys to be: + +- duplicate: it would mean: as that can result in multiple rows or the wrong row to be returned; +- optional: as that would mean the row cannot be looked up when the key is not defined; + ## Example Here follows an example demonstrating all the features of `VennDB`. From a90420a8cf3d1dec7edec5b3fe70e0393ed2f4a8 Mon Sep 17 00:00:00 2001 From: glendc Date: Mon, 15 Apr 2024 10:48:13 +0200 Subject: [PATCH 5/7] fix README + update docs, fuzz optionals and bugfix filter bug --- CHANGELOG.md | 7 ++ README.md | 101 +++++++++++++++++++------- fuzz/fuzz_targets/fuzz_employee_db.rs | 9 +++ venndb-macros/src/generate_db.rs | 31 ++++---- venndb-usage/src/main.rs | 92 +++++++++++++++++++++-- 5 files changed, 194 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44193ea..263a15b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,8 +13,15 @@ Breaking Changes: * for filters it means that both positive and negative bits will be set to false if the value is `None`; * for filter maps this means that the filter is not even registered; * keys cannot be optional; + * While technically this is a breaking change it is not expected to actually break someone, + as keys always had to be unique already and two times `None` will result in same hash... so it is unlikely + that there was an `Option` already used by someone; * this is potentially breaking as some implementations from `0.1*` might have already used `Option` in a different way; +While this changes behaviour of `filters` and `filter maps` it is unlikely that someone was already using +`Option` for these types before, as their ergonomics have been a bit weird prior to this version. +Even more so for `filter maps` it could have resulted in panics. + Non-Breaking Changes: * improve documentation; diff --git a/README.md b/README.md index 43beba5..e989c5a 100644 --- a/README.md +++ b/README.md @@ -295,18 +295,16 @@ pub struct Employee { country: Option, } -// TODO: adapt the example to make this work - fn main() { let db = EmployeeInMemDB::from_iter([ - RawCsvRow("1,John Doe,true,false,true,false,Engineering"), - RawCsvRow("2,Jane Doe,false,true,true,true,Sales"), - RawCsvRow("3,John Smith,false,false,true,false,Marketing"), - RawCsvRow("4,Jane Smith,true,true,false,true,HR"), - RawCsvRow("5,John Johnson,true,true,true,true,Engineering"), - RawCsvRow("6,Jane Johnson,false,false,false,false,Sales"), - RawCsvRow("7,John Brown,true,false,true,false,Marketing"), - RawCsvRow("8,Jane Brown,false,true,true,true,HR"), + RawCsvRow("1,John Doe,true,false,true,false,Engineering,USA"), + RawCsvRow("2,Jane Doe,false,true,true,true,Sales,"), + RawCsvRow("3,John Smith,false,false,,false,Marketing,"), + RawCsvRow("4,Jane Smith,true,true,false,true,HR,"), + RawCsvRow("5,John Johnson,true,true,true,true,Engineering,"), + RawCsvRow("6,Jane Johnson,false,false,,false,Sales,BE"), + RawCsvRow("7,John Brown,true,false,true,false,Marketing,BE"), + RawCsvRow("8,Jane Brown,false,true,true,true,HR,BR"), ]) .expect("MemDB created without errors (e.g. no duplicate keys)"); @@ -360,9 +358,22 @@ fn main() { .any(); assert!(manager.id == 1 || manager.id == 5); + println!(">>> Optional bool filters have three possible values, where None != false. An important distinction to make..."); + let mut query = db.query(); + query.is_active(false); + let inactive_employees: Vec<_> = query + .execute() + .expect("to have found at least one") + .iter() + .collect(); + assert_eq!(inactive_employees.len(), 1); + assert_eq!(inactive_employees[0].id, 4); + println!(">>> If you want you can also get the Employees back as a Vec, dropping the DB data all together..."); let employees = db.into_rows(); assert_eq!(employees.len(), 8); + assert!(employees[1].foo); + println!("All employees: {:?}", employees); println!(">>> You can also get the DB back from the Vec, if you want start to query again..."); // of course better to just keep it as a DB to begin with, but let's pretend this is ok in this example @@ -381,13 +392,23 @@ fn main() { assert_eq!(sales_employees.len(), 1); assert_eq!(sales_employees[0].name, "Jane Doe"); + println!(">>> Filter maps that are optional work as well, e.g. you can query for all employees from USA..."); + query.reset().country("USA".to_owned()); + let usa_employees: Vec<_> = query + .execute() + .expect("to have found at least one") + .iter() + .collect(); + assert_eq!(usa_employees.len(), 1); + assert_eq!(usa_employees[0].id, 1); + println!(">>> At any time you can also append new employees to the DB..."); assert!(db - .append(RawCsvRow("8,John Doe,true,false,true,false,Engineering")) + .append(RawCsvRow("8,John Doe,true,false,true,false,Engineering,")) .is_err()); println!(">>> This will fail however if a property is not correct (e.g. ID (key) is not unique in this case), let's try this again..."); assert!(db - .append(RawCsvRow("9,John Doe,false,true,true,false,Engineering")) + .append(RawCsvRow("9,John Doe,false,true,true,false,Engineering,")) .is_ok()); assert_eq!(db.len(), 9); @@ -404,9 +425,10 @@ fn main() { println!(">>> You can also extend it using an IntoIterator..."); db.extend([ - RawCsvRow("10,Glenn Doe,false,true,true,true,Engineering"), - RawCsvRow("11,Peter Miss,true,true,true,true,HR"), - ]).unwrap(); + RawCsvRow("10,Glenn Doe,false,true,true,true,Engineering,"), + RawCsvRow("11,Peter Miss,true,true,true,true,HR,USA"), + ]) + .unwrap(); let mut query = db.query(); query .department(Department::HR) @@ -421,6 +443,19 @@ fn main() { assert_eq!(employees.len(), 1); assert_eq!(employees[0].id, 11); + println!(">>> There are now 2 employees from USA..."); + query.reset().country("USA".to_owned()); + let employees: Vec<_> = query + .execute() + .expect("to have found at least one") + .iter() + .collect(); + assert_eq!(employees.len(), 2); + assert_eq!( + employees.iter().map(|e| e.id).sorted().collect::>(), + [1, 11] + ); + println!(">>> All previously data is still there as well of course..."); query .reset() @@ -448,14 +483,29 @@ where { fn from(RawCsvRow(s): RawCsvRow) -> Employee { let mut parts = s.as_ref().split(','); + let id = parts.next().unwrap().parse().unwrap(); + let name = parts.next().unwrap().to_string(); + let is_manager = parts.next().unwrap().parse().unwrap(); + let is_admin = parts.next().unwrap().parse().unwrap(); + let is_active = match parts.next().unwrap() { + "" => None, + s => Some(s.parse().unwrap()), + }; + let foo = parts.next().unwrap().parse().unwrap(); + let department = parts.next().unwrap().parse().unwrap(); + let country = match parts.next().unwrap() { + "" => None, + s => Some(s.to_string()), + }; Employee { - id: parts.next().unwrap().parse().unwrap(), - name: parts.next().unwrap().to_string(), - is_manager: parts.next().unwrap().parse().unwrap(), - is_admin: parts.next().unwrap().parse().unwrap(), - is_active: parts.next().unwrap().parse().unwrap(), - foo: parts.next().unwrap().parse().unwrap(), - department: parts.next().unwrap().parse().unwrap(), + id, + name, + is_manager, + is_admin, + is_active, + foo, + department, + country, } } } @@ -496,11 +546,12 @@ pub struct Employee { name: String, is_manager: bool, is_admin: bool, - is_active: bool, + is_active: Option, #[venndb(skip)] foo: bool, #[venndb(filter)] department: Department, + country: Option, } ``` @@ -542,8 +593,8 @@ Query (e.g. `EmployeeInMemDBQuery`) | - | - | | `EmployeeInMemDBQuery::reset(&mut self) -> &mut Self` | reset the query, bringing it back to the clean state it has on creation | | `EmployeeInMemDBQuery::execute(&self) -> Option>` | return the result of the query using the set filters. It will be `None` in case no rows matched the defined filters. Or put otherwise, the result will contain at least one row when `Some(_)` is returned. | -| `EmployeeInMemDBQuery::is_manager(&mut self, value: bool) -> &mut Self` | a filter setter for a `bool` filter. One such method per `bool` filter (that isn't `skip`ped) will be available. E.g. if you have ` foo` filter then there will be a `EmployeeInMemDBQuery:foo` method. | -| `EmployeeInMemDBQuery::department(&mut self, value: Department) -> &mut Self` | a filter (map) setter for a non-`bool` filter. One such method per non-`bool` filter will be available. You can also `skip` these, but that's of course a bit pointless. The type will be equal to the actual field type. And the name will once again be equal to the original field name. | +| `EmployeeInMemDBQuery::is_manager(&mut self, value: bool) -> &mut Self` | a filter setter for a `bool` filter. One such method per `bool` filter (that isn't `skip`ped) will be available. E.g. if you have ` foo` filter then there will be a `EmployeeInMemDBQuery:foo` method. For _bool_ filters that are optional (`Option`) this method is also generated just the same. | +| `EmployeeInMemDBQuery::department(&mut self, value: Department) -> &mut Self` | a filter (map) setter for a non-`bool` filter. One such method per non-`bool` filter will be available. You can also `skip` these, but that's of course a bit pointless. The type will be equal to the actual field type. And the name will once again be equal to the original field name. Filter maps that have a `Option` type have exactly the same signature. | Query Result (e.g. `EmployeeInMemDBQueryResult`) diff --git a/fuzz/fuzz_targets/fuzz_employee_db.rs b/fuzz/fuzz_targets/fuzz_employee_db.rs index 6146015..e4f94a9 100644 --- a/fuzz/fuzz_targets/fuzz_employee_db.rs +++ b/fuzz/fuzz_targets/fuzz_employee_db.rs @@ -10,8 +10,11 @@ pub struct Employee { id: u16, _name: String, earth: bool, + alive: Option, #[venndb(filter)] faction: Faction, + #[venndb(filter)] + planet: Option, } #[derive(Clone, Debug, Arbitrary, PartialEq, Eq, Hash)] @@ -20,6 +23,12 @@ pub enum Faction { Empire, } +#[derive(Clone, Debug, Arbitrary, PartialEq, Eq, Hash)] +pub enum Planet { + Earth, + Mars, +} + fuzz_target!(|rows: Vec| { let _ = EmployeeDB::from_rows(rows); }); diff --git a/venndb-macros/src/generate_db.rs b/venndb-macros/src/generate_db.rs index 0fbf1a2..eb184e8 100644 --- a/venndb-macros/src/generate_db.rs +++ b/venndb-macros/src/generate_db.rs @@ -369,20 +369,23 @@ fn generate_db_struct_method_append( let filter_index = format_ident!("{}_index", filter_vec_name); if field.optional { quote! { - if let Some(value) = data.#name.clone() { - let #filter_index = match self.#filter_map_name.entry(value) { - ::venndb::__internal::hash_map::Entry::Occupied(entry) => *entry.get(), - ::venndb::__internal::hash_map::Entry::Vacant(entry) => { - let vec_index = self.#filter_vec_name.len(); - entry.insert(vec_index); - let bv = ::venndb::__internal::BitVec::repeat(false, index); - self.#filter_vec_name.push(bv); - vec_index - } - }; - for (i, row) in self.#filter_vec_name.iter_mut().enumerate() { - row.push(i == #filter_index); - } + let #filter_index = match data.#name.clone() { + Some(value) => { + Some(match self.#filter_map_name.entry(value) { + ::venndb::__internal::hash_map::Entry::Occupied(entry) => *entry.get(), + ::venndb::__internal::hash_map::Entry::Vacant(entry) => { + let vec_index = self.#filter_vec_name.len(); + entry.insert(vec_index); + let bv = ::venndb::__internal::BitVec::repeat(false, index); + self.#filter_vec_name.push(bv); + vec_index + } + }) + }, + None => None, + }; + for (i, row) in self.#filter_vec_name.iter_mut().enumerate() { + row.push(Some(i) == #filter_index); } } } else { diff --git a/venndb-usage/src/main.rs b/venndb-usage/src/main.rs index 6deee29..3e5fb00 100644 --- a/venndb-usage/src/main.rs +++ b/venndb-usage/src/main.rs @@ -717,8 +717,11 @@ mod tests { assert_eq!(results.len(), 1); assert_eq!(results[0].id, 1); } +} - // v0.2 — Optional Filters +#[cfg(test)] +mod tests_v0_2 { + use super::*; #[derive(Debug, VennDB)] pub struct Worker { @@ -729,8 +732,9 @@ mod tests { #[venndb(filter)] department: Option, } + #[test] - fn test_optional_filter() { + fn test_optional_bool_filter() { let db = WorkerDB::from_rows(vec![ Worker { id: 1, @@ -752,19 +756,93 @@ mod tests { let results = query.execute().unwrap().iter().collect::>(); assert_eq!(results.len(), 1); assert_eq!(results[0].id, 1); + } - let mut query = db.query(); - query.is_active(false); - assert!(query.execute().is_none()); + #[test] + fn test_optional_map_filter() { + let db = WorkerDB::from_rows(vec![ + Worker { + id: 1, + is_admin: false, + is_active: Some(true), + department: Some(Department::Engineering), + }, + Worker { + id: 2, + is_admin: false, + is_active: None, + department: None, + }, + ]) + .unwrap(); let mut query = db.query(); query.department(Department::Engineering); let results = query.execute().unwrap().iter().collect::>(); assert_eq!(results.len(), 1); assert_eq!(results[0].id, 1); + } + + #[test] + fn test_db_with_optional_properties_default_filter() { + let db = WorkerDB::from_rows(vec![ + Worker { + id: 1, + is_admin: false, + is_active: Some(true), + department: Some(Department::Engineering), + }, + Worker { + id: 2, + is_admin: false, + is_active: None, + department: None, + }, + ]) + .unwrap(); + + let query = db.query(); + let results = query.execute().unwrap().iter().collect::>(); + assert_eq!(results.len(), 2); + assert_eq!(results[0].id, 1); + assert_eq!(results[1].id, 2); + } + + #[test] + fn test_optional_map_filter_specific() { + let db = WorkerDB::from_rows(vec![ + Worker { + id: 1, + is_admin: false, + is_active: None, + department: Some(Department::Engineering), + }, + Worker { + id: 2, + is_admin: false, + is_active: None, + department: Some(Department::HR), + }, + Worker { + id: 3, + is_admin: false, + is_active: None, + department: None, + }, + Worker { + id: 4, + is_admin: false, + is_active: None, + department: Some(Department::Engineering), + }, + ]) + .unwrap(); let mut query = db.query(); - query.department(Department::HR); - assert!(query.execute().is_none()); + query.department(Department::Engineering); + let results = query.execute().unwrap().iter().collect::>(); + assert_eq!(results.len(), 2); + assert_eq!(results[0].id, 1); + assert_eq!(results[1].id, 4); } } From d046215c0239258443d73cdb4eb4335f64a19404 Mon Sep 17 00:00:00 2001 From: glendc Date: Mon, 15 Apr 2024 10:52:23 +0200 Subject: [PATCH 6/7] improve styling example block on website --- www/index.html | 5 +++-- www/style.css | 12 ++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/www/index.html b/www/index.html index a41402c..86d0af9 100644 --- a/www/index.html +++ b/www/index.html @@ -131,8 +131,9 @@

Example

-

Learn more at https://crates.io/crates/venndb. -
+ +

Learn more at https://github.com/plabayo/venndb. + diff --git a/www/style.css b/www/style.css index fd79a25..9c644bd 100644 --- a/www/style.css +++ b/www/style.css @@ -111,4 +111,16 @@ main { #code-block { padding: 10px; border: 1px solid grey; + background-color: black; + color: lightgreen; + font-family: monospace; + font-size: 0.9em; +} + +quote { + display: block; + margin: 10px auto; + padding: 10px; + font-style: italic; + text-align: center; } \ No newline at end of file From a06906ecdbd088d7c0dd7b757345abbd4879694c Mon Sep 17 00:00:00 2001 From: glendc Date: Mon, 15 Apr 2024 10:57:07 +0200 Subject: [PATCH 7/7] add more content to about section of site --- www/index.html | 28 ++++++++++++++++++++++------ www/style.css | 2 +- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/www/index.html b/www/index.html index 86d0af9..b43c01d 100644 --- a/www/index.html +++ b/www/index.html @@ -47,12 +47,28 @@

venndb banner -

- An append-only in-memory database in Rust for rows queried using bit (flag) columns. - This database is designed for a very specific use case where you have mostly static data that you - typically load at startup and have to query constantly using very simple filters. Datasets - like these can be large and should be both fast and compact. -

+
+

+ An append-only in-memory database in Rust for rows queried using bit (flag) columns. + This database is designed for a very specific use case where you have mostly static data that you + typically load at startup and have to query constantly using very simple filters. Datasets + like these can be large and should be both fast and compact. +

+ +

+ For the limited usecases where venndb can be applied to, it has less dependencies and is faster then traditional choices, + such as a naive implementation or a more heavy lifted dependency such as Sqlite. +

+ + + See the benchmarks for more information on this topic. + + +

+ This project was developed originally in function of rama, where you can see it being used for example to provide an in-memory (upstream) proxy database. + Do let us know in case you use it as well in your project, such that we can assemble a showcase list. +

+