diff --git a/ahnlich/dsl/src/array.rs b/ahnlich/dsl/src/array.rs new file mode 100644 index 00000000..7dae7277 --- /dev/null +++ b/ahnlich/dsl/src/array.rs @@ -0,0 +1,17 @@ +use crate::parser::Rule; +use ahnlich_types::keyval::StoreKey; +use ndarray::Array1; +use pest::iterators::Pair; + +pub(crate) fn parse_multi_f32_array(f32_arrays_pair: Pair) -> Vec { + f32_arrays_pair.into_inner().map(parse_f32_array).collect() +} + +pub(crate) fn parse_f32_array(pair: Pair) -> StoreKey { + StoreKey(Array1::from_iter(pair.into_inner().map(|f32_pair| { + f32_pair + .as_str() + .parse::() + .expect("Cannot parse single f32 num") + }))) +} diff --git a/ahnlich/dsl/src/db.rs b/ahnlich/dsl/src/db.rs index 2bea33c9..86d0abc6 100644 --- a/ahnlich/dsl/src/db.rs +++ b/ahnlich/dsl/src/db.rs @@ -2,32 +2,15 @@ use std::{collections::HashSet, num::NonZeroUsize}; use crate::{ algorithm::{to_algorithm, to_non_linear}, + array::{parse_f32_array, parse_multi_f32_array}, + metadata::parse_store_keys_to_store_value, parser::{QueryParser, Rule}, }; -use ahnlich_types::{ - db::DBQuery, - keyval::{StoreKey, StoreName}, - metadata::MetadataKey, -}; -use ndarray::Array1; -use pest::iterators::Pair; +use ahnlich_types::{db::DBQuery, keyval::StoreName, metadata::MetadataKey}; use pest::Parser; use crate::{error::DslError, predicate::parse_predicate_expression}; -fn parse_multi_f32_array(f32_arrays_pair: Pair) -> Vec { - f32_arrays_pair.into_inner().map(parse_f32_array).collect() -} - -fn parse_f32_array(pair: Pair) -> StoreKey { - StoreKey(Array1::from_iter(pair.into_inner().map(|f32_pair| { - f32_pair - .as_str() - .parse::() - .expect("Cannot parse single f32 num") - }))) -} - // Parse raw strings separated by ; into a Vec. Examples include but are not restricted // to // @@ -45,9 +28,7 @@ fn parse_f32_array(pair: Pair) -> StoreKey { // GETPRED ((author = dickens) OR (country != Nigeria)) IN my_store // GETSIMN 4 WITH [0.65, 2.78] USING cosinesimilarity IN my_store WHERE (author = dickens) // CREATESTORE IF NOT EXISTS my_store DIMENSION 21 PREDICATES (author, country) NONLINEARALGORITHMINDEX (kdtree) -// -// #TODO -// SET +// SET (([1.0, 2.1, 3.2], {name: Haks, category: dev}), ([3.1, 4.8, 5.0], {name: Deven, category: dev})) in store pub fn parse_db_query(input: &str) -> Result, DslError> { let pairs = QueryParser::parse(Rule::db_query, input).map_err(Box::new)?; let statements = pairs.into_iter().collect::>(); @@ -60,6 +41,21 @@ pub fn parse_db_query(input: &str) -> Result, DslError> { Rule::list_clients => DBQuery::ListClients, Rule::list_stores => DBQuery::ListStores, Rule::info_server => DBQuery::InfoServer, + Rule::set_in_store => { + let mut inner_pairs = statement.into_inner(); + let store_keys_to_store_values = inner_pairs + .next() + .ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; + let store = inner_pairs + .next() + .ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? + .as_str(); + + DBQuery::Set { + store: StoreName(store.to_string()), + inputs: parse_store_keys_to_store_value(store_keys_to_store_values)?, + } + } Rule::create_store => { let mut inner_pairs = statement.into_inner().peekable(); let mut error_if_exists = true; diff --git a/ahnlich/dsl/src/lib.rs b/ahnlich/dsl/src/lib.rs index ca18af74..700e4f38 100644 --- a/ahnlich/dsl/src/lib.rs +++ b/ahnlich/dsl/src/lib.rs @@ -1,4 +1,5 @@ mod algorithm; +mod array; pub mod db; pub mod error; mod metadata; diff --git a/ahnlich/dsl/src/metadata.rs b/ahnlich/dsl/src/metadata.rs index ad660c42..990c15eb 100644 --- a/ahnlich/dsl/src/metadata.rs +++ b/ahnlich/dsl/src/metadata.rs @@ -1,8 +1,10 @@ +use crate::array::parse_f32_array; use crate::error::DslError; use crate::parser::Rule; -use ahnlich_types::metadata::MetadataValue; +use ahnlich_types::keyval::{StoreKey, StoreValue}; +use ahnlich_types::metadata::{MetadataKey, MetadataValue}; use pest::iterators::Pair; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; pub(crate) fn parse_metadata_value(pair: Pair) -> Result { match pair.as_rule() { @@ -34,3 +36,46 @@ pub(crate) fn parse_metadata_values(pair: Pair) -> Result) -> Result<(StoreKey, StoreValue), DslError> { + let start_pos = pair.as_span().start_pos().pos(); + let end_pos = pair.as_span().end_pos().pos(); + + let mut inner_pairs = pair.into_inner(); + let f32_array = parse_f32_array( + inner_pairs + .next() + .ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?, + ); + let store_value = inner_pairs + .next() + .ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?; + let mut store_value_map = HashMap::new(); + for store_value_single in store_value.into_inner() { + let start_pos = store_value_single.as_span().start_pos().pos(); + let end_pos = store_value_single.as_span().end_pos().pos(); + let mut v = store_value_single.into_inner(); + let key = MetadataKey::new( + v.next() + .ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))? + .as_str() + .to_string(), + ); + let value = parse_metadata_value( + v.next() + .ok_or(DslError::UnexpectedSpan((start_pos, end_pos)))?, + )?; + store_value_map.insert(key, value); + } + Ok((f32_array, store_value_map)) +} + +pub(crate) fn parse_store_keys_to_store_value( + pair: Pair, +) -> Result, DslError> { + let mut values = vec![]; + for value_pair in pair.into_inner() { + values.push(parse_into_store_key_and_value(value_pair)?); + } + Ok(values) +} diff --git a/ahnlich/dsl/src/syntax/syntax.pest b/ahnlich/dsl/src/syntax/syntax.pest index 1e039618..775da6f0 100644 --- a/ahnlich/dsl/src/syntax/syntax.pest +++ b/ahnlich/dsl/src/syntax/syntax.pest @@ -17,6 +17,7 @@ db_statement = _{ get_pred | get_sim_n | create_store | + set_in_store | invalid_statement } @@ -36,6 +37,7 @@ get_pred = { whitespace* ~ ^"getpred" ~ whitespace* ~ predicate_condition ~ in_i get_sim_n = { whitespace* ~ ^"getsimn" ~ whitespace* ~ non_zero ~ whitespace* ~ ^"with" ~ whitespace* ~ f32_array ~ whitespace* ~ ^"using" ~ whitespace* ~ algorithm ~ whitespace* ~ in_ignored ~ whitespace* ~ store_name ~ whitespace* ~ (^"where" ~ whitespace* ~ predicate_condition)? } // CREATESTORE IF NOT EXISTS store-name DIMENSION non-zero-size PREDICATES (key1, key2) NONLINEARALGORITHMINDEX (kdtree) create_store = { whitespace* ~ ^"createstore" ~ whitespace* ~ (if_not_exists)? ~ whitespace* ~ store_name ~ whitespace* ~ ^"dimension" ~ whitespace* ~ non_zero ~ whitespace* ~ (^"predicates" ~ whitespace* ~ "(" ~ whitespace* ~ metadata_keys ~ whitespace* ~ ")" )? ~ (whitespace* ~ ^"nonlinearalgorithmindex" ~ whitespace* ~ "(" ~ whitespace* ~ non_linear_algorithms ~ whitespace* ~ ")")? } +set_in_store = { whitespace* ~ ^"set" ~ whitespace* ~ store_keys_to_store_value ~ whitespace* ~ ^"in" ~ whitespace* ~ store_name } if_exists = { whitespace* ~ ^"if" ~ whitespace* ~ ^"exists" ~ whitespace* } if_not_exists = { whitespace* ~ ^"if" ~ whitespace* ~ ^"not" ~ whitespace* ~ ^"exists" ~ whitespace* } @@ -45,6 +47,10 @@ store_name = { (ASCII_ALPHANUMERIC | "_" | "-")+ } index_name = { (ASCII_ALPHANUMERIC | "_" | "-")+ } metadata_key = { (ASCII_ALPHANUMERIC | "_" | "-")+ } metadata_keys = { metadata_key ~ (whitespace* ~ "," ~ whitespace* ~ metadata_key)* } +store_value_single = { metadata_key ~ whitespace* ~ ":" ~ whitespace* ~ metadata_value } +store_value = { "{" ~ whitespace* ~ store_value_single ~ (whitespace* ~ "," ~ whitespace* ~ store_value_single)* ~ whitespace* ~ "}" } +store_key_to_store_value = { "(" ~ whitespace* ~ f32_array ~ whitespace* ~ "," ~ whitespace* ~ store_value ~ whitespace* ~ ")" } +store_keys_to_store_value = { "(" ~ whitespace* ~ store_key_to_store_value ~ (whitespace* ~ "," ~ whitespace* ~ store_key_to_store_value)* ~ whitespace* ~ ")" } non_linear_algorithm = { ^"kdtree" } algorithm = { ^"kdtree" | @@ -59,14 +65,14 @@ index_names = { index_name ~ (whitespace* ~ "," ~ whitespace* ~ index_name)* } non_zero = { '1'..'9' ~ ASCII_DIGIT* } f32 = { ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? } // Array of floating-point numbers -f32_array = { "[" ~ f32 ~ (whitespace* ~ "," ~ whitespace* ~ f32)* ~ "]"} +f32_array = { "[" ~ whitespace* ~ f32 ~ (whitespace* ~ "," ~ whitespace* ~ f32)* ~ whitespace* ~ "]"} // List of f32 arrays (comma-separated) f32_arrays = { f32_array ~ (whitespace* ~ "," ~ whitespace* ~ f32_array)* } -// raw string. For simplicity no quotes in the string -raw_string = { (!("," | ")") ~ ANY)+ } +// raw string. For simplicity no quotes, commas or closing braces in the string +raw_string = { (!("," | ")" | "}" ) ~ ANY)+ } ASCII_HEX = { '0'..'9' | 'A'..'F' | 'a'..'f' } // image contains all possible ascii hex image = { "/x" ~ ASCII_HEX+ } diff --git a/ahnlich/dsl/src/tests/db.rs b/ahnlich/dsl/src/tests/db.rs index b27f9b26..f38db822 100644 --- a/ahnlich/dsl/src/tests/db.rs +++ b/ahnlich/dsl/src/tests/db.rs @@ -5,7 +5,10 @@ use ahnlich_types::{ metadata::MetadataKey, }; use ndarray::Array1; -use std::{collections::HashSet, num::NonZeroUsize}; +use std::{ + collections::{HashMap, HashSet}, + num::NonZeroUsize, +}; use ahnlich_types::{ metadata::MetadataValue, @@ -298,6 +301,44 @@ fn test_get_key_parse() { ); } +#[test] +fn test_set_in_store_parse() { + let input = r#"set 2134 in store"#; + let DslError::UnexpectedSpan((start, end)) = parse_db_query(input).unwrap_err() else { + panic!("Unexpected error pattern found") + }; + assert_eq!((start, end), (0, 17)); + let input = r#"SET (([1,2,3], {state: Munich, country: Germany}), ([3.2, 4.5, 9.4], {country: USA})) in geo"#; + assert_eq!( + parse_db_query(input).expect("Could not parse query input"), + vec![DBQuery::Set { + store: StoreName("geo".to_string()), + inputs: vec![ + ( + StoreKey(Array1::from_iter([1.0, 2.0, 3.0])), + HashMap::from_iter([ + ( + MetadataKey::new("state".to_string()), + MetadataValue::RawString("Munich".to_string()) + ), + ( + MetadataKey::new("country".to_string()), + MetadataValue::RawString("Germany".to_string()) + ), + ]) + ), + ( + StoreKey(Array1::from_iter([3.2, 4.5, 9.4])), + HashMap::from_iter([( + MetadataKey::new("country".to_string()), + MetadataValue::RawString("USA".to_string()) + ),]) + ) + ], + }] + ); +} + #[test] fn test_del_key_parse() { let input = r#"DELKEY ([a, b, c], [3.0, 4.0]) in 1234"#;