From fd76c2c95ad9e0b02d368b29ab1650d94d856a52 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 30 Jan 2024 15:07:30 -0500 Subject: [PATCH] reimplement specials config (WIP) --- README.md | 2 +- src/api_test.rs | 2 + src/ast.rs | 2 + src/lib.rs | 103 +++++++++++++---------- src/validate.rs | 2 + src/valve.rs | 218 +++++++++++++++++++++++++++++++++++++++--------- 6 files changed, 244 insertions(+), 85 deletions(-) diff --git a/README.md b/README.md index 183c8a1d..df8d64d6 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ valve --help to see command line options. ### API -See [Valve] +See [valve] ### Python bindings See [valve.py](https://github.com/ontodev/valve.py) diff --git a/src/api_test.rs b/src/api_test.rs index e551a1da..8ceddcaf 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -1,3 +1,5 @@ +//! API tests + use ontodev_valve::{ valve::{Valve, ValveError}, SerdeMap, diff --git a/src/ast.rs b/src/ast.rs index 18ba841a..07639206 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,3 +1,5 @@ +//! Valve expressions + use std::fmt; /// Represents an expression as parsed using [Valve's grammar](../valve_grammar/index.html). diff --git a/src/lib.rs b/src/lib.rs index e498c14a..562c4072 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ //! to see command line options. //! //! ## API -//! See [Valve] +//! See [valve] //! //! ## Python bindings //! See [valve.py](https://github.com/ontodev/valve.py) @@ -33,8 +33,7 @@ use crate::{ validate_row_tx, validate_rows_constraints, validate_rows_intra, validate_rows_trees, QueryAsIf, QueryAsIfKind, ResultRow, }, - valve::ValveError, - valve::ValveRow, + valve::{ValveError, ValveRow, ValveSpecialConfig}, valve_grammar::StartParser, }; use async_recursion::async_recursion; @@ -212,7 +211,7 @@ pub fn read_config_files( parser: &StartParser, pool: &AnyPool, ) -> ( - SerdeMap, + ValveSpecialConfig, SerdeMap, SerdeMap, SerdeMap, @@ -221,19 +220,9 @@ pub fn read_config_files( HashMap>, HashMap>, ) { - let special_table_types = json!({ - "table": {"required": true}, - "column": {"required": true}, - "datatype": {"required": true}, - "rule": {"required": false}, - }); - let special_table_types = special_table_types.as_object().unwrap(); - // Initialize the special table entries in the specials config map: - let mut specials_config = SerdeMap::new(); - for t in special_table_types.keys() { - specials_config.insert(t.to_string(), SerdeValue::Null); - } + let specials_config_old = SerdeMap::new(); + let mut specials_config = ValveSpecialConfig::default(); // Load the table table from the given path: let mut tables_config = SerdeMap::new(); @@ -277,22 +266,38 @@ pub fn read_config_files( } } - if special_table_types.contains_key(row_type) { - match specials_config.get(row_type) { - Some(SerdeValue::Null) => (), - _ => panic!( - "Multiple tables with type '{}' declared in '{}'", - row_type, path - ), + let row_table = row.get("table").and_then(|t| t.as_str()).unwrap(); + let duplicate_err = format!( + "Multiple tables with type '{}' declared in '{}'", + row_type, path + ); + match row_type.as_str() { + "column" => { + if specials_config.column != "" { + panic!("{}", duplicate_err); + } + specials_config.column = row_table.to_string(); } - let row_table = row.get("table").and_then(|t| t.as_str()).unwrap(); - specials_config.insert( - row_type.to_string(), - SerdeValue::String(row_table.to_string()), - ); - } else { - panic!("Unrecognized table type '{}' in '{}'", row_type, path); - } + "datatype" => { + if specials_config.datatype != "" { + panic!("{}", duplicate_err); + } + specials_config.datatype = row_table.to_string(); + } + "rule" => { + if let Some(_) = &specials_config.rule { + panic!("{}", duplicate_err); + } + specials_config.rule = Some(row_table.to_string()); + } + "table" => { + if specials_config.table != "" { + panic!("{}", duplicate_err); + } + specials_config.table = row_table.to_string(); + } + _ => panic!("Unrecognized table type '{}' in '{}'", row_type, path), + }; } row.insert(String::from("column"), SerdeValue::Object(SerdeMap::new())); @@ -301,13 +306,17 @@ pub fn read_config_files( } // Check that all the required special tables are present - for (table_type, table_spec) in special_table_types.iter() { - if let Some(SerdeValue::Bool(true)) = table_spec.get("required") { - if let Some(SerdeValue::Null) = specials_config.get(table_type) { - panic!("Missing required '{}' table in '{}'", table_type, path); - } - } + if specials_config.column == "" { + panic!("Missing required 'column' table in '{}'", path); + } + if specials_config.datatype == "" { + panic!("Missing required 'datatype' table in '{}'", path); } + if specials_config.table == "" { + panic!("Missing required 'table' table in '{}'", path); + } + + println!("SPECIALS CONFIG: {:#?}", specials_config); // Helper function for extracting special configuration (other than the main 'table' // configuration) from either a file or a table in the database, depending on the value of @@ -317,12 +326,12 @@ pub fn read_config_files( // indicated by `path`, the table is read, and the rows are returned. fn get_special_config( table_type: &str, - specials_config: &SerdeMap, + specials_config_old: &SerdeMap, tables_config: &SerdeMap, path: &str, ) -> Vec { if path.to_lowercase().ends_with(".tsv") { - let table_name = specials_config + let table_name = specials_config_old .get(table_type) .and_then(|d| d.as_str()) .unwrap(); @@ -359,7 +368,7 @@ pub fn read_config_files( // Load datatype table let mut datatypes_config = SerdeMap::new(); - let rows = get_special_config("datatype", &specials_config, &tables_config, path); + let rows = get_special_config("datatype", &specials_config_old, &tables_config, path); for mut row in rows { for column in vec!["datatype", "parent", "condition", "SQL type"] { if !row.contains_key(column) || row.get(column) == None { @@ -390,7 +399,7 @@ pub fn read_config_files( } // Load column table - let rows = get_special_config("column", &specials_config, &tables_config, path); + let rows = get_special_config("column", &specials_config_old, &tables_config, path); for mut row in rows { for column in vec!["table", "column", "label", "nulltype", "datatype"] { if !row.contains_key(column) || row.get(column) == None { @@ -439,8 +448,8 @@ pub fn read_config_files( // Load rule table if it exists let mut rules_config = SerdeMap::new(); - if let Some(SerdeValue::String(table_name)) = specials_config.get("rule") { - let rows = get_special_config(table_name, &specials_config, &tables_config, path); + if let Some(SerdeValue::String(table_name)) = specials_config_old.get("rule") { + let rows = get_special_config(table_name, &specials_config_old, &tables_config, path); for row in rows { for column in vec![ "table", @@ -1275,6 +1284,7 @@ pub async fn get_affected_rows( let cell = json!({ "value": value, "valid": true, + // TODO: Here? "messages": json!([]), }); table_row.insert(cname.to_string(), json!(cell)); @@ -1321,6 +1331,7 @@ pub async fn get_row_from_db( if raw_messages.is_null() { vec![] } else { + // TODO: Here? let messages: &str = sql_row.get("message"); match serde_json::from_str::(messages) { Err(e) => return Err(ValveError::SerdeJsonError(e.into())), @@ -1347,6 +1358,7 @@ pub async fn get_row_from_db( } else { value = String::from(""); } + // TODO: Here? let column_messages = messages .iter() .filter(|m| m.get("column").unwrap().as_str() == Some(cname)) @@ -2064,6 +2076,7 @@ pub async fn insert_new_row_tx( .ok_or(ValveError::InputError( format!("No string named 'value' in {:?}", cell).into(), ))?; + // TODO: Here? let messages = sort_messages( &sorted_datatypes, cell.get("messages") @@ -2150,6 +2163,7 @@ pub async fn insert_new_row_tx( } query.execute(tx.acquire().await?).await?; + // TODO: Here? // Next add any validation messages to the message table: for m in all_messages { let column = m.get("column").and_then(|c| c.as_str()).unwrap(); @@ -3326,6 +3340,7 @@ pub fn get_table_ddl( /// message types, count the various message types encountered in the list and increment the counts /// in messages_stats accordingly. pub fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap) { + // TODO: Here. for message in messages { let message = message.as_object().unwrap(); let level = message.get("level").unwrap(); @@ -3403,6 +3418,7 @@ pub fn sort_messages( let mut datatype_messages = vec![]; let mut structure_messages = vec![]; let mut rule_messages = vec![]; + // TODO: Here. for message in cell_messages { let rule = message .get("rule") @@ -3528,6 +3544,7 @@ pub async fn make_inserts( add_message_counts(&cell.messages, messages_stats); } + // TODO: Here. for message in sort_messages(&sorted_datatypes, &cell.messages) { let row = row.row_number.unwrap().to_string(); let message_values = vec![ diff --git a/src/validate.rs b/src/validate.rs index 26c70f53..7268c93e 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,3 +1,5 @@ +//! Low-level validation functions + use crate::{ cast_sql_param_from_text, error, get_column_value, get_sql_type_from_global_config, is_sql_type_error, local_sql_syntax, diff --git a/src/valve.rs b/src/valve.rs index 9866390a..493f31ec 100644 --- a/src/valve.rs +++ b/src/valve.rs @@ -1,3 +1,5 @@ +//! The Valve API + use crate::{ add_message_counts, ast::Expression, @@ -32,6 +34,150 @@ use std::{collections::HashMap, fs::File, path::Path}; // [backed by a BTreeMap by default](https://docs.serde.rs/serde_json/map/index.html) pub type ValveRow = serde_json::Map; +#[derive(Debug)] +pub enum ValveError { + /// An error in the Valve configuration: + ConfigError(String), + /// An error that occurred while reading or writing to a CSV/TSV: + CsvError(csv::Error), + /// An error involving the data: + DataError(String), + /// An error generated by the underlying database: + DatabaseError(sqlx::Error), + /// An error in the inputs to a function: + InputError(String), + /// An error that occurred while reading/writing to stdio: + IOError(std::io::Error), + /// An error that occurred while serialising or deserialising to/from JSON: + SerdeJsonError(serde_json::Error), +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveMessage { + pub _column: String, + pub _value: String, + pub _rule: String, + pub _level: String, + pub _message: String, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveChange { + pub _column: String, + pub _level: String, + pub _old_value: String, + pub _value: String, + pub _message: String, +} + +// TODO: Make this struct public. +#[derive(Debug, Default)] +pub struct ValveSpecialConfig { + pub column: String, + pub datatype: String, + pub rule: Option, + pub table: String, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveTableConfig { + pub _table: String, + pub _column: HashMap, + pub _column_order: Vec, + pub _descrtiption: String, + pub _path: String, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveColumnConfig { + pub _table: String, + pub _column: String, + pub _datatype: String, + pub _description: String, + pub _label: String, + pub _structure: String, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveDatatypeConfig { + pub _html_type: String, + pub _sql_type: String, + pub _condition: String, + pub _datatype: String, + pub _description: String, + pub _parent: String, + pub _structure: String, + pub _transform: String, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveRuleConfig { + pub _description: String, + pub _level: String, + pub _table: String, + pub _then_column: String, + pub _then_condition: String, + pub _when_column: String, + pub _when_condition: String, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveTreeConstraint { + pub _child: String, + pub _parent: String, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveUnderConstraint { + pub _column: String, + pub _ttable: String, + pub _tcolumn: String, + pub _value: SerdeValue, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveForeignConstraint { + pub _column: String, + pub _ftable: String, + pub _fcolumn: String, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct _ValveTableConstraints { + // Note that primary would be better as HashMap, since it is not possible to + // have more than one primary key per table, but the below reflects the current implementation + // which in principle allows for more than one. + pub _primary: HashMap>, + pub _unique: HashMap>, + pub _foreign: HashMap>, + pub _tree: HashMap>, + pub _under: HashMap>, +} + +// TODO: Make this struct public. +#[derive(Debug)] +struct ValveConfig { + pub special: ValveSpecialConfig, + //pub table: HashMap, + //pub datatype: HashMap, + //pub rule: HashMap>>, + //pub table_constraints: ValveTableConstraints, + //pub datatype_conditions: HashMap, + //pub rule_conditions: HashMap>>, + //pub structure_conditions: HashMap, + //pub sorted_table_list: Vec, +} + /// Main entrypoint for the Valve API. #[derive(Clone, Debug)] pub struct Valve { @@ -55,24 +201,6 @@ pub struct Valve { pub verbose: bool, } -#[derive(Debug)] -pub enum ValveError { - /// An error in the Valve configuration: - ConfigError(String), - /// An error that occurred while reading or writing to a CSV/TSV: - CsvError(csv::Error), - /// An error involving the data: - DataError(String), - /// An error generated by the underlying database: - DatabaseError(sqlx::Error), - /// An error in the inputs to a function: - InputError(String), - /// An error that occurred while reading/writing to stdio: - IOError(std::io::Error), - /// An error that occurred while serialising or deserialising to/from JSON: - SerdeJsonError(serde_json::Error), -} - impl From for ValveError { fn from(e: csv::Error) -> Self { Self::CsvError(e) @@ -131,52 +259,60 @@ impl Valve { let parser = StartParser::new(); let ( specials_config, - tables_config, - datatypes_config, - rules_config, - constraints_config, + tables_config_old, + datatypes_config_old, + rules_config_old, + constraints_config_old, sorted_table_list, table_dependencies_in, table_dependencies_out, ) = read_config_files(table_path, &parser, &pool); - let mut config = SerdeMap::new(); - config.insert( - String::from("special"), - SerdeValue::Object(specials_config.clone()), - ); - config.insert( + // TODO: Obviously remove this later. + if 1 == 1 { + todo!(); + } + + let mut config_old = SerdeMap::new(); + //config_old.insert( + // String::from("special"), + // SerdeValue::Object(specials_config_old.clone()), + //); + config_old.insert( String::from("table"), - SerdeValue::Object(tables_config.clone()), + SerdeValue::Object(tables_config_old.clone()), ); - config.insert( + config_old.insert( String::from("datatype"), - SerdeValue::Object(datatypes_config.clone()), + SerdeValue::Object(datatypes_config_old.clone()), ); - config.insert( + config_old.insert( String::from("rule"), - SerdeValue::Object(rules_config.clone()), + SerdeValue::Object(rules_config_old.clone()), ); - config.insert( + config_old.insert( String::from("constraints"), - SerdeValue::Object(constraints_config.clone()), + SerdeValue::Object(constraints_config_old.clone()), ); let mut sorted_table_serdevalue_list: Vec = vec![]; for table in &sorted_table_list { sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); } - config.insert( + config_old.insert( String::from("sorted_table_list"), SerdeValue::Array(sorted_table_serdevalue_list), ); - let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); - let compiled_rule_conditions = - get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); - let parsed_structure_conditions = get_parsed_structure_conditions(&config, &parser); + let compiled_datatype_conditions = get_compiled_datatype_conditions(&config_old, &parser); + let compiled_rule_conditions = get_compiled_rule_conditions( + &config_old, + compiled_datatype_conditions.clone(), + &parser, + ); + let parsed_structure_conditions = get_parsed_structure_conditions(&config_old, &parser); Ok(Self { - config: config, + config: config_old, compiled_datatype_conditions: compiled_datatype_conditions, compiled_rule_conditions: compiled_rule_conditions, parsed_structure_conditions: parsed_structure_conditions,