From 70d035937d2a8effbfc1d6803542d2522036e9d3 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 26 Nov 2023 14:30:17 -0500 Subject: [PATCH 01/57] warn but do not panic, during load, if table file doesn't exist --- src/lib.rs | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 296fd7cf..638770a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4403,6 +4403,27 @@ async fn load_db( let mut total_infos = 0; let mut table_num = 1; for table_name in table_list { + let path = String::from( + config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|o| o.get(&table_name)) + .and_then(|n| n.get("path")) + .and_then(|p| p.as_str()) + .unwrap(), + ); + let mut rdr = { + match File::open(path.clone()) { + Err(e) => { + eprintln!("WARN: Unable to open '{}': {}", path.clone(), e); + continue; + } + Ok(table_file) => csv::ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(table_file), + } + }; if verbose { eprintln!( "{} - Loading table {}/{}: {}", @@ -4413,21 +4434,6 @@ async fn load_db( ); } table_num += 1; - let path = String::from( - config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|o| o.get(&table_name)) - .and_then(|n| n.get("path")) - .and_then(|p| p.as_str()) - .unwrap(), - ); - let mut rdr = csv::ReaderBuilder::new() - .has_headers(false) - .delimiter(b'\t') - .from_reader(File::open(path.clone()).unwrap_or_else(|err| { - panic!("Unable to open '{}': {}", path.clone(), err); - })); // Extract the headers, which we will need later: let mut records = rdr.records(); From 0bd3ad3a8de27cf49895298e87414682602404b4 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 26 Nov 2023 14:48:15 -0500 Subject: [PATCH 02/57] add ValveRow alias --- src/lib.rs | 43 ++++++++++++++++++++++--------------------- src/validate.rs | 22 +++++++++++----------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 638770a0..ba4b2167 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,10 +77,11 @@ lazy_static! { static ref SL_SQL_TYPES: Vec<&'static str> = vec!["text", "numeric", "integer", "real"]; } -/// An alias for [serde_json::Map](..//serde_json/struct.Map.html). +/// Aliases for [serde_json::Map](..//serde_json/struct.Map.html). // Note: serde_json::Map is // [backed by a BTreeMap by default](https://docs.serde.rs/serde_json/map/index.html) pub type SerdeMap = serde_json::Map; +pub type ValveRow = serde_json::Map; /// Represents a structure such as those found in the `structure` column of the `column` table in /// both its parsed format (i.e., as an [Expression](ast/enum.Expression.html)) as well as in its @@ -1444,7 +1445,7 @@ pub async fn get_affected_rows( global_config: &SerdeMap, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, -) -> Result, String> { +) -> Result, String> { // Since the consequence of an update could involve currently invalid rows // (in the conflict table) becoming valid or vice versa, we need to check rows for // which the value of the column is the same as `value` @@ -1470,7 +1471,7 @@ pub async fn get_affected_rows( .await .map_err(|e| e.to_string())? { - let mut table_row = SerdeMap::new(); + let mut table_row = ValveRow::new(); let mut row_number: Option = None; for column in row.columns() { let cname = column.name(); @@ -1508,7 +1509,7 @@ pub async fn get_row_from_db( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, -) -> Result { +) -> Result { let sql = format!( "{} WHERE row_number = {}", query_with_message_values(table, global_config, pool), @@ -1541,7 +1542,7 @@ pub async fn get_row_from_db( } }; - let mut row = SerdeMap::new(); + let mut row = ValveRow::new(); for column in sql_row.columns() { let cname = column.name(); if !vec!["row_number", "message"].contains(&cname) { @@ -1649,7 +1650,7 @@ pub async fn get_rows_to_update( ), String, > { - fn get_cell_value(row: &SerdeMap, column: &str) -> Result { + fn get_cell_value(row: &ValveRow, column: &str) -> Result { match row.get(column).and_then(|cell| cell.get("value")) { Some(SerdeValue::String(s)) => Ok(format!("{}", s)), Some(SerdeValue::Number(n)) => Ok(format!("{}", n)), @@ -1900,8 +1901,8 @@ pub async fn record_row_change( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, - from: Option<&SerdeMap>, - to: Option<&SerdeMap>, + from: Option<&ValveRow>, + to: Option<&ValveRow>, user: &str, ) -> Result<(), sqlx::Error> { if let (None, None) = (from, to) { @@ -1910,8 +1911,8 @@ pub async fn record_row_change( )); } - fn to_text(smap: Option<&SerdeMap>, quoted: bool) -> String { - match smap { + fn to_text(row: Option<&ValveRow>, quoted: bool) -> String { + match row { None => "NULL".to_string(), Some(r) => { let inner = format!("{}", json!(r)).replace("'", "''"); @@ -1932,7 +1933,7 @@ pub async fn record_row_change( } } - fn summarize(from: Option<&SerdeMap>, to: Option<&SerdeMap>) -> Result { + fn summarize(from: Option<&ValveRow>, to: Option<&ValveRow>) -> Result { // Constructs a summary of the form: // { // "column":"bar", @@ -2420,7 +2421,7 @@ pub async fn insert_new_row( compiled_rule_conditions: &HashMap>>, pool: &AnyPool, table: &str, - row: &SerdeMap, + row: &ValveRow, new_row_number: Option, user: &str, ) -> Result { @@ -2469,7 +2470,7 @@ pub async fn insert_new_row_tx( pool: &AnyPool, tx: &mut Transaction, table: &str, - row: &SerdeMap, + row: &ValveRow, new_row_number: Option, skip_validation: bool, ) -> Result { @@ -2789,7 +2790,7 @@ pub async fn update_row( compiled_rule_conditions: &HashMap>>, pool: &AnyPool, table_name: &str, - row: &SerdeMap, + row: &ValveRow, row_number: &u32, user: &str, ) -> Result<(), sqlx::Error> { @@ -2854,7 +2855,7 @@ pub async fn update_row_tx( pool: &AnyPool, tx: &mut Transaction, table: &str, - row: &SerdeMap, + row: &ValveRow, row_number: &u32, skip_validation: bool, do_not_recurse: bool, @@ -2967,10 +2968,10 @@ pub async fn update_row_tx( Ok(()) } -/// Given a path, read a TSV file and return a vector of rows represented as SerdeMaps. +/// Given a path, read a TSV file and return a vector of rows represented as ValveRows. /// Note: Use this function to read "small" TSVs only. In particular, use this for the special /// configuration tables. -fn read_tsv_into_vector(path: &str) -> Vec { +fn read_tsv_into_vector(path: &str) -> Vec { let mut rdr = csv::ReaderBuilder::new() .delimiter(b'\t') @@ -2981,7 +2982,7 @@ fn read_tsv_into_vector(path: &str) -> Vec { let rows: Vec<_> = rdr .deserialize() .map(|result| { - let row: SerdeMap = result.expect(format!("Error reading: {}", path).as_str()); + let row: ValveRow = result.expect(format!("Error reading: {}", path).as_str()); row }) .collect(); @@ -3010,8 +3011,8 @@ fn read_tsv_into_vector(path: &str) -> Vec { } /// Given a database at the specified location, query the "table" table and return a vector of rows -/// represented as SerdeMaps. -fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { +/// represented as ValveRows. +fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { let connection_options; if database.starts_with("postgresql://") { connection_options = AnyConnectOptions::from_str(database).unwrap(); @@ -3036,7 +3037,7 @@ fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec, + pub row: Option, } /// Given a config map, maps of compiled datatype and rule conditions, a database connection @@ -62,10 +62,10 @@ pub async fn validate_row( pool: &AnyPool, tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &str, - row: &SerdeMap, + row: &ValveRow, row_number: Option, query_as_if: Option<&QueryAsIf>, -) -> Result { +) -> Result { // Fallback to a default transaction if it is not given. Since we do not commit before it falls // out of scope the transaction will be rolled back at the end of this function. And since this // function is read-only the rollback is trivial and therefore inconsequential. @@ -944,10 +944,10 @@ pub fn validate_rows_intra( result_rows } -/// Given a row represented as a SerdeMap, remove any duplicate messages from the row's cells, so +/// Given a row represented as a ValveRow, remove any duplicate messages from the row's cells, so /// that no cell has messages with the same level, rule, and message text. -fn remove_duplicate_messages(row: &SerdeMap) -> Result { - let mut deduped_row = SerdeMap::new(); +fn remove_duplicate_messages(row: &ValveRow) -> Result { + let mut deduped_row = ValveRow::new(); for (column_name, cell) in row.iter() { let mut messages = cell .get("messages") @@ -981,12 +981,12 @@ fn remove_duplicate_messages(row: &SerdeMap) -> Result { Ok(deduped_row) } -/// Given a result row, convert it to a SerdeMap and return it. +/// Given a result row, convert it to a ValveRow and return it. /// Note that if the incoming result row has an associated row_number, this is ignored. -fn result_row_to_config_map(incoming: &ResultRow) -> SerdeMap { - let mut outgoing = SerdeMap::new(); +fn result_row_to_config_map(incoming: &ResultRow) -> ValveRow { + let mut outgoing = ValveRow::new(); for (column, cell) in incoming.contents.iter() { - let mut cell_map = SerdeMap::new(); + let mut cell_map = ValveRow::new(); if let Some(nulltype) = &cell.nulltype { cell_map.insert( "nulltype".to_string(), From c20bca7e7c094ce70b4e448dd83d25adbde7cb9b Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 26 Nov 2023 15:15:25 -0500 Subject: [PATCH 03/57] add stubs for new API --- src/lib.rs | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index ba4b2167..1d056e99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,6 +83,198 @@ lazy_static! { pub type SerdeMap = serde_json::Map; pub type ValveRow = serde_json::Map; +pub struct Valve { + global_config: SerdeMap, + compiled_datatype_conditions: HashMap, + compiled_rule_conditions: HashMap>>, + pool: AnyPool, + user: String, +} + +impl Valve { + /// Given a path to a table table, + /// read it, configure VALVE, and return a new Valve struct. + /// Return an error if reading or configuration fails. + pub fn build(mut self, table_path: &str) -> Result { + // Should be ConfigError + todo!(); + Ok(self) + } + + /// Set the user name for this instance. + /// The username must be a short string without newlines. + /// Return an error on invalid username. + pub fn set_user(mut self, user: &str) -> Result { + // ConfigError + todo!(); + Ok(self) + } + + /// Given a database connection string, + /// create a database connection for VALVE to use. + /// Drop and replace any current database connection. + /// Return an error if the connection cannot be created. + pub fn connect(mut self, connection: &str) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Create all configured database tables and views + /// if they do not already exist as configured. + /// Return an error on database problems. + pub fn create_all_tables(mut self) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Drop all configured tables, in reverse dependency order. + /// Return an error on database problem. + pub fn drop_all_tables(self) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Given a vector of table names, + /// drop those tables, in the given order. + /// Return an error on invalid table name or database problem. + pub fn drop_tables(self, tables: Vec<&str>) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Truncate all configured tables, in reverse dependency order. + /// Return an error on database problem. + pub fn truncate_all_tables(self) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Given a vector of table names, + /// truncate those tables, in the given order. + /// Return an error on invalid table name or database problem. + pub fn truncate_tables(self, tables: Vec<&str>) -> Result { + // ConfigOrDatabaseError + //self.create_all_tables(); + todo!(); + Ok(self) + } + + /// Load all configured tables in dependency order. + /// If `validate` is false, just try to insert all rows. + /// Return an error on database problem, + /// including database conflicts that prevent rows being inserted. + pub fn load_all_tables(self, validate: bool) -> Result { + // DatabaseError + //self.create_all_tables(); + //self.truncate_all_tables(); + todo!(); + Ok(self) + } + + /// Given a vector of table names, + /// load those tables in the given order. + /// If `validate` is false, just try to insert all rows. + /// Return an error on invalid table name or database problem. + pub fn load_tables(self, tables: Vec<&str>, validate: bool) -> Result { + // ConfigOrDatabaseError + //self.create_all_tables(); + //self.truncate_tables(tables); + todo!(); + Ok(self) + } + + /// Save all configured tables to their 'path's. + /// Return an error on writing or database problem. + pub fn save_all_tables(self) -> Result { + // WriteOrDatabaseError + todo!(); + Ok(self) + } + + /// Given a vector of table names, + /// Save thosee tables to their 'path's, in the given order. + /// Return an error on writing or database problem. + pub fn save_tables(self, tables: Vec<&str>) -> Result { + // WriteOrDatabaseError + todo!(); + Ok(self) + } + + /// Given a table name and a row as JSON, + /// return the validated row. + /// Return an error on database problem. + pub fn validate_row(self, table_name: &str, row: &ValveRow) -> Result { + // DatabaseError + todo!(); + } + + /// Given a table name and a row as JSON, + /// add the row to the table in the database, + /// and return the validated row, including its new row_number. + /// Return an error invalid table name or database problem. + pub fn insert_row(self, table_name: &str, row: &ValveRow) -> Result { + // ConfigOrDatabaseError + todo!(); + } + + /// Given a table name, a row number, and a row as JSON, + /// update the row in the database, + /// and return the validated row. + /// Return an error invalid table name or row number or database problem. + pub fn update_row( + self, + table_name: &str, + row_number: usize, + row: &ValveRow, + ) -> Result { + // ConfigOrDatabaseError + todo!(); + } + + /// Given a table name and a row number, + /// delete that row from the table. + /// Return an error invalid table name or row number or database problem. + pub fn delete_row(self, table_name: &str, row_number: usize) -> Result<(), sqlx::Error> { + // ConfigOrDatabaseError + todo!(); + } + + /// Return the next change to undo, or None. + /// Return an error on database problem. + pub fn get_record_to_undo(self) -> Result, sqlx::Error> { + // DatabaseError + todo!(); + } + + /// Return the next change to redo, or None. + /// Return an error on database problem. + pub fn get_record_to_redo(self) -> Result, sqlx::Error> { + // DatabaseError + todo!(); + } + + /// Undo one change and return the change record + /// or None if there was no change to undo. + /// Return an error on database problem. + pub fn undo(self) -> Result, sqlx::Error> { + // DatabaseError + todo!(); + } + + /// Redo one change and return the change record + /// or None if there was no change to redo. + /// Return an error on database problem. + pub fn redo(self) -> Result, sqlx::Error> { + // DatabaseError + todo!(); + } +} + /// Represents a structure such as those found in the `structure` column of the `column` table in /// both its parsed format (i.e., as an [Expression](ast/enum.Expression.html)) as well as in its /// original format (i.e., as a plain String). From 2a6eabcf928ffbdde709c4c5d3b80a99cd6ce2e0 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 26 Nov 2023 19:27:17 -0500 Subject: [PATCH 04/57] implement Valve::build() --- src/lib.rs | 116 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 105 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1d056e99..8407b621 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,22 +83,116 @@ lazy_static! { pub type SerdeMap = serde_json::Map; pub type ValveRow = serde_json::Map; +#[derive(Debug)] pub struct Valve { - global_config: SerdeMap, - compiled_datatype_conditions: HashMap, - compiled_rule_conditions: HashMap>>, - pool: AnyPool, - user: String, + pub global_config: SerdeMap, + pub compiled_datatype_conditions: HashMap, + pub compiled_rule_conditions: HashMap>>, + pub pool: Option, + pub user: String, } +// TODO NEXT: Move the existing public functions into this interface: impl Valve { - /// Given a path to a table table, - /// read it, configure VALVE, and return a new Valve struct. + /// Given a path to a table table and its name, read the table table, configure VALVE + /// partially ... TODO: finish this. + /// , and return a new Valve struct. /// Return an error if reading or configuration fails. - pub fn build(mut self, table_path: &str) -> Result { + pub async fn build( + table_path: &str, + config_table: &str, + // TODO: We need to refactor configure_db() so that it no longer collects the constraints + // configuration. We will do that in read_config_files() instead. + // Once this is implemented, the code below to construct the AnyPool which is used to + // call configure_db() should be removed. + // We will also remove the `database`, `initial_load` and `verbose` parameters. + database: &str, + initial_load: bool, + verbose: bool, + ) -> Result { // Should be ConfigError - todo!(); - Ok(self) + + let parser = StartParser::new(); + + let (specials_config, mut tables_config, mut datatypes_config, rules_config) = + read_config_files(table_path, config_table); + + //////////////////////////////////////////////////////////////////////////////////////// + // TODO: Remove this block of code later (see comment above) + let connection_options; + if database.starts_with("postgresql://") { + connection_options = AnyConnectOptions::from_str(database)?; + } else { + let connection_string; + if !database.starts_with("sqlite://") { + connection_string = format!("sqlite://{}?mode=rwc", database); + } else { + connection_string = database.to_string(); + } + connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); + } + + let pool = AnyPoolOptions::new() + .max_connections(5) + .connect_with(connection_options) + .await?; + + let (sorted_table_list, constraints_config) = configure_db( + &mut tables_config, + &mut datatypes_config, + &pool, + &parser, + verbose, + &ValveCommand::Config, + ) + .await?; + //////////////////////////////////////////////////////////////////////////////////////// + + let mut global_config = SerdeMap::new(); + global_config.insert( + String::from("special"), + SerdeValue::Object(specials_config.clone()), + ); + global_config.insert( + String::from("table"), + SerdeValue::Object(tables_config.clone()), + ); + global_config.insert( + String::from("datatype"), + SerdeValue::Object(datatypes_config.clone()), + ); + global_config.insert( + String::from("rule"), + SerdeValue::Object(rules_config.clone()), + ); + global_config.insert( + String::from("constraints"), + SerdeValue::Object(constraints_config.clone()), + ); + let mut sorted_table_serdevalue_list: Vec = vec![]; + for table in &sorted_table_list { + sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); + } + global_config.insert( + String::from("sorted_table_list"), + SerdeValue::Array(sorted_table_serdevalue_list), + ); + + let compiled_datatype_conditions = + get_compiled_datatype_conditions(&global_config, &parser); + let compiled_rule_conditions = get_compiled_rule_conditions( + &global_config, + compiled_datatype_conditions.clone(), + &parser, + ); + + Ok(Self { + global_config: global_config, + compiled_datatype_conditions: compiled_datatype_conditions, + compiled_rule_conditions: compiled_rule_conditions, + pool: None, + user: String::from("Valve"), + }) } /// Set the user name for this instance. @@ -106,7 +200,7 @@ impl Valve { /// Return an error on invalid username. pub fn set_user(mut self, user: &str) -> Result { // ConfigError - todo!(); + self.user = user.to_string(); Ok(self) } From 29a1be894a811c417841a3e2a2a39becaa5aaddb Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 08:23:25 -0500 Subject: [PATCH 05/57] refactor, fix api sigs, implement Valve::connect() and Valve::create_tables() --- src/lib.rs | 112 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 71 insertions(+), 41 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8407b621..fd14adf4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -95,7 +95,7 @@ pub struct Valve { // TODO NEXT: Move the existing public functions into this interface: impl Valve { /// Given a path to a table table and its name, read the table table, configure VALVE - /// partially ... TODO: finish this. + /// partially ... TODO: finish rewriting this doc string. /// , and return a new Valve struct. /// Return an error if reading or configuration fails. pub async fn build( @@ -110,7 +110,7 @@ impl Valve { initial_load: bool, verbose: bool, ) -> Result { - // Should be ConfigError + // TODO: Error type should be ConfigError let parser = StartParser::new(); @@ -119,24 +119,7 @@ impl Valve { //////////////////////////////////////////////////////////////////////////////////////// // TODO: Remove this block of code later (see comment above) - let connection_options; - if database.starts_with("postgresql://") { - connection_options = AnyConnectOptions::from_str(database)?; - } else { - let connection_string; - if !database.starts_with("sqlite://") { - connection_string = format!("sqlite://{}?mode=rwc", database); - } else { - connection_string = database.to_string(); - } - connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); - } - - let pool = AnyPoolOptions::new() - .max_connections(5) - .connect_with(connection_options) - .await?; - + let pool = get_pool_from_connection_string(database).await?; let (sorted_table_list, constraints_config) = configure_db( &mut tables_config, &mut datatypes_config, @@ -198,7 +181,7 @@ impl Valve { /// Set the user name for this instance. /// The username must be a short string without newlines. /// Return an error on invalid username. - pub fn set_user(mut self, user: &str) -> Result { + pub fn set_user(&mut self, user: &str) -> Result<&mut Self, sqlx::Error> { // ConfigError self.user = user.to_string(); Ok(self) @@ -208,24 +191,48 @@ impl Valve { /// create a database connection for VALVE to use. /// Drop and replace any current database connection. /// Return an error if the connection cannot be created. - pub fn connect(mut self, connection: &str) -> Result { + pub async fn connect(&mut self, connection: &str) -> Result<&mut Self, sqlx::Error> { // DatabaseError - todo!(); + self.pool = Some(get_pool_from_connection_string(connection).await?); Ok(self) } /// Create all configured database tables and views /// if they do not already exist as configured. /// Return an error on database problems. - pub fn create_all_tables(mut self) -> Result { + pub async fn create_all_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { // DatabaseError - todo!(); + let mut tables_config = self + .global_config + .get_mut("table") + .and_then(|t| t.as_object_mut()) + .unwrap(); + let mut tables_config = tables_config.clone(); + let mut datatypes_config = self + .global_config + .get_mut("datatype") + .and_then(|d| d.as_object_mut()) + .unwrap(); + let mut datatypes_config = datatypes_config.clone(); + let pool = self.pool.as_ref().unwrap(); + let parser = StartParser::new(); + + // TODO: Revisit this once te configure_db() function has been refactored: + let (_, _) = configure_db( + &mut tables_config, + &mut datatypes_config, + &pool, + &parser, + verbose, + &ValveCommand::Create, + ) + .await?; Ok(self) } /// Drop all configured tables, in reverse dependency order. /// Return an error on database problem. - pub fn drop_all_tables(self) -> Result { + pub fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError todo!(); Ok(self) @@ -234,7 +241,7 @@ impl Valve { /// Given a vector of table names, /// drop those tables, in the given order. /// Return an error on invalid table name or database problem. - pub fn drop_tables(self, tables: Vec<&str>) -> Result { + pub fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError todo!(); Ok(self) @@ -242,7 +249,7 @@ impl Valve { /// Truncate all configured tables, in reverse dependency order. /// Return an error on database problem. - pub fn truncate_all_tables(self) -> Result { + pub fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError todo!(); Ok(self) @@ -251,7 +258,7 @@ impl Valve { /// Given a vector of table names, /// truncate those tables, in the given order. /// Return an error on invalid table name or database problem. - pub fn truncate_tables(self, tables: Vec<&str>) -> Result { + pub fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError //self.create_all_tables(); todo!(); @@ -262,7 +269,7 @@ impl Valve { /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. - pub fn load_all_tables(self, validate: bool) -> Result { + pub fn load_all_tables(&self, validate: bool) -> Result<&Self, sqlx::Error> { // DatabaseError //self.create_all_tables(); //self.truncate_all_tables(); @@ -274,7 +281,7 @@ impl Valve { /// load those tables in the given order. /// If `validate` is false, just try to insert all rows. /// Return an error on invalid table name or database problem. - pub fn load_tables(self, tables: Vec<&str>, validate: bool) -> Result { + pub fn load_tables(&self, tables: Vec<&str>, validate: bool) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError //self.create_all_tables(); //self.truncate_tables(tables); @@ -284,7 +291,7 @@ impl Valve { /// Save all configured tables to their 'path's. /// Return an error on writing or database problem. - pub fn save_all_tables(self) -> Result { + pub fn save_all_tables(&self) -> Result<&Self, sqlx::Error> { // WriteOrDatabaseError todo!(); Ok(self) @@ -293,7 +300,7 @@ impl Valve { /// Given a vector of table names, /// Save thosee tables to their 'path's, in the given order. /// Return an error on writing or database problem. - pub fn save_tables(self, tables: Vec<&str>) -> Result { + pub fn save_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // WriteOrDatabaseError todo!(); Ok(self) @@ -302,7 +309,7 @@ impl Valve { /// Given a table name and a row as JSON, /// return the validated row. /// Return an error on database problem. - pub fn validate_row(self, table_name: &str, row: &ValveRow) -> Result { + pub fn validate_row(&self, table_name: &str, row: &ValveRow) -> Result { // DatabaseError todo!(); } @@ -311,7 +318,7 @@ impl Valve { /// add the row to the table in the database, /// and return the validated row, including its new row_number. /// Return an error invalid table name or database problem. - pub fn insert_row(self, table_name: &str, row: &ValveRow) -> Result { + pub fn insert_row(&self, table_name: &str, row: &ValveRow) -> Result { // ConfigOrDatabaseError todo!(); } @@ -321,7 +328,7 @@ impl Valve { /// and return the validated row. /// Return an error invalid table name or row number or database problem. pub fn update_row( - self, + &self, table_name: &str, row_number: usize, row: &ValveRow, @@ -333,21 +340,21 @@ impl Valve { /// Given a table name and a row number, /// delete that row from the table. /// Return an error invalid table name or row number or database problem. - pub fn delete_row(self, table_name: &str, row_number: usize) -> Result<(), sqlx::Error> { + pub fn delete_row(&self, table_name: &str, row_number: usize) -> Result<(), sqlx::Error> { // ConfigOrDatabaseError todo!(); } /// Return the next change to undo, or None. /// Return an error on database problem. - pub fn get_record_to_undo(self) -> Result, sqlx::Error> { + pub fn get_record_to_undo(&self) -> Result, sqlx::Error> { // DatabaseError todo!(); } /// Return the next change to redo, or None. /// Return an error on database problem. - pub fn get_record_to_redo(self) -> Result, sqlx::Error> { + pub fn get_record_to_redo(&self) -> Result, sqlx::Error> { // DatabaseError todo!(); } @@ -355,7 +362,7 @@ impl Valve { /// Undo one change and return the change record /// or None if there was no change to undo. /// Return an error on database problem. - pub fn undo(self) -> Result, sqlx::Error> { + pub fn undo(&self) -> Result, sqlx::Error> { // DatabaseError todo!(); } @@ -363,7 +370,7 @@ impl Valve { /// Redo one change and return the change record /// or None if there was no change to redo. /// Return an error on database problem. - pub fn redo(self) -> Result, sqlx::Error> { + pub fn redo(&self) -> Result, sqlx::Error> { // DatabaseError todo!(); } @@ -432,6 +439,29 @@ impl std::fmt::Debug for ColumnRule { } } +/// TODO: Add docstring here. Note that once we have refactored configure_db() (see above) it may +/// make more sense for this function to be an inner function of Valve. +pub async fn get_pool_from_connection_string(database: &str) -> Result { + let connection_options; + if database.starts_with("postgresql://") { + connection_options = AnyConnectOptions::from_str(database)?; + } else { + let connection_string; + if !database.starts_with("sqlite://") { + connection_string = format!("sqlite://{}?mode=rwc", database); + } else { + connection_string = database.to_string(); + } + connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); + } + + let pool = AnyPoolOptions::new() + .max_connections(5) + .connect_with(connection_options) + .await?; + Ok(pool) +} + /// Given the path to a configuration table (either a table.tsv file or a database containing a /// table named "table"), load and check the 'table', 'column', and 'datatype' tables, and return /// SerdeMaps corresponding to specials, tables, datatypes, and rules. From c6f96fc4598e54ae86ebeeae45b89cac65f6b380 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 08:25:36 -0500 Subject: [PATCH 06/57] rename create_all_tables to create_missing_tables --- src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fd14adf4..73721b07 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -200,7 +200,7 @@ impl Valve { /// Create all configured database tables and views /// if they do not already exist as configured. /// Return an error on database problems. - pub async fn create_all_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { + pub async fn create_missing_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { // DatabaseError let mut tables_config = self .global_config @@ -260,7 +260,7 @@ impl Valve { /// Return an error on invalid table name or database problem. pub fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - //self.create_all_tables(); + //self.create_missing_tables(); todo!(); Ok(self) } @@ -270,8 +270,10 @@ impl Valve { /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. pub fn load_all_tables(&self, validate: bool) -> Result<&Self, sqlx::Error> { + // YOU ARE HERE. + // DatabaseError - //self.create_all_tables(); + //self.create_missing_tables(); //self.truncate_all_tables(); todo!(); Ok(self) @@ -283,7 +285,7 @@ impl Valve { /// Return an error on invalid table name or database problem. pub fn load_tables(&self, tables: Vec<&str>, validate: bool) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - //self.create_all_tables(); + //self.create_missing_tables(); //self.truncate_tables(tables); todo!(); Ok(self) From 83e741ed51030529f98d3885874749e260b25099 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 09:07:17 -0500 Subject: [PATCH 07/57] implement (rough) load_all_tables() --- src/lib.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++++------- src/main.rs | 23 +++++++++++-------- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 73721b07..4eefe7e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,9 +105,8 @@ impl Valve { // configuration. We will do that in read_config_files() instead. // Once this is implemented, the code below to construct the AnyPool which is used to // call configure_db() should be removed. - // We will also remove the `database`, `initial_load` and `verbose` parameters. + // We will also remove the `database` and `verbose` parameters. database: &str, - initial_load: bool, verbose: bool, ) -> Result { // TODO: Error type should be ConfigError @@ -202,6 +201,10 @@ impl Valve { /// Return an error on database problems. pub async fn create_missing_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { // DatabaseError + + // TODO: Revisit the implementation of this once te configure_db() function has been + // refactored. Currently it implicitly drops and recreates _all_ tables but eventually this + // function needs to do this only for _missing_ tables. let mut tables_config = self .global_config .get_mut("table") @@ -217,7 +220,6 @@ impl Valve { let pool = self.pool.as_ref().unwrap(); let parser = StartParser::new(); - // TODO: Revisit this once te configure_db() function has been refactored: let (_, _) = configure_db( &mut tables_config, &mut datatypes_config, @@ -269,13 +271,58 @@ impl Valve { /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. - pub fn load_all_tables(&self, validate: bool) -> Result<&Self, sqlx::Error> { - // YOU ARE HERE. - + pub async fn load_all_tables( + &mut self, + validate: bool, + verbose: bool, + initial_load: bool, + ) -> Result<&mut Self, sqlx::Error> { // DatabaseError - //self.create_missing_tables(); + + self.create_missing_tables(verbose); //self.truncate_all_tables(); - todo!(); + if let Some(pool) = &self.pool { + if pool.any_kind() == AnyKind::Sqlite { + sqlx_query("PRAGMA foreign_keys = ON").execute(pool).await?; + if initial_load { + // These pragmas are unsafe but they are used during initial loading since data + // integrity is not a priority in this case. + sqlx_query("PRAGMA journal_mode = OFF") + .execute(pool) + .await?; + sqlx_query("PRAGMA synchronous = 0").execute(pool).await?; + sqlx_query("PRAGMA cache_size = 1000000") + .execute(pool) + .await?; + sqlx_query("PRAGMA temp_store = MEMORY") + .execute(pool) + .await?; + } + } + + if verbose { + eprintln!( + "{} - Processing {} tables.", + Utc::now(), + self.global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .unwrap() + .len() + ); + } + load_db( + &self.global_config, + &pool, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + verbose, + ) + .await?; + } else { + eprintln!("WARN: Attempt to load tables but Valve is not connected to a database."); + } + Ok(self) } diff --git a/src/main.rs b/src/main.rs index 7e61aba4..4c919167 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ use argparse::{ArgumentParser, Store, StoreTrue}; use ontodev_valve::{ get_compiled_datatype_conditions, get_compiled_rule_conditions, get_parsed_structure_conditions, valve, valve_grammar::StartParser, ValveCommand, + Valve }; use serde_json::{from_str, Value as SerdeValue}; use std::{env, process}; @@ -156,15 +157,19 @@ async fn main() -> Result<(), sqlx::Error> { ) .await?; } else { - valve( - &source, - &destination, - &ValveCommand::Load, - verbose, - initial_load, - &config_table, - ) - .await?; + let mut valve = Valve::build(&source, &config_table, &destination, verbose).await?; + valve.connect(&destination).await?; + valve.create_missing_tables(verbose).await?; + valve.load_all_tables(true, verbose, initial_load).await?; + // valve( + // &source, + // &destination, + // &ValveCommand::Load, + // verbose, + // initial_load, + // &config_table, + // ) + // .await?; } Ok(()) From 8dceabf780e8f1943f20baa00b13dde97eb1ba54 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 09:14:36 -0500 Subject: [PATCH 08/57] fix small bug in call to create_all_tables() --- src/lib.rs | 2 +- src/main.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4eefe7e9..5bc104bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -279,7 +279,7 @@ impl Valve { ) -> Result<&mut Self, sqlx::Error> { // DatabaseError - self.create_missing_tables(verbose); + self.create_missing_tables(verbose).await?; //self.truncate_all_tables(); if let Some(pool) = &self.pool { if pool.any_kind() == AnyKind::Sqlite { diff --git a/src/main.rs b/src/main.rs index 4c919167..486cb522 100644 --- a/src/main.rs +++ b/src/main.rs @@ -159,7 +159,6 @@ async fn main() -> Result<(), sqlx::Error> { } else { let mut valve = Valve::build(&source, &config_table, &destination, verbose).await?; valve.connect(&destination).await?; - valve.create_missing_tables(verbose).await?; valve.load_all_tables(true, verbose, initial_load).await?; // valve( // &source, From dca2095bb4953720aa05820567011d60de8dcdd6 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 26 Nov 2023 14:30:17 -0500 Subject: [PATCH 09/57] warn but do not panic, during load, if table file doesn't exist --- src/lib.rs | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 296fd7cf..638770a0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4403,6 +4403,27 @@ async fn load_db( let mut total_infos = 0; let mut table_num = 1; for table_name in table_list { + let path = String::from( + config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|o| o.get(&table_name)) + .and_then(|n| n.get("path")) + .and_then(|p| p.as_str()) + .unwrap(), + ); + let mut rdr = { + match File::open(path.clone()) { + Err(e) => { + eprintln!("WARN: Unable to open '{}': {}", path.clone(), e); + continue; + } + Ok(table_file) => csv::ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(table_file), + } + }; if verbose { eprintln!( "{} - Loading table {}/{}: {}", @@ -4413,21 +4434,6 @@ async fn load_db( ); } table_num += 1; - let path = String::from( - config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|o| o.get(&table_name)) - .and_then(|n| n.get("path")) - .and_then(|p| p.as_str()) - .unwrap(), - ); - let mut rdr = csv::ReaderBuilder::new() - .has_headers(false) - .delimiter(b'\t') - .from_reader(File::open(path.clone()).unwrap_or_else(|err| { - panic!("Unable to open '{}': {}", path.clone(), err); - })); // Extract the headers, which we will need later: let mut records = rdr.records(); From 81800669a70d2b416e09904163d547dc00f0e70c Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 26 Nov 2023 14:48:15 -0500 Subject: [PATCH 10/57] add ValveRow alias --- src/lib.rs | 43 ++++++++++++++++++++++--------------------- src/validate.rs | 22 +++++++++++----------- 2 files changed, 33 insertions(+), 32 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 638770a0..ba4b2167 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -77,10 +77,11 @@ lazy_static! { static ref SL_SQL_TYPES: Vec<&'static str> = vec!["text", "numeric", "integer", "real"]; } -/// An alias for [serde_json::Map](..//serde_json/struct.Map.html). +/// Aliases for [serde_json::Map](..//serde_json/struct.Map.html). // Note: serde_json::Map is // [backed by a BTreeMap by default](https://docs.serde.rs/serde_json/map/index.html) pub type SerdeMap = serde_json::Map; +pub type ValveRow = serde_json::Map; /// Represents a structure such as those found in the `structure` column of the `column` table in /// both its parsed format (i.e., as an [Expression](ast/enum.Expression.html)) as well as in its @@ -1444,7 +1445,7 @@ pub async fn get_affected_rows( global_config: &SerdeMap, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, -) -> Result, String> { +) -> Result, String> { // Since the consequence of an update could involve currently invalid rows // (in the conflict table) becoming valid or vice versa, we need to check rows for // which the value of the column is the same as `value` @@ -1470,7 +1471,7 @@ pub async fn get_affected_rows( .await .map_err(|e| e.to_string())? { - let mut table_row = SerdeMap::new(); + let mut table_row = ValveRow::new(); let mut row_number: Option = None; for column in row.columns() { let cname = column.name(); @@ -1508,7 +1509,7 @@ pub async fn get_row_from_db( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, -) -> Result { +) -> Result { let sql = format!( "{} WHERE row_number = {}", query_with_message_values(table, global_config, pool), @@ -1541,7 +1542,7 @@ pub async fn get_row_from_db( } }; - let mut row = SerdeMap::new(); + let mut row = ValveRow::new(); for column in sql_row.columns() { let cname = column.name(); if !vec!["row_number", "message"].contains(&cname) { @@ -1649,7 +1650,7 @@ pub async fn get_rows_to_update( ), String, > { - fn get_cell_value(row: &SerdeMap, column: &str) -> Result { + fn get_cell_value(row: &ValveRow, column: &str) -> Result { match row.get(column).and_then(|cell| cell.get("value")) { Some(SerdeValue::String(s)) => Ok(format!("{}", s)), Some(SerdeValue::Number(n)) => Ok(format!("{}", n)), @@ -1900,8 +1901,8 @@ pub async fn record_row_change( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, - from: Option<&SerdeMap>, - to: Option<&SerdeMap>, + from: Option<&ValveRow>, + to: Option<&ValveRow>, user: &str, ) -> Result<(), sqlx::Error> { if let (None, None) = (from, to) { @@ -1910,8 +1911,8 @@ pub async fn record_row_change( )); } - fn to_text(smap: Option<&SerdeMap>, quoted: bool) -> String { - match smap { + fn to_text(row: Option<&ValveRow>, quoted: bool) -> String { + match row { None => "NULL".to_string(), Some(r) => { let inner = format!("{}", json!(r)).replace("'", "''"); @@ -1932,7 +1933,7 @@ pub async fn record_row_change( } } - fn summarize(from: Option<&SerdeMap>, to: Option<&SerdeMap>) -> Result { + fn summarize(from: Option<&ValveRow>, to: Option<&ValveRow>) -> Result { // Constructs a summary of the form: // { // "column":"bar", @@ -2420,7 +2421,7 @@ pub async fn insert_new_row( compiled_rule_conditions: &HashMap>>, pool: &AnyPool, table: &str, - row: &SerdeMap, + row: &ValveRow, new_row_number: Option, user: &str, ) -> Result { @@ -2469,7 +2470,7 @@ pub async fn insert_new_row_tx( pool: &AnyPool, tx: &mut Transaction, table: &str, - row: &SerdeMap, + row: &ValveRow, new_row_number: Option, skip_validation: bool, ) -> Result { @@ -2789,7 +2790,7 @@ pub async fn update_row( compiled_rule_conditions: &HashMap>>, pool: &AnyPool, table_name: &str, - row: &SerdeMap, + row: &ValveRow, row_number: &u32, user: &str, ) -> Result<(), sqlx::Error> { @@ -2854,7 +2855,7 @@ pub async fn update_row_tx( pool: &AnyPool, tx: &mut Transaction, table: &str, - row: &SerdeMap, + row: &ValveRow, row_number: &u32, skip_validation: bool, do_not_recurse: bool, @@ -2967,10 +2968,10 @@ pub async fn update_row_tx( Ok(()) } -/// Given a path, read a TSV file and return a vector of rows represented as SerdeMaps. +/// Given a path, read a TSV file and return a vector of rows represented as ValveRows. /// Note: Use this function to read "small" TSVs only. In particular, use this for the special /// configuration tables. -fn read_tsv_into_vector(path: &str) -> Vec { +fn read_tsv_into_vector(path: &str) -> Vec { let mut rdr = csv::ReaderBuilder::new() .delimiter(b'\t') @@ -2981,7 +2982,7 @@ fn read_tsv_into_vector(path: &str) -> Vec { let rows: Vec<_> = rdr .deserialize() .map(|result| { - let row: SerdeMap = result.expect(format!("Error reading: {}", path).as_str()); + let row: ValveRow = result.expect(format!("Error reading: {}", path).as_str()); row }) .collect(); @@ -3010,8 +3011,8 @@ fn read_tsv_into_vector(path: &str) -> Vec { } /// Given a database at the specified location, query the "table" table and return a vector of rows -/// represented as SerdeMaps. -fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { +/// represented as ValveRows. +fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { let connection_options; if database.starts_with("postgresql://") { connection_options = AnyConnectOptions::from_str(database).unwrap(); @@ -3036,7 +3037,7 @@ fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec, + pub row: Option, } /// Given a config map, maps of compiled datatype and rule conditions, a database connection @@ -62,10 +62,10 @@ pub async fn validate_row( pool: &AnyPool, tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &str, - row: &SerdeMap, + row: &ValveRow, row_number: Option, query_as_if: Option<&QueryAsIf>, -) -> Result { +) -> Result { // Fallback to a default transaction if it is not given. Since we do not commit before it falls // out of scope the transaction will be rolled back at the end of this function. And since this // function is read-only the rollback is trivial and therefore inconsequential. @@ -944,10 +944,10 @@ pub fn validate_rows_intra( result_rows } -/// Given a row represented as a SerdeMap, remove any duplicate messages from the row's cells, so +/// Given a row represented as a ValveRow, remove any duplicate messages from the row's cells, so /// that no cell has messages with the same level, rule, and message text. -fn remove_duplicate_messages(row: &SerdeMap) -> Result { - let mut deduped_row = SerdeMap::new(); +fn remove_duplicate_messages(row: &ValveRow) -> Result { + let mut deduped_row = ValveRow::new(); for (column_name, cell) in row.iter() { let mut messages = cell .get("messages") @@ -981,12 +981,12 @@ fn remove_duplicate_messages(row: &SerdeMap) -> Result { Ok(deduped_row) } -/// Given a result row, convert it to a SerdeMap and return it. +/// Given a result row, convert it to a ValveRow and return it. /// Note that if the incoming result row has an associated row_number, this is ignored. -fn result_row_to_config_map(incoming: &ResultRow) -> SerdeMap { - let mut outgoing = SerdeMap::new(); +fn result_row_to_config_map(incoming: &ResultRow) -> ValveRow { + let mut outgoing = ValveRow::new(); for (column, cell) in incoming.contents.iter() { - let mut cell_map = SerdeMap::new(); + let mut cell_map = ValveRow::new(); if let Some(nulltype) = &cell.nulltype { cell_map.insert( "nulltype".to_string(), From 529dd28ebc03c3958dbb40291600052c0742c535 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 26 Nov 2023 15:15:25 -0500 Subject: [PATCH 11/57] add stubs for new API --- src/lib.rs | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index ba4b2167..1d056e99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,6 +83,198 @@ lazy_static! { pub type SerdeMap = serde_json::Map; pub type ValveRow = serde_json::Map; +pub struct Valve { + global_config: SerdeMap, + compiled_datatype_conditions: HashMap, + compiled_rule_conditions: HashMap>>, + pool: AnyPool, + user: String, +} + +impl Valve { + /// Given a path to a table table, + /// read it, configure VALVE, and return a new Valve struct. + /// Return an error if reading or configuration fails. + pub fn build(mut self, table_path: &str) -> Result { + // Should be ConfigError + todo!(); + Ok(self) + } + + /// Set the user name for this instance. + /// The username must be a short string without newlines. + /// Return an error on invalid username. + pub fn set_user(mut self, user: &str) -> Result { + // ConfigError + todo!(); + Ok(self) + } + + /// Given a database connection string, + /// create a database connection for VALVE to use. + /// Drop and replace any current database connection. + /// Return an error if the connection cannot be created. + pub fn connect(mut self, connection: &str) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Create all configured database tables and views + /// if they do not already exist as configured. + /// Return an error on database problems. + pub fn create_all_tables(mut self) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Drop all configured tables, in reverse dependency order. + /// Return an error on database problem. + pub fn drop_all_tables(self) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Given a vector of table names, + /// drop those tables, in the given order. + /// Return an error on invalid table name or database problem. + pub fn drop_tables(self, tables: Vec<&str>) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Truncate all configured tables, in reverse dependency order. + /// Return an error on database problem. + pub fn truncate_all_tables(self) -> Result { + // DatabaseError + todo!(); + Ok(self) + } + + /// Given a vector of table names, + /// truncate those tables, in the given order. + /// Return an error on invalid table name or database problem. + pub fn truncate_tables(self, tables: Vec<&str>) -> Result { + // ConfigOrDatabaseError + //self.create_all_tables(); + todo!(); + Ok(self) + } + + /// Load all configured tables in dependency order. + /// If `validate` is false, just try to insert all rows. + /// Return an error on database problem, + /// including database conflicts that prevent rows being inserted. + pub fn load_all_tables(self, validate: bool) -> Result { + // DatabaseError + //self.create_all_tables(); + //self.truncate_all_tables(); + todo!(); + Ok(self) + } + + /// Given a vector of table names, + /// load those tables in the given order. + /// If `validate` is false, just try to insert all rows. + /// Return an error on invalid table name or database problem. + pub fn load_tables(self, tables: Vec<&str>, validate: bool) -> Result { + // ConfigOrDatabaseError + //self.create_all_tables(); + //self.truncate_tables(tables); + todo!(); + Ok(self) + } + + /// Save all configured tables to their 'path's. + /// Return an error on writing or database problem. + pub fn save_all_tables(self) -> Result { + // WriteOrDatabaseError + todo!(); + Ok(self) + } + + /// Given a vector of table names, + /// Save thosee tables to their 'path's, in the given order. + /// Return an error on writing or database problem. + pub fn save_tables(self, tables: Vec<&str>) -> Result { + // WriteOrDatabaseError + todo!(); + Ok(self) + } + + /// Given a table name and a row as JSON, + /// return the validated row. + /// Return an error on database problem. + pub fn validate_row(self, table_name: &str, row: &ValveRow) -> Result { + // DatabaseError + todo!(); + } + + /// Given a table name and a row as JSON, + /// add the row to the table in the database, + /// and return the validated row, including its new row_number. + /// Return an error invalid table name or database problem. + pub fn insert_row(self, table_name: &str, row: &ValveRow) -> Result { + // ConfigOrDatabaseError + todo!(); + } + + /// Given a table name, a row number, and a row as JSON, + /// update the row in the database, + /// and return the validated row. + /// Return an error invalid table name or row number or database problem. + pub fn update_row( + self, + table_name: &str, + row_number: usize, + row: &ValveRow, + ) -> Result { + // ConfigOrDatabaseError + todo!(); + } + + /// Given a table name and a row number, + /// delete that row from the table. + /// Return an error invalid table name or row number or database problem. + pub fn delete_row(self, table_name: &str, row_number: usize) -> Result<(), sqlx::Error> { + // ConfigOrDatabaseError + todo!(); + } + + /// Return the next change to undo, or None. + /// Return an error on database problem. + pub fn get_record_to_undo(self) -> Result, sqlx::Error> { + // DatabaseError + todo!(); + } + + /// Return the next change to redo, or None. + /// Return an error on database problem. + pub fn get_record_to_redo(self) -> Result, sqlx::Error> { + // DatabaseError + todo!(); + } + + /// Undo one change and return the change record + /// or None if there was no change to undo. + /// Return an error on database problem. + pub fn undo(self) -> Result, sqlx::Error> { + // DatabaseError + todo!(); + } + + /// Redo one change and return the change record + /// or None if there was no change to redo. + /// Return an error on database problem. + pub fn redo(self) -> Result, sqlx::Error> { + // DatabaseError + todo!(); + } +} + /// Represents a structure such as those found in the `structure` column of the `column` table in /// both its parsed format (i.e., as an [Expression](ast/enum.Expression.html)) as well as in its /// original format (i.e., as a plain String). From efeb611eac7fc4e0a93fb20c56981713ae1a2595 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 26 Nov 2023 19:27:17 -0500 Subject: [PATCH 12/57] implement Valve::build() --- src/lib.rs | 116 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 105 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1d056e99..8407b621 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,22 +83,116 @@ lazy_static! { pub type SerdeMap = serde_json::Map; pub type ValveRow = serde_json::Map; +#[derive(Debug)] pub struct Valve { - global_config: SerdeMap, - compiled_datatype_conditions: HashMap, - compiled_rule_conditions: HashMap>>, - pool: AnyPool, - user: String, + pub global_config: SerdeMap, + pub compiled_datatype_conditions: HashMap, + pub compiled_rule_conditions: HashMap>>, + pub pool: Option, + pub user: String, } +// TODO NEXT: Move the existing public functions into this interface: impl Valve { - /// Given a path to a table table, - /// read it, configure VALVE, and return a new Valve struct. + /// Given a path to a table table and its name, read the table table, configure VALVE + /// partially ... TODO: finish this. + /// , and return a new Valve struct. /// Return an error if reading or configuration fails. - pub fn build(mut self, table_path: &str) -> Result { + pub async fn build( + table_path: &str, + config_table: &str, + // TODO: We need to refactor configure_db() so that it no longer collects the constraints + // configuration. We will do that in read_config_files() instead. + // Once this is implemented, the code below to construct the AnyPool which is used to + // call configure_db() should be removed. + // We will also remove the `database`, `initial_load` and `verbose` parameters. + database: &str, + initial_load: bool, + verbose: bool, + ) -> Result { // Should be ConfigError - todo!(); - Ok(self) + + let parser = StartParser::new(); + + let (specials_config, mut tables_config, mut datatypes_config, rules_config) = + read_config_files(table_path, config_table); + + //////////////////////////////////////////////////////////////////////////////////////// + // TODO: Remove this block of code later (see comment above) + let connection_options; + if database.starts_with("postgresql://") { + connection_options = AnyConnectOptions::from_str(database)?; + } else { + let connection_string; + if !database.starts_with("sqlite://") { + connection_string = format!("sqlite://{}?mode=rwc", database); + } else { + connection_string = database.to_string(); + } + connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); + } + + let pool = AnyPoolOptions::new() + .max_connections(5) + .connect_with(connection_options) + .await?; + + let (sorted_table_list, constraints_config) = configure_db( + &mut tables_config, + &mut datatypes_config, + &pool, + &parser, + verbose, + &ValveCommand::Config, + ) + .await?; + //////////////////////////////////////////////////////////////////////////////////////// + + let mut global_config = SerdeMap::new(); + global_config.insert( + String::from("special"), + SerdeValue::Object(specials_config.clone()), + ); + global_config.insert( + String::from("table"), + SerdeValue::Object(tables_config.clone()), + ); + global_config.insert( + String::from("datatype"), + SerdeValue::Object(datatypes_config.clone()), + ); + global_config.insert( + String::from("rule"), + SerdeValue::Object(rules_config.clone()), + ); + global_config.insert( + String::from("constraints"), + SerdeValue::Object(constraints_config.clone()), + ); + let mut sorted_table_serdevalue_list: Vec = vec![]; + for table in &sorted_table_list { + sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); + } + global_config.insert( + String::from("sorted_table_list"), + SerdeValue::Array(sorted_table_serdevalue_list), + ); + + let compiled_datatype_conditions = + get_compiled_datatype_conditions(&global_config, &parser); + let compiled_rule_conditions = get_compiled_rule_conditions( + &global_config, + compiled_datatype_conditions.clone(), + &parser, + ); + + Ok(Self { + global_config: global_config, + compiled_datatype_conditions: compiled_datatype_conditions, + compiled_rule_conditions: compiled_rule_conditions, + pool: None, + user: String::from("Valve"), + }) } /// Set the user name for this instance. @@ -106,7 +200,7 @@ impl Valve { /// Return an error on invalid username. pub fn set_user(mut self, user: &str) -> Result { // ConfigError - todo!(); + self.user = user.to_string(); Ok(self) } From 1c4980821e7a93234b49ac7bb26843969a1d5e89 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 08:23:25 -0500 Subject: [PATCH 13/57] refactor, fix api sigs, implement Valve::connect() and Valve::create_tables() --- src/lib.rs | 112 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 71 insertions(+), 41 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8407b621..fd14adf4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -95,7 +95,7 @@ pub struct Valve { // TODO NEXT: Move the existing public functions into this interface: impl Valve { /// Given a path to a table table and its name, read the table table, configure VALVE - /// partially ... TODO: finish this. + /// partially ... TODO: finish rewriting this doc string. /// , and return a new Valve struct. /// Return an error if reading or configuration fails. pub async fn build( @@ -110,7 +110,7 @@ impl Valve { initial_load: bool, verbose: bool, ) -> Result { - // Should be ConfigError + // TODO: Error type should be ConfigError let parser = StartParser::new(); @@ -119,24 +119,7 @@ impl Valve { //////////////////////////////////////////////////////////////////////////////////////// // TODO: Remove this block of code later (see comment above) - let connection_options; - if database.starts_with("postgresql://") { - connection_options = AnyConnectOptions::from_str(database)?; - } else { - let connection_string; - if !database.starts_with("sqlite://") { - connection_string = format!("sqlite://{}?mode=rwc", database); - } else { - connection_string = database.to_string(); - } - connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); - } - - let pool = AnyPoolOptions::new() - .max_connections(5) - .connect_with(connection_options) - .await?; - + let pool = get_pool_from_connection_string(database).await?; let (sorted_table_list, constraints_config) = configure_db( &mut tables_config, &mut datatypes_config, @@ -198,7 +181,7 @@ impl Valve { /// Set the user name for this instance. /// The username must be a short string without newlines. /// Return an error on invalid username. - pub fn set_user(mut self, user: &str) -> Result { + pub fn set_user(&mut self, user: &str) -> Result<&mut Self, sqlx::Error> { // ConfigError self.user = user.to_string(); Ok(self) @@ -208,24 +191,48 @@ impl Valve { /// create a database connection for VALVE to use. /// Drop and replace any current database connection. /// Return an error if the connection cannot be created. - pub fn connect(mut self, connection: &str) -> Result { + pub async fn connect(&mut self, connection: &str) -> Result<&mut Self, sqlx::Error> { // DatabaseError - todo!(); + self.pool = Some(get_pool_from_connection_string(connection).await?); Ok(self) } /// Create all configured database tables and views /// if they do not already exist as configured. /// Return an error on database problems. - pub fn create_all_tables(mut self) -> Result { + pub async fn create_all_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { // DatabaseError - todo!(); + let mut tables_config = self + .global_config + .get_mut("table") + .and_then(|t| t.as_object_mut()) + .unwrap(); + let mut tables_config = tables_config.clone(); + let mut datatypes_config = self + .global_config + .get_mut("datatype") + .and_then(|d| d.as_object_mut()) + .unwrap(); + let mut datatypes_config = datatypes_config.clone(); + let pool = self.pool.as_ref().unwrap(); + let parser = StartParser::new(); + + // TODO: Revisit this once te configure_db() function has been refactored: + let (_, _) = configure_db( + &mut tables_config, + &mut datatypes_config, + &pool, + &parser, + verbose, + &ValveCommand::Create, + ) + .await?; Ok(self) } /// Drop all configured tables, in reverse dependency order. /// Return an error on database problem. - pub fn drop_all_tables(self) -> Result { + pub fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError todo!(); Ok(self) @@ -234,7 +241,7 @@ impl Valve { /// Given a vector of table names, /// drop those tables, in the given order. /// Return an error on invalid table name or database problem. - pub fn drop_tables(self, tables: Vec<&str>) -> Result { + pub fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError todo!(); Ok(self) @@ -242,7 +249,7 @@ impl Valve { /// Truncate all configured tables, in reverse dependency order. /// Return an error on database problem. - pub fn truncate_all_tables(self) -> Result { + pub fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError todo!(); Ok(self) @@ -251,7 +258,7 @@ impl Valve { /// Given a vector of table names, /// truncate those tables, in the given order. /// Return an error on invalid table name or database problem. - pub fn truncate_tables(self, tables: Vec<&str>) -> Result { + pub fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError //self.create_all_tables(); todo!(); @@ -262,7 +269,7 @@ impl Valve { /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. - pub fn load_all_tables(self, validate: bool) -> Result { + pub fn load_all_tables(&self, validate: bool) -> Result<&Self, sqlx::Error> { // DatabaseError //self.create_all_tables(); //self.truncate_all_tables(); @@ -274,7 +281,7 @@ impl Valve { /// load those tables in the given order. /// If `validate` is false, just try to insert all rows. /// Return an error on invalid table name or database problem. - pub fn load_tables(self, tables: Vec<&str>, validate: bool) -> Result { + pub fn load_tables(&self, tables: Vec<&str>, validate: bool) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError //self.create_all_tables(); //self.truncate_tables(tables); @@ -284,7 +291,7 @@ impl Valve { /// Save all configured tables to their 'path's. /// Return an error on writing or database problem. - pub fn save_all_tables(self) -> Result { + pub fn save_all_tables(&self) -> Result<&Self, sqlx::Error> { // WriteOrDatabaseError todo!(); Ok(self) @@ -293,7 +300,7 @@ impl Valve { /// Given a vector of table names, /// Save thosee tables to their 'path's, in the given order. /// Return an error on writing or database problem. - pub fn save_tables(self, tables: Vec<&str>) -> Result { + pub fn save_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // WriteOrDatabaseError todo!(); Ok(self) @@ -302,7 +309,7 @@ impl Valve { /// Given a table name and a row as JSON, /// return the validated row. /// Return an error on database problem. - pub fn validate_row(self, table_name: &str, row: &ValveRow) -> Result { + pub fn validate_row(&self, table_name: &str, row: &ValveRow) -> Result { // DatabaseError todo!(); } @@ -311,7 +318,7 @@ impl Valve { /// add the row to the table in the database, /// and return the validated row, including its new row_number. /// Return an error invalid table name or database problem. - pub fn insert_row(self, table_name: &str, row: &ValveRow) -> Result { + pub fn insert_row(&self, table_name: &str, row: &ValveRow) -> Result { // ConfigOrDatabaseError todo!(); } @@ -321,7 +328,7 @@ impl Valve { /// and return the validated row. /// Return an error invalid table name or row number or database problem. pub fn update_row( - self, + &self, table_name: &str, row_number: usize, row: &ValveRow, @@ -333,21 +340,21 @@ impl Valve { /// Given a table name and a row number, /// delete that row from the table. /// Return an error invalid table name or row number or database problem. - pub fn delete_row(self, table_name: &str, row_number: usize) -> Result<(), sqlx::Error> { + pub fn delete_row(&self, table_name: &str, row_number: usize) -> Result<(), sqlx::Error> { // ConfigOrDatabaseError todo!(); } /// Return the next change to undo, or None. /// Return an error on database problem. - pub fn get_record_to_undo(self) -> Result, sqlx::Error> { + pub fn get_record_to_undo(&self) -> Result, sqlx::Error> { // DatabaseError todo!(); } /// Return the next change to redo, or None. /// Return an error on database problem. - pub fn get_record_to_redo(self) -> Result, sqlx::Error> { + pub fn get_record_to_redo(&self) -> Result, sqlx::Error> { // DatabaseError todo!(); } @@ -355,7 +362,7 @@ impl Valve { /// Undo one change and return the change record /// or None if there was no change to undo. /// Return an error on database problem. - pub fn undo(self) -> Result, sqlx::Error> { + pub fn undo(&self) -> Result, sqlx::Error> { // DatabaseError todo!(); } @@ -363,7 +370,7 @@ impl Valve { /// Redo one change and return the change record /// or None if there was no change to redo. /// Return an error on database problem. - pub fn redo(self) -> Result, sqlx::Error> { + pub fn redo(&self) -> Result, sqlx::Error> { // DatabaseError todo!(); } @@ -432,6 +439,29 @@ impl std::fmt::Debug for ColumnRule { } } +/// TODO: Add docstring here. Note that once we have refactored configure_db() (see above) it may +/// make more sense for this function to be an inner function of Valve. +pub async fn get_pool_from_connection_string(database: &str) -> Result { + let connection_options; + if database.starts_with("postgresql://") { + connection_options = AnyConnectOptions::from_str(database)?; + } else { + let connection_string; + if !database.starts_with("sqlite://") { + connection_string = format!("sqlite://{}?mode=rwc", database); + } else { + connection_string = database.to_string(); + } + connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); + } + + let pool = AnyPoolOptions::new() + .max_connections(5) + .connect_with(connection_options) + .await?; + Ok(pool) +} + /// Given the path to a configuration table (either a table.tsv file or a database containing a /// table named "table"), load and check the 'table', 'column', and 'datatype' tables, and return /// SerdeMaps corresponding to specials, tables, datatypes, and rules. From b5ea3a811ebe38999e63da46e21f5c209a008f94 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 08:25:36 -0500 Subject: [PATCH 14/57] rename create_all_tables to create_missing_tables --- src/lib.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fd14adf4..73721b07 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -200,7 +200,7 @@ impl Valve { /// Create all configured database tables and views /// if they do not already exist as configured. /// Return an error on database problems. - pub async fn create_all_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { + pub async fn create_missing_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { // DatabaseError let mut tables_config = self .global_config @@ -260,7 +260,7 @@ impl Valve { /// Return an error on invalid table name or database problem. pub fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - //self.create_all_tables(); + //self.create_missing_tables(); todo!(); Ok(self) } @@ -270,8 +270,10 @@ impl Valve { /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. pub fn load_all_tables(&self, validate: bool) -> Result<&Self, sqlx::Error> { + // YOU ARE HERE. + // DatabaseError - //self.create_all_tables(); + //self.create_missing_tables(); //self.truncate_all_tables(); todo!(); Ok(self) @@ -283,7 +285,7 @@ impl Valve { /// Return an error on invalid table name or database problem. pub fn load_tables(&self, tables: Vec<&str>, validate: bool) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - //self.create_all_tables(); + //self.create_missing_tables(); //self.truncate_tables(tables); todo!(); Ok(self) From 268bd2aa171bef1171fd11989fd08a8a4a00103f Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 09:07:17 -0500 Subject: [PATCH 15/57] implement (rough) load_all_tables() --- src/lib.rs | 63 ++++++++++++++++++++++++++++++++++++++++++++++------- src/main.rs | 23 +++++++++++-------- 2 files changed, 69 insertions(+), 17 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 73721b07..4eefe7e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -105,9 +105,8 @@ impl Valve { // configuration. We will do that in read_config_files() instead. // Once this is implemented, the code below to construct the AnyPool which is used to // call configure_db() should be removed. - // We will also remove the `database`, `initial_load` and `verbose` parameters. + // We will also remove the `database` and `verbose` parameters. database: &str, - initial_load: bool, verbose: bool, ) -> Result { // TODO: Error type should be ConfigError @@ -202,6 +201,10 @@ impl Valve { /// Return an error on database problems. pub async fn create_missing_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { // DatabaseError + + // TODO: Revisit the implementation of this once te configure_db() function has been + // refactored. Currently it implicitly drops and recreates _all_ tables but eventually this + // function needs to do this only for _missing_ tables. let mut tables_config = self .global_config .get_mut("table") @@ -217,7 +220,6 @@ impl Valve { let pool = self.pool.as_ref().unwrap(); let parser = StartParser::new(); - // TODO: Revisit this once te configure_db() function has been refactored: let (_, _) = configure_db( &mut tables_config, &mut datatypes_config, @@ -269,13 +271,58 @@ impl Valve { /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. - pub fn load_all_tables(&self, validate: bool) -> Result<&Self, sqlx::Error> { - // YOU ARE HERE. - + pub async fn load_all_tables( + &mut self, + validate: bool, + verbose: bool, + initial_load: bool, + ) -> Result<&mut Self, sqlx::Error> { // DatabaseError - //self.create_missing_tables(); + + self.create_missing_tables(verbose); //self.truncate_all_tables(); - todo!(); + if let Some(pool) = &self.pool { + if pool.any_kind() == AnyKind::Sqlite { + sqlx_query("PRAGMA foreign_keys = ON").execute(pool).await?; + if initial_load { + // These pragmas are unsafe but they are used during initial loading since data + // integrity is not a priority in this case. + sqlx_query("PRAGMA journal_mode = OFF") + .execute(pool) + .await?; + sqlx_query("PRAGMA synchronous = 0").execute(pool).await?; + sqlx_query("PRAGMA cache_size = 1000000") + .execute(pool) + .await?; + sqlx_query("PRAGMA temp_store = MEMORY") + .execute(pool) + .await?; + } + } + + if verbose { + eprintln!( + "{} - Processing {} tables.", + Utc::now(), + self.global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .unwrap() + .len() + ); + } + load_db( + &self.global_config, + &pool, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + verbose, + ) + .await?; + } else { + eprintln!("WARN: Attempt to load tables but Valve is not connected to a database."); + } + Ok(self) } diff --git a/src/main.rs b/src/main.rs index 7e61aba4..4c919167 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ use argparse::{ArgumentParser, Store, StoreTrue}; use ontodev_valve::{ get_compiled_datatype_conditions, get_compiled_rule_conditions, get_parsed_structure_conditions, valve, valve_grammar::StartParser, ValveCommand, + Valve }; use serde_json::{from_str, Value as SerdeValue}; use std::{env, process}; @@ -156,15 +157,19 @@ async fn main() -> Result<(), sqlx::Error> { ) .await?; } else { - valve( - &source, - &destination, - &ValveCommand::Load, - verbose, - initial_load, - &config_table, - ) - .await?; + let mut valve = Valve::build(&source, &config_table, &destination, verbose).await?; + valve.connect(&destination).await?; + valve.create_missing_tables(verbose).await?; + valve.load_all_tables(true, verbose, initial_load).await?; + // valve( + // &source, + // &destination, + // &ValveCommand::Load, + // verbose, + // initial_load, + // &config_table, + // ) + // .await?; } Ok(()) From 2b4073070959c839b6514df4b1bb4182da834235 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 27 Nov 2023 09:14:36 -0500 Subject: [PATCH 16/57] fix small bug in call to create_all_tables() --- src/lib.rs | 2 +- src/main.rs | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4eefe7e9..5bc104bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -279,7 +279,7 @@ impl Valve { ) -> Result<&mut Self, sqlx::Error> { // DatabaseError - self.create_missing_tables(verbose); + self.create_missing_tables(verbose).await?; //self.truncate_all_tables(); if let Some(pool) = &self.pool { if pool.any_kind() == AnyKind::Sqlite { diff --git a/src/main.rs b/src/main.rs index 4c919167..486cb522 100644 --- a/src/main.rs +++ b/src/main.rs @@ -159,7 +159,6 @@ async fn main() -> Result<(), sqlx::Error> { } else { let mut valve = Valve::build(&source, &config_table, &destination, verbose).await?; valve.connect(&destination).await?; - valve.create_missing_tables(verbose).await?; valve.load_all_tables(true, verbose, initial_load).await?; // valve( // &source, From 0aa56afa2a5909ffecdc59430972d21e3d584e39 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 3 Dec 2023 15:11:56 -0500 Subject: [PATCH 17/57] finish implementing Valve::build() --- src/lib.rs | 410 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 359 insertions(+), 51 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5bc104bf..f84c2364 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -90,9 +90,10 @@ pub struct Valve { pub compiled_rule_conditions: HashMap>>, pub pool: Option, pub user: String, + pub verbose: bool, + pub initial_load: bool, } -// TODO NEXT: Move the existing public functions into this interface: impl Valve { /// Given a path to a table table and its name, read the table table, configure VALVE /// partially ... TODO: finish rewriting this doc string. @@ -101,34 +102,22 @@ impl Valve { pub async fn build( table_path: &str, config_table: &str, - // TODO: We need to refactor configure_db() so that it no longer collects the constraints - // configuration. We will do that in read_config_files() instead. - // Once this is implemented, the code below to construct the AnyPool which is used to - // call configure_db() should be removed. - // We will also remove the `database` and `verbose` parameters. database: &str, verbose: bool, + initial_load: bool, ) -> Result { // TODO: Error type should be ConfigError - let parser = StartParser::new(); - - let (specials_config, mut tables_config, mut datatypes_config, rules_config) = - read_config_files(table_path, config_table); - - //////////////////////////////////////////////////////////////////////////////////////// - // TODO: Remove this block of code later (see comment above) let pool = get_pool_from_connection_string(database).await?; - let (sorted_table_list, constraints_config) = configure_db( - &mut tables_config, - &mut datatypes_config, - &pool, - &parser, - verbose, - &ValveCommand::Config, - ) - .await?; - //////////////////////////////////////////////////////////////////////////////////////// + let parser = StartParser::new(); + let ( + specials_config, + tables_config, + datatypes_config, + rules_config, + constraints_config, + sorted_table_list, + ) = read_config_files(table_path, config_table, &parser, &pool); let mut global_config = SerdeMap::new(); global_config.insert( @@ -172,8 +161,10 @@ impl Valve { global_config: global_config, compiled_datatype_conditions: compiled_datatype_conditions, compiled_rule_conditions: compiled_rule_conditions, - pool: None, + pool: Some(pool), user: String::from("Valve"), + verbose: verbose, + initial_load: initial_load, }) } @@ -186,20 +177,10 @@ impl Valve { Ok(self) } - /// Given a database connection string, - /// create a database connection for VALVE to use. - /// Drop and replace any current database connection. - /// Return an error if the connection cannot be created. - pub async fn connect(&mut self, connection: &str) -> Result<&mut Self, sqlx::Error> { - // DatabaseError - self.pool = Some(get_pool_from_connection_string(connection).await?); - Ok(self) - } - /// Create all configured database tables and views /// if they do not already exist as configured. /// Return an error on database problems. - pub async fn create_missing_tables(&mut self, verbose: bool) -> Result<&mut Self, sqlx::Error> { + pub async fn create_missing_tables(&mut self) -> Result<&mut Self, sqlx::Error> { // DatabaseError // TODO: Revisit the implementation of this once te configure_db() function has been @@ -225,7 +206,7 @@ impl Valve { &mut datatypes_config, &pool, &parser, - verbose, + self.verbose, &ValveCommand::Create, ) .await?; @@ -274,17 +255,15 @@ impl Valve { pub async fn load_all_tables( &mut self, validate: bool, - verbose: bool, - initial_load: bool, ) -> Result<&mut Self, sqlx::Error> { // DatabaseError - self.create_missing_tables(verbose).await?; + self.create_missing_tables().await?; //self.truncate_all_tables(); if let Some(pool) = &self.pool { if pool.any_kind() == AnyKind::Sqlite { sqlx_query("PRAGMA foreign_keys = ON").execute(pool).await?; - if initial_load { + if self.initial_load { // These pragmas are unsafe but they are used during initial loading since data // integrity is not a priority in this case. sqlx_query("PRAGMA journal_mode = OFF") @@ -300,7 +279,7 @@ impl Valve { } } - if verbose { + if self.verbose { eprintln!( "{} - Processing {} tables.", Utc::now(), @@ -316,7 +295,7 @@ impl Valve { &pool, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, - verbose, + self.verbose, ) .await?; } else { @@ -488,8 +467,7 @@ impl std::fmt::Debug for ColumnRule { } } -/// TODO: Add docstring here. Note that once we have refactored configure_db() (see above) it may -/// make more sense for this function to be an inner function of Valve. +/// TODO: Add docstring here. pub async fn get_pool_from_connection_string(database: &str) -> Result { let connection_options; if database.starts_with("postgresql://") { @@ -505,6 +483,7 @@ pub async fn get_pool_from_connection_string(database: &str) -> Result Result (SerdeMap, SerdeMap, SerdeMap, SerdeMap) { + parser: &StartParser, + pool: &AnyPool, +) -> ( + SerdeMap, + SerdeMap, + SerdeMap, + SerdeMap, + SerdeMap, + Vec, +) { let special_table_types = json!({ "table": {"required": true}, "column": {"required": true}, @@ -789,6 +778,124 @@ pub fn read_config_files( } } + // Initialize the constraints config: + let mut constraints_config = SerdeMap::new(); + constraints_config.insert(String::from("foreign"), SerdeValue::Object(SerdeMap::new())); + constraints_config.insert(String::from("unique"), SerdeValue::Object(SerdeMap::new())); + constraints_config.insert(String::from("primary"), SerdeValue::Object(SerdeMap::new())); + constraints_config.insert(String::from("tree"), SerdeValue::Object(SerdeMap::new())); + constraints_config.insert(String::from("under"), SerdeValue::Object(SerdeMap::new())); + + for table_name in tables_config.keys().cloned().collect::>() { + let optional_path = tables_config + .get(&table_name) + .and_then(|r| r.get("path")) + .and_then(|p| p.as_str()); + + let mut path = None; + match optional_path { + None => { + // If an entry of the tables_config has no path then it is an internal table which + // need not be configured explicitly. Currently the only examples are the message + // and history tables. + if table_name != "message" && table_name != "history" { + panic!("No path defined for table {}", table_name); + } + continue; + } + Some(p) if !Path::new(p).is_file() => { + eprintln!("WARN: File does not exist {}", p); + } + Some(p) if Path::new(p).canonicalize().is_err() => { + eprintln!("WARN: File path could not be made canonical {}", p); + } + Some(p) => path = Some(p.to_string()), + }; + + let defined_columns: Vec = tables_config + .get(&table_name) + .and_then(|r| r.get("column")) + .and_then(|v| v.as_object()) + .and_then(|o| Some(o.keys())) + .and_then(|k| Some(k.cloned())) + .and_then(|k| Some(k.collect())) + .unwrap(); + + // We use column_order to explicitly indicate the order in which the columns should appear + // in the table, for later reference. The default is to preserve the order from the actual + // table file. If that does not exist, we use the ordering in defined_columns. + let mut column_order = vec![]; + if let Some(path) = path { + // Get the actual columns from the data itself. Note that we set has_headers to + // false(even though the files have header rows) in order to explicitly read the + // header row. + let mut rdr = csv::ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(File::open(path.clone()).unwrap_or_else(|err| { + panic!("Unable to open '{}': {}", path.clone(), err); + })); + let mut iter = rdr.records(); + if let Some(result) = iter.next() { + let actual_columns = result + .unwrap() + .iter() + .map(|c| c.to_string()) + .collect::>(); + // Make sure that the actual columns found in the table file, and the columns + // defined in the column config, exactly match in terms of their content: + for column_name in &actual_columns { + column_order.push(json!(column_name)); + if !defined_columns.contains(&column_name.to_string()) { + panic!( + "Column '{}.{}' not in column config", + table_name, column_name + ); + } + } + for column_name in &defined_columns { + if !actual_columns.contains(&column_name.to_string()) { + panic!( + "Defined column '{}.{}' not found in table", + table_name, column_name + ); + } + } + } else { + panic!("'{}' is empty", path); + } + } + + if column_order.is_empty() { + column_order = defined_columns.iter().map(|c| json!(c)).collect::>(); + } + tables_config + .get_mut(&table_name) + .and_then(|t| t.as_object_mut()) + .and_then(|o| { + o.insert( + String::from("column_order"), + SerdeValue::Array(column_order), + ) + }); + + // Populate the constraints config: + let table_constraints = get_table_constraints( + &mut tables_config, + &mut datatypes_config, + parser, + &table_name, + &pool, + ); + for constraint_type in vec!["foreign", "unique", "primary", "tree", "under"] { + let table_constraints = table_constraints.get(constraint_type).unwrap().clone(); + constraints_config + .get_mut(constraint_type) + .and_then(|o| o.as_object_mut()) + .and_then(|o| o.insert(table_name.to_string(), table_constraints)); + } + } + // Manually add the messsage table config: tables_config.insert( "message".to_string(), @@ -927,12 +1034,25 @@ pub fn read_config_files( }), ); + // Sort the tables (aside from the message and history tables) according to their foreign key + // dependencies so that tables are always loaded after the tables they depend on. + let sorted_tables = verify_table_deps_and_sort( + &tables_config + .keys() + .cloned() + .filter(|m| m != "history" && m != "message") + .collect(), + &constraints_config, + ); + // Finally, return all the configs: ( specials_config, tables_config, datatypes_config, rules_config, + constraints_config, + sorted_tables, ) } @@ -1307,6 +1427,7 @@ fn get_sql_for_text_view( (drop_view_sql, create_view_sql) } +// TODO: Remove this function once it has been factored. /// Given config maps for tables and datatypes, a database connection pool, and a StartParser, /// read in the TSV files corresponding to the tables defined in the tables config, and use that /// information to fill in constraints information into a new config map that is then returned along @@ -1597,11 +1718,6 @@ pub async fn valve( initial_load: bool, config_table: &str, ) -> Result { - let parser = StartParser::new(); - - let (specials_config, mut tables_config, mut datatypes_config, rules_config) = - read_config_files(&table_table.to_string(), config_table); - // To connect to a postgresql database listening to a unix domain socket: // ---------------------------------------------------------------------- // let connection_options = @@ -1628,6 +1744,12 @@ pub async fn valve( .max_connections(5) .connect_with(connection_options) .await?; + + let parser = StartParser::new(); + + let (specials_config, mut tables_config, mut datatypes_config, rules_config, _, _) = + read_config_files(&table_table.to_string(), config_table, &parser, &pool); + if *command == ValveCommand::Load && pool.any_kind() == AnyKind::Sqlite { sqlx_query("PRAGMA foreign_keys = ON") .execute(&pool) @@ -3883,6 +4005,7 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) }; } +// TODO: Remove this function once it has been refactored /// Given the config maps for tables and datatypes, and a table name, generate a SQL schema string, /// including each column C and its matching C_meta column, then return the schema string as well as /// a list of the table's constraints. @@ -4192,6 +4315,191 @@ fn create_table_statement( return (statements, table_constraints); } +/// TODO: Add doc string here. +fn get_table_constraints( + tables_config: &mut SerdeMap, + datatypes_config: &mut SerdeMap, + parser: &StartParser, + table_name: &str, + pool: &AnyPool, +) -> SerdeValue { + let column_names = tables_config + .get(table_name) + .and_then(|t| t.get("column_order")) + .and_then(|c| c.as_array()) + .unwrap() + .iter() + .map(|v| v.as_str().unwrap().to_string()) + .collect::>(); + + let columns = tables_config + .get(table_name) + .and_then(|c| c.as_object()) + .and_then(|o| o.get("column")) + .and_then(|c| c.as_object()) + .unwrap(); + + let mut table_constraints = json!({ + "foreign": [], + "unique": [], + "primary": [], + "tree": [], + "under": [], + }); + + let mut colvals: Vec = vec![]; + for column_name in &column_names { + let column = columns + .get(column_name) + .and_then(|c| c.as_object()) + .unwrap(); + colvals.push(column.clone()); + } + + for row in colvals { + let sql_type = get_sql_type( + datatypes_config, + &row.get("datatype") + .and_then(|d| d.as_str()) + .and_then(|s| Some(s.to_string())) + .unwrap(), + pool, + ) + .unwrap(); + let column_name = row.get("column").and_then(|s| s.as_str()).unwrap(); + let structure = row.get("structure").and_then(|s| s.as_str()); + if let Some(structure) = structure { + if structure != "" { + let parsed_structure = parser.parse(structure).unwrap(); + for expression in parsed_structure { + match *expression { + Expression::Label(value) if value == "primary" => { + let primary_keys = table_constraints + .get_mut("primary") + .and_then(|v| v.as_array_mut()) + .unwrap(); + primary_keys.push(SerdeValue::String(column_name.to_string())); + } + Expression::Label(value) if value == "unique" => { + let unique_constraints = table_constraints + .get_mut("unique") + .and_then(|v| v.as_array_mut()) + .unwrap(); + unique_constraints.push(SerdeValue::String(column_name.to_string())); + } + Expression::Function(name, args) if name == "from" => { + if args.len() != 1 { + panic!("Invalid foreign key: {} for: {}", structure, table_name); + } + match &*args[0] { + Expression::Field(ftable, fcolumn) => { + let foreign_keys = table_constraints + .get_mut("foreign") + .and_then(|v| v.as_array_mut()) + .unwrap(); + let foreign_key = json!({ + "column": column_name, + "ftable": ftable, + "fcolumn": fcolumn, + }); + foreign_keys.push(foreign_key); + } + _ => { + panic!("Invalid foreign key: {} for: {}", structure, table_name) + } + }; + } + Expression::Function(name, args) if name == "tree" => { + if args.len() != 1 { + panic!( + "Invalid 'tree' constraint: {} for: {}", + structure, table_name + ); + } + match &*args[0] { + Expression::Label(child) => { + let child_datatype = columns + .get(child) + .and_then(|c| c.get("datatype")) + .and_then(|d| d.as_str()); + if let None = child_datatype { + panic!( + "Could not determine datatype for {} of tree({})", + child, child + ); + } + let child_datatype = child_datatype.unwrap(); + let parent = column_name; + let child_sql_type = get_sql_type( + datatypes_config, + &child_datatype.to_string(), + pool, + ) + .unwrap(); + if sql_type != child_sql_type { + panic!( + "SQL type '{}' of '{}' in 'tree({})' for table \ + '{}' doe snot match SQL type: '{}' of parent: '{}'.", + child_sql_type, + child, + child, + table_name, + sql_type, + parent + ); + } + let tree_constraints = table_constraints + .get_mut("tree") + .and_then(|t| t.as_array_mut()) + .unwrap(); + let entry = json!({"parent": column_name, + "child": child}); + tree_constraints.push(entry); + } + _ => { + panic!( + "Invalid 'tree' constraint: {} for: {}", + structure, table_name + ); + } + }; + } + Expression::Function(name, args) if name == "under" => { + let generic_error = format!( + "Invalid 'under' constraint: {} for: {}", + structure, table_name + ); + if args.len() != 2 { + panic!("{}", generic_error); + } + match (&*args[0], &*args[1]) { + (Expression::Field(ttable, tcolumn), Expression::Label(value)) => { + let under_constraints = table_constraints + .get_mut("under") + .and_then(|u| u.as_array_mut()) + .unwrap(); + let entry = json!({"column": column_name, + "ttable": ttable, + "tcolumn": tcolumn, + "value": value}); + under_constraints.push(entry); + } + (_, _) => panic!("{}", generic_error), + }; + } + _ => panic!( + "Unrecognized structure: {} for {}.{}", + structure, table_name, column_name + ), + }; + } + } + } + } + + return table_constraints; +} + /// Given a list of messages and a HashMap, messages_stats, with which to collect counts of /// message types, count the various message types encountered in the list and increment the counts /// in messages_stats accordingly. From f0f5d29a1c29fb3dee121d4085dbd821e0d65288 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 5 Dec 2023 10:40:49 -0500 Subject: [PATCH 18/57] implement create_missing_tables() --- src/lib.rs | 407 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 384 insertions(+), 23 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f84c2364..8c88aee1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -183,33 +183,66 @@ impl Valve { pub async fn create_missing_tables(&mut self) -> Result<&mut Self, sqlx::Error> { // DatabaseError - // TODO: Revisit the implementation of this once te configure_db() function has been - // refactored. Currently it implicitly drops and recreates _all_ tables but eventually this - // function needs to do this only for _missing_ tables. let mut tables_config = self .global_config .get_mut("table") .and_then(|t| t.as_object_mut()) - .unwrap(); - let mut tables_config = tables_config.clone(); + .unwrap() + .clone(); let mut datatypes_config = self .global_config .get_mut("datatype") .and_then(|d| d.as_object_mut()) + .unwrap() + .clone(); + let mut constraints_config = self + .global_config + .get_mut("constraints") + .and_then(|t| t.as_object_mut()) + .unwrap() + .clone(); + let sorted_tables = self + .global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap().to_string()))) + .and_then(|l| Some(l.collect::>())) .unwrap(); - let mut datatypes_config = datatypes_config.clone(); + let pool = self.pool.as_ref().unwrap(); let parser = StartParser::new(); - let (_, _) = configure_db( + let setup_statements = get_setup_statements( &mut tables_config, &mut datatypes_config, + &constraints_config, &pool, &parser, self.verbose, &ValveCommand::Create, ) .await?; + + // Add the message and history tables to the beginning of the list of tables to create + // (the message table in particular needs to be at the beginning since the table views all + // reference it). + let mut tables_to_create = vec!["message".to_string(), "history".to_string()]; + tables_to_create.append(&mut sorted_tables.clone()); + + for table in &tables_to_create { + let table_statements = setup_statements.get(table).unwrap(); + for stmt in table_statements { + sqlx_query(stmt) + .execute(pool) + .await + .expect(format!("The SQL statement: {} returned an error", stmt).as_str()); + } + if self.verbose { + let output = String::from(table_statements.join("\n")); + println!("{}\n", output); + } + } + Ok(self) } @@ -252,10 +285,7 @@ impl Valve { /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. - pub async fn load_all_tables( - &mut self, - validate: bool, - ) -> Result<&mut Self, sqlx::Error> { + pub async fn load_all_tables(&mut self, validate: bool) -> Result<&mut Self, sqlx::Error> { // DatabaseError self.create_missing_tables().await?; @@ -1302,9 +1332,16 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { ); } + let create_or_replace_view = { + if pool.any_kind() == AnyKind::Postgres { + "CREATE OR REPLACE VIEW" + } else { + "CREATE VIEW IF NOT EXISTS" + } + }; let create_view_sql = format!( indoc! {r#" - CREATE VIEW "{t}_view" AS + {create_or_replace_view} "{t}_view" AS SELECT union_t.*, {message_t} AS "message", @@ -1315,6 +1352,7 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { SELECT * FROM "{t}_conflict" ) as union_t; "#}, + create_or_replace_view = create_or_replace_view, t = table, message_t = message_t, history_t = history_t, @@ -1412,13 +1450,21 @@ fn get_sql_for_text_view( v }; + let create_or_replace_view = { + if pool.any_kind() == AnyKind::Postgres { + "CREATE OR REPLACE VIEW" + } else { + "CREATE VIEW IF NOT EXISTS" + } + }; let create_view_sql = format!( - r#"CREATE VIEW "{table}_text_view" AS + r#"{create_or_replace_view} "{table}_text_view" AS SELECT {outer_columns} FROM ( SELECT {inner_columns} FROM "{table}_view" ) t"#, + create_or_replace_view = create_or_replace_view, outer_columns = outer_columns.join(", "), inner_columns = inner_columns.join(", "), table = table, @@ -1427,6 +1473,124 @@ fn get_sql_for_text_view( (drop_view_sql, create_view_sql) } +/// TODO: Add docstring here +pub async fn get_setup_statements( + tables_config: &mut SerdeMap, + datatypes_config: &mut SerdeMap, + constraints_config: &SerdeMap, + pool: &AnyPool, + parser: &StartParser, + verbose: bool, + command: &ValveCommand, +) -> Result>, sqlx::Error> { + // Begin by reading in the TSV files corresponding to the tables defined in tables_config, and + // use that information to create the associated database tables, while saving constraint + // information to constrains_config. + let mut setup_statements = HashMap::new(); + for table_name in tables_config.keys().cloned().collect::>() { + // Generate the statements for creating the table and its corresponding conflict table: + let mut table_statements = vec![]; + for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { + let mut statements = + get_table_ddl(tables_config, datatypes_config, parser, &table, &pool); + table_statements.append(&mut statements); + } + + let (drop_view_sql, create_view_sql) = get_sql_for_standard_view(&table_name, pool); + let (drop_text_view_sql, create_text_view_sql) = + get_sql_for_text_view(tables_config, &table_name, pool); + table_statements.push(drop_text_view_sql); + table_statements.push(drop_view_sql); + table_statements.push(create_view_sql); + table_statements.push(create_text_view_sql); + + setup_statements.insert(table_name.to_string(), table_statements); + } + + // Generate DDL for the history table: + let mut history_statements = vec![]; + history_statements.push({ + let mut sql = r#"DROP TABLE IF EXISTS "history""#.to_string(); + if pool.any_kind() == AnyKind::Postgres { + sql.push_str(" CASCADE"); + } + sql.push_str(";"); + sql + }); + history_statements.push(format!( + indoc! {r#" + CREATE TABLE IF NOT EXISTS "history" ( + {row_number} + "table" TEXT, + "row" BIGINT, + "from" TEXT, + "to" TEXT, + "summary" TEXT, + "user" TEXT, + "undone_by" TEXT, + {timestamp} + ); + "#}, + row_number = { + if pool.any_kind() == AnyKind::Sqlite { + "\"history_id\" INTEGER PRIMARY KEY," + } else { + "\"history_id\" SERIAL PRIMARY KEY," + } + }, + timestamp = { + if pool.any_kind() == AnyKind::Sqlite { + "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" + } else { + "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" + } + }, + )); + history_statements.push( + r#"CREATE INDEX IF NOT EXISTS "history_tr_idx" ON "history"("table", "row");"#.to_string(), + ); + setup_statements.insert("history".to_string(), history_statements); + + // Generate DDL for the message table: + let mut message_statements = vec![]; + message_statements.push({ + let mut sql = r#"DROP TABLE IF EXISTS "message""#.to_string(); + if pool.any_kind() == AnyKind::Postgres { + sql.push_str(" CASCADE"); + } + sql.push_str(";"); + sql + }); + message_statements.push(format!( + indoc! {r#" + CREATE TABLE IF NOT EXISTS "message" ( + {} + "table" TEXT, + "row" BIGINT, + "column" TEXT, + "value" TEXT, + "level" TEXT, + "rule" TEXT, + "message" TEXT + ); + "#}, + { + if pool.any_kind() == AnyKind::Sqlite { + "\"message_id\" INTEGER PRIMARY KEY," + } else { + "\"message_id\" SERIAL PRIMARY KEY," + } + }, + )); + message_statements.push( + r#"CREATE INDEX IF NOT EXISTS "message_trc_idx" ON "message"("table", "row", "column");"# + .to_string(), + ); + setup_statements.insert("message".to_string(), message_statements); + + return Ok(setup_statements); +} + // TODO: Remove this function once it has been factored. /// Given config maps for tables and datatypes, a database connection pool, and a StartParser, /// read in the TSV files corresponding to the tables defined in the tables config, and use that @@ -4317,12 +4481,20 @@ fn create_table_statement( /// TODO: Add doc string here. fn get_table_constraints( - tables_config: &mut SerdeMap, - datatypes_config: &mut SerdeMap, + tables_config: &SerdeMap, + datatypes_config: &SerdeMap, parser: &StartParser, table_name: &str, pool: &AnyPool, ) -> SerdeValue { + let mut table_constraints = json!({ + "foreign": [], + "unique": [], + "primary": [], + "tree": [], + "under": [], + }); + let column_names = tables_config .get(table_name) .and_then(|t| t.get("column_order")) @@ -4339,14 +4511,6 @@ fn get_table_constraints( .and_then(|c| c.as_object()) .unwrap(); - let mut table_constraints = json!({ - "foreign": [], - "unique": [], - "primary": [], - "tree": [], - "under": [], - }); - let mut colvals: Vec = vec![]; for column_name in &column_names { let column = columns @@ -4500,6 +4664,203 @@ fn get_table_constraints( return table_constraints; } +// TODO: Add docstring here +fn get_table_ddl( + tables_config: &mut SerdeMap, + datatypes_config: &mut SerdeMap, + parser: &StartParser, + table_name: &String, + pool: &AnyPool, +) -> Vec { + // TODO: Don't generate "drop" statements in this function. It will be done elsewhere. This + // function should only generate creation statements. + let mut drop_table_sql = format!(r#"DROP TABLE IF EXISTS "{}""#, table_name); + if pool.any_kind() == AnyKind::Postgres { + drop_table_sql.push_str(" CASCADE"); + } + drop_table_sql.push_str(";"); + let mut statements = vec![drop_table_sql]; + let mut create_lines = vec![ + format!(r#"CREATE TABLE IF NOT EXISTS "{}" ("#, table_name), + String::from(r#" "row_number" BIGINT,"#), + ]; + + let colvals = { + let normal_table_name; + if let Some(s) = table_name.strip_suffix("_conflict") { + normal_table_name = String::from(s); + } else { + normal_table_name = table_name.to_string(); + } + let column_order = tables_config + .get(&normal_table_name) + .and_then(|t| t.get("column_order")) + .and_then(|c| c.as_array()) + .unwrap() + .iter() + .map(|v| v.as_str().unwrap().to_string()) + .collect::>(); + let columns = tables_config + .get(&normal_table_name) + .and_then(|c| c.as_object()) + .and_then(|o| o.get("column")) + .and_then(|c| c.as_object()) + .unwrap(); + + column_order + .iter() + .map(|column_name| { + columns + .get(column_name) + .and_then(|c| c.as_object()) + .unwrap() + }) + .collect::>() + }; + + let table_constraints = { + // Conflict tables have no database constraints: + if table_name.ends_with("_conflict") { + json!({"foreign": [], "unique": [], "primary": [], "tree": [], "under": [],}) + } else { + get_table_constraints(tables_config, datatypes_config, parser, &table_name, &pool) + } + }; + + let c = colvals.len(); + let mut r = 0; + for row in colvals { + r += 1; + let sql_type = get_sql_type( + datatypes_config, + &row.get("datatype") + .and_then(|d| d.as_str()) + .and_then(|s| Some(s.to_string())) + .unwrap(), + pool, + ) + .unwrap(); + + let short_sql_type = { + if sql_type.to_lowercase().as_str().starts_with("varchar(") { + "VARCHAR" + } else { + &sql_type + } + }; + + if pool.any_kind() == AnyKind::Postgres { + if !PG_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { + panic!( + "Unrecognized PostgreSQL SQL type '{}' for datatype: '{}'. \ + Accepted SQL types for PostgreSQL are: {}", + sql_type, + row.get("datatype").and_then(|d| d.as_str()).unwrap(), + PG_SQL_TYPES.join(", ") + ); + } + } else { + if !SL_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { + panic!( + "Unrecognized SQLite SQL type '{}' for datatype '{}'. \ + Accepted SQL datatypes for SQLite are: {}", + sql_type, + row.get("datatype").and_then(|d| d.as_str()).unwrap(), + SL_SQL_TYPES.join(", ") + ); + } + } + + let column_name = row.get("column").and_then(|s| s.as_str()).unwrap(); + let mut line = format!(r#" "{}" {}"#, column_name, sql_type); + + // Check if the column is a primary key and indicate this in the DDL if so: + let primary_constraints = table_constraints + .get("primary") + .and_then(|v| v.as_array()) + .unwrap(); + if primary_constraints.contains(&json!(column_name)) { + line.push_str(" PRIMARY KEY"); + } + + // Check if the column has a unique constraint and indicate this in the DDL if so: + let unique_constraints = table_constraints + .get("unique") + .and_then(|v| v.as_array()) + .unwrap(); + if unique_constraints.contains(&json!(column_name)) { + line.push_str(" UNIQUE"); + } + + // If there are foreign constraints add a column to the end of the statement which we will + // finish after this for loop is done: + if !(r >= c + && table_constraints + .get("foreign") + .and_then(|v| v.as_array()) + .and_then(|v| Some(v.is_empty())) + .unwrap()) + { + line.push_str(","); + } + create_lines.push(line); + } + + // Add the SQL to indicate any foreign constraints: + let foreign_keys = table_constraints + .get("foreign") + .and_then(|v| v.as_array()) + .unwrap(); + let num_fkeys = foreign_keys.len(); + for (i, fkey) in foreign_keys.iter().enumerate() { + create_lines.push(format!( + r#" FOREIGN KEY ("{}") REFERENCES "{}"("{}"){}"#, + fkey.get("column").and_then(|s| s.as_str()).unwrap(), + fkey.get("ftable").and_then(|s| s.as_str()).unwrap(), + fkey.get("fcolumn").and_then(|s| s.as_str()).unwrap(), + if i < (num_fkeys - 1) { "," } else { "" } + )); + } + create_lines.push(String::from(");")); + // We are done generating the lines for the 'create table' statement. Join them and add the + // result to the statements to return: + statements.push(String::from(create_lines.join("\n"))); + + // Loop through the tree constraints and if any of their associated child columns do not already + // have an associated unique or primary index, create one implicitly here: + let tree_constraints = table_constraints + .get("tree") + .and_then(|v| v.as_array()) + .unwrap(); + for tree in tree_constraints { + let unique_keys = table_constraints + .get("unique") + .and_then(|v| v.as_array()) + .unwrap(); + let primary_keys = table_constraints + .get("primary") + .and_then(|v| v.as_array()) + .unwrap(); + let tree_child = tree.get("child").and_then(|c| c.as_str()).unwrap(); + if !unique_keys.contains(&SerdeValue::String(tree_child.to_string())) + && !primary_keys.contains(&SerdeValue::String(tree_child.to_string())) + { + statements.push(format!( + r#"CREATE UNIQUE INDEX IF NOT EXISTS "{}_{}_idx" ON "{}"("{}");"#, + table_name, tree_child, table_name, tree_child + )); + } + } + + // Finally, create a further unique index on row_number: + statements.push(format!( + r#"CREATE UNIQUE INDEX IF NOT EXISTS "{}_row_number_idx" ON "{}"("row_number");"#, + table_name, table_name + )); + + return statements; +} + /// Given a list of messages and a HashMap, messages_stats, with which to collect counts of /// message types, count the various message types encountered in the list and increment the counts /// in messages_stats accordingly. From cdc99b03fae8f5e74e619c1e76c442896c27c5e7 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 5 Dec 2023 11:59:32 -0500 Subject: [PATCH 19/57] eliminate compiler warnings --- src/lib.rs | 62 +++++++++++++++++++++-------------------------------- src/main.rs | 6 +++--- 2 files changed, 28 insertions(+), 40 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8c88aee1..92b43156 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -195,12 +195,6 @@ impl Valve { .and_then(|d| d.as_object_mut()) .unwrap() .clone(); - let mut constraints_config = self - .global_config - .get_mut("constraints") - .and_then(|t| t.as_object_mut()) - .unwrap() - .clone(); let sorted_tables = self .global_config .get("sorted_table_list") @@ -212,16 +206,8 @@ impl Valve { let pool = self.pool.as_ref().unwrap(); let parser = StartParser::new(); - let setup_statements = get_setup_statements( - &mut tables_config, - &mut datatypes_config, - &constraints_config, - &pool, - &parser, - self.verbose, - &ValveCommand::Create, - ) - .await?; + let setup_statements = + get_setup_statements(&mut tables_config, &mut datatypes_config, &pool, &parser).await?; // Add the message and history tables to the beginning of the list of tables to create // (the message table in particular needs to be at the beginning since the table views all @@ -250,16 +236,17 @@ impl Valve { /// Return an error on database problem. pub fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError - todo!(); + + // TODO NEXT Ok(self) } /// Given a vector of table names, /// drop those tables, in the given order. /// Return an error on invalid table name or database problem. - pub fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { + pub fn drop_tables(&self, _tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError - todo!(); + // TODO Ok(self) } @@ -267,17 +254,17 @@ impl Valve { /// Return an error on database problem. pub fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError - todo!(); + // TODO Ok(self) } /// Given a vector of table names, /// truncate those tables, in the given order. /// Return an error on invalid table name or database problem. - pub fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { + pub fn truncate_tables(&self, _tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError //self.create_missing_tables(); - todo!(); + // TODO Ok(self) } @@ -285,7 +272,7 @@ impl Valve { /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. - pub async fn load_all_tables(&mut self, validate: bool) -> Result<&mut Self, sqlx::Error> { + pub async fn load_all_tables(&mut self, _validate: bool) -> Result<&mut Self, sqlx::Error> { // DatabaseError self.create_missing_tables().await?; @@ -339,11 +326,11 @@ impl Valve { /// load those tables in the given order. /// If `validate` is false, just try to insert all rows. /// Return an error on invalid table name or database problem. - pub fn load_tables(&self, tables: Vec<&str>, validate: bool) -> Result<&Self, sqlx::Error> { + pub fn load_tables(&self, _tables: Vec<&str>, _validate: bool) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError //self.create_missing_tables(); //self.truncate_tables(tables); - todo!(); + // TODO Ok(self) } @@ -351,23 +338,27 @@ impl Valve { /// Return an error on writing or database problem. pub fn save_all_tables(&self) -> Result<&Self, sqlx::Error> { // WriteOrDatabaseError - todo!(); + // TODO Ok(self) } /// Given a vector of table names, /// Save thosee tables to their 'path's, in the given order. /// Return an error on writing or database problem. - pub fn save_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { + pub fn save_tables(&self, _tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // WriteOrDatabaseError - todo!(); + // TODO Ok(self) } /// Given a table name and a row as JSON, /// return the validated row. /// Return an error on database problem. - pub fn validate_row(&self, table_name: &str, row: &ValveRow) -> Result { + pub fn validate_row( + &self, + _table_name: &str, + _row: &ValveRow, + ) -> Result { // DatabaseError todo!(); } @@ -376,7 +367,7 @@ impl Valve { /// add the row to the table in the database, /// and return the validated row, including its new row_number. /// Return an error invalid table name or database problem. - pub fn insert_row(&self, table_name: &str, row: &ValveRow) -> Result { + pub fn insert_row(&self, _table_name: &str, _row: &ValveRow) -> Result { // ConfigOrDatabaseError todo!(); } @@ -387,9 +378,9 @@ impl Valve { /// Return an error invalid table name or row number or database problem. pub fn update_row( &self, - table_name: &str, - row_number: usize, - row: &ValveRow, + _table_name: &str, + _row_number: usize, + _row: &ValveRow, ) -> Result { // ConfigOrDatabaseError todo!(); @@ -398,7 +389,7 @@ impl Valve { /// Given a table name and a row number, /// delete that row from the table. /// Return an error invalid table name or row number or database problem. - pub fn delete_row(&self, table_name: &str, row_number: usize) -> Result<(), sqlx::Error> { + pub fn delete_row(&self, _table_name: &str, _row_number: usize) -> Result<(), sqlx::Error> { // ConfigOrDatabaseError todo!(); } @@ -1477,11 +1468,8 @@ fn get_sql_for_text_view( pub async fn get_setup_statements( tables_config: &mut SerdeMap, datatypes_config: &mut SerdeMap, - constraints_config: &SerdeMap, pool: &AnyPool, parser: &StartParser, - verbose: bool, - command: &ValveCommand, ) -> Result>, sqlx::Error> { // Begin by reading in the TSV files corresponding to the tables defined in tables_config, and // use that information to create the associated database tables, while saving constraint diff --git a/src/main.rs b/src/main.rs index 486cb522..a48a88b2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -157,9 +157,9 @@ async fn main() -> Result<(), sqlx::Error> { ) .await?; } else { - let mut valve = Valve::build(&source, &config_table, &destination, verbose).await?; - valve.connect(&destination).await?; - valve.load_all_tables(true, verbose, initial_load).await?; + let mut valve = Valve::build(&source, &config_table, &destination, verbose, + initial_load).await?; + valve.load_all_tables(true).await?; // valve( // &source, // &destination, From 866e408f9b8c398bce3e750ddb17b71c846621bc Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 5 Dec 2023 14:02:08 -0500 Subject: [PATCH 20/57] implement drop_all_tables() and truncate_all_tables() --- Makefile | 6 +++ src/lib.rs | 124 ++++++++++++++++++++++++++++++++++++---------------- src/main.rs | 18 ++++++-- 3 files changed, 106 insertions(+), 42 deletions(-) diff --git a/Makefile b/Makefile index 5832e279..7008e909 100644 --- a/Makefile +++ b/Makefile @@ -82,6 +82,9 @@ sqlite_api_test: valve test/src/table.tsv build/valve.db test/insert_update.sh | diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv echo "select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id" | sqlite3 -header -tabs build/valve.db > test/output/history.tsv diff --strip-trailing-cr -q test/expected/history.tsv test/output/history.tsv + # We drop all of the db tables because the schema for the next test (random test) is different + # from the schema used for this test. + ./$< --drop_all $(word 2,$^) $(word 3,$^) @echo "Test succeeded!" pg_api_test: valve test/src/table.tsv test/insert_update.sh | test/output @@ -93,6 +96,9 @@ pg_api_test: valve test/src/table.tsv test/insert_update.sh | test/output diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv psql postgresql:///valve_postgres -c "COPY (select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id) TO STDOUT WITH NULL AS ''" > test/output/history.tsv tail -n +2 test/expected/history.tsv | diff --strip-trailing-cr -q test/output/history.tsv - + # We drop all of the db tables because the schema for the next test (random test) is different + # from the schema used for this test. + ./$< --drop_all $(word 2,$^) postgresql:///valve_postgres @echo "Test succeeded!" sqlite_random_db = build/valve_random.db diff --git a/src/lib.rs b/src/lib.rs index 92b43156..a4468288 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -177,6 +177,14 @@ impl Valve { Ok(self) } + pub async fn execute_sql(&self, sql: &str) -> Result<(), sqlx::Error> { + sqlx_query(&sql) + .execute(self.pool.as_ref().unwrap()) + .await + .expect(format!("The SQL statement: {} returned an error", sql).as_str()); + Ok(()) + } + /// Create all configured database tables and views /// if they do not already exist as configured. /// Return an error on database problems. @@ -218,10 +226,7 @@ impl Valve { for table in &tables_to_create { let table_statements = setup_statements.get(table).unwrap(); for stmt in table_statements { - sqlx_query(stmt) - .execute(pool) - .await - .expect(format!("The SQL statement: {} returned an error", stmt).as_str()); + self.execute_sql(stmt).await?; } if self.verbose { let output = String::from(table_statements.join("\n")); @@ -234,10 +239,38 @@ impl Valve { /// Drop all configured tables, in reverse dependency order. /// Return an error on database problem. - pub fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { + pub async fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError - // TODO NEXT + // Drop all of the database tables in the reverse of their sorted order: + let sorted_tables = { + let mut sorted_tables = vec!["message", "history"]; + sorted_tables.append( + &mut self + .global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) + .and_then(|l| Some(l.collect::>())) + .unwrap(), + ); + sorted_tables.reverse(); + sorted_tables + }; + + for table in sorted_tables { + if table != "message" && table != "history" { + let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); + self.execute_sql(&sql).await?; + let sql = format!(r#"DROP VIEW IF EXISTS "{}_view""#, table); + self.execute_sql(&sql).await?; + let sql = format!(r#"DROP TABLE IF EXISTS "{}_conflict""#, table); + self.execute_sql(&sql).await?; + } + let sql = format!(r#"DROP TABLE IF EXISTS "{}""#, table); + self.execute_sql(&sql).await?; + } + Ok(self) } @@ -252,9 +285,45 @@ impl Valve { /// Truncate all configured tables, in reverse dependency order. /// Return an error on database problem. - pub fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { + pub async fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError - // TODO + let sorted_tables = { + let mut sorted_tables = vec!["message", "history"]; + sorted_tables.append( + &mut self + .global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) + .and_then(|l| Some(l.collect::>())) + .unwrap(), + ); + sorted_tables.reverse(); + sorted_tables + }; + + let is_postgres = self.pool.as_ref().unwrap().any_kind() == AnyKind::Postgres; + for table in sorted_tables { + let sql = format!(r#"DELETE FROM "{}""#, table); + self.execute_sql(&sql).await?; + if table != "message" && table != "history" { + let sql = format!(r#"DELETE FROM "{}_conflict""#, table); + self.execute_sql(&sql).await?; + } else if table == "message" && is_postgres { + let sql = format!( + r#"ALTER SEQUENCE "{}_message_id_seq" RESTART WITH 1"#, + table + ); + self.execute_sql(&sql).await?; + } else if table == "history" && is_postgres { + let sql = format!( + r#"ALTER SEQUENCE "{}_history_id_seq" RESTART WITH 1"#, + table + ); + self.execute_sql(&sql).await?; + } + } + Ok(self) } @@ -276,7 +345,8 @@ impl Valve { // DatabaseError self.create_missing_tables().await?; - //self.truncate_all_tables(); + self.truncate_all_tables().await?; + if let Some(pool) = &self.pool { if pool.any_kind() == AnyKind::Sqlite { sqlx_query("PRAGMA foreign_keys = ON").execute(pool).await?; @@ -1232,6 +1302,8 @@ pub fn get_parsed_structure_conditions( parsed_structure_conditions } +// TODO: Modify this function so that it no longer returns the DROP statement, once you have +// removed the old valve functions that require it. /// Given the name of a table and a database connection pool, generate SQL for creating a view /// based on the table that provides a unified representation of the normal and conflict versions /// of the table, plus columns summarising the information associated with the given table that is @@ -1352,6 +1424,8 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { (drop_view_sql, create_view_sql) } +// TODO: Modify this function so that it no longer returns the DROP statement, once you have +// removed the old valve functions that require it. /// Given the tables configuration map, the name of a table and a database connection pool, /// generate SQL for creating a more user-friendly version of the view than the one generated by /// [get_sql_for_standard_view()]. Unlike the standard view generated by that function, the view @@ -1484,11 +1558,8 @@ pub async fn get_setup_statements( table_statements.append(&mut statements); } - let (drop_view_sql, create_view_sql) = get_sql_for_standard_view(&table_name, pool); - let (drop_text_view_sql, create_text_view_sql) = - get_sql_for_text_view(tables_config, &table_name, pool); - table_statements.push(drop_text_view_sql); - table_statements.push(drop_view_sql); + let (_, create_view_sql) = get_sql_for_standard_view(&table_name, pool); + let (_, create_text_view_sql) = get_sql_for_text_view(tables_config, &table_name, pool); table_statements.push(create_view_sql); table_statements.push(create_text_view_sql); @@ -1497,14 +1568,6 @@ pub async fn get_setup_statements( // Generate DDL for the history table: let mut history_statements = vec![]; - history_statements.push({ - let mut sql = r#"DROP TABLE IF EXISTS "history""#.to_string(); - if pool.any_kind() == AnyKind::Postgres { - sql.push_str(" CASCADE"); - } - sql.push_str(";"); - sql - }); history_statements.push(format!( indoc! {r#" CREATE TABLE IF NOT EXISTS "history" ( @@ -1541,14 +1604,6 @@ pub async fn get_setup_statements( // Generate DDL for the message table: let mut message_statements = vec![]; - message_statements.push({ - let mut sql = r#"DROP TABLE IF EXISTS "message""#.to_string(); - if pool.any_kind() == AnyKind::Postgres { - sql.push_str(" CASCADE"); - } - sql.push_str(";"); - sql - }); message_statements.push(format!( indoc! {r#" CREATE TABLE IF NOT EXISTS "message" ( @@ -4660,14 +4715,7 @@ fn get_table_ddl( table_name: &String, pool: &AnyPool, ) -> Vec { - // TODO: Don't generate "drop" statements in this function. It will be done elsewhere. This - // function should only generate creation statements. - let mut drop_table_sql = format!(r#"DROP TABLE IF EXISTS "{}""#, table_name); - if pool.any_kind() == AnyKind::Postgres { - drop_table_sql.push_str(" CASCADE"); - } - drop_table_sql.push_str(";"); - let mut statements = vec![drop_table_sql]; + let mut statements = vec![]; let mut create_lines = vec![ format!(r#"CREATE TABLE IF NOT EXISTS "{}" ("#, table_name), String::from(r#" "row_number" BIGINT,"#), diff --git a/src/main.rs b/src/main.rs index a48a88b2..408e29bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,8 +6,7 @@ use argparse::{ArgumentParser, Store, StoreTrue}; use ontodev_valve::{ get_compiled_datatype_conditions, get_compiled_rule_conditions, - get_parsed_structure_conditions, valve, valve_grammar::StartParser, ValveCommand, - Valve + get_parsed_structure_conditions, valve, valve_grammar::StartParser, Valve, ValveCommand, }; use serde_json::{from_str, Value as SerdeValue}; use std::{env, process}; @@ -20,6 +19,7 @@ fn cli_args_valid(source: &str, destination: &str, dump_config: bool) -> bool { async fn main() -> Result<(), sqlx::Error> { let mut api_test = false; let mut dump_config = false; + let mut drop_all = false; let mut create_only = false; let mut config_table = String::new(); let mut verbose = false; @@ -49,6 +49,11 @@ async fn main() -> Result<(), sqlx::Error> { r#"Read the configuration referred to by SOURCE and send it to stdout as a JSON-formatted string."#, ); + ap.refer(&mut drop_all).add_option( + &["--drop_all"], + StoreTrue, + r#"Drop all tables in the database."#, + ); ap.refer(&mut create_only).add_option( &["--create_only"], StoreTrue, @@ -146,6 +151,10 @@ async fn main() -> Result<(), sqlx::Error> { let config = serde_json::to_string(config).unwrap(); println!("{}", config); + } else if drop_all { + let valve = + Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; + valve.drop_all_tables().await?; } else if create_only { valve( &source, @@ -157,9 +166,10 @@ async fn main() -> Result<(), sqlx::Error> { ) .await?; } else { - let mut valve = Valve::build(&source, &config_table, &destination, verbose, - initial_load).await?; + let mut valve = + Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; valve.load_all_tables(true).await?; + // valve( // &source, // &destination, From 6f8e09a2e79e2fb2de8e544f7436eb4cb5d89f22 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 8 Dec 2023 11:27:57 -0500 Subject: [PATCH 21/57] reimplement truncate_all_tables(), add order_tables_for_deletion(), make execute_sql() private --- Makefile | 16 ++++++----- src/lib.rs | 79 +++++++++++++++++++++++------------------------------- 2 files changed, 43 insertions(+), 52 deletions(-) diff --git a/Makefile b/Makefile index 7008e909..3e6bd336 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ MAKEFLAGS += --warn-undefined-variables build: mkdir build -.PHONY: doc readme valve_debug valve_release test sqlite_test pg_test api_test sqlite_api_test \ +.PHONY: clean doc readme valve_debug valve_release test sqlite_test pg_test api_test sqlite_api_test \ pg_qpi_test random_test_data random_test sqlite_random_test pg_random_test guess_test_data \ perf_test_data sqlite_perf_test pg_perf_test perf_test @@ -36,13 +36,13 @@ valve_debug: cargo build ln -s target/debug/ontodev_valve valve -build/valve.db: test/src/table.tsv clean valve | build - ./valve $< $@ +build/valve.db: valve test/src/table.tsv | build + ./$^ $@ test/output: mkdir -p test/output -test: sqlite_test pg_test api_test random_test +test: clean_test_db sqlite_test pg_test api_test random_test tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 \ table9 table10 table11 @@ -61,6 +61,7 @@ sqlite_test: build/valve.db test/src/table.tsv | test/output pg_test: valve test/src/table.tsv | test/output @echo "Testing valve on postgresql ..." + ./$^ --drop_all postgresql:///valve_postgres ./$^ postgresql:///valve_postgres test/round_trip.sh postgresql:///valve_postgres $(word 2,$^) scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test) @@ -112,13 +113,13 @@ $(random_test_dir)/ontology: random_test_data: test/generate_random_test_data.py valve valve test/random_test_data/table.tsv | $(random_test_dir)/ontology ./$< $$(date +"%s") 100 5 $(word 3,$^) $| -sqlite_random_test: valve clean random_test_data | build test/output +sqlite_random_test: valve random_test_data | build test/output @echo "Testing with random data on sqlite ..." ./$< $(random_test_dir)/table.tsv $(sqlite_random_db) test/round_trip.sh $(sqlite_random_db) $(random_test_dir)/table.tsv @echo "Test succeeded!" -pg_random_test: valve clean random_test_data | build test/output +pg_random_test: valve random_test_data | build test/output @echo "Testing with random data on postgresql ..." ./$< $(random_test_dir)/table.tsv postgresql:///valve_postgres test/round_trip.sh postgresql:///valve_postgres $(random_test_dir)/table.tsv @@ -172,6 +173,9 @@ perf_test: sqlite_perf_test pg_perf_test clean: rm -Rf build/valve.db* build/valve_random.db* test/output $(random_test_dir)/ontology valve +clean_test_db: + rm -Rf build/valve.db + clean_guess_db: rm -Rf build/valve_guess.db diff --git a/src/lib.rs b/src/lib.rs index a4468288..1ce441a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -177,7 +177,7 @@ impl Valve { Ok(self) } - pub async fn execute_sql(&self, sql: &str) -> Result<(), sqlx::Error> { + async fn execute_sql(&self, sql: &str) -> Result<(), sqlx::Error> { sqlx_query(&sql) .execute(self.pool.as_ref().unwrap()) .await @@ -237,28 +237,29 @@ impl Valve { Ok(self) } + pub fn order_tables_for_deletion(&self) -> Vec<&str> { + // Every other table depends on the message and history table so these will go last: + let mut sorted_tables = vec!["message", "history"]; + sorted_tables.append( + &mut self + .global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) + .and_then(|l| Some(l.collect::>())) + .unwrap(), + ); + sorted_tables.reverse(); + sorted_tables + } + /// Drop all configured tables, in reverse dependency order. /// Return an error on database problem. pub async fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError // Drop all of the database tables in the reverse of their sorted order: - let sorted_tables = { - let mut sorted_tables = vec!["message", "history"]; - sorted_tables.append( - &mut self - .global_config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) - .and_then(|l| Some(l.collect::>())) - .unwrap(), - ); - sorted_tables.reverse(); - sorted_tables - }; - - for table in sorted_tables { + for table in self.order_tables_for_deletion() { if table != "message" && table != "history" { let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); self.execute_sql(&sql).await?; @@ -287,39 +288,25 @@ impl Valve { /// Return an error on database problem. pub async fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError - let sorted_tables = { - let mut sorted_tables = vec!["message", "history"]; - sorted_tables.append( - &mut self - .global_config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) - .and_then(|l| Some(l.collect::>())) - .unwrap(), - ); - sorted_tables.reverse(); - sorted_tables + + // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that + // depends on another table, T', even in the case where we have previously truncated T'. + // SQLite does not need this. However SQLite does require that the tables be truncated in + // deletion order (which means that it must be checking that T' is empty). + + let truncate_sql = |table: &str| -> String { + if self.pool.as_ref().unwrap().any_kind() == AnyKind::Postgres { + format!(r#"TRUNCATE TABLE "{}" RESTART IDENTITY CASCADE"#, table) + } else { + format!(r#"DELETE FROM "{}""#, table) + } }; - let is_postgres = self.pool.as_ref().unwrap().any_kind() == AnyKind::Postgres; - for table in sorted_tables { - let sql = format!(r#"DELETE FROM "{}""#, table); + for table in self.order_tables_for_deletion() { + let sql = truncate_sql(&table); self.execute_sql(&sql).await?; if table != "message" && table != "history" { - let sql = format!(r#"DELETE FROM "{}_conflict""#, table); - self.execute_sql(&sql).await?; - } else if table == "message" && is_postgres { - let sql = format!( - r#"ALTER SEQUENCE "{}_message_id_seq" RESTART WITH 1"#, - table - ); - self.execute_sql(&sql).await?; - } else if table == "history" && is_postgres { - let sql = format!( - r#"ALTER SEQUENCE "{}_history_id_seq" RESTART WITH 1"#, - table - ); + let sql = truncate_sql(&format!("{}_conflict", table)); self.execute_sql(&sql).await?; } } From aeee965e3a3b62aaa441c3553c6a789d592b8079 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 8 Dec 2023 12:33:54 -0500 Subject: [PATCH 22/57] implement set_user() --- src/lib.rs | 54 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1ce441a2..902bbd93 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,11 +94,16 @@ pub struct Valve { pub initial_load: bool, } +#[derive(Debug)] +pub struct ConfigError { + pub message: String, +} + impl Valve { - /// Given a path to a table table and its name, read the table table, configure VALVE - /// partially ... TODO: finish rewriting this doc string. - /// , and return a new Valve struct. - /// Return an error if reading or configuration fails. + /// Given a path to a table table, its name, a path to a database, a flag for verbose output, + /// and a flag indicating whether the Valve instance should be built for initial loading: + /// Set up a database connection, read the table table, configure VALVE, and return a new + /// Valve struct. pub async fn build( table_path: &str, config_table: &str, @@ -168,20 +173,41 @@ impl Valve { }) } - /// Set the user name for this instance. - /// The username must be a short string without newlines. - /// Return an error on invalid username. - pub fn set_user(&mut self, user: &str) -> Result<&mut Self, sqlx::Error> { - // ConfigError + /// Controls the maximum length of a username. + const USERNAME_MAX_LEN: usize = 20; + + /// Set the user name, which must be a short, trimmed, string without newlines, for this Valve + /// instance. + pub fn set_user(&mut self, user: &str) -> Result<&mut Self, ConfigError> { + if user.len() > Self::USERNAME_MAX_LEN { + return Err(ConfigError { + message: format!( + "Username '{}' is longer than {} characters.", + user, + Self::USERNAME_MAX_LEN + ), + }); + } else { + let user_regex = Regex::new(r#"^\S([^\n]*\S)*$"#).unwrap(); + if !user_regex.is_match(user) { + return Err(ConfigError { + message: format!( + "Username '{}' is not a short, trimmed, string without newlines.", + user, + ), + }); + } + } self.user = user.to_string(); Ok(self) } async fn execute_sql(&self, sql: &str) -> Result<(), sqlx::Error> { + // DatabaseError + sqlx_query(&sql) .execute(self.pool.as_ref().unwrap()) - .await - .expect(format!("The SQL statement: {} returned an error", sql).as_str()); + .await?; Ok(()) } @@ -237,7 +263,7 @@ impl Valve { Ok(self) } - pub fn order_tables_for_deletion(&self) -> Vec<&str> { + pub fn get_tables_ordered_for_deletion(&self) -> Vec<&str> { // Every other table depends on the message and history table so these will go last: let mut sorted_tables = vec!["message", "history"]; sorted_tables.append( @@ -259,7 +285,7 @@ impl Valve { // DatabaseError // Drop all of the database tables in the reverse of their sorted order: - for table in self.order_tables_for_deletion() { + for table in self.get_tables_ordered_for_deletion() { if table != "message" && table != "history" { let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); self.execute_sql(&sql).await?; @@ -302,7 +328,7 @@ impl Valve { } }; - for table in self.order_tables_for_deletion() { + for table in self.get_tables_ordered_for_deletion() { let sql = truncate_sql(&table); self.execute_sql(&sql).await?; if table != "message" && table != "history" { From ff0c7549a868b2af3e0a7d44193f42c52fb1a894 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 8 Dec 2023 12:49:51 -0500 Subject: [PATCH 23/57] minor refactoring and commenting --- src/lib.rs | 61 ++++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 902bbd93..1020b075 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -202,6 +202,7 @@ impl Valve { Ok(self) } + /// Given a SQL string, execute it using the connection pool associated with the Valve instance. async fn execute_sql(&self, sql: &str) -> Result<(), sqlx::Error> { // DatabaseError @@ -211,9 +212,33 @@ impl Valve { Ok(()) } - /// Create all configured database tables and views - /// if they do not already exist as configured. - /// Return an error on database problems. + /// Returns a list of tables, including the message and history tables, in the right order for + /// table creation. + fn get_tables_ordered_for_creation(&self) -> Vec<&str> { + // Every other table depends on the message and history table so these will go last: + let mut sorted_tables = vec!["message", "history"]; + sorted_tables.append( + &mut self + .global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) + .and_then(|l| Some(l.collect::>())) + .unwrap(), + ); + sorted_tables + } + + /// Returns a list of tables, including the message and history tables, in the right order for + /// table deletion. + fn get_tables_ordered_for_deletion(&self) -> Vec<&str> { + // Every other table depends on the message and history table so these will go last: + let mut sorted_tables = self.get_tables_ordered_for_creation(); + sorted_tables.reverse(); + sorted_tables + } + + /// Create all configured database tables and views if they do not already exist as configured. pub async fn create_missing_tables(&mut self) -> Result<&mut Self, sqlx::Error> { // DatabaseError @@ -229,13 +254,6 @@ impl Valve { .and_then(|d| d.as_object_mut()) .unwrap() .clone(); - let sorted_tables = self - .global_config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap().to_string()))) - .and_then(|l| Some(l.collect::>())) - .unwrap(); let pool = self.pool.as_ref().unwrap(); let parser = StartParser::new(); @@ -246,10 +264,7 @@ impl Valve { // Add the message and history tables to the beginning of the list of tables to create // (the message table in particular needs to be at the beginning since the table views all // reference it). - let mut tables_to_create = vec!["message".to_string(), "history".to_string()]; - tables_to_create.append(&mut sorted_tables.clone()); - - for table in &tables_to_create { + for table in self.get_tables_ordered_for_creation() { let table_statements = setup_statements.get(table).unwrap(); for stmt in table_statements { self.execute_sql(stmt).await?; @@ -263,24 +278,7 @@ impl Valve { Ok(self) } - pub fn get_tables_ordered_for_deletion(&self) -> Vec<&str> { - // Every other table depends on the message and history table so these will go last: - let mut sorted_tables = vec!["message", "history"]; - sorted_tables.append( - &mut self - .global_config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) - .and_then(|l| Some(l.collect::>())) - .unwrap(), - ); - sorted_tables.reverse(); - sorted_tables - } - /// Drop all configured tables, in reverse dependency order. - /// Return an error on database problem. pub async fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError @@ -311,7 +309,6 @@ impl Valve { } /// Truncate all configured tables, in reverse dependency order. - /// Return an error on database problem. pub async fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError From 3b05d2817e3d9132f8f62ef8d6c880d92e7b6957 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 8 Dec 2023 13:37:36 -0500 Subject: [PATCH 24/57] partially implement table_has_changed() --- src/lib.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 1020b075..d6753293 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -238,6 +238,40 @@ impl Valve { sorted_tables } + /// TODO: Add docstring here + pub async fn table_has_changed(&self, table: &str) -> Result { + let (_column_config, _column_order, _description, _path) = { + let table_config = self + .global_config + .get("table") + .and_then(|tc| tc.get(table)) + .and_then(|t| t.as_object()) + .unwrap(); + let column_config = table_config + .get("column") + .and_then(|c| c.as_object()) + .unwrap(); + let column_order = table_config + .get("column_order") + .and_then(|c| c.as_array()) + .unwrap(); + let description = table_config + .get("description") + .and_then(|c| c.as_str()) + .unwrap(); + let path = table_config.get("path").and_then(|c| c.as_str()).unwrap(); + + (column_config, column_order, description, path) + }; + + // TODO: Look in the database, in the table table and in the column table, and check to see + // if the path, description, column_order, and individial column configurations match what + // is present in the current configuration. If anything differs, the table should be flagged + // as having been changed. + + Ok(true) + } + /// Create all configured database tables and views if they do not already exist as configured. pub async fn create_missing_tables(&mut self) -> Result<&mut Self, sqlx::Error> { // DatabaseError @@ -265,6 +299,12 @@ impl Valve { // (the message table in particular needs to be at the beginning since the table views all // reference it). for table in self.get_tables_ordered_for_creation() { + // TODO: Use table_has_changed() to control whether a table is created or not, and once + // this is done, remove the "IF NOT EXISTS" qualifiers from all of the CREATE TABLE, + // CREATE VIEW, and CREATE INDEX statements. + if self.table_has_changed(table).await? { + eprintln!("{} has changed.", table); + } let table_statements = setup_statements.get(table).unwrap(); for stmt in table_statements { self.execute_sql(stmt).await?; From 83af84fa1c98c0e54b52e8802fbd159c2e0fef58 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 12 Dec 2023 14:11:30 -0500 Subject: [PATCH 25/57] check primary and unique constraints in table_has_changed() --- src/lib.rs | 443 +++++++++++++++++++++++++++++++++++++++++------- src/validate.rs | 5 +- 2 files changed, 388 insertions(+), 60 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d6753293..f61de584 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,11 +83,18 @@ lazy_static! { pub type SerdeMap = serde_json::Map; pub type ValveRow = serde_json::Map; +#[macro_export] +macro_rules! valve_log { + () => (eprintln!()); + ($($arg:tt)*) => (eprintln!("{} - {}", Utc::now(), format_args!($($arg)*))); +} + #[derive(Debug)] pub struct Valve { pub global_config: SerdeMap, pub compiled_datatype_conditions: HashMap, pub compiled_rule_conditions: HashMap>>, + pub parsed_structure_conditions: HashMap, pub pool: Option, pub user: String, pub verbose: bool, @@ -96,6 +103,7 @@ pub struct Valve { #[derive(Debug)] pub struct ConfigError { + // TODO: Read https://www.lpalmieri.com/posts/error-handling-rust/ pub message: String, } @@ -161,11 +169,13 @@ impl Valve { compiled_datatype_conditions.clone(), &parser, ); + let parsed_structure_conditions = get_parsed_structure_conditions(&global_config, &parser); Ok(Self { global_config: global_config, compiled_datatype_conditions: compiled_datatype_conditions, compiled_rule_conditions: compiled_rule_conditions, + parsed_structure_conditions: parsed_structure_conditions, pool: Some(pool), user: String::from("Valve"), verbose: verbose, @@ -238,44 +248,363 @@ impl Valve { sorted_tables } + /// TODO: Add docstring here + async fn structure_has_changed( + &self, + pstruct: &Expression, + table: &str, + column: &str, + sqlite_pk: &u32, + sqlite_ctype: &str, + ) -> Result { + let pool = self.pool.as_ref().unwrap(); + let column_has_constraint_type = |constraint_type: &str| -> Result { + if pool.any_kind() == AnyKind::Postgres { + let sql = format!( + r#"SELECT 1 + FROM information_schema.table_constraints tco + JOIN information_schema.key_column_usage kcu + ON kcu.constraint_name = tco.constraint_name + AND kcu.constraint_schema = tco.constraint_schema + AND kcu.table_name = '{}' + WHERE tco.constraint_type = '{}' + AND kcu.column_name = '{}'"#, + table, constraint_type, column + ); + let rows = block_on(sqlx_query(&sql).fetch_all(pool))?; + if rows.len() > 1 { + unreachable!(); + } + Ok(rows.len() == 1) + } else { + if constraint_type == "PRIMARY KEY" { + return Ok(*sqlite_pk == 1); + } else if constraint_type == "UNIQUE" { + let sql = format!(r#"PRAGMA INDEX_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(pool))? { + let idx_name = row.get::("name"); + let unique = row.get::("unique") as u8; + if unique == 1 { + let sql = format!(r#"PRAGMA INDEX_INFO("{}")"#, idx_name); + let rows = block_on(sqlx_query(&sql).fetch_all(pool))?; + if rows.len() == 1 { + let cname = rows[0].get::("name"); + if cname == column { + return Ok(true); + } + } + } + } + Ok(false) + } else { + todo!(); + } + } + }; + + // Check if there is a change to whether this column is a primary key: + let is_primary = match pstruct { + Expression::Label(label) if label == "primary" => true, + _ => false, + }; + if is_primary != column_has_constraint_type("PRIMARY KEY")? { + return Ok(true); + } + + // Check if there is a change to whether this column is a unique constraint: + let is_unique = match pstruct { + Expression::Label(label) if label == "unique" => true, + _ => false, + }; + if is_unique != column_has_constraint_type("UNIQUE")? { + return Ok(true); + } + + // TODO NEXT: + match pstruct { + Expression::Function(name, _args) if name == "from" => (), + Expression::Function(name, _args) if name == "tree" => (), + _ => (), + }; + + Ok(false) + } + /// TODO: Add docstring here pub async fn table_has_changed(&self, table: &str) -> Result { - let (_column_config, _column_order, _description, _path) = { + let pool = self.pool.as_ref().unwrap(); + let (columns_config, configured_column_order, description, table_type, path) = { let table_config = self .global_config .get("table") .and_then(|tc| tc.get(table)) .and_then(|t| t.as_object()) .unwrap(); - let column_config = table_config + let columns_config = table_config .get("column") .and_then(|c| c.as_object()) .unwrap(); - let column_order = table_config - .get("column_order") - .and_then(|c| c.as_array()) - .unwrap(); + let configured_column_order = { + let mut configured_column_order = { + if table == "message" { + vec!["message_id".to_string()] + } else if table == "history" { + vec!["history_id".to_string()] + } else { + vec!["row_number".to_string()] + } + }; + configured_column_order.append( + &mut table_config + .get("column_order") + .and_then(|c| c.as_array()) + .and_then(|a| Some(a.iter())) + .and_then(|a| Some(a.map(|c| c.as_str().unwrap().to_string()))) + .and_then(|a| Some(a.collect::>())) + .unwrap(), + ); + configured_column_order + }; let description = table_config .get("description") .and_then(|c| c.as_str()) .unwrap(); - let path = table_config.get("path").and_then(|c| c.as_str()).unwrap(); + let table_type = { + if table != "message" && table != "history" { + table_config.get("type").and_then(|c| c.as_str()) + } else { + None + } + }; + let path = { + if table != "message" && table != "history" { + table_config.get("path").and_then(|c| c.as_str()) + } else { + None + } + }; - (column_config, column_order, description, path) + ( + columns_config, + configured_column_order, + description, + table_type, + path, + ) + }; + + let db_columns_in_order = { + if pool.any_kind() == AnyKind::Sqlite { + let sql = format!( + r#"SELECT 1 FROM sqlite_master WHERE "type" = 'table' AND "name" = '{}'"#, + table + ); + let rows = sqlx_query(&sql).fetch_all(pool).await?; + if rows.len() == 0 { + if self.verbose { + valve_log!( + "The table '{}' will be recreated as it does not exist in the database", + table + ); + } + return Ok(true); + } else if rows.len() == 1 { + // Otherwise send another query to the db to get the column info: + let sql = format!(r#"PRAGMA TABLE_INFO("{}")"#, table); + let rows = block_on(sqlx_query(&sql).fetch_all(pool))?; + rows.iter() + .map(|r| { + ( + r.get::("name"), + r.get::("type"), + r.get::("pk") as u32, + ) + }) + .collect::>() + } else { + unreachable!(); + } + } else { + let sql = format!( + r#"SELECT "column_name", "data_type" + FROM "information_schema"."columns" + WHERE "table_name" = '{}' + ORDER BY "ordinal_position""#, + table, + ); + let rows = sqlx_query(&sql).fetch_all(pool).await?; + if rows.len() == 0 { + if self.verbose { + valve_log!( + "The table '{}' will be recreated as it does not exist in the database", + table + ); + } + return Ok(true); + } + // Otherwise we get the column name: + rows.iter() + .map(|r| { + ( + r.get::("column_name"), + r.get::("data_type"), + // The third entry is just a dummy so that the datatypes in the two + // wings of this if/else block match. + 0, + ) + }) + .collect::>() + } }; - // TODO: Look in the database, in the table table and in the column table, and check to see - // if the path, description, column_order, and individial column configurations match what - // is present in the current configuration. If anything differs, the table should be flagged - // as having been changed. + // Check if the order of the configured columns matches the order of the columns in the + // database: + let db_column_order = db_columns_in_order + .iter() + .map(|c| c.0.clone()) + .collect::>(); + if db_column_order != configured_column_order { + if self.verbose { + valve_log!( + "The table '{}' will be recreated since the database columns: {:?} \ + and/or their order does not match the configured columns: {:?}", + table, + db_column_order, + configured_column_order + ); + } + return Ok(true); + } + + // Check, for tables other than "message" and "history", whether the corresponding entries + // for 'description', 'type', and 'path' in the configuration match the contents of the + // table table: + if table != "message" && table != "history" { + for table_param in vec![ + ("description", Some(description)), + ("type", table_type), + ("path", path), + ] { + let column = table_param.0; + let is_clause = if self.pool.as_ref().unwrap().any_kind() == AnyKind::Sqlite { + "IS" + } else { + "IS NOT DISTINCT FROM" + }; + let eq_value = match table_param.1 { + Some(value) => format!("= '{}'", value), + None => format!("{} NULL", is_clause), + }; + let sql = format!( + r#"SELECT 1 from "table" WHERE "table" = '{}' AND "{}" {}"#, + table, column, eq_value, + ); + let rows = sqlx_query(&sql) + .fetch_all(self.pool.as_ref().unwrap()) + .await?; + if rows.len() == 0 { + if self.verbose { + valve_log!( + "The table '{table}' will be recreated because the entries in the \ + table table for '{table}' have changed.", + table = table + ); + } + return Ok(true); + } else if rows.len() > 0 { + if self.verbose { + valve_log!( + "WARN more than one row was returned from the query '{}'", + sql + ); + } + } + } + } + + // Check, for all tables, whether their column configuration matches the contents of the + // database: + for (cname, ctype, pk) in &db_columns_in_order { + // Do not consider special row/message/history identifier columns: + if (table == "message" && cname == "message_id") + || (table == "history" && cname == "history_id") + || cname == "row_number" + { + continue; + } + let column_config = columns_config + .get(cname) + .and_then(|c| c.as_object()) + .unwrap(); + let sql_type = get_sql_type_from_global_config( + &self.global_config, + table, + &cname, + self.pool.as_ref().unwrap(), + ) + .unwrap(); + + // Check the column's SQL type: + if sql_type.to_lowercase() != ctype.to_lowercase() { + let s = sql_type.to_lowercase(); + let c = ctype.to_lowercase(); + // CHARACTER VARYING and VARCHAR are synonyms so we ignore this difference. + if !(s.starts_with("varchar") || s.starts_with("character varying")) + || !(c.starts_with("varchar") || c.starts_with("character varying")) + { + if self.verbose { + valve_log!( + "The table '{}' will be recreated because the SQL type of column '{}', \ + {}, does not match the configured value: {}", + table, + cname, + ctype, + sql_type + ); + } + return Ok(true); + } + } - Ok(true) + // Check the column's structure: + let structure = column_config.get("structure").and_then(|d| d.as_str()); + match structure { + Some(structure) if structure != "" => { + let parsed_structure = self + .parsed_structure_conditions + .get(structure) + .and_then(|p| Some(p.parsed.clone())) + .unwrap(); + if self + .structure_has_changed(&parsed_structure, table, &cname, &pk, &ctype) + .await? + { + if self.verbose { + valve_log!( + "The table '{}' will be recreated because the database \ + constraints for column '{}' do not match the configured \ + structure, '{}'", + table, + cname, + structure + ); + } + return Ok(true); + } + } + _ => (), + }; + } + + Ok(false) } /// Create all configured database tables and views if they do not already exist as configured. pub async fn create_missing_tables(&mut self) -> Result<&mut Self, sqlx::Error> { // DatabaseError + // TODO: Add logging statements here. + let mut tables_config = self .global_config .get_mut("table") @@ -303,16 +632,18 @@ impl Valve { // this is done, remove the "IF NOT EXISTS" qualifiers from all of the CREATE TABLE, // CREATE VIEW, and CREATE INDEX statements. if self.table_has_changed(table).await? { - eprintln!("{} has changed.", table); + // ... + } else { + // ... } let table_statements = setup_statements.get(table).unwrap(); for stmt in table_statements { self.execute_sql(stmt).await?; } - if self.verbose { - let output = String::from(table_statements.join("\n")); - println!("{}\n", output); - } + //if self.verbose { + // let output = String::from(table_statements.join("\n")); + // println!("{}\n", output); + //} } Ok(self) @@ -417,9 +748,8 @@ impl Valve { } if self.verbose { - eprintln!( - "{} - Processing {} tables.", - Utc::now(), + valve_log!( + "Processing {} tables.", self.global_config .get("sorted_table_list") .and_then(|l| l.as_array()) @@ -436,7 +766,7 @@ impl Valve { ) .await?; } else { - eprintln!("WARN: Attempt to load tables but Valve is not connected to a database."); + valve_log!("WARN: Attempt to load tables but Valve is not connected to a database."); } Ok(self) @@ -458,7 +788,7 @@ impl Valve { /// Return an error on writing or database problem. pub fn save_all_tables(&self) -> Result<&Self, sqlx::Error> { // WriteOrDatabaseError - // TODO + // TODO. See https://github.com/ontodev/nanobot.rs/pull/65 for hints. Ok(self) } @@ -945,10 +1275,10 @@ pub fn read_config_files( continue; } Some(p) if !Path::new(p).is_file() => { - eprintln!("WARN: File does not exist {}", p); + valve_log!("WARN: File does not exist {}", p); } Some(p) if Path::new(p).canonicalize().is_err() => { - eprintln!("WARN: File path could not be made canonical {}", p); + valve_log!("WARN: File path could not be made canonical {}", p); } Some(p) => path = Some(p.to_string()), }; @@ -1107,6 +1437,7 @@ pub fn read_config_files( }), ); + // TODO: Are there some missing columns here? // Manually add the history table config: tables_config.insert( "history".to_string(), @@ -1731,10 +2062,10 @@ pub async fn configure_db( continue; } Some(p) if !Path::new(p).is_file() => { - eprintln!("WARN: File does not exist {}", p); + valve_log!("WARN: File does not exist {}", p); } Some(p) if Path::new(p).canonicalize().is_err() => { - eprintln!("WARN: File path could not be made canonical {}", p); + valve_log!("WARN: File path could not be made canonical {}", p); } Some(p) => path = Some(p.to_string()), }; @@ -2073,11 +2404,7 @@ pub async fn valve( if *command == ValveCommand::Load { if verbose { - eprintln!( - "{} - Processing {} tables.", - Utc::now(), - sorted_table_list.len() - ); + valve_log!("Processing {} tables.", sorted_table_list.len()); } load_db( &config, @@ -2440,9 +2767,10 @@ pub async fn get_rows_to_update( let updates_before = match query_as_if.kind { QueryAsIfKind::Add => { if let None = query_as_if.row { - eprintln!( + valve_log!( "WARN: No row in query_as_if: {:?} for {:?}", - query_as_if, query_as_if.kind + query_as_if, + query_as_if.kind ); } IndexMap::new() @@ -2475,9 +2803,10 @@ pub async fn get_rows_to_update( let updates_after = match &query_as_if.row { None => { if query_as_if.kind != QueryAsIfKind::Remove { - eprintln!( + valve_log!( "WARN: No row in query_as_if: {:?} for {:?}", - query_as_if, query_as_if.kind + query_as_if, + query_as_if.kind ); } IndexMap::new() @@ -2550,9 +2879,10 @@ pub async fn get_rows_to_update( let updates = match query_as_if.kind { QueryAsIfKind::Add => { if let None = query_as_if.row { - eprintln!( + valve_log!( "WARN: No row in query_as_if: {:?} for {:?}", - query_as_if, query_as_if.kind + query_as_if, + query_as_if.kind ); } IndexMap::new() @@ -2754,12 +3084,12 @@ fn get_json_from_row(row: &AnyRow, column: &str) -> Option { let value: &str = row.get(column); match serde_json::from_str::(value) { Err(e) => { - eprintln!("WARN: {}", e); + valve_log!("WARN: {}", e); None } Ok(SerdeValue::Object(value)) => Some(value), _ => { - eprintln!("WARN: {} is not an object.", value); + valve_log!("WARN: {} is not an object.", value); None } } @@ -2859,7 +3189,7 @@ pub async fn undo( ) -> Result<(), sqlx::Error> { let last_change = match get_record_to_undo(pool).await? { None => { - eprintln!("WARN: Nothing to undo."); + valve_log!("WARN: Nothing to undo."); return Ok(()); } Some(r) => r, @@ -2955,13 +3285,13 @@ pub async fn redo( ) -> Result<(), sqlx::Error> { let last_undo = match get_record_to_redo(pool).await? { None => { - eprintln!("WARN: Nothing to redo."); + valve_log!("WARN: Nothing to redo."); return Ok(()); } Some(last_undo) => { let undone_by = last_undo.try_get_raw("undone_by")?; if undone_by.is_null() { - eprintln!("WARN: Nothing to redo."); + valve_log!("WARN: Nothing to redo."); return Ok(()); } last_undo @@ -3742,9 +4072,13 @@ fn read_tsv_into_vector(path: &str) -> Vec { let val = val.as_str().unwrap(); let trimmed_val = val.trim(); if trimmed_val != val { - eprintln!( + valve_log!( "Error: Value '{}' of column '{}' in row {} of table '{}' {}", - val, col, i, path, "has leading and/or trailing whitespace." + val, + col, + i, + path, + "has leading and/or trailing whitespace." ); process::exit(1); } @@ -4964,7 +5298,7 @@ fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap { - eprintln!("WARN: Unable to open '{}': {}", path.clone(), e); + valve_log!("WARN: Unable to open '{}': {}", path.clone(), e); continue; } Ok(table_file) => csv::ReaderBuilder::new() @@ -5546,13 +5880,7 @@ async fn load_db( } }; if verbose { - eprintln!( - "{} - Loading table {}/{}: {}", - Utc::now(), - table_num, - num_tables, - table_name - ); + valve_log!("Loading table {}/{}: {}", table_num, num_tables, table_name); } table_num += 1; @@ -5651,7 +5979,7 @@ async fn load_db( "{} errors, {} warnings, and {} information messages generated for {}", errors, warnings, infos, table_name ); - eprintln!("{} - {}", Utc::now(), status_message); + valve_log!("{}", status_message); total_errors += errors; total_warnings += warnings; total_infos += infos; @@ -5659,9 +5987,8 @@ async fn load_db( } if verbose { - eprintln!( - "{} - Loading complete with {} errors, {} warnings, and {} information messages", - Utc::now(), + valve_log!( + "Loading complete with {} errors, {} warnings, and {} information messages", total_errors, total_warnings, total_infos diff --git a/src/validate.rs b/src/validate.rs index e4b89dc1..6b3595a0 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,3 +1,4 @@ +use chrono::Utc; use enquote::unquote; use indexmap::IndexMap; use serde_json::{json, Value as SerdeValue}; @@ -9,7 +10,7 @@ use std::collections::HashMap; use crate::{ ast::Expression, cast_column_sql_to_text, cast_sql_param_from_text, get_column_value, - get_sql_type_from_global_config, is_sql_type_error, local_sql_syntax, ColumnRule, + get_sql_type_from_global_config, is_sql_type_error, local_sql_syntax, valve_log, ColumnRule, CompiledCondition, ParsedStructure, SerdeMap, ValveRow, SQL_PARAM, }; @@ -873,7 +874,7 @@ pub fn validate_rows_intra( let mut result_rows = vec![]; for row in rows { match row { - Err(err) => eprintln!("Error while processing row for '{}': {}", table_name, err), + Err(err) => valve_log!("ERROR while processing row for '{}': {}", table_name, err), Ok(row) => { let mut result_row = ResultRow { row_number: None, From ef7a8074205a3719049952cd6bcd91e61c520065 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 13 Dec 2023 10:44:54 -0500 Subject: [PATCH 26/57] check foreign keys in table_has_changed() --- Makefile | 1 - src/lib.rs | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 3e6bd336..d5488499 100644 --- a/Makefile +++ b/Makefile @@ -61,7 +61,6 @@ sqlite_test: build/valve.db test/src/table.tsv | test/output pg_test: valve test/src/table.tsv | test/output @echo "Testing valve on postgresql ..." - ./$^ --drop_all postgresql:///valve_postgres ./$^ postgresql:///valve_postgres test/round_trip.sh postgresql:///valve_postgres $(word 2,$^) scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test) diff --git a/src/lib.rs b/src/lib.rs index f61de584..892c86ad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,6 +114,7 @@ impl Valve { /// Valve struct. pub async fn build( table_path: &str, + // TODO: Remove the config_table parameter. config_table: &str, database: &str, verbose: bool, @@ -258,6 +259,8 @@ impl Valve { sqlite_ctype: &str, ) -> Result { let pool = self.pool.as_ref().unwrap(); + // A clojure to determine whether the given column has the given constraint type, which + // can be one of 'UNIQUE', 'PRIMARY KEY', 'FOREIGN KEY': let column_has_constraint_type = |constraint_type: &str| -> Result { if pool.any_kind() == AnyKind::Postgres { let sql = format!( @@ -296,8 +299,19 @@ impl Valve { } } Ok(false) + } else if constraint_type == "FOREIGN KEY" { + let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(pool))? { + let cname = row.get::("from"); + if cname == column { + return Ok(true); + } + } + Ok(false) } else { - todo!(); + return Err(SqlxCErr( + format!("Unrecognized constraint type: '{}'", constraint_type).into(), + )); } } }; @@ -320,10 +334,60 @@ impl Valve { return Ok(true); } - // TODO NEXT: match pstruct { - Expression::Function(name, _args) if name == "from" => (), - Expression::Function(name, _args) if name == "tree" => (), + Expression::Function(name, args) if name == "from" => { + match &*args[0] { + Expression::Field(cfg_ftable, cfg_fcolumn) => { + if pool.any_kind() == AnyKind::Sqlite { + let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); + for row in sqlx_query(&sql).fetch_all(pool).await? { + let from = row.get::("from"); + if from == column { + let db_ftable = row.get::("table"); + let db_fcolumn = row.get::("to"); + if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + return Ok(true); + } + } + } + } else { + let sql = format!( + r#"SELECT + ccu.table_name AS foreign_table_name, + ccu.column_name AS foreign_column_name + FROM information_schema.table_constraints AS tc + JOIN information_schema.key_column_usage AS kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + JOIN information_schema.constraint_column_usage AS ccu + ON ccu.constraint_name = tc.constraint_name + WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_name = '{}' + AND kcu.column_name = '{}'"#, + table, column + ); + let rows = sqlx_query(&sql).fetch_all(pool).await?; + if rows.len() == 0 { + return Ok(true); + } else if rows.len() > 1 { + unreachable!(); + } else { + let row = &rows[0]; + let db_ftable = row.get::("foreign_table_name"); + let db_fcolumn = row.get::("foreign_column_name"); + if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + return Ok(true); + } + } + } + } + _ => { + return Err(SqlxCErr( + format!("Unrecognized structure: {:?}", pstruct).into(), + )); + } + }; + } _ => (), }; From 63a425d25ada22d02851cfd9da9ab0eaaecbcf44 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 14 Dec 2023 14:05:38 -0500 Subject: [PATCH 27/57] finish implementing create_missing_tables() plus bug fixes and refactoring --- src/lib.rs | 395 +++++++++++++++++++++++------------------- src/main.rs | 2 +- test/src/datatype.tsv | 2 +- 3 files changed, 218 insertions(+), 181 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 892c86ad..4081db99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -256,7 +256,6 @@ impl Valve { table: &str, column: &str, sqlite_pk: &u32, - sqlite_ctype: &str, ) -> Result { let pool = self.pool.as_ref().unwrap(); // A clojure to determine whether the given column has the given constraint type, which @@ -316,22 +315,47 @@ impl Valve { } }; - // Check if there is a change to whether this column is a primary key: + // Check if there is a change to whether this column is a primary/unique key: let is_primary = match pstruct { Expression::Label(label) if label == "primary" => true, _ => false, }; if is_primary != column_has_constraint_type("PRIMARY KEY")? { return Ok(true); - } - - // Check if there is a change to whether this column is a unique constraint: - let is_unique = match pstruct { - Expression::Label(label) if label == "unique" => true, - _ => false, - }; - if is_unique != column_has_constraint_type("UNIQUE")? { - return Ok(true); + } else if !is_primary { + let is_unique = match pstruct { + Expression::Label(label) if label == "unique" => true, + _ => false, + }; + let unique_in_db = column_has_constraint_type("UNIQUE")?; + if is_unique != unique_in_db { + // A child of a tree constraint implies a unique db constraint, so if there is a + // unique constraint in the db that is not configured, that is the explanation, + // and in that case we do not count this as a change to the column. + if !unique_in_db { + return Ok(true); + } else { + let trees = self + .global_config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|o| o.get("tree")) + .and_then(|t| t.as_object()) + .and_then(|o| o.get(table)) + .and_then(|t| t.as_array()) + .and_then(|a| { + Some( + a.iter() + .map(|o| o.as_object().and_then(|o| o.get("child")).unwrap()), + ) + }) + .unwrap() + .collect::>(); + if !trees.contains(&&SerdeValue::String(column.to_string())) { + return Ok(true); + } + } + } } match pstruct { @@ -368,8 +392,10 @@ impl Valve { ); let rows = sqlx_query(&sql).fetch_all(pool).await?; if rows.len() == 0 { + // If the table doesn't even exist return true. return Ok(true); } else if rows.len() > 1 { + // This seems impossible given how PostgreSQL works: unreachable!(); } else { let row = &rows[0]; @@ -394,7 +420,15 @@ impl Valve { Ok(false) } - /// TODO: Add docstring here + /// Given the name of a table, determine whether its current instantiation in the database + /// differs from the way it has been configured. The answer to this question is yes whenever + /// (1) the number of columns or any of their names differs from their configured values, or + /// the order of database columns differs from the configured order; (2) The values in the + /// table table differ from their configured values; (3) The SQL type of one or more columns + /// does not match the configured SQL type for that column; (3) All columns with a 'unique', + /// 'primary', or 'from(table, column)' in their column configuration are associated, in the + /// database, with a unique constraint, primary key, and foreign key, respectively, and vice + /// versa. pub async fn table_has_changed(&self, table: &str) -> Result { let pool = self.pool.as_ref().unwrap(); let (columns_config, configured_column_order, description, table_type, path) = { @@ -575,7 +609,7 @@ impl Valve { ); } return Ok(true); - } else if rows.len() > 0 { + } else if rows.len() > 1 { if self.verbose { valve_log!( "WARN more than one row was returned from the query '{}'", @@ -589,9 +623,12 @@ impl Valve { // Check, for all tables, whether their column configuration matches the contents of the // database: for (cname, ctype, pk) in &db_columns_in_order { - // Do not consider special row/message/history identifier columns: + // Do not consider these special columns: if (table == "message" && cname == "message_id") + || (table == "message" && cname == "row") || (table == "history" && cname == "history_id") + || (table == "history" && cname == "timestamp") + || (table == "history" && cname == "row") || cname == "row_number" { continue; @@ -613,8 +650,8 @@ impl Valve { let s = sql_type.to_lowercase(); let c = ctype.to_lowercase(); // CHARACTER VARYING and VARCHAR are synonyms so we ignore this difference. - if !(s.starts_with("varchar") || s.starts_with("character varying")) - || !(c.starts_with("varchar") || c.starts_with("character varying")) + if !((s.starts_with("varchar") || s.starts_with("character varying")) + && (c.starts_with("varchar") || c.starts_with("character varying"))) { if self.verbose { valve_log!( @@ -640,7 +677,7 @@ impl Valve { .and_then(|p| Some(p.parsed.clone())) .unwrap(); if self - .structure_has_changed(&parsed_structure, table, &cname, &pk, &ctype) + .structure_has_changed(&parsed_structure, table, &cname, &pk) .await? { if self.verbose { @@ -663,51 +700,150 @@ impl Valve { Ok(false) } - /// Create all configured database tables and views if they do not already exist as configured. - pub async fn create_missing_tables(&mut self) -> Result<&mut Self, sqlx::Error> { - // DatabaseError - - // TODO: Add logging statements here. - - let mut tables_config = self + /// TODO: Add docstring here + pub async fn get_setup_statements(&self) -> Result>, sqlx::Error> { + let tables_config = self .global_config - .get_mut("table") - .and_then(|t| t.as_object_mut()) + .get("table") + .and_then(|t| t.as_object()) .unwrap() .clone(); - let mut datatypes_config = self + let datatypes_config = self .global_config - .get_mut("datatype") - .and_then(|d| d.as_object_mut()) + .get("datatype") + .and_then(|d| d.as_object()) .unwrap() .clone(); let pool = self.pool.as_ref().unwrap(); let parser = StartParser::new(); - let setup_statements = - get_setup_statements(&mut tables_config, &mut datatypes_config, &pool, &parser).await?; + // Begin by reading in the TSV files corresponding to the tables defined in tables_config, and + // use that information to create the associated database tables, while saving constraint + // information to constrains_config. + let mut setup_statements = HashMap::new(); + for table_name in tables_config.keys().cloned().collect::>() { + // Generate the statements for creating the table and its corresponding conflict table: + let mut table_statements = vec![]; + for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { + let mut statements = + get_table_ddl(&tables_config, &datatypes_config, &parser, &table, &pool); + table_statements.append(&mut statements); + } - // Add the message and history tables to the beginning of the list of tables to create - // (the message table in particular needs to be at the beginning since the table views all - // reference it). + let (_, create_view_sql) = get_sql_for_standard_view(&table_name, pool); + let (_, create_text_view_sql) = + get_sql_for_text_view(&tables_config, &table_name, pool); + table_statements.push(create_view_sql); + table_statements.push(create_text_view_sql); + + setup_statements.insert(table_name.to_string(), table_statements); + } + + let text_type = get_sql_type(&datatypes_config, &"text".to_string(), pool).unwrap(); + + // Generate DDL for the history table: + let mut history_statements = vec![]; + history_statements.push(format!( + indoc! {r#" + CREATE TABLE "history" ( + {history_id} + "table" {text_type}, + "row" BIGINT, + "from" {text_type}, + "to" {text_type}, + "summary" {text_type}, + "user" {text_type}, + "undone_by" {text_type}, + {timestamp} + ); + "#}, + history_id = { + if pool.any_kind() == AnyKind::Sqlite { + "\"history_id\" INTEGER PRIMARY KEY," + } else { + "\"history_id\" SERIAL PRIMARY KEY," + } + }, + text_type = text_type, + timestamp = { + if pool.any_kind() == AnyKind::Sqlite { + "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" + } else { + "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" + } + }, + )); + history_statements + .push(r#"CREATE INDEX "history_tr_idx" ON "history"("table", "row");"#.to_string()); + setup_statements.insert("history".to_string(), history_statements); + + // Generate DDL for the message table: + let mut message_statements = vec![]; + message_statements.push(format!( + indoc! {r#" + CREATE TABLE "message" ( + {message_id} + "table" {text_type}, + "row" BIGINT, + "column" {text_type}, + "value" {text_type}, + "level" {text_type}, + "rule" {text_type}, + "message" {text_type} + ); + "#}, + message_id = { + if pool.any_kind() == AnyKind::Sqlite { + "\"message_id\" INTEGER PRIMARY KEY," + } else { + "\"message_id\" SERIAL PRIMARY KEY," + } + }, + text_type = text_type, + )); + message_statements.push( + r#"CREATE INDEX "message_trc_idx" ON "message"("table", "row", "column");"#.to_string(), + ); + setup_statements.insert("message".to_string(), message_statements); + + return Ok(setup_statements); + } + + /// TODO: Add docstring + pub async fn dump_schema(&self) -> Result<(), sqlx::Error> { + let setup_statements = self.get_setup_statements().await?; for table in self.get_tables_ordered_for_creation() { - // TODO: Use table_has_changed() to control whether a table is created or not, and once - // this is done, remove the "IF NOT EXISTS" qualifiers from all of the CREATE TABLE, - // CREATE VIEW, and CREATE INDEX statements. - if self.table_has_changed(table).await? { - // ... - } else { - // ... - } let table_statements = setup_statements.get(table).unwrap(); - for stmt in table_statements { - self.execute_sql(stmt).await?; + let output = String::from(table_statements.join("\n")); + println!("{}\n", output); + } + Ok(()) + } + + /// Create all configured database tables and views if they do not already exist as configured. + pub async fn create_missing_tables(&self) -> Result<&Self, sqlx::Error> { + // DatabaseError + + // TODO: Add logging statements here. + + let setup_statements = self.get_setup_statements().await?; + let sorted_table_list = self.get_tables_ordered_for_creation(); + for (i, table) in sorted_table_list.iter().enumerate() { + if self.table_has_changed(*table).await? { + let mut tables_to_drop = vec![""; sorted_table_list.len() - i]; + tables_to_drop.clone_from_slice(&sorted_table_list[i..]); + tables_to_drop.reverse(); + for table in tables_to_drop { + valve_log!("Dropping table {}", table); + self.drop_tables(vec![table]).await?; + } + + let table_statements = setup_statements.get(*table).unwrap(); + for stmt in table_statements { + self.execute_sql(stmt).await?; + } } - //if self.verbose { - // let output = String::from(table_statements.join("\n")); - // println!("{}\n", output); - //} } Ok(self) @@ -718,7 +854,18 @@ impl Valve { // DatabaseError // Drop all of the database tables in the reverse of their sorted order: - for table in self.get_tables_ordered_for_deletion() { + self.drop_tables(self.get_tables_ordered_for_deletion()) + .await?; + Ok(self) + } + + /// Given a vector of table names, + /// drop those tables, in the given order. + /// Return an error on invalid table name or database problem. + pub async fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { + // DatabaseError + + for table in tables { if table != "message" && table != "history" { let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); self.execute_sql(&sql).await?; @@ -734,15 +881,6 @@ impl Valve { Ok(self) } - /// Given a vector of table names, - /// drop those tables, in the given order. - /// Return an error on invalid table name or database problem. - pub fn drop_tables(&self, _tables: Vec<&str>) -> Result<&Self, sqlx::Error> { - // DatabaseError - // TODO - Ok(self) - } - /// Truncate all configured tables, in reverse dependency order. pub async fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError @@ -786,7 +924,7 @@ impl Valve { /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. - pub async fn load_all_tables(&mut self, _validate: bool) -> Result<&mut Self, sqlx::Error> { + pub async fn load_all_tables(&self, _validate: bool) -> Result<&Self, sqlx::Error> { // DatabaseError self.create_missing_tables().await?; @@ -1514,7 +1652,10 @@ pub fn read_config_files( "row", "from", "to", + "summary", "user", + "undone_by", + "timestamp", ], "column": { "table": { @@ -1566,6 +1707,14 @@ pub fn read_config_files( "datatype": "line", "structure": "", }, + "timestamp": { + "table": "history", + "column": "timestamp", + "description": "The time of the change, or of the undo.", + "datatype": "line", + "structure": "", + }, + } }), ); @@ -1840,16 +1989,9 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { ); } - let create_or_replace_view = { - if pool.any_kind() == AnyKind::Postgres { - "CREATE OR REPLACE VIEW" - } else { - "CREATE VIEW IF NOT EXISTS" - } - }; let create_view_sql = format!( indoc! {r#" - {create_or_replace_view} "{t}_view" AS + CREATE VIEW "{t}_view" AS SELECT union_t.*, {message_t} AS "message", @@ -1860,7 +2002,6 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { SELECT * FROM "{t}_conflict" ) as union_t; "#}, - create_or_replace_view = create_or_replace_view, t = table, message_t = message_t, history_t = history_t, @@ -1880,7 +2021,7 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { /// returned in the form of a tuple of Strings, with the first string being a SQL statement /// for dropping the view, and the second string being a SQL statement for creating it. fn get_sql_for_text_view( - tables_config: &mut SerdeMap, + tables_config: &SerdeMap, table: &str, pool: &AnyPool, ) -> (String, String) { @@ -1960,21 +2101,13 @@ fn get_sql_for_text_view( v }; - let create_or_replace_view = { - if pool.any_kind() == AnyKind::Postgres { - "CREATE OR REPLACE VIEW" - } else { - "CREATE VIEW IF NOT EXISTS" - } - }; let create_view_sql = format!( - r#"{create_or_replace_view} "{table}_text_view" AS + r#"CREATE VIEW "{table}_text_view" AS SELECT {outer_columns} FROM ( SELECT {inner_columns} FROM "{table}_view" ) t"#, - create_or_replace_view = create_or_replace_view, outer_columns = outer_columns.join(", "), inner_columns = inner_columns.join(", "), table = table, @@ -1983,102 +2116,6 @@ fn get_sql_for_text_view( (drop_view_sql, create_view_sql) } -/// TODO: Add docstring here -pub async fn get_setup_statements( - tables_config: &mut SerdeMap, - datatypes_config: &mut SerdeMap, - pool: &AnyPool, - parser: &StartParser, -) -> Result>, sqlx::Error> { - // Begin by reading in the TSV files corresponding to the tables defined in tables_config, and - // use that information to create the associated database tables, while saving constraint - // information to constrains_config. - let mut setup_statements = HashMap::new(); - for table_name in tables_config.keys().cloned().collect::>() { - // Generate the statements for creating the table and its corresponding conflict table: - let mut table_statements = vec![]; - for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { - let mut statements = - get_table_ddl(tables_config, datatypes_config, parser, &table, &pool); - table_statements.append(&mut statements); - } - - let (_, create_view_sql) = get_sql_for_standard_view(&table_name, pool); - let (_, create_text_view_sql) = get_sql_for_text_view(tables_config, &table_name, pool); - table_statements.push(create_view_sql); - table_statements.push(create_text_view_sql); - - setup_statements.insert(table_name.to_string(), table_statements); - } - - // Generate DDL for the history table: - let mut history_statements = vec![]; - history_statements.push(format!( - indoc! {r#" - CREATE TABLE IF NOT EXISTS "history" ( - {row_number} - "table" TEXT, - "row" BIGINT, - "from" TEXT, - "to" TEXT, - "summary" TEXT, - "user" TEXT, - "undone_by" TEXT, - {timestamp} - ); - "#}, - row_number = { - if pool.any_kind() == AnyKind::Sqlite { - "\"history_id\" INTEGER PRIMARY KEY," - } else { - "\"history_id\" SERIAL PRIMARY KEY," - } - }, - timestamp = { - if pool.any_kind() == AnyKind::Sqlite { - "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" - } else { - "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" - } - }, - )); - history_statements.push( - r#"CREATE INDEX IF NOT EXISTS "history_tr_idx" ON "history"("table", "row");"#.to_string(), - ); - setup_statements.insert("history".to_string(), history_statements); - - // Generate DDL for the message table: - let mut message_statements = vec![]; - message_statements.push(format!( - indoc! {r#" - CREATE TABLE IF NOT EXISTS "message" ( - {} - "table" TEXT, - "row" BIGINT, - "column" TEXT, - "value" TEXT, - "level" TEXT, - "rule" TEXT, - "message" TEXT - ); - "#}, - { - if pool.any_kind() == AnyKind::Sqlite { - "\"message_id\" INTEGER PRIMARY KEY," - } else { - "\"message_id\" SERIAL PRIMARY KEY," - } - }, - )); - message_statements.push( - r#"CREATE INDEX IF NOT EXISTS "message_trc_idx" ON "message"("table", "row", "column");"# - .to_string(), - ); - setup_statements.insert("message".to_string(), message_statements); - - return Ok(setup_statements); -} - // TODO: Remove this function once it has been factored. /// Given config maps for tables and datatypes, a database connection pool, and a StartParser, /// read in the TSV files corresponding to the tables defined in the tables config, and use that @@ -5157,15 +5194,15 @@ fn get_table_constraints( // TODO: Add docstring here fn get_table_ddl( - tables_config: &mut SerdeMap, - datatypes_config: &mut SerdeMap, + tables_config: &SerdeMap, + datatypes_config: &SerdeMap, parser: &StartParser, table_name: &String, pool: &AnyPool, ) -> Vec { let mut statements = vec![]; let mut create_lines = vec![ - format!(r#"CREATE TABLE IF NOT EXISTS "{}" ("#, table_name), + format!(r#"CREATE TABLE "{}" ("#, table_name), String::from(r#" "row_number" BIGINT,"#), ]; @@ -5330,7 +5367,7 @@ fn get_table_ddl( && !primary_keys.contains(&SerdeValue::String(tree_child.to_string())) { statements.push(format!( - r#"CREATE UNIQUE INDEX IF NOT EXISTS "{}_{}_idx" ON "{}"("{}");"#, + r#"CREATE UNIQUE INDEX "{}_{}_idx" ON "{}"("{}");"#, table_name, tree_child, table_name, tree_child )); } @@ -5338,7 +5375,7 @@ fn get_table_ddl( // Finally, create a further unique index on row_number: statements.push(format!( - r#"CREATE UNIQUE INDEX IF NOT EXISTS "{}_row_number_idx" ON "{}"("row_number");"#, + r#"CREATE UNIQUE INDEX "{}_row_number_idx" ON "{}"("row_number");"#, table_name, table_name )); diff --git a/src/main.rs b/src/main.rs index 408e29bd..7c367c36 100644 --- a/src/main.rs +++ b/src/main.rs @@ -166,7 +166,7 @@ async fn main() -> Result<(), sqlx::Error> { ) .await?; } else { - let mut valve = + let valve = Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; valve.load_all_tables(true).await?; diff --git a/test/src/datatype.tsv b/test/src/datatype.tsv index c162ca56..b2079e3b 100644 --- a/test/src/datatype.tsv +++ b/test/src/datatype.tsv @@ -18,7 +18,7 @@ real nonspace match(/-?\d+(\.\d+)?/) a positive or negative real number REAL R suffix word exclude(/\W/) a suffix for a CURIE table_name word exclude(/\W/) a table name table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT VARCHAR(100) xsd:string textarea +text any text TEXT TEXT xsd:string textarea trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace word nonspace exclude(/\W/) a single word: letters, numbers, underscore From bde8501d501a72e76cf90a2d89d3da79a23175f9 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 14 Dec 2023 14:08:18 -0500 Subject: [PATCH 28/57] small optimization --- src/lib.rs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4081db99..abd87a45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -829,14 +829,17 @@ impl Valve { let setup_statements = self.get_setup_statements().await?; let sorted_table_list = self.get_tables_ordered_for_creation(); + let mut once_dropped = false; for (i, table) in sorted_table_list.iter().enumerate() { if self.table_has_changed(*table).await? { - let mut tables_to_drop = vec![""; sorted_table_list.len() - i]; - tables_to_drop.clone_from_slice(&sorted_table_list[i..]); - tables_to_drop.reverse(); - for table in tables_to_drop { - valve_log!("Dropping table {}", table); - self.drop_tables(vec![table]).await?; + if !once_dropped { + let mut tables_to_drop = vec![""; sorted_table_list.len() - i]; + tables_to_drop.clone_from_slice(&sorted_table_list[i..]); + tables_to_drop.reverse(); + for table in tables_to_drop { + self.drop_tables(vec![table]).await?; + } + once_dropped = true; } let table_statements = setup_statements.get(*table).unwrap(); From 201e3fa97a096d4cb658367b160af3fa68802ef3 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 15 Dec 2023 15:58:05 -0500 Subject: [PATCH 29/57] remove unneeded functions; temporarily make most functions private --- src/api_test.rs | 711 +++------------ src/lib.rs | 2263 ++++++++++++++++++----------------------------- src/main.rs | 58 +- src/validate.rs | 209 +---- 4 files changed, 984 insertions(+), 2257 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index 40904bcb..6efa3a73 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -1,40 +1,14 @@ -use ontodev_valve::{ - delete_row, get_compiled_datatype_conditions, get_compiled_rule_conditions, - get_parsed_structure_conditions, insert_new_row, redo, undo, update_row, - validate::{get_matching_values, validate_row}, - valve, - valve_grammar::StartParser, - ColumnRule, CompiledCondition, ParsedStructure, SerdeMap, ValveCommand, -}; +use ontodev_valve::{SerdeMap, Valve}; use rand::distributions::{Alphanumeric, DistString, Distribution, Uniform}; use rand::{random, thread_rng}; -use serde_json::{json, Value as SerdeValue}; -use sqlx::{ - any::{AnyConnectOptions, AnyKind, AnyPool, AnyPoolOptions}, - query as sqlx_query, - Error::Configuration as SqlxCErr, - Row, ValueRef, -}; -use std::{collections::HashMap, str::FromStr}; - -async fn test_matching( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - parsed_structure_conditions: &HashMap, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +use serde_json::json; +use sqlx::{any::AnyPool, query as sqlx_query, Error::Configuration as SqlxCErr, Row, ValueRef}; + +async fn test_matching(valve: &Valve) -> Result<(), sqlx::Error> { eprint!("Running test_matching() ... "); + // Test the get_matching_values() function: - let matching_values = get_matching_values( - &config, - &compiled_datatype_conditions, - &parsed_structure_conditions, - &pool, - "table2", - "child", - None, - ) - .await?; + let matching_values = valve.get_matching_values("table2", "child", None).await?; assert_eq!( matching_values, json!([ @@ -51,16 +25,9 @@ async fn test_matching( ]) ); - let matching_values = get_matching_values( - &config, - &compiled_datatype_conditions, - &parsed_structure_conditions, - &pool, - "table6", - "child", - Some("7"), - ) - .await?; + let matching_values = valve + .get_matching_values("table6", "child", Some("7")) + .await?; assert_eq!( matching_values, json!([ @@ -72,13 +39,9 @@ async fn test_matching( Ok(()) } -async fn test_idempotent_validate_and_update( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), sqlx::Error> { eprint!("Running test_idempotent_validate_and_update() ... "); + // We test that validate_row() is idempotent by running it multiple times on the same row: let row = json!({ "child": {"messages": [], "valid": true, "value": "b"}, @@ -94,71 +57,28 @@ async fn test_idempotent_validate_and_update( }, }); - let result_row_1 = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table2", - row.as_object().unwrap(), - Some(1), - None, - ) - .await?; - - let result_row_2 = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table2", - &result_row_1, - Some(1), - None, - ) - .await?; + let result_row_1 = valve + .validate_row("table2", row.as_object().unwrap()) + .await?; + + let result_row_2 = valve.validate_row("table2", &result_row_1).await?; assert_eq!(result_row_1, result_row_2); - let result_row = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table2", - &result_row_2, - Some(1), - None, - ) - .await?; + let result_row = valve.validate_row("table2", &result_row_2).await?; assert_eq!(result_row, result_row_2); // Update the row we constructed and validated above in the database: - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table2", - &row.as_object().unwrap(), - &1, - "VALVE", - ) - .await?; + valve + .update_row("table2", &1, &row.as_object().unwrap()) + .await?; eprintln!("done."); Ok(()) } -async fn test_validate_and_insert_1( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), sqlx::Error> { eprint!("Running test_validate_and_insert_1() ... "); + // Validate and insert a new row: let row = json!({ "id": {"messages": [], "valid": true, "value": "BFO:0000027"}, @@ -174,42 +94,19 @@ async fn test_validate_and_insert_1( "type": {"messages": [], "valid": true, "value": "owl:Class"}, }); - let result_row = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table3", - row.as_object().unwrap(), - None, - None, - ) - .await?; - - let _new_row_num = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table3", - &result_row, - None, - "VALVE", - ) - .await?; + let result_row = valve + .validate_row("table3", row.as_object().unwrap()) + .await?; + + let (_new_row_num, _new_row) = valve.insert_row("table3", &result_row).await?; eprintln!("done."); Ok(()) } -async fn test_validate_and_update( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_validate_and_update(valve: &Valve) -> Result<(), sqlx::Error> { eprint!("Running test_validate_and_update() ... "); + // Validate and update an existing row: let row = json!({ "child": {"messages": [], "valid": true, "value": 2}, @@ -225,42 +122,19 @@ async fn test_validate_and_update( }, }); - let result_row = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table6", - row.as_object().unwrap(), - Some(1), - None, - ) - .await?; - - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table6", - &result_row, - &1, - "VALVE", - ) - .await?; + let result_row = valve + .validate_row("table6", row.as_object().unwrap()) + .await?; + + valve.update_row("table6", &1, &result_row).await?; eprintln!("done."); Ok(()) } -async fn test_validate_and_insert_2( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), sqlx::Error> { eprint!("Running test_validate_and_insert_2() ... "); + // Validate and insert a new row: let row = json!({ "child": {"messages": [], "valid": true, "value": 2}, @@ -276,42 +150,19 @@ async fn test_validate_and_insert_2( }, }); - let result_row = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table6", - row.as_object().unwrap(), - None, - None, - ) - .await?; - - let _new_row_num = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table6", - &result_row, - None, - "VALVE", - ) - .await?; + let result_row = valve + .validate_row("table6", row.as_object().unwrap()) + .await?; + + let (_new_row_num, _new_row) = valve.insert_row("table6", &result_row).await?; eprintln!("done."); Ok(()) } -async fn test_dependencies( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_dependencies(valve: &Valve) -> Result<(), sqlx::Error> { eprint!("Running test_dependencies() ... "); + // Test cases for updates/inserts/deletes with dependencies. let row = json!({ "foreign_column": {"messages": [], "valid": true, "value": "w"}, @@ -319,17 +170,9 @@ async fn test_dependencies( "numeric_foreign_column": {"messages": [], "valid": true, "value": ""}, }); - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row.as_object().unwrap(), - &1, - "VALVE", - ) - .await?; + valve + .update_row("table10", &1, &row.as_object().unwrap()) + .await?; let row = json!({ "child": {"messages": [], "valid": true, "value": "b"}, @@ -339,28 +182,11 @@ async fn test_dependencies( "bar": {"messages": [], "valid": true, "value": "f"}, }); - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table11", - &row.as_object().unwrap(), - &2, - "VALVE", - ) - .await?; - - delete_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table11", - &4, - "VALVE", - ) - .await?; + valve + .update_row("table11", &2, &row.as_object().unwrap()) + .await?; + + valve.delete_row("table11", &4).await?; let row = json!({ "foreign_column": {"messages": [], "valid": true, "value": "i"}, @@ -368,24 +194,16 @@ async fn test_dependencies( "numeric_foreign_column": {"messages": [], "valid": true, "value": "9"}, }); - let _new_row_num = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row.as_object().unwrap(), - None, - "VALVE", - ) - .await?; + let (_new_row_num, _new_row) = valve + .insert_row("table10", &row.as_object().unwrap()) + .await?; eprintln!("done."); Ok(()) } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum DbOperation { +enum DbOperation { Insert, Delete, Update, @@ -490,15 +308,11 @@ async fn generate_operation_sequence(pool: &AnyPool) -> Result, Ok(operations) } -async fn test_randomized_api_test_with_undo_redo( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> { // Randomly generate a number of insert/update/delete operations, possibly followed by undos // and/or redos. eprint!("Running test_randomized_api_test_with_undo_redo() ... "); + fn generate_value() -> String { let mut value = Alphanumeric.sample_string(&mut rand::thread_rng(), 10); while random::() && random::() { @@ -532,33 +346,24 @@ async fn test_randomized_api_test_with_undo_redo( row } - let operations_list = generate_operation_sequence(pool).await?; + let operations_list = generate_operation_sequence(&valve.pool).await?; for operation in operations_list { match operation { DbOperation::Delete => { let query = sqlx_query("SELECT MAX(row_number) AS row_number FROM table1_view"); - let sql_row = query.fetch_one(pool).await?; + let sql_row = query.fetch_one(&valve.pool).await?; let raw_row_number = sql_row.try_get_raw("row_number")?; if raw_row_number.is_null() { return Err(SqlxCErr("No rows in table1_view".into())); } else { let row_number: i64 = sql_row.get("row_number"); let row_number = row_number as u32; - delete_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table1", - &row_number, - "VALVE", - ) - .await?; + valve.delete_row("table1", &row_number).await?; } } DbOperation::Update => { let query = sqlx_query("SELECT MAX(row_number) AS row_number FROM table1_view"); - let sql_row = query.fetch_one(pool).await?; + let sql_row = query.fetch_one(&valve.pool).await?; let raw_row_number = sql_row.try_get_raw("row_number")?; if raw_row_number.is_null() { return Err(SqlxCErr("No rows in table1_view".into())); @@ -566,52 +371,18 @@ async fn test_randomized_api_test_with_undo_redo( let row_number: i64 = sql_row.get("row_number"); let row_number = row_number as u32; let row = generate_row(); - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table1", - &row, - &row_number, - "VALVE", - ) - .await?; + valve.update_row("table1", &row_number, &row).await?; } } DbOperation::Insert => { let row = generate_row(); - let _rn = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table1", - &row, - None, - "VALVE", - ) - .await?; + let (_rn, _r) = valve.insert_row("table1", &row).await?; } DbOperation::Undo => { - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.undo().await?; } DbOperation::Redo => { - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.redo().await?; } }; } @@ -620,13 +391,9 @@ async fn test_randomized_api_test_with_undo_redo( Ok(()) } -async fn test_undo_redo( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> { eprint!("Running test_undo_redo() ... "); + // Undo/redo tests let row_1 = json!({ "foreign_column": {"messages": [], "valid": true, "value": "j"}, @@ -640,324 +407,78 @@ async fn test_undo_redo( }); // Undo/redo test 1: - let _rn = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row_1.as_object().unwrap(), - None, - "VALVE", - ) - .await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + let (_rn, _r) = valve + .insert_row("table10", &row_1.as_object().unwrap()) + .await?; + + valve.undo().await?; + + valve.redo().await?; + + valve.undo().await?; // Undo/redo test 2: - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row_2.as_object().unwrap(), - &8, - "VALVE", - ) - .await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve + .update_row("table10", &8, &row_2.as_object().unwrap()) + .await?; + + valve.undo().await?; + + valve.redo().await?; + + valve.undo().await?; // Undo/redo test 3: - delete_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &8, - "VALVE", - ) - .await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.delete_row("table10", &8).await?; + + valve.undo().await?; + + valve.redo().await?; + + valve.undo().await?; // Undo/redo test 4: - let rn = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row_1.as_object().unwrap(), - None, - "VALVE", - ) - .await?; - - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row_2.as_object().unwrap(), - &rn, - "VALVE", - ) - .await?; + let (rn, _row) = valve + .insert_row("table10", &row_1.as_object().unwrap()) + .await?; + + valve + .update_row("table10", &rn, &row_2.as_object().unwrap()) + .await?; // Undo update: - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.undo().await?; // Redo update: - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - delete_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &rn, - "VALVE", - ) - .await?; + valve.redo().await?; + + valve.delete_row("table10", &rn).await?; // Undo delete: - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.undo().await?; // Undo update: - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.undo().await?; // Undo insert: - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.undo().await?; eprintln!("done."); Ok(()) } pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> { - let config = valve( - table, - database, - &ValveCommand::Config, - false, - false, - "table", - ) - .await?; - let config: SerdeValue = serde_json::from_str(config.as_str()).unwrap(); - let config = config.as_object().unwrap(); - - // To connect to a postgresql database listening to a unix domain socket: - // ---------------------------------------------------------------------- - // let connection_options = - // AnyConnectOptions::from_str("postgres:///testdb?host=/var/run/postgresql")?; - // - // To query the connection type at runtime via the pool: - // ----------------------------------------------------- - // let db_type = pool.any_kind(); - - let connection_options; - if database.starts_with("postgresql://") { - connection_options = AnyConnectOptions::from_str(database)?; - } else { - let connection_string; - if !database.starts_with("sqlite://") { - connection_string = format!("sqlite://{}?mode=rwc", database); - } else { - connection_string = database.to_string(); - } - connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); - } - - let pool = AnyPoolOptions::new() - .max_connections(5) - .connect_with(connection_options) - .await?; - if pool.any_kind() == AnyKind::Sqlite { - sqlx_query("PRAGMA foreign_keys = ON") - .execute(&pool) - .await?; - } - - let parser = StartParser::new(); - let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); - let parsed_structure_conditions = get_parsed_structure_conditions(&config, &parser); - let compiled_rule_conditions = - get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); - + let valve = Valve::build(table, "table", database, false, false).await?; // NOTE that you must use an external script to fetch the data from the database and run a diff // against a known good sample to verify that these tests yield the expected results: - test_matching( - &config, - &compiled_datatype_conditions, - &parsed_structure_conditions, - &pool, - ) - .await?; - test_idempotent_validate_and_update( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_validate_and_insert_1( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_validate_and_update( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_validate_and_insert_2( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_dependencies( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_undo_redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - - test_randomized_api_test_with_undo_redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; + test_matching(&valve).await?; + test_idempotent_validate_and_update(&valve).await?; + test_validate_and_insert_1(&valve).await?; + test_validate_and_update(&valve).await?; + test_validate_and_insert_2(&valve).await?; + test_dependencies(&valve).await?; + test_undo_redo(&valve).await?; + test_randomized_api_test_with_undo_redo(&valve).await?; Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index abd87a45..e69f636b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,13 +24,14 @@ pub mod validate; lalrpop_mod!(pub valve_grammar); use crate::validate::{ - validate_row, validate_rows_constraints, validate_rows_intra, validate_rows_trees, - validate_tree_foreign_keys, validate_under, QueryAsIf, QueryAsIfKind, ResultRow, + validate_row_tx, validate_rows_constraints, validate_rows_intra, validate_rows_trees, + validate_tree_foreign_keys, validate_under, with_tree_sql, QueryAsIf, QueryAsIfKind, ResultRow, }; use crate::{ast::Expression, valve_grammar::StartParser}; use async_recursion::async_recursion; use chrono::Utc; use crossbeam; +use enquote::unquote; use futures::executor::block_on; use indexmap::IndexMap; use indoc::indoc; @@ -89,13 +90,76 @@ macro_rules! valve_log { ($($arg:tt)*) => (eprintln!("{} - {}", Utc::now(), format_args!($($arg)*))); } +/// Represents a structure such as those found in the `structure` column of the `column` table in +/// both its parsed format (i.e., as an [Expression](ast/enum.Expression.html)) as well as in its +/// original format (i.e., as a plain String). +#[derive(Clone)] +pub struct ParsedStructure { + pub original: String, + pub parsed: Expression, +} + +// We use Debug here instead of Display because we have only implemented Debug for Expressions. +// See the comment about this in ast.rs. +impl std::fmt::Debug for ParsedStructure { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{{\"parsed_structure\": {{\"original\": \"{}\", \"parsed\": {:?}}}}}", + &self.original, &self.parsed + ) + } +} + +/// Represents a condition in three different ways: (i) in String format, (ii) as a parsed +/// [Expression](ast/enum.Expression.html), and (iii) as a pre-compiled regular expression. +#[derive(Clone)] +pub struct CompiledCondition { + pub original: String, + pub parsed: Expression, + pub compiled: Arc bool + Sync + Send>, +} + +// We use Debug here instead of Display because we have only implemented Debug for Expressions. +// See the comment about this in ast.rs. +impl std::fmt::Debug for CompiledCondition { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{{\"compiled_condition\": {{\"original\": \"{}\", \"parsed\": {:?}}}}}", + &self.original, &self.parsed + ) + } +} + +/// Represents a 'when-then' condition, as found in the `rule` table, as two +/// [CompiledCondition](struct.CompiledCondition.html) structs corresponding to the when and then +/// parts of the given rule. +#[derive(Clone)] +pub struct ColumnRule { + pub when: CompiledCondition, + pub then: CompiledCondition, +} + +// We use Debug here instead of Display because we have only implemented Debug for Expressions. +// See the comment about this in ast.rs. +impl std::fmt::Debug for ColumnRule { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{{\"column_rule\": {{\"when\": {:?}, \"then\": {:?}}}}}", + &self.when, &self.then + ) + } +} + #[derive(Debug)] pub struct Valve { pub global_config: SerdeMap, pub compiled_datatype_conditions: HashMap, pub compiled_rule_conditions: HashMap>>, pub parsed_structure_conditions: HashMap, - pub pool: Option, + pub pool: AnyPool, pub user: String, pub verbose: bool, pub initial_load: bool, @@ -177,8 +241,8 @@ impl Valve { compiled_datatype_conditions: compiled_datatype_conditions, compiled_rule_conditions: compiled_rule_conditions, parsed_structure_conditions: parsed_structure_conditions, - pool: Some(pool), - user: String::from("Valve"), + pool: pool, + user: String::from("VALVE"), verbose: verbose, initial_load: initial_load, }) @@ -217,9 +281,7 @@ impl Valve { async fn execute_sql(&self, sql: &str) -> Result<(), sqlx::Error> { // DatabaseError - sqlx_query(&sql) - .execute(self.pool.as_ref().unwrap()) - .await?; + sqlx_query(&sql).execute(&self.pool).await?; Ok(()) } @@ -249,7 +311,12 @@ impl Valve { sorted_tables } - /// TODO: Add docstring here + /// Given a parsed structure condition, a table and column name, and an unsigned integer + /// representing whether the given column, in the case of a SQLite database, is a primary key + /// (in the case of PostgreSQL, the sqlite_pk parameter is ignored): determine whether the + /// structure of the column is properly reflected in the db. E.g., a `from(table.column)` + /// struct should be associated with a foreign key, `primary` with a primary key, `unique` + /// with a unique constraint. async fn structure_has_changed( &self, pstruct: &Expression, @@ -257,11 +324,10 @@ impl Valve { column: &str, sqlite_pk: &u32, ) -> Result { - let pool = self.pool.as_ref().unwrap(); // A clojure to determine whether the given column has the given constraint type, which // can be one of 'UNIQUE', 'PRIMARY KEY', 'FOREIGN KEY': let column_has_constraint_type = |constraint_type: &str| -> Result { - if pool.any_kind() == AnyKind::Postgres { + if self.pool.any_kind() == AnyKind::Postgres { let sql = format!( r#"SELECT 1 FROM information_schema.table_constraints tco @@ -273,7 +339,7 @@ impl Valve { AND kcu.column_name = '{}'"#, table, constraint_type, column ); - let rows = block_on(sqlx_query(&sql).fetch_all(pool))?; + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; if rows.len() > 1 { unreachable!(); } @@ -283,12 +349,12 @@ impl Valve { return Ok(*sqlite_pk == 1); } else if constraint_type == "UNIQUE" { let sql = format!(r#"PRAGMA INDEX_LIST("{}")"#, table); - for row in block_on(sqlx_query(&sql).fetch_all(pool))? { + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { let idx_name = row.get::("name"); let unique = row.get::("unique") as u8; if unique == 1 { let sql = format!(r#"PRAGMA INDEX_INFO("{}")"#, idx_name); - let rows = block_on(sqlx_query(&sql).fetch_all(pool))?; + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; if rows.len() == 1 { let cname = rows[0].get::("name"); if cname == column { @@ -300,7 +366,7 @@ impl Valve { Ok(false) } else if constraint_type == "FOREIGN KEY" { let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); - for row in block_on(sqlx_query(&sql).fetch_all(pool))? { + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { let cname = row.get::("from"); if cname == column { return Ok(true); @@ -362,9 +428,9 @@ impl Valve { Expression::Function(name, args) if name == "from" => { match &*args[0] { Expression::Field(cfg_ftable, cfg_fcolumn) => { - if pool.any_kind() == AnyKind::Sqlite { + if self.pool.any_kind() == AnyKind::Sqlite { let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); - for row in sqlx_query(&sql).fetch_all(pool).await? { + for row in sqlx_query(&sql).fetch_all(&self.pool).await? { let from = row.get::("from"); if from == column { let db_ftable = row.get::("table"); @@ -390,7 +456,7 @@ impl Valve { AND kcu.column_name = '{}'"#, table, column ); - let rows = sqlx_query(&sql).fetch_all(pool).await?; + let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; if rows.len() == 0 { // If the table doesn't even exist return true. return Ok(true); @@ -429,8 +495,7 @@ impl Valve { /// 'primary', or 'from(table, column)' in their column configuration are associated, in the /// database, with a unique constraint, primary key, and foreign key, respectively, and vice /// versa. - pub async fn table_has_changed(&self, table: &str) -> Result { - let pool = self.pool.as_ref().unwrap(); + async fn table_has_changed(&self, table: &str) -> Result { let (columns_config, configured_column_order, description, table_type, path) = { let table_config = self .global_config @@ -492,12 +557,12 @@ impl Valve { }; let db_columns_in_order = { - if pool.any_kind() == AnyKind::Sqlite { + if self.pool.any_kind() == AnyKind::Sqlite { let sql = format!( r#"SELECT 1 FROM sqlite_master WHERE "type" = 'table' AND "name" = '{}'"#, table ); - let rows = sqlx_query(&sql).fetch_all(pool).await?; + let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; if rows.len() == 0 { if self.verbose { valve_log!( @@ -509,7 +574,7 @@ impl Valve { } else if rows.len() == 1 { // Otherwise send another query to the db to get the column info: let sql = format!(r#"PRAGMA TABLE_INFO("{}")"#, table); - let rows = block_on(sqlx_query(&sql).fetch_all(pool))?; + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; rows.iter() .map(|r| { ( @@ -530,7 +595,7 @@ impl Valve { ORDER BY "ordinal_position""#, table, ); - let rows = sqlx_query(&sql).fetch_all(pool).await?; + let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; if rows.len() == 0 { if self.verbose { valve_log!( @@ -584,7 +649,7 @@ impl Valve { ("path", path), ] { let column = table_param.0; - let is_clause = if self.pool.as_ref().unwrap().any_kind() == AnyKind::Sqlite { + let is_clause = if self.pool.any_kind() == AnyKind::Sqlite { "IS" } else { "IS NOT DISTINCT FROM" @@ -597,9 +662,7 @@ impl Valve { r#"SELECT 1 from "table" WHERE "table" = '{}' AND "{}" {}"#, table, column, eq_value, ); - let rows = sqlx_query(&sql) - .fetch_all(self.pool.as_ref().unwrap()) - .await?; + let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; if rows.len() == 0 { if self.verbose { valve_log!( @@ -637,13 +700,9 @@ impl Valve { .get(cname) .and_then(|c| c.as_object()) .unwrap(); - let sql_type = get_sql_type_from_global_config( - &self.global_config, - table, - &cname, - self.pool.as_ref().unwrap(), - ) - .unwrap(); + let sql_type = + get_sql_type_from_global_config(&self.global_config, table, &cname, &self.pool) + .unwrap(); // Check the column's SQL type: if sql_type.to_lowercase() != ctype.to_lowercase() { @@ -701,7 +760,7 @@ impl Valve { } /// TODO: Add docstring here - pub async fn get_setup_statements(&self) -> Result>, sqlx::Error> { + async fn get_setup_statements(&self) -> Result>, sqlx::Error> { let tables_config = self .global_config .get("table") @@ -715,32 +774,36 @@ impl Valve { .unwrap() .clone(); - let pool = self.pool.as_ref().unwrap(); let parser = StartParser::new(); - // Begin by reading in the TSV files corresponding to the tables defined in tables_config, and - // use that information to create the associated database tables, while saving constraint - // information to constrains_config. + // Begin by reading in the TSV files corresponding to the tables defined in tables_config, + // and use that information to create the associated database tables, while saving + // constraint information to constrains_config. let mut setup_statements = HashMap::new(); for table_name in tables_config.keys().cloned().collect::>() { // Generate the statements for creating the table and its corresponding conflict table: let mut table_statements = vec![]; for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { - let mut statements = - get_table_ddl(&tables_config, &datatypes_config, &parser, &table, &pool); + let mut statements = get_table_ddl( + &tables_config, + &datatypes_config, + &parser, + &table, + &self.pool, + ); table_statements.append(&mut statements); } - let (_, create_view_sql) = get_sql_for_standard_view(&table_name, pool); - let (_, create_text_view_sql) = - get_sql_for_text_view(&tables_config, &table_name, pool); + let create_view_sql = get_sql_for_standard_view(&table_name, &self.pool); + let create_text_view_sql = + get_sql_for_text_view(&tables_config, &table_name, &self.pool); table_statements.push(create_view_sql); table_statements.push(create_text_view_sql); setup_statements.insert(table_name.to_string(), table_statements); } - let text_type = get_sql_type(&datatypes_config, &"text".to_string(), pool).unwrap(); + let text_type = get_sql_type(&datatypes_config, &"text".to_string(), &self.pool).unwrap(); // Generate DDL for the history table: let mut history_statements = vec![]; @@ -759,7 +822,7 @@ impl Valve { ); "#}, history_id = { - if pool.any_kind() == AnyKind::Sqlite { + if self.pool.any_kind() == AnyKind::Sqlite { "\"history_id\" INTEGER PRIMARY KEY," } else { "\"history_id\" SERIAL PRIMARY KEY," @@ -767,7 +830,7 @@ impl Valve { }, text_type = text_type, timestamp = { - if pool.any_kind() == AnyKind::Sqlite { + if self.pool.any_kind() == AnyKind::Sqlite { "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" } else { "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" @@ -794,7 +857,7 @@ impl Valve { ); "#}, message_id = { - if pool.any_kind() == AnyKind::Sqlite { + if self.pool.any_kind() == AnyKind::Sqlite { "\"message_id\" INTEGER PRIMARY KEY," } else { "\"message_id\" SERIAL PRIMARY KEY," @@ -888,20 +951,32 @@ impl Valve { pub async fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError + self.truncate_tables(self.get_tables_ordered_for_deletion()) + .await?; + Ok(self) + } + + /// Given a vector of table names, + /// truncate those tables, in the given order. + /// Return an error on invalid table name or database problem. + pub async fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { + // ConfigOrDatabaseError + + self.create_missing_tables().await?; + // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that // depends on another table, T', even in the case where we have previously truncated T'. // SQLite does not need this. However SQLite does require that the tables be truncated in // deletion order (which means that it must be checking that T' is empty). - let truncate_sql = |table: &str| -> String { - if self.pool.as_ref().unwrap().any_kind() == AnyKind::Postgres { + if self.pool.any_kind() == AnyKind::Postgres { format!(r#"TRUNCATE TABLE "{}" RESTART IDENTITY CASCADE"#, table) } else { format!(r#"DELETE FROM "{}""#, table) } }; - for table in self.get_tables_ordered_for_deletion() { + for table in tables { let sql = truncate_sql(&table); self.execute_sql(&sql).await?; if table != "message" && table != "history" { @@ -913,16 +988,6 @@ impl Valve { Ok(self) } - /// Given a vector of table names, - /// truncate those tables, in the given order. - /// Return an error on invalid table name or database problem. - pub fn truncate_tables(&self, _tables: Vec<&str>) -> Result<&Self, sqlx::Error> { - // ConfigOrDatabaseError - //self.create_missing_tables(); - // TODO - Ok(self) - } - /// Load all configured tables in dependency order. /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, @@ -933,46 +998,46 @@ impl Valve { self.create_missing_tables().await?; self.truncate_all_tables().await?; - if let Some(pool) = &self.pool { - if pool.any_kind() == AnyKind::Sqlite { - sqlx_query("PRAGMA foreign_keys = ON").execute(pool).await?; - if self.initial_load { - // These pragmas are unsafe but they are used during initial loading since data - // integrity is not a priority in this case. - sqlx_query("PRAGMA journal_mode = OFF") - .execute(pool) - .await?; - sqlx_query("PRAGMA synchronous = 0").execute(pool).await?; - sqlx_query("PRAGMA cache_size = 1000000") - .execute(pool) - .await?; - sqlx_query("PRAGMA temp_store = MEMORY") - .execute(pool) - .await?; - } + if self.pool.any_kind() == AnyKind::Sqlite { + sqlx_query("PRAGMA foreign_keys = ON") + .execute(&self.pool) + .await?; + if self.initial_load { + // These pragmas are unsafe but they are used during initial loading since data + // integrity is not a priority in this case. + sqlx_query("PRAGMA journal_mode = OFF") + .execute(&self.pool) + .await?; + sqlx_query("PRAGMA synchronous = 0") + .execute(&self.pool) + .await?; + sqlx_query("PRAGMA cache_size = 1000000") + .execute(&self.pool) + .await?; + sqlx_query("PRAGMA temp_store = MEMORY") + .execute(&self.pool) + .await?; } + } - if self.verbose { - valve_log!( - "Processing {} tables.", - self.global_config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .unwrap() - .len() - ); - } - load_db( - &self.global_config, - &pool, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - self.verbose, - ) - .await?; - } else { - valve_log!("WARN: Attempt to load tables but Valve is not connected to a database."); + if self.verbose { + valve_log!( + "Processing {} tables.", + self.global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .unwrap() + .len() + ); } + load_db( + &self.global_config, + &self.pool, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + self.verbose, + ) + .await?; Ok(self) } @@ -981,11 +1046,17 @@ impl Valve { /// load those tables in the given order. /// If `validate` is false, just try to insert all rows. /// Return an error on invalid table name or database problem. - pub fn load_tables(&self, _tables: Vec<&str>, _validate: bool) -> Result<&Self, sqlx::Error> { + pub async fn load_tables( + &self, + tables: Vec<&str>, + _validate: bool, + ) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - //self.create_missing_tables(); - //self.truncate_tables(tables); - // TODO + self.create_missing_tables().await?; + self.truncate_tables(tables).await?; + if 1 == 1 { + todo!(); + } Ok(self) } @@ -1009,142 +1080,614 @@ impl Valve { /// Given a table name and a row as JSON, /// return the validated row. /// Return an error on database problem. - pub fn validate_row( + pub async fn validate_row( &self, - _table_name: &str, - _row: &ValveRow, + table_name: &str, + row: &ValveRow, ) -> Result { // DatabaseError - todo!(); + + validate_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + None, + table_name, + row, + None, + None, + ) + .await } /// Given a table name and a row as JSON, /// add the row to the table in the database, /// and return the validated row, including its new row_number. /// Return an error invalid table name or database problem. - pub fn insert_row(&self, _table_name: &str, _row: &ValveRow) -> Result { + /// A wrapper around [insert_new_row_tx()] in which the following steps are also performed: + /// - A database transaction is created and then committed once the given new row has been + /// inserted. + /// - The row is validated before insertion and the update to the database is recorded to the + /// history table indicating that the given user is responsible for the change. + pub async fn insert_row( + &self, + table_name: &str, + row: &ValveRow, + ) -> Result<(u32, ValveRow), sqlx::Error> { // ConfigOrDatabaseError - todo!(); + + let mut tx = self.pool.begin().await?; + + let row = validate_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + Some(&mut tx), + table_name, + row, + None, + None, + ) + .await?; + + let rn = insert_new_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + &row, + None, + true, + ) + .await?; + + record_row_change(&mut tx, table_name, &rn, None, Some(&row), &self.user).await?; + tx.commit().await?; + Ok((rn, row)) } /// Given a table name, a row number, and a row as JSON, /// update the row in the database, /// and return the validated row. /// Return an error invalid table name or row number or database problem. - pub fn update_row( + pub async fn update_row( &self, - _table_name: &str, - _row_number: usize, - _row: &ValveRow, + table_name: &str, + row_number: &u32, + row: &ValveRow, ) -> Result { // ConfigOrDatabaseError - todo!(); + + let mut tx = self.pool.begin().await?; + + // Get the old version of the row from the database so that we can later record it to the + // history table: + let old_row = get_row_from_db( + &self.global_config, + &self.pool, + &mut tx, + table_name, + &row_number, + ) + .await?; + + let row = validate_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + Some(&mut tx), + table_name, + row, + Some(*row_number), + None, + ) + .await?; + + update_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + &row, + row_number, + true, + false, + ) + .await?; + + // Record the row update in the history table: + record_row_change( + &mut tx, + table_name, + row_number, + Some(&old_row), + Some(&row), + &self.user, + ) + .await?; + + tx.commit().await?; + Ok(row) } /// Given a table name and a row number, /// delete that row from the table. /// Return an error invalid table name or row number or database problem. - pub fn delete_row(&self, _table_name: &str, _row_number: usize) -> Result<(), sqlx::Error> { + pub async fn delete_row(&self, table_name: &str, row_number: &u32) -> Result<(), sqlx::Error> { // ConfigOrDatabaseError - todo!(); + let mut tx = self.pool.begin().await?; + + let row = get_row_from_db( + &self.global_config, + &self.pool, + &mut tx, + &table_name, + row_number, + ) + .await?; + + record_row_change( + &mut tx, + &table_name, + row_number, + Some(&row), + None, + &self.user, + ) + .await?; + + delete_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + row_number, + ) + .await?; + + tx.commit().await?; + Ok(()) } /// Return the next change to undo, or None. /// Return an error on database problem. - pub fn get_record_to_undo(&self) -> Result, sqlx::Error> { + pub async fn get_record_to_undo(&self) -> Result, sqlx::Error> { // DatabaseError - todo!(); + // Look in the history table, get the row with the greatest ID, get the row number, + // from, and to, and determine whether the last operation was a delete, insert, or update. + let is_clause = if self.pool.any_kind() == AnyKind::Sqlite { + "IS" + } else { + "IS NOT DISTINCT FROM" + }; + let sql = format!( + r#"SELECT * FROM "history" + WHERE "undone_by" {} NULL + ORDER BY "history_id" DESC LIMIT 1"#, + is_clause + ); + let query = sqlx_query(&sql); + let result_row = query.fetch_optional(&self.pool).await?; + Ok(result_row) } /// Return the next change to redo, or None. /// Return an error on database problem. - pub fn get_record_to_redo(&self) -> Result, sqlx::Error> { + pub async fn get_record_to_redo(&self) -> Result, sqlx::Error> { // DatabaseError - todo!(); + // Look in the history table, get the row with the greatest ID, get the row number, + // from, and to, and determine whether the last operation was a delete, insert, or update. + let is_not_clause = if self.pool.any_kind() == AnyKind::Sqlite { + "IS NOT" + } else { + "IS DISTINCT FROM" + }; + let sql = format!( + r#"SELECT * FROM "history" + WHERE "undone_by" {} NULL + ORDER BY "timestamp" DESC LIMIT 1"#, + is_not_clause + ); + let query = sqlx_query(&sql); + let result_row = query.fetch_optional(&self.pool).await?; + Ok(result_row) } /// Undo one change and return the change record /// or None if there was no change to undo. /// Return an error on database problem. - pub fn undo(&self) -> Result, sqlx::Error> { + pub async fn undo(&self) -> Result, sqlx::Error> { // DatabaseError - todo!(); + let last_change = match self.get_record_to_undo().await? { + None => { + valve_log!("WARN: Nothing to undo."); + return Ok(None); + } + Some(r) => r, + }; + let history_id: i32 = last_change.get("history_id"); + let history_id = history_id as u16; + let table: &str = last_change.get("table"); + let row_number: i64 = last_change.get("row"); + let row_number = row_number as u32; + let from = get_json_from_row(&last_change, "from"); + let to = get_json_from_row(&last_change, "to"); + + match (from, to) { + (None, None) => { + return Err(SqlxCErr( + "Cannot redo unknown operation from None to None".into(), + )) + } + (None, Some(_)) => { + // Undo an insert: + let mut tx = self.pool.begin().await?; + + delete_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &row_number, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(None) + } + (Some(from), None) => { + // Undo a delete: + let mut tx = self.pool.begin().await?; + + insert_new_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &from, + Some(row_number), + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(from)) + } + (Some(from), Some(_)) => { + // Undo an an update: + let mut tx = self.pool.begin().await?; + + update_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &from, + &row_number, + false, + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(from)) + } + } } /// Redo one change and return the change record /// or None if there was no change to redo. /// Return an error on database problem. - pub fn redo(&self) -> Result, sqlx::Error> { + pub async fn redo(&self) -> Result, sqlx::Error> { // DatabaseError - todo!(); - } -} + let last_undo = match self.get_record_to_redo().await? { + None => { + valve_log!("WARN: Nothing to redo."); + return Ok(None); + } + Some(last_undo) => { + let undone_by = last_undo.try_get_raw("undone_by")?; + if undone_by.is_null() { + valve_log!("WARN: Nothing to redo."); + return Ok(None); + } + last_undo + } + }; + let history_id: i32 = last_undo.get("history_id"); + let history_id = history_id as u16; + let table: &str = last_undo.get("table"); + let row_number: i64 = last_undo.get("row"); + let row_number = row_number as u32; + let from = get_json_from_row(&last_undo, "from"); + let to = get_json_from_row(&last_undo, "to"); -/// Represents a structure such as those found in the `structure` column of the `column` table in -/// both its parsed format (i.e., as an [Expression](ast/enum.Expression.html)) as well as in its -/// original format (i.e., as a plain String). -#[derive(Clone)] -pub struct ParsedStructure { - pub original: String, - pub parsed: Expression, -} + match (from, to) { + (None, None) => { + return Err(SqlxCErr( + "Cannot redo unknown operation from None to None".into(), + )) + } + (None, Some(to)) => { + // Redo an insert: + let mut tx = self.pool.begin().await?; + + insert_new_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &to, + Some(row_number), + false, + ) + .await?; -// We use Debug here instead of Display because we have only implemented Debug for Expressions. -// See the comment about this in ast.rs. -impl std::fmt::Debug for ParsedStructure { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "{{\"parsed_structure\": {{\"original\": \"{}\", \"parsed\": {:?}}}}}", - &self.original, &self.parsed - ) - } -} + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(to)) + } + (Some(_), None) => { + // Redo a delete: + let mut tx = self.pool.begin().await?; + + delete_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &row_number, + ) + .await?; -/// Represents a condition in three different ways: (i) in String format, (ii) as a parsed -/// [Expression](ast/enum.Expression.html), and (iii) as a pre-compiled regular expression. -#[derive(Clone)] -pub struct CompiledCondition { - pub original: String, - pub parsed: Expression, - pub compiled: Arc bool + Sync + Send>, -} + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(None) + } + (Some(_), Some(to)) => { + // Redo an an update: + let mut tx = self.pool.begin().await?; + + update_row_tx( + &self.global_config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &to, + &row_number, + false, + false, + ) + .await?; -// We use Debug here instead of Display because we have only implemented Debug for Expressions. -// See the comment about this in ast.rs. -impl std::fmt::Debug for CompiledCondition { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "{{\"compiled_condition\": {{\"original\": \"{}\", \"parsed\": {:?}}}}}", - &self.original, &self.parsed - ) + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(to)) + } + } } -} -/// Represents a 'when-then' condition, as found in the `rule` table, as two -/// [CompiledCondition](struct.CompiledCondition.html) structs corresponding to the when and then -/// parts of the given rule. -#[derive(Clone)] -pub struct ColumnRule { - pub when: CompiledCondition, - pub then: CompiledCondition, -} + /// Given a config map, a map of compiled datatype conditions, a database connection pool, a + /// table name, a column name, and (optionally) a string to match, return a JSON array of + /// possible valid values for the given column which contain the matching string as a substring + /// (or all of them if no matching string is given). The JSON array returned is formatted for + /// Typeahead, i.e., it takes the form: `[{"id": id, "label": label, "order": order}, ...]`. + pub async fn get_matching_values( + &self, + table_name: &str, + column_name: &str, + matching_string: Option<&str>, + ) -> Result { + let config = &self.global_config; + let compiled_datatype_conditions = &self.compiled_datatype_conditions; + let parsed_structure_conditions = &self.parsed_structure_conditions; + let pool = &self.pool; + let dt_name = config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column_name)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("datatype")) + .and_then(|d| d.as_str()) + .unwrap(); -// We use Debug here instead of Display because we have only implemented Debug for Expressions. -// See the comment about this in ast.rs. -impl std::fmt::Debug for ColumnRule { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "{{\"column_rule\": {{\"when\": {:?}, \"then\": {:?}}}}}", - &self.when, &self.then - ) + let dt_condition = compiled_datatype_conditions + .get(dt_name) + .and_then(|d| Some(d.parsed.clone())); + + let mut values = vec![]; + match dt_condition { + Some(Expression::Function(name, args)) if name == "in" => { + for arg in args { + if let Expression::Label(arg) = *arg { + // Remove the enclosing quotes from the values being returned: + let label = unquote(&arg).unwrap_or_else(|_| arg); + if let Some(s) = matching_string { + if label.contains(s) { + values.push(label); + } + } + } + } + } + _ => { + // If the datatype for the column does not correspond to an `in(...)` function, then + // we check the column's structure constraints. If they include a + // `from(foreign_table.foreign_column)` condition, then the values are taken from + // the foreign column. Otherwise if the structure includes an + // `under(tree_table.tree_column, value)` condition, then get the values from the + // tree column that are under `value`. + let structure = parsed_structure_conditions.get( + config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column_name)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|d| d.as_str()) + .unwrap_or_else(|| ""), + ); + + let sql_type = + get_sql_type_from_global_config(&config, table_name, &column_name, &pool) + .unwrap(); + + match structure { + Some(ParsedStructure { original, parsed }) => { + let matching_string = { + match matching_string { + None => "%".to_string(), + Some(s) => format!("%{}%", s), + } + }; + + match parsed { + Expression::Function(name, args) if name == "from" => { + let foreign_key = &args[0]; + if let Expression::Field(ftable, fcolumn) = &**foreign_key { + let fcolumn_text = cast_column_sql_to_text(&fcolumn, &sql_type); + let sql = local_sql_syntax( + &pool, + &format!( + r#"SELECT "{}" FROM "{}" WHERE {} LIKE {}"#, + fcolumn, ftable, fcolumn_text, SQL_PARAM + ), + ); + let rows = sqlx_query(&sql) + .bind(&matching_string) + .fetch_all(pool) + .await?; + for row in rows.iter() { + values.push(get_column_value(&row, &fcolumn, &sql_type)); + } + } + } + Expression::Function(name, args) + if name == "under" || name == "tree" => + { + let mut tree_col = "not set"; + let mut under_val = Some("not set".to_string()); + if name == "under" { + if let Expression::Field(_, column) = &**&args[0] { + tree_col = column; + } + if let Expression::Label(label) = &**&args[1] { + under_val = Some(label.to_string()); + } + } else { + let tree_key = &args[0]; + if let Expression::Label(label) = &**tree_key { + tree_col = label; + under_val = None; + } + } + + let tree = config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|c| c.get("tree")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_array()) + .and_then(|t| { + t.iter().find(|o| o.get("child").unwrap() == tree_col) + }) + .expect( + format!("No tree: '{}.{}' found", table_name, tree_col) + .as_str(), + ) + .as_object() + .unwrap(); + let child_column = + tree.get("child").and_then(|c| c.as_str()).unwrap(); + + let (tree_sql, mut params) = with_tree_sql( + &config, + tree, + &table_name.to_string(), + &table_name.to_string(), + under_val.as_ref(), + None, + &pool, + ); + let child_column_text = + cast_column_sql_to_text(&child_column, &sql_type); + let sql = local_sql_syntax( + &pool, + &format!( + r#"{} SELECT "{}" FROM "tree" WHERE {} LIKE {}"#, + tree_sql, child_column, child_column_text, SQL_PARAM + ), + ); + params.push(matching_string); + + let mut query = sqlx_query(&sql); + for param in ¶ms { + query = query.bind(param); + } + + let rows = query.fetch_all(pool).await?; + for row in rows.iter() { + values.push(get_column_value(&row, &child_column, &sql_type)); + } + } + _ => panic!("Unrecognised structure: {}", original), + }; + } + None => (), + }; + } + }; + + let mut typeahead_values = vec![]; + for (i, v) in values.iter().enumerate() { + // enumerate() begins at 0 but we need to begin at 1: + let i = i + 1; + typeahead_values.push(json!({ + "id": v, + "label": v, + "order": i, + })); + } + + Ok(json!(typeahead_values)) } } /// TODO: Add docstring here. -pub async fn get_pool_from_connection_string(database: &str) -> Result { +async fn get_pool_from_connection_string(database: &str) -> Result { let connection_options; if database.starts_with("postgresql://") { connection_options = AnyConnectOptions::from_str(database)?; @@ -1170,7 +1713,7 @@ pub async fn get_pool_from_connection_string(database: &str) -> Result HashMap { @@ -1779,7 +2321,7 @@ pub fn get_compiled_datatype_conditions( /// ... /// } /// ``` -pub fn get_compiled_rule_conditions( +fn get_compiled_rule_conditions( config: &SerdeMap, compiled_datatype_conditions: HashMap, parser: &StartParser, @@ -1857,7 +2399,7 @@ pub fn get_compiled_rule_conditions( /// Given the global config map and a parser, parse all of the structure conditions, add them to /// a hash map whose keys are given by the text versions of the conditions and whose values are /// given by the parsed versions, and finally return the hashmap. -pub fn get_parsed_structure_conditions( +fn get_parsed_structure_conditions( config: &SerdeMap, parser: &StartParser, ) -> HashMap { @@ -1899,7 +2441,6 @@ pub fn get_parsed_structure_conditions( parsed_structure_conditions } -// TODO: Modify this function so that it no longer returns the DROP statement, once you have // removed the old valve functions that require it. /// Given the name of a table and a database connection pool, generate SQL for creating a view /// based on the table that provides a unified representation of the normal and conflict versions @@ -1907,11 +2448,9 @@ pub fn get_parsed_structure_conditions( /// contained in the message and history tables. The SQL generated is in the form of a tuple of /// Strings, with the first string being a SQL statement for dropping the view, and the second /// string being a SQL statement for creating it. -fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { - let mut drop_view_sql = format!(r#"DROP VIEW IF EXISTS "{}_view""#, table); +fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> String { let message_t; if pool.any_kind() == AnyKind::Postgres { - drop_view_sql.push_str(" CASCADE"); message_t = format!( indoc! {r#" ( @@ -1951,7 +2490,6 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { t = table, ); } - drop_view_sql.push_str(";"); let history_t; if pool.any_kind() == AnyKind::Postgres { @@ -2010,11 +2548,9 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { history_t = history_t, ); - (drop_view_sql, create_view_sql) + create_view_sql } -// TODO: Modify this function so that it no longer returns the DROP statement, once you have -// removed the old valve functions that require it. /// Given the tables configuration map, the name of a table and a database connection pool, /// generate SQL for creating a more user-friendly version of the view than the one generated by /// [get_sql_for_standard_view()]. Unlike the standard view generated by that function, the view @@ -2023,11 +2559,7 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { /// errors. Like the function for generating a standard view, the SQL generated by this function is /// returned in the form of a tuple of Strings, with the first string being a SQL statement /// for dropping the view, and the second string being a SQL statement for creating it. -fn get_sql_for_text_view( - tables_config: &SerdeMap, - table: &str, - pool: &AnyPool, -) -> (String, String) { +fn get_sql_for_text_view(tables_config: &SerdeMap, table: &str, pool: &AnyPool) -> String { let is_clause = if pool.any_kind() == AnyKind::Sqlite { "IS" } else { @@ -2047,11 +2579,6 @@ fn get_sql_for_text_view( // Add a second "text view" such that the datatypes of all values are TEXT and appear // directly in their corresponsing columns (rather than as NULLs) even when they have // SQL datatype errors. - let mut drop_view_sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); - if pool.any_kind() == AnyKind::Postgres { - drop_view_sql.push_str(" CASCADE"); - } - let mut inner_columns = real_columns .iter() .map(|c| { @@ -2116,485 +2643,80 @@ fn get_sql_for_text_view( table = table, ); - (drop_view_sql, create_view_sql) + create_view_sql } -// TODO: Remove this function once it has been factored. -/// Given config maps for tables and datatypes, a database connection pool, and a StartParser, -/// read in the TSV files corresponding to the tables defined in the tables config, and use that -/// information to fill in constraints information into a new config map that is then returned along -/// with a list of the tables in the database sorted according to their mutual dependencies. If -/// the flag `verbose` is set to true, emit SQL to create the database schema to STDOUT. -/// If `command` is set to [ValveCommand::Create], execute the SQL statements to create the -/// database using the given connection pool. If it is set to [ValveCommand::Load], execute the SQL -/// to load it as well. -pub async fn configure_db( - tables_config: &mut SerdeMap, - datatypes_config: &mut SerdeMap, - pool: &AnyPool, - parser: &StartParser, - verbose: bool, - command: &ValveCommand, -) -> Result<(Vec, SerdeMap), sqlx::Error> { - // This is the SerdeMap that we will be returning: - let mut constraints_config = SerdeMap::new(); - constraints_config.insert(String::from("foreign"), SerdeValue::Object(SerdeMap::new())); - constraints_config.insert(String::from("unique"), SerdeValue::Object(SerdeMap::new())); - constraints_config.insert(String::from("primary"), SerdeValue::Object(SerdeMap::new())); - constraints_config.insert(String::from("tree"), SerdeValue::Object(SerdeMap::new())); - constraints_config.insert(String::from("under"), SerdeValue::Object(SerdeMap::new())); - - // Begin by reading in the TSV files corresponding to the tables defined in tables_config, and - // use that information to create the associated database tables, while saving constraint - // information to constrains_config. - let mut setup_statements = HashMap::new(); - for table_name in tables_config.keys().cloned().collect::>() { - let optional_path = tables_config - .get(&table_name) - .and_then(|r| r.get("path")) - .and_then(|p| p.as_str()); - - let mut path = None; - match optional_path { - None => { - // If an entry of the tables_config has no path then it is an internal table which - // need not be configured explicitly. Currently the only examples are the message - // and history tables. - if table_name != "message" && table_name != "history" { - panic!("No path defined for table {}", table_name); - } - continue; - } - Some(p) if !Path::new(p).is_file() => { - valve_log!("WARN: File does not exist {}", p); - } - Some(p) if Path::new(p).canonicalize().is_err() => { - valve_log!("WARN: File path could not be made canonical {}", p); - } - Some(p) => path = Some(p.to_string()), - }; +/// Given a table name, a column name, and a database pool, construct an SQL string to extract the +/// value of the column, such that when the value of a given column is null, the query attempts to +/// extract it from the message table. Returns a String representing the SQL to retrieve the value +/// of the column. +fn query_column_with_message_value(table: &str, column: &str, pool: &AnyPool) -> String { + let is_clause = if pool.any_kind() == AnyKind::Sqlite { + "IS" + } else { + "IS NOT DISTINCT FROM" + }; - let defined_columns: Vec = tables_config - .get(&table_name) - .and_then(|r| r.get("column")) - .and_then(|v| v.as_object()) - .and_then(|o| Some(o.keys())) - .and_then(|k| Some(k.cloned())) - .and_then(|k| Some(k.collect())) - .unwrap(); + format!( + r#"CASE + WHEN "{column}" {is_clause} NULL THEN ( + SELECT value + FROM "message" + WHERE "row" = "row_number" + AND "column" = '{column}' + AND "table" = '{table}' + ORDER BY "message_id" DESC + LIMIT 1 + ) + ELSE {casted_column} + END AS "{column}""#, + casted_column = if pool.any_kind() == AnyKind::Sqlite { + cast_column_sql_to_text(column, "non-text") + } else { + format!("\"{}\"::TEXT", column) + }, + column = column, + table = table, + ) +} - // We use column_order to explicitly indicate the order in which the columns should appear - // in the table, for later reference. The default is to preserve the order from the actual - // table file. If that does not exist, we use the ordering in defined_columns. - let mut column_order = vec![]; - if let Some(path) = path { - // Get the actual columns from the data itself. Note that we set has_headers to - // false(even though the files have header rows) in order to explicitly read the - // header row. - let mut rdr = csv::ReaderBuilder::new() - .has_headers(false) - .delimiter(b'\t') - .from_reader(File::open(path.clone()).unwrap_or_else(|err| { - panic!("Unable to open '{}': {}", path.clone(), err); - })); - let mut iter = rdr.records(); - if let Some(result) = iter.next() { - let actual_columns = result - .unwrap() - .iter() - .map(|c| c.to_string()) - .collect::>(); - // Make sure that the actual columns found in the table file, and the columns - // defined in the column config, exactly match in terms of their content: - for column_name in &actual_columns { - column_order.push(json!(column_name)); - if !defined_columns.contains(&column_name.to_string()) { - panic!( - "Column '{}.{}' not in column config", - table_name, column_name - ); - } - } - for column_name in &defined_columns { - if !actual_columns.contains(&column_name.to_string()) { - panic!( - "Defined column '{}.{}' not found in table", - table_name, column_name - ); - } - } - } else { - panic!("'{}' is empty", path); - } - } +/// Given a table name, a global configuration map, and a database connection pool, construct an +/// SQL query that one can use to get the logical contents of the table, such that when the value +/// of a given column is null, the query attempts to extract it from the message table. Returns a +/// String representing the query. +fn query_with_message_values(table: &str, global_config: &SerdeMap, pool: &AnyPool) -> String { + let real_columns = global_config + .get("table") + .and_then(|t| t.get(table)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|t| t.as_object()) + .and_then(|t| Some(t.keys())) + .and_then(|k| Some(k.map(|k| k.to_string()))) + .and_then(|t| Some(t.collect::>())) + .unwrap(); - if column_order.is_empty() { - column_order = defined_columns.iter().map(|c| json!(c)).collect::>(); - } - tables_config - .get_mut(&table_name) - .and_then(|t| t.as_object_mut()) - .and_then(|o| { - o.insert( - String::from("column_order"), - SerdeValue::Array(column_order), - ) - }); + let mut inner_columns = real_columns + .iter() + .map(|column| query_column_with_message_value(table, column, pool)) + .collect::>(); - // Create the table and its corresponding conflict table: - let mut table_statements = vec![]; - for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { - let (mut statements, table_constraints) = - create_table_statement(tables_config, datatypes_config, parser, &table, &pool); - table_statements.append(&mut statements); - if !table.ends_with("_conflict") { - for constraint_type in vec!["foreign", "unique", "primary", "tree", "under"] { - let table_constraints = table_constraints.get(constraint_type).unwrap().clone(); - constraints_config - .get_mut(constraint_type) - .and_then(|o| o.as_object_mut()) - .and_then(|o| o.insert(table_name.to_string(), table_constraints)); - } - } - } + let mut outer_columns = real_columns + .iter() + .map(|c| format!("t.\"{}\"", c)) + .collect::>(); - let (drop_view_sql, create_view_sql) = get_sql_for_standard_view(&table_name, pool); - let (drop_text_view_sql, create_text_view_sql) = - get_sql_for_text_view(tables_config, &table_name, pool); - table_statements.push(drop_text_view_sql); - table_statements.push(drop_view_sql); - table_statements.push(create_view_sql); - table_statements.push(create_text_view_sql); + let inner_columns = { + let mut v = vec!["row_number".to_string(), "message".to_string()]; + v.append(&mut inner_columns); + v + }; - setup_statements.insert(table_name.to_string(), table_statements); - } - - // Sort the tables according to their foreign key dependencies so that tables are always loaded - // after the tables they depend on. Ignore the internal message and history tables: - let sorted_tables = verify_table_deps_and_sort( - &setup_statements.keys().cloned().collect(), - &constraints_config, - ); - - if *command != ValveCommand::Config || verbose { - // Generate DDL for the history table: - let mut history_statements = vec![]; - history_statements.push({ - let mut sql = r#"DROP TABLE IF EXISTS "history""#.to_string(); - if pool.any_kind() == AnyKind::Postgres { - sql.push_str(" CASCADE"); - } - sql.push_str(";"); - sql - }); - history_statements.push(format!( - indoc! {r#" - CREATE TABLE "history" ( - {row_number} - "table" TEXT, - "row" BIGINT, - "from" TEXT, - "to" TEXT, - "summary" TEXT, - "user" TEXT, - "undone_by" TEXT, - {timestamp} - ); - "#}, - row_number = { - if pool.any_kind() == AnyKind::Sqlite { - "\"history_id\" INTEGER PRIMARY KEY," - } else { - "\"history_id\" SERIAL PRIMARY KEY," - } - }, - timestamp = { - if pool.any_kind() == AnyKind::Sqlite { - "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" - } else { - "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" - } - }, - )); - history_statements - .push(r#"CREATE INDEX "history_tr_idx" ON "history"("table", "row");"#.to_string()); - setup_statements.insert("history".to_string(), history_statements); - - // Generate DDL for the message table: - let mut message_statements = vec![]; - message_statements.push({ - let mut sql = r#"DROP TABLE IF EXISTS "message""#.to_string(); - if pool.any_kind() == AnyKind::Postgres { - sql.push_str(" CASCADE"); - } - sql.push_str(";"); - sql - }); - message_statements.push(format!( - indoc! {r#" - CREATE TABLE "message" ( - {} - "table" TEXT, - "row" BIGINT, - "column" TEXT, - "value" TEXT, - "level" TEXT, - "rule" TEXT, - "message" TEXT - ); - "#}, - { - if pool.any_kind() == AnyKind::Sqlite { - "\"message_id\" INTEGER PRIMARY KEY," - } else { - "\"message_id\" SERIAL PRIMARY KEY," - } - }, - )); - message_statements.push( - r#"CREATE INDEX "message_trc_idx" ON "message"("table", "row", "column");"#.to_string(), - ); - setup_statements.insert("message".to_string(), message_statements); - - // Add the message and history tables to the beginning of the list of tables to create - // (the message table in particular needs to be at the beginning since the table views all - // reference it). - let mut tables_to_create = vec!["message".to_string(), "history".to_string()]; - tables_to_create.append(&mut sorted_tables.clone()); - for table in &tables_to_create { - let table_statements = setup_statements.get(table).unwrap(); - if *command != ValveCommand::Config { - for stmt in table_statements { - sqlx_query(stmt) - .execute(pool) - .await - .expect(format!("The SQL statement: {} returned an error", stmt).as_str()); - } - } - if verbose { - let output = String::from(table_statements.join("\n")); - println!("{}\n", output); - } - } - } - - return Ok((sorted_tables, constraints_config)); -} - -/// Various VALVE commands, used with [valve()](valve). -#[derive(Debug, PartialEq, Eq)] -pub enum ValveCommand { - /// Configure but do not create or load. - Config, - /// Configure and create but do not load. - Create, - /// Configure, create, and load. - Load, -} - -/// Given a path to a configuration table (either a table.tsv file or a database containing a -/// table named "table"), and a directory in which to find/create a database: configure the -/// database using the configuration which can be looked up using the table table, and -/// optionally create and/or load it according to the value of `command` (see [ValveCommand]). -/// If the `verbose` flag is set to true, output status messages while loading. If `config_table` -/// is given and `table_table` indicates a database, query the table called `config_table` for the -/// table table information. Returns the configuration map as a String. If `initial_load` is set to -/// true, then (SQLite only) the database settings will be tuned for initial loading. Note that -/// these settings are unsafe and should be used for initial loading only, as data integrity will -/// not be guaranteed in the case of an interrupted transaction. -pub async fn valve( - table_table: &str, - database: &str, - command: &ValveCommand, - verbose: bool, - initial_load: bool, - config_table: &str, -) -> Result { - // To connect to a postgresql database listening to a unix domain socket: - // ---------------------------------------------------------------------- - // let connection_options = - // AnyConnectOptions::from_str("postgres:///testdb?host=/var/run/postgresql")?; - // - // To query the connection type at runtime via the pool: - // ----------------------------------------------------- - // let db_type = pool.any_kind(); - - let connection_options; - if database.starts_with("postgresql://") { - connection_options = AnyConnectOptions::from_str(database)?; - } else { - let connection_string; - if !database.starts_with("sqlite://") { - connection_string = format!("sqlite://{}?mode=rwc", database); - } else { - connection_string = database.to_string(); - } - connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); - } - - let pool = AnyPoolOptions::new() - .max_connections(5) - .connect_with(connection_options) - .await?; - - let parser = StartParser::new(); - - let (specials_config, mut tables_config, mut datatypes_config, rules_config, _, _) = - read_config_files(&table_table.to_string(), config_table, &parser, &pool); - - if *command == ValveCommand::Load && pool.any_kind() == AnyKind::Sqlite { - sqlx_query("PRAGMA foreign_keys = ON") - .execute(&pool) - .await?; - if initial_load { - // These pragmas are unsafe but they are used during initial loading since data - // integrity is not a priority in this case. - sqlx_query("PRAGMA journal_mode = OFF") - .execute(&pool) - .await?; - sqlx_query("PRAGMA synchronous = 0").execute(&pool).await?; - sqlx_query("PRAGMA cache_size = 1000000") - .execute(&pool) - .await?; - sqlx_query("PRAGMA temp_store = MEMORY") - .execute(&pool) - .await?; - } - } - - let (sorted_table_list, constraints_config) = configure_db( - &mut tables_config, - &mut datatypes_config, - &pool, - &parser, - verbose, - command, - ) - .await?; - - let mut config = SerdeMap::new(); - config.insert( - String::from("special"), - SerdeValue::Object(specials_config.clone()), - ); - config.insert( - String::from("table"), - SerdeValue::Object(tables_config.clone()), - ); - config.insert( - String::from("datatype"), - SerdeValue::Object(datatypes_config.clone()), - ); - config.insert( - String::from("rule"), - SerdeValue::Object(rules_config.clone()), - ); - config.insert( - String::from("constraints"), - SerdeValue::Object(constraints_config.clone()), - ); - let mut sorted_table_serdevalue_list: Vec = vec![]; - for table in &sorted_table_list { - sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); - } - config.insert( - String::from("sorted_table_list"), - SerdeValue::Array(sorted_table_serdevalue_list), - ); - - let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); - let compiled_rule_conditions = - get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); - - if *command == ValveCommand::Load { - if verbose { - valve_log!("Processing {} tables.", sorted_table_list.len()); - } - load_db( - &config, - &pool, - &compiled_datatype_conditions, - &compiled_rule_conditions, - verbose, - ) - .await?; - } - - let config = SerdeValue::Object(config); - Ok(config.to_string()) -} - -/// Given a table name, a column name, and a database pool, construct an SQL string to extract the -/// value of the column, such that when the value of a given column is null, the query attempts to -/// extract it from the message table. Returns a String representing the SQL to retrieve the value -/// of the column. -pub fn query_column_with_message_value(table: &str, column: &str, pool: &AnyPool) -> String { - let is_clause = if pool.any_kind() == AnyKind::Sqlite { - "IS" - } else { - "IS NOT DISTINCT FROM" - }; - - format!( - r#"CASE - WHEN "{column}" {is_clause} NULL THEN ( - SELECT value - FROM "message" - WHERE "row" = "row_number" - AND "column" = '{column}' - AND "table" = '{table}' - ORDER BY "message_id" DESC - LIMIT 1 - ) - ELSE {casted_column} - END AS "{column}""#, - casted_column = if pool.any_kind() == AnyKind::Sqlite { - cast_column_sql_to_text(column, "non-text") - } else { - format!("\"{}\"::TEXT", column) - }, - column = column, - table = table, - ) -} - -/// Given a table name, a global configuration map, and a database connection pool, construct an -/// SQL query that one can use to get the logical contents of the table, such that when the value -/// of a given column is null, the query attempts to extract it from the message table. Returns a -/// String representing the query. -pub fn query_with_message_values(table: &str, global_config: &SerdeMap, pool: &AnyPool) -> String { - let real_columns = global_config - .get("table") - .and_then(|t| t.get(table)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|t| t.as_object()) - .and_then(|t| Some(t.keys())) - .and_then(|k| Some(k.map(|k| k.to_string()))) - .and_then(|t| Some(t.collect::>())) - .unwrap(); - - let mut inner_columns = real_columns - .iter() - .map(|column| query_column_with_message_value(table, column, pool)) - .collect::>(); - - let mut outer_columns = real_columns - .iter() - .map(|c| format!("t.\"{}\"", c)) - .collect::>(); - - let inner_columns = { - let mut v = vec!["row_number".to_string(), "message".to_string()]; - v.append(&mut inner_columns); - v - }; - - let outer_columns = { - let mut v = vec!["t.row_number".to_string(), "t.message".to_string()]; - v.append(&mut outer_columns); - v - }; + let outer_columns = { + let mut v = vec!["t.row_number".to_string(), "t.message".to_string()]; + v.append(&mut outer_columns); + v + }; format!( r#"SELECT {outer_columns} @@ -2612,7 +2734,7 @@ pub fn query_with_message_values(table: &str, global_config: &SerdeMap, pool: &A /// column name, and a value for that column: get the rows, other than the one indicated by /// `except`, that would need to be revalidated if the given value were to replace the actual /// value of the column in that row. -pub async fn get_affected_rows( +async fn get_affected_rows( table: &str, column: &str, value: &str, @@ -2678,7 +2800,7 @@ pub async fn get_affected_rows( /// Given a global configuration map, a database connection pool, a database transaction, a table /// name and a row number, get the logical contents of that row (whether or not it is valid), /// including any messages, from the database. -pub async fn get_row_from_db( +async fn get_row_from_db( global_config: &SerdeMap, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, @@ -2752,7 +2874,7 @@ pub async fn get_row_from_db( /// Given a database connection pool, a database transaction, a table name, a column name, and a row /// number, get the current value of the given column in the database. -pub async fn get_db_value( +async fn get_db_value( table: &str, column: &str, row_number: &u32, @@ -2811,7 +2933,7 @@ pub async fn get_db_value( /// and a [QueryAsIf] struct representing a custom modification to the query of the table, get /// the rows that will potentially be affected by the database change to the row indicated in /// query_as_if. -pub async fn get_rows_to_update( +async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, @@ -3026,7 +3148,7 @@ pub async fn get_rows_to_update( /// a database transaction, a number of updates to process, a [QueryAsIf] struct indicating how /// we should modify 'in thought' the current state of the database, and a flag indicating whether /// we should allow recursive updates, validate and then update each row indicated in `updates`. -pub async fn process_updates( +async fn process_updates( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -3039,7 +3161,7 @@ pub async fn process_updates( for (update_table, rows_to_update) in updates { for (row_number, row) in rows_to_update { // Validate each row 'counterfactually': - let vrow = validate_row( + let vrow = validate_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -3075,7 +3197,7 @@ pub async fn process_updates( /// are going to change it from, optionally: the version of the row we are going to change it to, /// and the name of the user making the change, record the change to the history table in the /// database. Note that `from` and `to` cannot both be None. -pub async fn record_row_change( +async fn record_row_change( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, @@ -3239,281 +3361,42 @@ async fn switch_undone_state( Ok(()) } -/// Given a database pool fetch the last row inserted to the history table that has not been undone. -pub async fn get_record_to_undo(pool: &AnyPool) -> Result, sqlx::Error> { - // Look in the history table, get the row with the greatest ID, get the row number, - // from, and to, and determine whether the last operation was a delete, insert, or update. - let is_clause = if pool.any_kind() == AnyKind::Sqlite { - "IS" - } else { - "IS NOT DISTINCT FROM" - }; - let sql = format!( - r#"SELECT * FROM "history" - WHERE "undone_by" {} NULL - ORDER BY "history_id" DESC LIMIT 1"#, - is_clause - ); - let query = sqlx_query(&sql); - let result_row = query.fetch_optional(pool).await?; - Ok(result_row) -} +/// Given a global config map and a table name, return a list of the columns from the table +/// that may potentially result in database conflicts. +fn get_conflict_columns(global_config: &SerdeMap, table_name: &str) -> Vec { + let mut conflict_columns = vec![]; + let primaries = global_config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|c| c.get("primary")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_array()) + .unwrap(); -/// Given a database pool fetch the row in the history table that has been most recently marked as -/// undone. -pub async fn get_record_to_redo(pool: &AnyPool) -> Result, sqlx::Error> { - // Look in the history table, get the row with the greatest ID, get the row number, - // from, and to, and determine whether the last operation was a delete, insert, or update. - let is_not_clause = if pool.any_kind() == AnyKind::Sqlite { - "IS NOT" - } else { - "IS DISTINCT FROM" - }; - let sql = format!( - r#"SELECT * FROM "history" - WHERE "undone_by" {} NULL - ORDER BY "timestamp" DESC LIMIT 1"#, - is_not_clause - ); - let query = sqlx_query(&sql); - let result_row = query.fetch_optional(pool).await?; - Ok(result_row) -} + let uniques = global_config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|c| c.get("unique")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_array()) + .unwrap(); -/// Given a global configuration map, maps of compiled datatype and ruled conditions, a database -/// connection pool, and the user who initiated the undo, find the last recorded change to the -/// database and undo it, indicating in the history table that undo_user is responsible. -#[async_recursion] -pub async fn undo( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - undo_user: &str, -) -> Result<(), sqlx::Error> { - let last_change = match get_record_to_undo(pool).await? { - None => { - valve_log!("WARN: Nothing to undo."); - return Ok(()); - } - Some(r) => r, - }; - let history_id: i32 = last_change.get("history_id"); - let history_id = history_id as u16; - let table: &str = last_change.get("table"); - let row_number: i64 = last_change.get("row"); - let row_number = row_number as u32; - let from = get_json_from_row(&last_change, "from"); - let to = get_json_from_row(&last_change, "to"); - - match (from, to) { - (None, None) => { - return Err(SqlxCErr( - "Cannot redo unknown operation from None to None".into(), - )) - } - (None, Some(_)) => { - // Undo an insert: - let mut tx = pool.begin().await?; - - delete_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &row_number, - ) - .await?; - - switch_undone_state(undo_user, history_id, true, &mut tx, pool).await?; - tx.commit().await?; - } - (Some(from), None) => { - // Undo a delete: - let mut tx = pool.begin().await?; - - insert_new_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &from, - Some(row_number), - false, - ) - .await?; - - switch_undone_state(undo_user, history_id, true, &mut tx, pool).await?; - tx.commit().await?; - } - (Some(from), Some(_)) => { - // Undo an an update: - let mut tx = pool.begin().await?; - - update_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &from, - &row_number, - false, - false, - ) - .await?; - - switch_undone_state(undo_user, history_id, true, &mut tx, pool).await?; - tx.commit().await?; - } - } - Ok(()) -} - -/// Given a global configuration map, maps of compiled datatype and ruled conditions, a database -/// connection pool, and the user who initiated the redo, find the last recorded change to the -/// database that was undone and redo it, indicating in the history table that redo_user is -/// responsible for the redo. -#[async_recursion] -pub async fn redo( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - redo_user: &str, -) -> Result<(), sqlx::Error> { - let last_undo = match get_record_to_redo(pool).await? { - None => { - valve_log!("WARN: Nothing to redo."); - return Ok(()); - } - Some(last_undo) => { - let undone_by = last_undo.try_get_raw("undone_by")?; - if undone_by.is_null() { - valve_log!("WARN: Nothing to redo."); - return Ok(()); - } - last_undo - } - }; - let history_id: i32 = last_undo.get("history_id"); - let history_id = history_id as u16; - let table: &str = last_undo.get("table"); - let row_number: i64 = last_undo.get("row"); - let row_number = row_number as u32; - let from = get_json_from_row(&last_undo, "from"); - let to = get_json_from_row(&last_undo, "to"); - - match (from, to) { - (None, None) => { - return Err(SqlxCErr( - "Cannot redo unknown operation from None to None".into(), - )) - } - (None, Some(to)) => { - // Redo an insert: - let mut tx = pool.begin().await?; - - insert_new_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &to, - Some(row_number), - false, - ) - .await?; - - switch_undone_state(redo_user, history_id, false, &mut tx, pool).await?; - tx.commit().await?; - } - (Some(_), None) => { - // Redo a delete: - let mut tx = pool.begin().await?; - - delete_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &row_number, - ) - .await?; - - switch_undone_state(redo_user, history_id, false, &mut tx, pool).await?; - tx.commit().await?; - } - (Some(_), Some(to)) => { - // Redo an an update: - let mut tx = pool.begin().await?; - - update_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &to, - &row_number, - false, - false, - ) - .await?; - - switch_undone_state(redo_user, history_id, false, &mut tx, pool).await?; - tx.commit().await?; - } - } - Ok(()) -} - -/// Given a global config map and a table name, return a list of the columns from the table -/// that may potentially result in database conflicts. -fn get_conflict_columns(global_config: &SerdeMap, table_name: &str) -> Vec { - let mut conflict_columns = vec![]; - let primaries = global_config - .get("constraints") - .and_then(|c| c.as_object()) - .and_then(|c| c.get("primary")) - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_array()) - .unwrap(); - - let uniques = global_config - .get("constraints") - .and_then(|c| c.as_object()) - .and_then(|c| c.get("unique")) - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_array()) - .unwrap(); - - // We take tree-children because these imply a unique database constraint on the corresponding - // column. - let tree_children = global_config - .get("constraints") - .and_then(|c| c.as_object()) - .and_then(|o| o.get("tree")) - .and_then(|t| t.as_object()) - .and_then(|o| o.get(table_name)) - .and_then(|t| t.as_array()) - .unwrap() - .iter() - .map(|v| v.as_object().unwrap()) - .map(|v| v.get("child").unwrap().clone()) - .collect::>(); + // We take tree-children because these imply a unique database constraint on the corresponding + // column. + let tree_children = global_config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|o| o.get("tree")) + .and_then(|t| t.as_object()) + .and_then(|o| o.get(table_name)) + .and_then(|t| t.as_array()) + .unwrap() + .iter() + .map(|v| v.as_object().unwrap()) + .map(|v| v.get("child").unwrap().clone()) + .collect::>(); let foreign_sources = global_config .get("constraints") @@ -3588,60 +3471,12 @@ fn is_sql_type_error(sql_type: &str, value: &str) -> bool { } } -/// A wrapper around [insert_new_row_tx()] in which the following steps are also performed: -/// - A database transaction is created and then committed once the given new row has been inserted. -/// - The row is validated before insertion and the update to the database is recorded to the -/// history table indicating that the given user is responsible for the change. -#[async_recursion] -pub async fn insert_new_row( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - table: &str, - row: &ValveRow, - new_row_number: Option, - user: &str, -) -> Result { - let mut tx = pool.begin().await?; - - let row = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - Some(&mut tx), - table, - row, - new_row_number, - None, - ) - .await?; - - let rn = insert_new_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &row, - new_row_number, - true, - ) - .await?; - - record_row_change(&mut tx, table, &rn, None, Some(&row), user).await?; - tx.commit().await?; - Ok(rn) -} - /// Given a global config map, compiled datatype and rule conditions, a database connection pool, a /// database transaction, a table name, and a row, assign the given new row number to the row and /// insert it to the database using the given transaction, then return the new row number. -/// If skip_validation is set to true, omit the implicit call to [validate_row()]. +/// If skip_validation is set to true, omit the implicit call to [validate_row_tx()]. #[async_recursion] -pub async fn insert_new_row_tx( +async fn insert_new_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -3655,7 +3490,7 @@ pub async fn insert_new_row_tx( // Send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = if !skip_validation { - validate_row( + validate_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -3843,43 +3678,10 @@ pub async fn insert_new_row_tx( Ok(new_row_number) } -/// A wrapper around [delete_row_tx()] in which the database transaction is implicitly created -/// and then committed once the given row has been deleted, and the change to the database is -/// recorded in the history table indicating that the given user is responsible for the change. -#[async_recursion] -pub async fn delete_row( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - table: &str, - row_number: &u32, - user: &str, -) -> Result<(), sqlx::Error> { - let mut tx = pool.begin().await?; - - let row = get_row_from_db(global_config, pool, &mut tx, &table, row_number).await?; - record_row_change(&mut tx, &table, row_number, Some(&row), None, user).await?; - - delete_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - row_number, - ) - .await?; - - tx.commit().await?; - Ok(()) -} - /// Given a global config map, maps of datatype and rule conditions, a database connection pool, a /// database transaction, a table name, and a row number, delete the given row from the database. #[async_recursion] -pub async fn delete_row_tx( +async fn delete_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -3957,76 +3759,13 @@ pub async fn delete_row_tx( Ok(()) } -/// A wrapper around [update_row_tx()] in which the database transaction is implicitly created -/// and then committed once the given row has been updated, the given row is validated before -/// the update, and the update is recorded to the history table indicating that the given user -/// is responsible for the change. -#[async_recursion] -pub async fn update_row( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - table_name: &str, - row: &ValveRow, - row_number: &u32, - user: &str, -) -> Result<(), sqlx::Error> { - let mut tx = pool.begin().await?; - - // Get the old version of the row from the database so that we can later record it to the - // history table: - let old_row = get_row_from_db(global_config, pool, &mut tx, table_name, row_number).await?; - - let row = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - Some(&mut tx), - table_name, - row, - Some(*row_number), - None, - ) - .await?; - - update_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table_name, - &row, - row_number, - true, - false, - ) - .await?; - - // Record the row update in the history table: - record_row_change( - &mut tx, - table_name, - row_number, - Some(&old_row), - Some(&row), - user, - ) - .await?; - - tx.commit().await?; - Ok(()) -} - /// Given global config map, maps of compiled datatype and rule conditions, a database connection /// pool, a database transaction, a table name, a row, and the row number to update, update the /// corresponding row in the database. If skip_validation is set, skip the implicit call to -/// [validate_row()]. If do_not_recurse, is set, do not look for rows which could be affected by +/// [validate_row_tx()]. If do_not_recurse, is set, do not look for rows which could be affected by /// this update. #[async_recursion] -pub async fn update_row_tx( +async fn update_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -4075,7 +3814,7 @@ pub async fn update_row_tx( // Send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = if !skip_validation { - validate_row( + validate_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -4412,7 +4151,7 @@ fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Opti /// Given the global config map, a table name, a column name, and a database connection pool /// used to determine the database type return the column's SQL type. -pub fn get_sql_type_from_global_config( +fn get_sql_type_from_global_config( global_config: &SerdeMap, table: &str, column: &str, @@ -4700,316 +4439,6 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) }; } -// TODO: Remove this function once it has been refactored -/// Given the config maps for tables and datatypes, and a table name, generate a SQL schema string, -/// including each column C and its matching C_meta column, then return the schema string as well as -/// a list of the table's constraints. -fn create_table_statement( - tables_config: &mut SerdeMap, - datatypes_config: &mut SerdeMap, - parser: &StartParser, - table_name: &String, - pool: &AnyPool, -) -> (Vec, SerdeValue) { - let mut drop_table_sql = format!(r#"DROP TABLE IF EXISTS "{}""#, table_name); - if pool.any_kind() == AnyKind::Postgres { - drop_table_sql.push_str(" CASCADE"); - } - drop_table_sql.push_str(";"); - let mut statements = vec![drop_table_sql]; - let mut create_lines = vec![ - format!(r#"CREATE TABLE "{}" ("#, table_name), - String::from(r#" "row_number" BIGINT,"#), - ]; - - let normal_table_name; - if let Some(s) = table_name.strip_suffix("_conflict") { - normal_table_name = String::from(s); - } else { - normal_table_name = table_name.to_string(); - } - - let column_names = tables_config - .get(&normal_table_name) - .and_then(|t| t.get("column_order")) - .and_then(|c| c.as_array()) - .unwrap() - .iter() - .map(|v| v.as_str().unwrap().to_string()) - .collect::>(); - - let columns = tables_config - .get(normal_table_name.as_str()) - .and_then(|c| c.as_object()) - .and_then(|o| o.get("column")) - .and_then(|c| c.as_object()) - .unwrap(); - - let mut table_constraints = json!({ - "foreign": [], - "unique": [], - "primary": [], - "tree": [], - "under": [], - }); - - let mut colvals: Vec = vec![]; - for column_name in &column_names { - let column = columns - .get(column_name) - .and_then(|c| c.as_object()) - .unwrap(); - colvals.push(column.clone()); - } - - let c = colvals.len(); - let mut r = 0; - for row in colvals { - r += 1; - let sql_type = get_sql_type( - datatypes_config, - &row.get("datatype") - .and_then(|d| d.as_str()) - .and_then(|s| Some(s.to_string())) - .unwrap(), - pool, - ); - - if let None = sql_type { - panic!("Missing SQL type for {}", row.get("datatype").unwrap()); - } - let sql_type = sql_type.unwrap(); - - let short_sql_type = { - if sql_type.to_lowercase().as_str().starts_with("varchar(") { - "VARCHAR" - } else { - &sql_type - } - }; - - if pool.any_kind() == AnyKind::Postgres { - if !PG_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { - panic!( - "Unrecognized PostgreSQL SQL type '{}' for datatype: '{}'. \ - Accepted SQL types for PostgreSQL are: {}", - sql_type, - row.get("datatype").and_then(|d| d.as_str()).unwrap(), - PG_SQL_TYPES.join(", ") - ); - } - } else { - if !SL_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { - panic!( - "Unrecognized SQLite SQL type '{}' for datatype '{}'. \ - Accepted SQL datatypes for SQLite are: {}", - sql_type, - row.get("datatype").and_then(|d| d.as_str()).unwrap(), - SL_SQL_TYPES.join(", ") - ); - } - } - - let column_name = row.get("column").and_then(|s| s.as_str()).unwrap(); - let mut line = format!(r#" "{}" {}"#, column_name, sql_type); - let structure = row.get("structure").and_then(|s| s.as_str()); - if let Some(structure) = structure { - if structure != "" && !table_name.ends_with("_conflict") { - let parsed_structure = parser.parse(structure).unwrap(); - for expression in parsed_structure { - match *expression { - Expression::Label(value) if value == "primary" => { - line.push_str(" PRIMARY KEY"); - let primary_keys = table_constraints - .get_mut("primary") - .and_then(|v| v.as_array_mut()) - .unwrap(); - primary_keys.push(SerdeValue::String(column_name.to_string())); - } - Expression::Label(value) if value == "unique" => { - line.push_str(" UNIQUE"); - let unique_constraints = table_constraints - .get_mut("unique") - .and_then(|v| v.as_array_mut()) - .unwrap(); - unique_constraints.push(SerdeValue::String(column_name.to_string())); - } - Expression::Function(name, args) if name == "from" => { - if args.len() != 1 { - panic!("Invalid foreign key: {} for: {}", structure, table_name); - } - match &*args[0] { - Expression::Field(ftable, fcolumn) => { - let foreign_keys = table_constraints - .get_mut("foreign") - .and_then(|v| v.as_array_mut()) - .unwrap(); - let foreign_key = json!({ - "column": column_name, - "ftable": ftable, - "fcolumn": fcolumn, - }); - foreign_keys.push(foreign_key); - } - _ => { - panic!("Invalid foreign key: {} for: {}", structure, table_name) - } - }; - } - Expression::Function(name, args) if name == "tree" => { - if args.len() != 1 { - panic!( - "Invalid 'tree' constraint: {} for: {}", - structure, table_name - ); - } - match &*args[0] { - Expression::Label(child) => { - let child_datatype = columns - .get(child) - .and_then(|c| c.get("datatype")) - .and_then(|d| d.as_str()); - if let None = child_datatype { - panic!( - "Could not determine SQL datatype for {} of tree({})", - child, child - ); - } - let child_datatype = child_datatype.unwrap(); - let parent = column_name; - let child_sql_type = get_sql_type( - datatypes_config, - &child_datatype.to_string(), - pool, - ) - .unwrap(); - if sql_type != child_sql_type { - panic!( - "SQL type '{}' of '{}' in 'tree({})' for table \ - '{}' doe snot match SQL type: '{}' of parent: '{}'.", - child_sql_type, - child, - child, - table_name, - sql_type, - parent - ); - } - let tree_constraints = table_constraints - .get_mut("tree") - .and_then(|t| t.as_array_mut()) - .unwrap(); - let entry = json!({"parent": column_name, - "child": child}); - tree_constraints.push(entry); - } - _ => { - panic!( - "Invalid 'tree' constraint: {} for: {}", - structure, table_name - ); - } - }; - } - Expression::Function(name, args) if name == "under" => { - let generic_error = format!( - "Invalid 'under' constraint: {} for: {}", - structure, table_name - ); - if args.len() != 2 { - panic!("{}", generic_error); - } - match (&*args[0], &*args[1]) { - (Expression::Field(ttable, tcolumn), Expression::Label(value)) => { - let under_constraints = table_constraints - .get_mut("under") - .and_then(|u| u.as_array_mut()) - .unwrap(); - let entry = json!({"column": column_name, - "ttable": ttable, - "tcolumn": tcolumn, - "value": value}); - under_constraints.push(entry); - } - (_, _) => panic!("{}", generic_error), - }; - } - _ => panic!( - "Unrecognized structure: {} for {}.{}", - structure, table_name, column_name - ), - }; - } - } - } - if r >= c - && table_constraints - .get("foreign") - .and_then(|v| v.as_array()) - .and_then(|v| Some(v.is_empty())) - .unwrap() - { - line.push_str(""); - } else { - line.push_str(","); - } - create_lines.push(line); - } - - let foreign_keys = table_constraints - .get("foreign") - .and_then(|v| v.as_array()) - .unwrap(); - let num_fkeys = foreign_keys.len(); - for (i, fkey) in foreign_keys.iter().enumerate() { - create_lines.push(format!( - r#" FOREIGN KEY ("{}") REFERENCES "{}"("{}"){}"#, - fkey.get("column").and_then(|s| s.as_str()).unwrap(), - fkey.get("ftable").and_then(|s| s.as_str()).unwrap(), - fkey.get("fcolumn").and_then(|s| s.as_str()).unwrap(), - if i < (num_fkeys - 1) { "," } else { "" } - )); - } - create_lines.push(String::from(");")); - // We are done generating the lines for the 'create table' statement. Join them and add the - // result to the statements to return: - statements.push(String::from(create_lines.join("\n"))); - - // Loop through the tree constraints and if any of their associated child columns do not already - // have an associated unique or primary index, create one implicitly here: - let tree_constraints = table_constraints - .get("tree") - .and_then(|v| v.as_array()) - .unwrap(); - for tree in tree_constraints { - let unique_keys = table_constraints - .get("unique") - .and_then(|v| v.as_array()) - .unwrap(); - let primary_keys = table_constraints - .get("primary") - .and_then(|v| v.as_array()) - .unwrap(); - let tree_child = tree.get("child").and_then(|c| c.as_str()).unwrap(); - if !unique_keys.contains(&SerdeValue::String(tree_child.to_string())) - && !primary_keys.contains(&SerdeValue::String(tree_child.to_string())) - { - statements.push(format!( - r#"CREATE UNIQUE INDEX "{}_{}_idx" ON "{}"("{}");"#, - table_name, tree_child, table_name, tree_child - )); - } - } - - // Finally, create a further unique index on row_number: - statements.push(format!( - r#"CREATE UNIQUE INDEX "{}_row_number_idx" ON "{}"("row_number");"#, - table_name, table_name - )); - - return (statements, table_constraints); -} - /// TODO: Add doc string here. fn get_table_constraints( tables_config: &SerdeMap, diff --git a/src/main.rs b/src/main.rs index 7c367c36..cdd05000 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,10 +4,7 @@ use crate::api_test::run_api_tests; use argparse::{ArgumentParser, Store, StoreTrue}; -use ontodev_valve::{ - get_compiled_datatype_conditions, get_compiled_rule_conditions, - get_parsed_structure_conditions, valve, valve_grammar::StartParser, Valve, ValveCommand, -}; +use ontodev_valve::Valve; use serde_json::{from_str, Value as SerdeValue}; use std::{env, process}; @@ -119,66 +116,37 @@ async fn main() -> Result<(), sqlx::Error> { if api_test { run_api_tests(&source, &destination).await?; } else if dump_config { - let config = valve( - &source, - &String::from(":memory:"), - &ValveCommand::Config, - false, - false, - &config_table, - ) - .await?; - let mut config: SerdeValue = serde_json::from_str(config.as_str()).unwrap(); - let config = config.as_object_mut().unwrap(); - let parser = StartParser::new(); - - let datatype_conditions = get_compiled_datatype_conditions(&config, &parser); - let structure_conditions = get_parsed_structure_conditions(&config, &parser); - let rule_conditions = - get_compiled_rule_conditions(&config, datatype_conditions.clone(), &parser); - - let datatype_conditions = format!("{:?}", datatype_conditions).replace(r"\", r"\\"); + let valve = + Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; + let mut config = valve.global_config.clone(); + let datatype_conditions = + format!("{:?}", valve.compiled_datatype_conditions).replace(r"\", r"\\"); let datatype_conditions: SerdeValue = from_str(&datatype_conditions).unwrap(); config.insert(String::from("datatype_conditions"), datatype_conditions); - let structure_conditions = format!("{:?}", structure_conditions).replace(r"\", r"\\"); + let structure_conditions = + format!("{:?}", valve.parsed_structure_conditions).replace(r"\", r"\\"); let structure_conditions: SerdeValue = from_str(&structure_conditions).unwrap(); config.insert(String::from("structure_conditions"), structure_conditions); - let rule_conditions = format!("{:?}", rule_conditions).replace(r"\", r"\\"); + let rule_conditions = format!("{:?}", valve.compiled_rule_conditions).replace(r"\", r"\\"); let rule_conditions: SerdeValue = from_str(&rule_conditions).unwrap(); config.insert(String::from("rule_conditions"), rule_conditions); - let config = serde_json::to_string(config).unwrap(); + let config = serde_json::to_string(&config).unwrap(); println!("{}", config); } else if drop_all { let valve = Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; valve.drop_all_tables().await?; } else if create_only { - valve( - &source, - &destination, - &ValveCommand::Create, - verbose, - false, - &config_table, - ) - .await?; + let valve = + Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; + valve.create_missing_tables().await?; } else { let valve = Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; valve.load_all_tables(true).await?; - - // valve( - // &source, - // &destination, - // &ValveCommand::Load, - // verbose, - // initial_load, - // &config_table, - // ) - // .await?; } Ok(()) diff --git a/src/validate.rs b/src/validate.rs index 6b3595a0..90486adf 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,5 +1,4 @@ use chrono::Utc; -use enquote::unquote; use indexmap::IndexMap; use serde_json::{json, Value as SerdeValue}; use sqlx::{ @@ -9,9 +8,8 @@ use sqlx::{ use std::collections::HashMap; use crate::{ - ast::Expression, cast_column_sql_to_text, cast_sql_param_from_text, get_column_value, - get_sql_type_from_global_config, is_sql_type_error, local_sql_syntax, valve_log, ColumnRule, - CompiledCondition, ParsedStructure, SerdeMap, ValveRow, SQL_PARAM, + cast_sql_param_from_text, get_column_value, get_sql_type_from_global_config, is_sql_type_error, + local_sql_syntax, valve_log, ColumnRule, CompiledCondition, SerdeMap, ValveRow, }; /// Represents a particular cell in a particular row of data with vaildation results. @@ -51,12 +49,12 @@ pub struct QueryAsIf { } /// Given a config map, maps of compiled datatype and rule conditions, a database connection -/// pool, a table name, a row to validate and a row number in the case where the row already exists, -/// perform both intra- and inter-row validation and return the validated row. Optionally, if a -/// transaction is given, use that instead of the pool for database access. Optionally, if -/// query_as_if is given, validate the row counterfactually according to that parameter. -/// Note that this function is idempotent. -pub async fn validate_row( +/// pool, a table name, a row to validate and a row number in the case where the row already +/// exists, perform both intra- and inter-row validation and return the validated row. +/// Optionally, if a transaction is given, use that instead of the pool for database access. +/// Optionally, if query_as_if is given, validate the row counterfactually according to that +/// parameter. Note that this function is idempotent. +pub async fn validate_row_tx( config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -245,195 +243,6 @@ pub async fn validate_row( Ok(result_row) } -/// Given a config map, a map of compiled datatype conditions, a database connection pool, a table -/// name, a column name, and (optionally) a string to match, return a JSON array of possible valid -/// values for the given column which contain the matching string as a substring (or all of them if -/// no matching string is given). The JSON array returned is formatted for Typeahead, i.e., it takes -/// the form: `[{"id": id, "label": label, "order": order}, ...]`. -pub async fn get_matching_values( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - parsed_structure_conditions: &HashMap, - pool: &AnyPool, - table_name: &str, - column_name: &str, - matching_string: Option<&str>, -) -> Result { - let dt_name = config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column_name)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("datatype")) - .and_then(|d| d.as_str()) - .unwrap(); - - let dt_condition = compiled_datatype_conditions - .get(dt_name) - .and_then(|d| Some(d.parsed.clone())); - - let mut values = vec![]; - match dt_condition { - Some(Expression::Function(name, args)) if name == "in" => { - for arg in args { - if let Expression::Label(arg) = *arg { - // Remove the enclosing quotes from the values being returned: - let label = unquote(&arg).unwrap_or_else(|_| arg); - if let Some(s) = matching_string { - if label.contains(s) { - values.push(label); - } - } - } - } - } - _ => { - // If the datatype for the column does not correspond to an `in(...)` function, then we - // check the column's structure constraints. If they include a - // `from(foreign_table.foreign_column)` condition, then the values are taken from the - // foreign column. Otherwise if the structure includes an - // `under(tree_table.tree_column, value)` condition, then get the values from the tree - // column that are under `value`. - let structure = parsed_structure_conditions.get( - config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column_name)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("structure")) - .and_then(|d| d.as_str()) - .unwrap_or_else(|| ""), - ); - - let sql_type = - get_sql_type_from_global_config(&config, table_name, &column_name, pool).unwrap(); - - match structure { - Some(ParsedStructure { original, parsed }) => { - let matching_string = { - match matching_string { - None => "%".to_string(), - Some(s) => format!("%{}%", s), - } - }; - - match parsed { - Expression::Function(name, args) if name == "from" => { - let foreign_key = &args[0]; - if let Expression::Field(ftable, fcolumn) = &**foreign_key { - let fcolumn_text = cast_column_sql_to_text(&fcolumn, &sql_type); - let sql = local_sql_syntax( - &pool, - &format!( - r#"SELECT "{}" FROM "{}" WHERE {} LIKE {}"#, - fcolumn, ftable, fcolumn_text, SQL_PARAM - ), - ); - let rows = sqlx_query(&sql) - .bind(&matching_string) - .fetch_all(pool) - .await?; - for row in rows.iter() { - values.push(get_column_value(&row, &fcolumn, &sql_type)); - } - } - } - Expression::Function(name, args) if name == "under" || name == "tree" => { - let mut tree_col = "not set"; - let mut under_val = Some("not set".to_string()); - if name == "under" { - if let Expression::Field(_, column) = &**&args[0] { - tree_col = column; - } - if let Expression::Label(label) = &**&args[1] { - under_val = Some(label.to_string()); - } - } else { - let tree_key = &args[0]; - if let Expression::Label(label) = &**tree_key { - tree_col = label; - under_val = None; - } - } - - let tree = config - .get("constraints") - .and_then(|c| c.as_object()) - .and_then(|c| c.get("tree")) - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_array()) - .and_then(|t| { - t.iter().find(|o| o.get("child").unwrap() == tree_col) - }) - .expect( - format!("No tree: '{}.{}' found", table_name, tree_col) - .as_str(), - ) - .as_object() - .unwrap(); - let child_column = tree.get("child").and_then(|c| c.as_str()).unwrap(); - - let (tree_sql, mut params) = with_tree_sql( - &config, - tree, - &table_name.to_string(), - &table_name.to_string(), - under_val.as_ref(), - None, - pool, - ); - let child_column_text = - cast_column_sql_to_text(&child_column, &sql_type); - let sql = local_sql_syntax( - &pool, - &format!( - r#"{} SELECT "{}" FROM "tree" WHERE {} LIKE {}"#, - tree_sql, child_column, child_column_text, SQL_PARAM - ), - ); - params.push(matching_string); - - let mut query = sqlx_query(&sql); - for param in ¶ms { - query = query.bind(param); - } - - let rows = query.fetch_all(pool).await?; - for row in rows.iter() { - values.push(get_column_value(&row, &child_column, &sql_type)); - } - } - _ => panic!("Unrecognised structure: {}", original), - }; - } - None => (), - }; - } - }; - - let mut typeahead_values = vec![]; - for (i, v) in values.iter().enumerate() { - // enumerate() begins at 0 but we need to begin at 1: - let i = i + 1; - typeahead_values.push(json!({ - "id": v, - "label": v, - "order": i, - })); - } - - Ok(json!(typeahead_values)) -} - /// Given a config map, a db connection pool, a table name, and an optional extra row, validate /// any associated under constraints for the current column. Optionally, if a transaction is /// given, use that instead of the pool for database access. @@ -1058,7 +867,7 @@ fn select_with_extra_row( /// Given a map representing a tree constraint, a table name, a root from which to generate a /// sub-tree of the tree, and an extra SQL clause, generate the SQL for a WITH clause representing /// the sub-tree. -fn with_tree_sql( +pub fn with_tree_sql( config: &SerdeMap, tree: &SerdeMap, table_name: &str, From fd4e716cbdfde04cd8286ad00d6ee05986381a5a Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 15 Dec 2023 18:52:06 -0500 Subject: [PATCH 30/57] move load_db() to load_tables() in the new Valve API --- src/lib.rs | 420 ++++++++++++++++++++++++++--------------------------- 1 file changed, 206 insertions(+), 214 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e69f636b..9530458d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -187,6 +187,26 @@ impl Valve { // TODO: Error type should be ConfigError let pool = get_pool_from_connection_string(database).await?; + if pool.any_kind() == AnyKind::Sqlite { + sqlx_query("PRAGMA foreign_keys = ON") + .execute(&pool) + .await?; + if initial_load { + // These pragmas are unsafe but they are used during initial loading since data + // integrity is not a priority in this case. + sqlx_query("PRAGMA journal_mode = OFF") + .execute(&pool) + .await?; + sqlx_query("PRAGMA synchronous = 0").execute(&pool).await?; + sqlx_query("PRAGMA cache_size = 1000000") + .execute(&pool) + .await?; + sqlx_query("PRAGMA temp_store = MEMORY") + .execute(&pool) + .await?; + } + } + let parser = StartParser::new(); let ( specials_config, @@ -931,6 +951,12 @@ impl Valve { pub async fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError + // TODO: This will work fine when the table list is all of the tables in the db, + // but in the case of a partial list, then there is a risk that some of them have + // dependencies on tables not in the list. What we need to do is grab the complete + // list of tables from self.global_config.sorted_table_list and use it as a reference + // for which tables need to be dropped. + for table in tables { if table != "message" && table != "history" { let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); @@ -962,6 +988,12 @@ impl Valve { pub async fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError + // TODO: This will work fine when the table list is all of the tables in the db, + // but in the case of a partial list, then there is a risk that some of them have + // dependencies on tables not in the list. What we need to do is grab the complete + // list of tables from self.global_config.sorted_table_list and use it as a reference + // for which tables need to be dropped. + self.create_missing_tables().await?; // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that @@ -995,67 +1027,194 @@ impl Valve { pub async fn load_all_tables(&self, _validate: bool) -> Result<&Self, sqlx::Error> { // DatabaseError - self.create_missing_tables().await?; - self.truncate_all_tables().await?; - - if self.pool.any_kind() == AnyKind::Sqlite { - sqlx_query("PRAGMA foreign_keys = ON") - .execute(&self.pool) - .await?; - if self.initial_load { - // These pragmas are unsafe but they are used during initial loading since data - // integrity is not a priority in this case. - sqlx_query("PRAGMA journal_mode = OFF") - .execute(&self.pool) - .await?; - sqlx_query("PRAGMA synchronous = 0") - .execute(&self.pool) - .await?; - sqlx_query("PRAGMA cache_size = 1000000") - .execute(&self.pool) - .await?; - sqlx_query("PRAGMA temp_store = MEMORY") - .execute(&self.pool) - .await?; - } - } - + let table_list = self.get_tables_ordered_for_creation(); if self.verbose { - valve_log!( - "Processing {} tables.", - self.global_config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .unwrap() - .len() - ); + valve_log!("Processing {} tables.", table_list.len()); } - load_db( - &self.global_config, - &self.pool, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - self.verbose, - ) - .await?; - - Ok(self) + self.load_tables(table_list, true).await } /// Given a vector of table names, /// load those tables in the given order. /// If `validate` is false, just try to insert all rows. /// Return an error on invalid table name or database problem. + /// Given a configuration map, a database connection pool, a parser, HashMaps representing + /// compiled datatype and rule conditions, and a HashMap representing parsed structure + /// conditions, read in the data TSV files corresponding to each configured table, then validate + /// and load all of the corresponding data rows. If the verbose flag is set to true, output + /// progress messages to stderr during load. pub async fn load_tables( &self, - tables: Vec<&str>, + table_list: Vec<&str>, _validate: bool, ) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError + self.create_missing_tables().await?; - self.truncate_tables(tables).await?; - if 1 == 1 { - todo!(); + let mut list_for_deletion = table_list.clone(); + list_for_deletion.reverse(); + self.truncate_tables(list_for_deletion).await?; + + let num_tables = table_list.len(); + let mut total_errors = 0; + let mut total_warnings = 0; + let mut total_infos = 0; + let mut table_num = 1; + for table_name in table_list { + if table_name == "message" || table_name == "history" { + continue; + } + let table_name = table_name.to_string(); + let path = String::from( + self.global_config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|o| o.get(&table_name)) + .and_then(|n| n.get("path")) + .and_then(|p| p.as_str()) + .unwrap(), + ); + let mut rdr = { + match File::open(path.clone()) { + Err(e) => { + valve_log!("WARN: Unable to open '{}': {}", path.clone(), e); + continue; + } + Ok(table_file) => csv::ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(table_file), + } + }; + if self.verbose { + valve_log!("Loading table {}/{}: {}", table_num, num_tables, table_name); + } + table_num += 1; + + // Extract the headers, which we will need later: + let mut records = rdr.records(); + let headers; + if let Some(result) = records.next() { + headers = result.unwrap(); + } else { + panic!("'{}' is empty", path); + } + + for header in headers.iter() { + if header.trim().is_empty() { + panic!( + "One or more of the header fields is empty for table '{}'", + table_name + ); + } + } + + // HashMap used to report info about the number of error/warning/info messages for this + // table when the verbose flag is set to true: + let mut messages_stats = HashMap::new(); + messages_stats.insert("error".to_string(), 0); + messages_stats.insert("warning".to_string(), 0); + messages_stats.insert("info".to_string(), 0); + + // Split the data into chunks of size CHUNK_SIZE before passing them to the validation + // logic: + let chunks = records.chunks(CHUNK_SIZE); + validate_and_insert_chunks( + &self.global_config, + &self.pool, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &table_name, + &chunks, + &headers, + &mut messages_stats, + self.verbose, + ) + .await?; + + // We need to wait until all of the rows for a table have been loaded before validating the + // "foreign" constraints on a table's trees, since this checks if the values of one column + // (the tree's parent) are all contained in another column (the tree's child): + // We also need to wait before validating a table's "under" constraints. Although the tree + // associated with such a constraint need not be defined on the same table, it can be. + let mut recs_to_update = validate_tree_foreign_keys( + &self.global_config, + &self.pool, + None, + &table_name, + None, + ) + .await?; + recs_to_update.append( + &mut validate_under(&self.global_config, &self.pool, None, &table_name, None) + .await?, + ); + + for record in recs_to_update { + let row_number = record.get("row_number").unwrap(); + let column_name = record.get("column").and_then(|s| s.as_str()).unwrap(); + let value = record.get("value").and_then(|s| s.as_str()).unwrap(); + let level = record.get("level").and_then(|s| s.as_str()).unwrap(); + let rule = record.get("rule").and_then(|s| s.as_str()).unwrap(); + let message = record.get("message").and_then(|s| s.as_str()).unwrap(); + + let sql = local_sql_syntax( + &self.pool, + &format!( + r#"INSERT INTO "message" + ("table", "row", "column", "value", "level", "rule", "message") + VALUES ({}, {}, {}, {}, {}, {}, {})"#, + SQL_PARAM, + row_number, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM + ), + ); + let mut query = sqlx_query(&sql); + query = query.bind(&table_name); + query = query.bind(&column_name); + query = query.bind(&value); + query = query.bind(&level); + query = query.bind(&rule); + query = query.bind(&message); + query.execute(&self.pool).await?; + + if self.verbose { + // Add the generated message to messages_stats: + let messages = vec![json!({ + "message": message, + "level": level, + })]; + add_message_counts(&messages, &mut messages_stats); + } + } + + if self.verbose { + // Output a report on the messages generated to stderr: + let errors = messages_stats.get("error").unwrap(); + let warnings = messages_stats.get("warning").unwrap(); + let infos = messages_stats.get("info").unwrap(); + let status_message = format!( + "{} errors, {} warnings, and {} information messages generated for {}", + errors, warnings, infos, table_name + ); + valve_log!("{}", status_message); + total_errors += errors; + total_warnings += warnings; + total_infos += infos; + } + } + + if self.verbose { + valve_log!( + "Loading complete with {} errors, {} warnings, and {} information messages", + total_errors, + total_warnings, + total_infos + ); } Ok(self) } @@ -5363,170 +5522,3 @@ async fn validate_and_insert_chunks( Ok(()) } } - -/// Given a configuration map, a database connection pool, a parser, HashMaps representing -/// compiled datatype and rule conditions, and a HashMap representing parsed structure conditions, -/// read in the data TSV files corresponding to each configured table, then validate and load all of -/// the corresponding data rows. If the verbose flag is set to true, output progress messages to -/// stderr during load. -async fn load_db( - config: &SerdeMap, - pool: &AnyPool, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - verbose: bool, -) -> Result<(), sqlx::Error> { - let mut table_list = vec![]; - for table in config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .unwrap() - { - table_list.push(table.as_str().and_then(|s| Some(s.to_string())).unwrap()); - } - let table_list = table_list; // Change the table_list to read only after populating it. - let num_tables = table_list.len(); - let mut total_errors = 0; - let mut total_warnings = 0; - let mut total_infos = 0; - let mut table_num = 1; - for table_name in table_list { - let path = String::from( - config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|o| o.get(&table_name)) - .and_then(|n| n.get("path")) - .and_then(|p| p.as_str()) - .unwrap(), - ); - let mut rdr = { - match File::open(path.clone()) { - Err(e) => { - valve_log!("WARN: Unable to open '{}': {}", path.clone(), e); - continue; - } - Ok(table_file) => csv::ReaderBuilder::new() - .has_headers(false) - .delimiter(b'\t') - .from_reader(table_file), - } - }; - if verbose { - valve_log!("Loading table {}/{}: {}", table_num, num_tables, table_name); - } - table_num += 1; - - // Extract the headers, which we will need later: - let mut records = rdr.records(); - let headers; - if let Some(result) = records.next() { - headers = result.unwrap(); - } else { - panic!("'{}' is empty", path); - } - - for header in headers.iter() { - if header.trim().is_empty() { - panic!( - "One or more of the header fields is empty for table '{}'", - table_name - ); - } - } - - // HashMap used to report info about the number of error/warning/info messages for this - // table when the verbose flag is set to true: - let mut messages_stats = HashMap::new(); - messages_stats.insert("error".to_string(), 0); - messages_stats.insert("warning".to_string(), 0); - messages_stats.insert("info".to_string(), 0); - - // Split the data into chunks of size CHUNK_SIZE before passing them to the validation - // logic: - let chunks = records.chunks(CHUNK_SIZE); - validate_and_insert_chunks( - config, - pool, - compiled_datatype_conditions, - compiled_rule_conditions, - &table_name, - &chunks, - &headers, - &mut messages_stats, - verbose, - ) - .await?; - - // We need to wait until all of the rows for a table have been loaded before validating the - // "foreign" constraints on a table's trees, since this checks if the values of one column - // (the tree's parent) are all contained in another column (the tree's child): - // We also need to wait before validating a table's "under" constraints. Although the tree - // associated with such a constraint need not be defined on the same table, it can be. - let mut recs_to_update = - validate_tree_foreign_keys(config, pool, None, &table_name, None).await?; - recs_to_update.append(&mut validate_under(config, pool, None, &table_name, None).await?); - - for record in recs_to_update { - let row_number = record.get("row_number").unwrap(); - let column_name = record.get("column").and_then(|s| s.as_str()).unwrap(); - let value = record.get("value").and_then(|s| s.as_str()).unwrap(); - let level = record.get("level").and_then(|s| s.as_str()).unwrap(); - let rule = record.get("rule").and_then(|s| s.as_str()).unwrap(); - let message = record.get("message").and_then(|s| s.as_str()).unwrap(); - - let sql = local_sql_syntax( - &pool, - &format!( - r#"INSERT INTO "message" - ("table", "row", "column", "value", "level", "rule", "message") - VALUES ({}, {}, {}, {}, {}, {}, {})"#, - SQL_PARAM, row_number, SQL_PARAM, SQL_PARAM, SQL_PARAM, SQL_PARAM, SQL_PARAM - ), - ); - let mut query = sqlx_query(&sql); - query = query.bind(&table_name); - query = query.bind(&column_name); - query = query.bind(&value); - query = query.bind(&level); - query = query.bind(&rule); - query = query.bind(&message); - query.execute(pool).await?; - - if verbose { - // Add the generated message to messages_stats: - let messages = vec![json!({ - "message": message, - "level": level, - })]; - add_message_counts(&messages, &mut messages_stats); - } - } - - if verbose { - // Output a report on the messages generated to stderr: - let errors = messages_stats.get("error").unwrap(); - let warnings = messages_stats.get("warning").unwrap(); - let infos = messages_stats.get("info").unwrap(); - let status_message = format!( - "{} errors, {} warnings, and {} information messages generated for {}", - errors, warnings, infos, table_name - ); - valve_log!("{}", status_message); - total_errors += errors; - total_warnings += warnings; - total_infos += infos; - } - } - - if verbose { - valve_log!( - "Loading complete with {} errors, {} warnings, and {} information messages", - total_errors, - total_warnings, - total_infos - ); - } - - Ok(()) -} From fd61d7bb6d094a8be1301b03e123ab050327018a Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sat, 16 Dec 2023 09:24:54 -0500 Subject: [PATCH 31/57] move comment --- src/lib.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9530458d..68105ef0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -951,12 +951,6 @@ impl Valve { pub async fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError - // TODO: This will work fine when the table list is all of the tables in the db, - // but in the case of a partial list, then there is a risk that some of them have - // dependencies on tables not in the list. What we need to do is grab the complete - // list of tables from self.global_config.sorted_table_list and use it as a reference - // for which tables need to be dropped. - for table in tables { if table != "message" && table != "history" { let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); @@ -988,12 +982,6 @@ impl Valve { pub async fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - // TODO: This will work fine when the table list is all of the tables in the db, - // but in the case of a partial list, then there is a risk that some of them have - // dependencies on tables not in the list. What we need to do is grab the complete - // list of tables from self.global_config.sorted_table_list and use it as a reference - // for which tables need to be dropped. - self.create_missing_tables().await?; // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that @@ -1050,6 +1038,12 @@ impl Valve { ) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError + // TODO: This will work fine when the table list is all of the tables in the db, + // but in the case of a partial list, then there is a risk that some of them have + // dependencies on tables not in the list. What we need to do is grab the complete + // list of tables from self.global_config.sorted_table_list and use it as a reference + // for which tables need to be dropped. + self.create_missing_tables().await?; let mut list_for_deletion = table_list.clone(); list_for_deletion.reverse(); From 06ea389ddef100daee9f7653af265244b2d1ba9b Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 17 Dec 2023 14:11:00 -0500 Subject: [PATCH 32/57] refactoring --- src/lib.rs | 297 +++++++++++++++++++++++++--------------------------- src/main.rs | 8 +- 2 files changed, 152 insertions(+), 153 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 68105ef0..897465eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -331,25 +331,33 @@ impl Valve { sorted_tables } - /// Given a parsed structure condition, a table and column name, and an unsigned integer - /// representing whether the given column, in the case of a SQLite database, is a primary key - /// (in the case of PostgreSQL, the sqlite_pk parameter is ignored): determine whether the - /// structure of the column is properly reflected in the db. E.g., a `from(table.column)` - /// struct should be associated with a foreign key, `primary` with a primary key, `unique` - /// with a unique constraint. - async fn structure_has_changed( - &self, - pstruct: &Expression, - table: &str, - column: &str, - sqlite_pk: &u32, - ) -> Result { - // A clojure to determine whether the given column has the given constraint type, which - // can be one of 'UNIQUE', 'PRIMARY KEY', 'FOREIGN KEY': - let column_has_constraint_type = |constraint_type: &str| -> Result { - if self.pool.any_kind() == AnyKind::Postgres { - let sql = format!( - r#"SELECT 1 + /// Given the name of a table, determine whether its current instantiation in the database + /// differs from the way it has been configured. The answer to this question is yes whenever + /// (1) the number of columns or any of their names differs from their configured values, or + /// the order of database columns differs from the configured order; (2) The values in the + /// table table differ from their configured values; (3) The SQL type of one or more columns + /// does not match the configured SQL type for that column; (3) All columns with a 'unique', + /// 'primary', or 'from(table, column)' in their column configuration are associated, in the + /// database, with a unique constraint, primary key, and foreign key, respectively, and vice + /// versa. + async fn table_has_changed(&self, table: &str) -> Result { + // A clojure that, given a parsed structure condition, a table and column name, and an + // unsigned integer representing whether the given column, in the case of a SQLite database, + // is a primary key (in the case of PostgreSQL, the sqlite_pk parameter is ignored): + // determine whether the structure of the column is properly reflected in the db. E.g., a + // `from(table.column)` struct should be associated with a foreign key, `primary` with a + // primary key, `unique` with a unique constraint. + let structure_has_changed = |pstruct: &Expression, + table: &str, + column: &str, + sqlite_pk: &u32| + -> Result { + // A clojure to determine whether the given column has the given constraint type, which + // can be one of 'UNIQUE', 'PRIMARY KEY', 'FOREIGN KEY': + let column_has_constraint_type = |constraint_type: &str| -> Result { + if self.pool.any_kind() == AnyKind::Postgres { + let sql = format!( + r#"SELECT 1 FROM information_schema.table_constraints tco JOIN information_schema.key_column_usage kcu ON kcu.constraint_name = tco.constraint_name @@ -357,112 +365,111 @@ impl Valve { AND kcu.table_name = '{}' WHERE tco.constraint_type = '{}' AND kcu.column_name = '{}'"#, - table, constraint_type, column - ); - let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; - if rows.len() > 1 { - unreachable!(); - } - Ok(rows.len() == 1) - } else { - if constraint_type == "PRIMARY KEY" { - return Ok(*sqlite_pk == 1); - } else if constraint_type == "UNIQUE" { - let sql = format!(r#"PRAGMA INDEX_LIST("{}")"#, table); - for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { - let idx_name = row.get::("name"); - let unique = row.get::("unique") as u8; - if unique == 1 { - let sql = format!(r#"PRAGMA INDEX_INFO("{}")"#, idx_name); - let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; - if rows.len() == 1 { - let cname = rows[0].get::("name"); - if cname == column { - return Ok(true); + table, constraint_type, column + ); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() > 1 { + unreachable!(); + } + Ok(rows.len() == 1) + } else { + if constraint_type == "PRIMARY KEY" { + return Ok(*sqlite_pk == 1); + } else if constraint_type == "UNIQUE" { + let sql = format!(r#"PRAGMA INDEX_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let idx_name = row.get::("name"); + let unique = row.get::("unique") as u8; + if unique == 1 { + let sql = format!(r#"PRAGMA INDEX_INFO("{}")"#, idx_name); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() == 1 { + let cname = rows[0].get::("name"); + if cname == column { + return Ok(true); + } } } } - } - Ok(false) - } else if constraint_type == "FOREIGN KEY" { - let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); - for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { - let cname = row.get::("from"); - if cname == column { - return Ok(true); + Ok(false) + } else if constraint_type == "FOREIGN KEY" { + let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let cname = row.get::("from"); + if cname == column { + return Ok(true); + } } + Ok(false) + } else { + return Err(SqlxCErr( + format!("Unrecognized constraint type: '{}'", constraint_type).into(), + )); } - Ok(false) - } else { - return Err(SqlxCErr( - format!("Unrecognized constraint type: '{}'", constraint_type).into(), - )); } - } - }; + }; - // Check if there is a change to whether this column is a primary/unique key: - let is_primary = match pstruct { - Expression::Label(label) if label == "primary" => true, - _ => false, - }; - if is_primary != column_has_constraint_type("PRIMARY KEY")? { - return Ok(true); - } else if !is_primary { - let is_unique = match pstruct { - Expression::Label(label) if label == "unique" => true, + // Check if there is a change to whether this column is a primary/unique key: + let is_primary = match pstruct { + Expression::Label(label) if label == "primary" => true, _ => false, }; - let unique_in_db = column_has_constraint_type("UNIQUE")?; - if is_unique != unique_in_db { - // A child of a tree constraint implies a unique db constraint, so if there is a - // unique constraint in the db that is not configured, that is the explanation, - // and in that case we do not count this as a change to the column. - if !unique_in_db { - return Ok(true); - } else { - let trees = self - .global_config - .get("constraints") - .and_then(|c| c.as_object()) - .and_then(|o| o.get("tree")) - .and_then(|t| t.as_object()) - .and_then(|o| o.get(table)) - .and_then(|t| t.as_array()) - .and_then(|a| { - Some( - a.iter() - .map(|o| o.as_object().and_then(|o| o.get("child")).unwrap()), - ) - }) - .unwrap() - .collect::>(); - if !trees.contains(&&SerdeValue::String(column.to_string())) { + if is_primary != column_has_constraint_type("PRIMARY KEY")? { + return Ok(true); + } else if !is_primary { + let is_unique = match pstruct { + Expression::Label(label) if label == "unique" => true, + _ => false, + }; + let unique_in_db = column_has_constraint_type("UNIQUE")?; + if is_unique != unique_in_db { + // A child of a tree constraint implies a unique db constraint, so if there is a + // unique constraint in the db that is not configured, that is the explanation, + // and in that case we do not count this as a change to the column. + if !unique_in_db { return Ok(true); + } else { + let trees = + self.global_config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|o| o.get("tree")) + .and_then(|t| t.as_object()) + .and_then(|o| o.get(table)) + .and_then(|t| t.as_array()) + .and_then(|a| { + Some(a.iter().map(|o| { + o.as_object().and_then(|o| o.get("child")).unwrap() + })) + }) + .unwrap() + .collect::>(); + if !trees.contains(&&SerdeValue::String(column.to_string())) { + return Ok(true); + } } } } - } - match pstruct { - Expression::Function(name, args) if name == "from" => { - match &*args[0] { - Expression::Field(cfg_ftable, cfg_fcolumn) => { - if self.pool.any_kind() == AnyKind::Sqlite { - let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); - for row in sqlx_query(&sql).fetch_all(&self.pool).await? { - let from = row.get::("from"); - if from == column { - let db_ftable = row.get::("table"); - let db_fcolumn = row.get::("to"); - if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { - return Ok(true); + match pstruct { + Expression::Function(name, args) if name == "from" => { + match &*args[0] { + Expression::Field(cfg_ftable, cfg_fcolumn) => { + if self.pool.any_kind() == AnyKind::Sqlite { + let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let from = row.get::("from"); + if from == column { + let db_ftable = row.get::("table"); + let db_fcolumn = row.get::("to"); + if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + return Ok(true); + } } } - } - } else { - let sql = format!( - r#"SELECT + } else { + let sql = format!( + r#"SELECT ccu.table_name AS foreign_table_name, ccu.column_name AS foreign_column_name FROM information_schema.table_constraints AS tc @@ -474,48 +481,38 @@ impl Valve { WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_name = '{}' AND kcu.column_name = '{}'"#, - table, column - ); - let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; - if rows.len() == 0 { - // If the table doesn't even exist return true. - return Ok(true); - } else if rows.len() > 1 { - // This seems impossible given how PostgreSQL works: - unreachable!(); - } else { - let row = &rows[0]; - let db_ftable = row.get::("foreign_table_name"); - let db_fcolumn = row.get::("foreign_column_name"); - if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + table, column + ); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() == 0 { + // If the table doesn't even exist return true. return Ok(true); + } else if rows.len() > 1 { + // This seems impossible given how PostgreSQL works: + unreachable!(); + } else { + let row = &rows[0]; + let db_ftable = row.get::("foreign_table_name"); + let db_fcolumn = row.get::("foreign_column_name"); + if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + return Ok(true); + } } } } - } - _ => { - return Err(SqlxCErr( - format!("Unrecognized structure: {:?}", pstruct).into(), - )); - } - }; - } - _ => (), - }; + _ => { + return Err(SqlxCErr( + format!("Unrecognized structure: {:?}", pstruct).into(), + )); + } + }; + } + _ => (), + }; - Ok(false) - } + Ok(false) + }; - /// Given the name of a table, determine whether its current instantiation in the database - /// differs from the way it has been configured. The answer to this question is yes whenever - /// (1) the number of columns or any of their names differs from their configured values, or - /// the order of database columns differs from the configured order; (2) The values in the - /// table table differ from their configured values; (3) The SQL type of one or more columns - /// does not match the configured SQL type for that column; (3) All columns with a 'unique', - /// 'primary', or 'from(table, column)' in their column configuration are associated, in the - /// database, with a unique constraint, primary key, and foreign key, respectively, and vice - /// versa. - async fn table_has_changed(&self, table: &str) -> Result { let (columns_config, configured_column_order, description, table_type, path) = { let table_config = self .global_config @@ -755,10 +752,7 @@ impl Valve { .get(structure) .and_then(|p| Some(p.parsed.clone())) .unwrap(); - if self - .structure_has_changed(&parsed_structure, table, &cname, &pk) - .await? - { + if structure_has_changed(&parsed_structure, table, &cname, &pk)? { if self.verbose { valve_log!( "The table '{}' will be recreated because the database \ @@ -905,7 +899,7 @@ impl Valve { } /// Create all configured database tables and views if they do not already exist as configured. - pub async fn create_missing_tables(&self) -> Result<&Self, sqlx::Error> { + pub async fn create_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError // TODO: Add logging statements here. @@ -982,7 +976,7 @@ impl Valve { pub async fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - self.create_missing_tables().await?; + self.create_all_tables().await?; // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that // depends on another table, T', even in the case where we have previously truncated T'. @@ -1044,7 +1038,6 @@ impl Valve { // list of tables from self.global_config.sorted_table_list and use it as a reference // for which tables need to be dropped. - self.create_missing_tables().await?; let mut list_for_deletion = table_list.clone(); list_for_deletion.reverse(); self.truncate_tables(list_for_deletion).await?; diff --git a/src/main.rs b/src/main.rs index cdd05000..733097ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,12 @@ async fn main() -> Result<(), sqlx::Error> { let mut destination = String::new(); { + // TODO: Add a --yes flag, and set a flag called Valve.interactive. And then in the + // create_missing_tables() function, prompt the user to confirm when a table needs + // to be recreated automatically due to a change, unless the --yes flag is set. Note + // that Valve.interactive is only applicable to the command line. It should never be set + // when using valve as a library. + // this block limits scope of borrows by ap.refer() method let mut ap = ArgumentParser::new(); ap.set_description( @@ -142,7 +148,7 @@ async fn main() -> Result<(), sqlx::Error> { } else if create_only { let valve = Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; - valve.create_missing_tables().await?; + valve.create_all_tables().await?; } else { let valve = Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; From f4a8ddccd5f3a71c9d26414d38a294dbf7a27d3e Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 17 Dec 2023 14:54:21 -0500 Subject: [PATCH 33/57] handle dependencies during truncation and dropping --- src/lib.rs | 52 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 897465eb..53285225 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -909,6 +909,8 @@ impl Valve { let mut once_dropped = false; for (i, table) in sorted_table_list.iter().enumerate() { if self.table_has_changed(*table).await? { + // TODO: Prompt the user to confirm whether she wants to automatically drop any + // flagged tables. if !once_dropped { let mut tables_to_drop = vec![""; sorted_table_list.len() - i]; tables_to_drop.clone_from_slice(&sorted_table_list[i..]); @@ -945,8 +947,26 @@ impl Valve { pub async fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError - for table in tables { - if table != "message" && table != "history" { + let drop_list = { + let mut drop_list = vec![]; + let drop_order = self.get_tables_ordered_for_deletion(); + for table in &tables { + let idx = drop_order.iter().position(|s| s == table).unwrap(); + for i in 0..idx + 1 { + let dep_table = drop_order[i]; + if !drop_list.contains(&dep_table) { + drop_list.push(drop_order[i]); + } + } + } + drop_list + }; + + // TODO: If the drop_list does not match tables, prompt the user to confirm whether + // she wants to automatically truncate those dependent tables. + + for table in &drop_list { + if *table != "message" && *table != "history" { let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); self.execute_sql(&sql).await?; let sql = format!(r#"DROP VIEW IF EXISTS "{}_view""#, table); @@ -978,6 +998,24 @@ impl Valve { self.create_all_tables().await?; + let truncate_list = { + let mut truncate_list = vec![]; + let truncate_order = self.get_tables_ordered_for_deletion(); + for table in &tables { + let idx = truncate_order.iter().position(|s| s == table).unwrap(); + for i in 0..idx + 1 { + let dep_table = truncate_order[i]; + if !truncate_list.contains(&dep_table) { + truncate_list.push(truncate_order[i]); + } + } + } + truncate_list + }; + + // TODO: If the truncate_list does not match tables, prompt the user to confirm whether + // she wants to automatically truncate those dependent tables. + // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that // depends on another table, T', even in the case where we have previously truncated T'. // SQLite does not need this. However SQLite does require that the tables be truncated in @@ -990,10 +1028,10 @@ impl Valve { } }; - for table in tables { + for table in &truncate_list { let sql = truncate_sql(&table); self.execute_sql(&sql).await?; - if table != "message" && table != "history" { + if *table != "message" && *table != "history" { let sql = truncate_sql(&format!("{}_conflict", table)); self.execute_sql(&sql).await?; } @@ -1032,12 +1070,6 @@ impl Valve { ) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - // TODO: This will work fine when the table list is all of the tables in the db, - // but in the case of a partial list, then there is a risk that some of them have - // dependencies on tables not in the list. What we need to do is grab the complete - // list of tables from self.global_config.sorted_table_list and use it as a reference - // for which tables need to be dropped. - let mut list_for_deletion = table_list.clone(); list_for_deletion.reverse(); self.truncate_tables(list_for_deletion).await?; From 289213f701618de5a0090c0bb389d266c7cb355e Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 17 Dec 2023 15:51:50 -0500 Subject: [PATCH 34/57] add interactive flag --- src/api_test.rs | 2 +- src/lib.rs | 68 ++++++++++++++++++++++++++++++++++++++++++----- src/main.rs | 70 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 118 insertions(+), 22 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index 6efa3a73..64ad20b9 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -468,7 +468,7 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> { } pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> { - let valve = Valve::build(table, "table", database, false, false).await?; + let valve = Valve::build(table, "table", database, false, false, false).await?; // NOTE that you must use an external script to fetch the data from the database and run a diff // against a known good sample to verify that these tests yield the expected results: test_matching(&valve).await?; diff --git a/src/lib.rs b/src/lib.rs index 53285225..18c3e89c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -155,13 +155,23 @@ impl std::fmt::Debug for ColumnRule { #[derive(Debug)] pub struct Valve { + /// TODO: Add docstring here. pub global_config: SerdeMap, + /// TODO: Add docstring here. pub compiled_datatype_conditions: HashMap, + /// TODO: Add docstring here. pub compiled_rule_conditions: HashMap>>, + /// TODO: Add docstring here. pub parsed_structure_conditions: HashMap, + /// TODO: Add docstring here. pub pool: AnyPool, + /// TODO: Add docstring here. pub user: String, + /// TODO: Add docstring here. pub verbose: bool, + /// TODO: Add docstring here. Note that this field is CLI only. + pub interactive: bool, + /// TODO: Add docstring here. pub initial_load: bool, } @@ -182,6 +192,7 @@ impl Valve { config_table: &str, database: &str, verbose: bool, + interactive: bool, initial_load: bool, ) -> Result { // TODO: Error type should be ConfigError @@ -264,6 +275,7 @@ impl Valve { pool: pool, user: String::from("VALVE"), verbose: verbose, + interactive: interactive, initial_load: initial_load, }) } @@ -909,8 +921,10 @@ impl Valve { let mut once_dropped = false; for (i, table) in sorted_table_list.iter().enumerate() { if self.table_has_changed(*table).await? { - // TODO: Prompt the user to confirm whether she wants to automatically drop any - // flagged tables. + if self.verbose { + // TODO: Prompt the user to confirm whether she wants to automatically drop any + // flagged tables. + } if !once_dropped { let mut tables_to_drop = vec![""; sorted_table_list.len() - i]; tables_to_drop.clone_from_slice(&sorted_table_list[i..]); @@ -931,6 +945,32 @@ impl Valve { Ok(self) } + /// TODO: Add docstring here. + pub async fn table_exists(&self, table: &str) -> Result { + let sql = { + if self.pool.any_kind() == AnyKind::Sqlite { + format!( + r#"SELECT 1 + FROM "sqlite_master" + WHERE "type" = 'table' AND name = '{}' + LIMIT 1"#, + table + ) + } else { + format!( + r#"SELECT 1 + FROM "information_schema"."tables" + WHERE "table_schema" = 'public' + AND "table_name" = '{}'"#, + table + ) + } + }; + let query = sqlx_query(&sql); + let rows = query.fetch_all(&self.pool).await?; + return Ok(rows.len() > 0); + } + /// Drop all configured tables, in reverse dependency order. pub async fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError @@ -962,8 +1002,16 @@ impl Valve { drop_list }; - // TODO: If the drop_list does not match tables, prompt the user to confirm whether - // she wants to automatically truncate those dependent tables. + if self.verbose { + let auto_drops = drop_list + .iter() + .filter(|t| !tables.contains(t) && !block_on(self.table_exists(t)).unwrap()) + .collect::>(); + if auto_drops.len() > 0 { + // TODO: prompt the user to confirm whether she wants to automatically drop + // the dependent tables. + } + } for table in &drop_list { if *table != "message" && *table != "history" { @@ -1013,8 +1061,16 @@ impl Valve { truncate_list }; - // TODO: If the truncate_list does not match tables, prompt the user to confirm whether - // she wants to automatically truncate those dependent tables. + if self.verbose { + let auto_truncates = truncate_list + .iter() + .filter(|t| !tables.contains(t) && !block_on(self.table_exists(t)).unwrap()) + .collect::>(); + if auto_truncates.len() > 0 { + // TODO: prompt the user to confirm whether she wants to automatically truncate + // the dependent tables. + } + } // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that // depends on another table, T', even in the case where we have previously truncated T'. diff --git a/src/main.rs b/src/main.rs index 733097ea..8235b9f0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,12 +14,14 @@ fn cli_args_valid(source: &str, destination: &str, dump_config: bool) -> bool { #[async_std::main] async fn main() -> Result<(), sqlx::Error> { + let mut verbose = false; + let mut yes = false; let mut api_test = false; let mut dump_config = false; + let mut dump_schema = false; let mut drop_all = false; let mut create_only = false; let mut config_table = String::new(); - let mut verbose = false; let mut initial_load = false; let mut source = String::new(); let mut destination = String::new(); @@ -39,6 +41,16 @@ async fn main() -> Result<(), sqlx::Error> { to by SOURCE will be read and a new database will be created and loaded with the indicated data."#, ); + ap.refer(&mut verbose).add_option( + &["--verbose"], + StoreTrue, + r#"While loading the database, write progress messages to stderr."#, + ); + ap.refer(&mut yes).add_option( + &["--yes"], + StoreTrue, + r#"Do not prompt the user to confirm dropping/truncating tables."#, + ); ap.refer(&mut api_test).add_option( &["--api_test"], StoreTrue, @@ -52,6 +64,11 @@ async fn main() -> Result<(), sqlx::Error> { r#"Read the configuration referred to by SOURCE and send it to stdout as a JSON-formatted string."#, ); + ap.refer(&mut dump_schema).add_option( + &["--dump_schema"], + StoreTrue, + r#"Write the SQL used to create the database to stdout."#, + ); ap.refer(&mut drop_all).add_option( &["--drop_all"], StoreTrue, @@ -63,18 +80,13 @@ async fn main() -> Result<(), sqlx::Error> { r#"Read the configuration referred to by SOURCE, and create a corresponding database in DESTINATION but do not load it."#, ); + // TODO: Remove this option: ap.refer(&mut config_table).add_option( &["--config_table"], Store, r#"When reading configuration from a database, the name to use to refer to the main configuration table (defaults to "table")"#, ); - ap.refer(&mut verbose).add_option( - &["--verbose"], - StoreTrue, - r#"Write the SQL used to create the database to stdout after configuring it, and then - while loading the database, write progress messages to stderr."#, - ); ap.refer(&mut initial_load).add_option( &["--initial_load"], StoreTrue, @@ -122,8 +134,15 @@ async fn main() -> Result<(), sqlx::Error> { if api_test { run_api_tests(&source, &destination).await?; } else if dump_config { - let valve = - Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; + let valve = Valve::build( + &source, + &config_table, + &destination, + verbose, + initial_load, + yes, + ) + .await?; let mut config = valve.global_config.clone(); let datatype_conditions = format!("{:?}", valve.compiled_datatype_conditions).replace(r"\", r"\\"); @@ -142,16 +161,37 @@ async fn main() -> Result<(), sqlx::Error> { let config = serde_json::to_string(&config).unwrap(); println!("{}", config); } else if drop_all { - let valve = - Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; + let valve = Valve::build( + &source, + &config_table, + &destination, + verbose, + initial_load, + yes, + ) + .await?; valve.drop_all_tables().await?; } else if create_only { - let valve = - Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; + let valve = Valve::build( + &source, + &config_table, + &destination, + verbose, + initial_load, + yes, + ) + .await?; valve.create_all_tables().await?; } else { - let valve = - Valve::build(&source, &config_table, &destination, verbose, initial_load).await?; + let valve = Valve::build( + &source, + &config_table, + &destination, + verbose, + initial_load, + yes, + ) + .await?; valve.load_all_tables(true).await?; } From 80b495f54e1b8154df8f81ffd8f49628ff448ef9 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 17 Dec 2023 15:56:11 -0500 Subject: [PATCH 35/57] fix typo --- src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 18c3e89c..b94a68de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -921,7 +921,7 @@ impl Valve { let mut once_dropped = false; for (i, table) in sorted_table_list.iter().enumerate() { if self.table_has_changed(*table).await? { - if self.verbose { + if self.interactive { // TODO: Prompt the user to confirm whether she wants to automatically drop any // flagged tables. } @@ -1002,7 +1002,7 @@ impl Valve { drop_list }; - if self.verbose { + if self.interactive { let auto_drops = drop_list .iter() .filter(|t| !tables.contains(t) && !block_on(self.table_exists(t)).unwrap()) @@ -1061,7 +1061,7 @@ impl Valve { truncate_list }; - if self.verbose { + if self.interactive { let auto_truncates = truncate_list .iter() .filter(|t| !tables.contains(t) && !block_on(self.table_exists(t)).unwrap()) From c052d9701b05a6069e65e40422f2581eb0e17e23 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 17 Dec 2023 16:18:30 -0500 Subject: [PATCH 36/57] fix typo --- src/main.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/main.rs b/src/main.rs index 8235b9f0..30096317 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,12 +27,6 @@ async fn main() -> Result<(), sqlx::Error> { let mut destination = String::new(); { - // TODO: Add a --yes flag, and set a flag called Valve.interactive. And then in the - // create_missing_tables() function, prompt the user to confirm when a table needs - // to be recreated automatically due to a change, unless the --yes flag is set. Note - // that Valve.interactive is only applicable to the command line. It should never be set - // when using valve as a library. - // this block limits scope of borrows by ap.refer() method let mut ap = ArgumentParser::new(); ap.set_description( @@ -131,6 +125,7 @@ async fn main() -> Result<(), sqlx::Error> { config_table = "table".to_string(); } + let interactive = !yes; if api_test { run_api_tests(&source, &destination).await?; } else if dump_config { @@ -140,7 +135,7 @@ async fn main() -> Result<(), sqlx::Error> { &destination, verbose, initial_load, - yes, + interactive, ) .await?; let mut config = valve.global_config.clone(); @@ -167,7 +162,7 @@ async fn main() -> Result<(), sqlx::Error> { &destination, verbose, initial_load, - yes, + interactive, ) .await?; valve.drop_all_tables().await?; @@ -178,7 +173,7 @@ async fn main() -> Result<(), sqlx::Error> { &destination, verbose, initial_load, - yes, + interactive, ) .await?; valve.create_all_tables().await?; @@ -189,7 +184,7 @@ async fn main() -> Result<(), sqlx::Error> { &destination, verbose, initial_load, - yes, + interactive, ) .await?; valve.load_all_tables(true).await?; From 373ea445c9e6568d88ca70425bf2eec8ad171715 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 18 Dec 2023 08:36:41 -0500 Subject: [PATCH 37/57] remove config_table parameter --- src/api_test.rs | 2 +- src/lib.rs | 9 +++----- src/main.rs | 55 +++++++------------------------------------------ 3 files changed, 11 insertions(+), 55 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index 64ad20b9..2030d61c 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -468,7 +468,7 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> { } pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> { - let valve = Valve::build(table, "table", database, false, false, false).await?; + let valve = Valve::build(table, database, false, false, false).await?; // NOTE that you must use an external script to fetch the data from the database and run a diff // against a known good sample to verify that these tests yield the expected results: test_matching(&valve).await?; diff --git a/src/lib.rs b/src/lib.rs index b94a68de..0c7af547 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -188,8 +188,6 @@ impl Valve { /// Valve struct. pub async fn build( table_path: &str, - // TODO: Remove the config_table parameter. - config_table: &str, database: &str, verbose: bool, interactive: bool, @@ -226,7 +224,7 @@ impl Valve { rules_config, constraints_config, sorted_table_list, - ) = read_config_files(table_path, config_table, &parser, &pool); + ) = read_config_files(table_path, &parser, &pool); let mut global_config = SerdeMap::new(); global_config.insert( @@ -1949,7 +1947,6 @@ async fn get_pool_from_connection_string(database: &str) -> Result ( @@ -1982,7 +1979,7 @@ fn read_config_files( if path.to_lowercase().ends_with(".tsv") { read_tsv_into_vector(path) } else { - read_db_table_into_vector(path, config_table) + read_db_table_into_vector(path, "table") } }; @@ -4165,7 +4162,7 @@ fn read_tsv_into_vector(path: &str) -> Vec { rows } -/// Given a database at the specified location, query the "table" table and return a vector of rows +/// Given a database at the specified location, query the given table and return a vector of rows /// represented as ValveRows. fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { let connection_options; diff --git a/src/main.rs b/src/main.rs index 30096317..91233ca6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -21,7 +21,6 @@ async fn main() -> Result<(), sqlx::Error> { let mut dump_schema = false; let mut drop_all = false; let mut create_only = false; - let mut config_table = String::new(); let mut initial_load = false; let mut source = String::new(); let mut destination = String::new(); @@ -74,13 +73,6 @@ async fn main() -> Result<(), sqlx::Error> { r#"Read the configuration referred to by SOURCE, and create a corresponding database in DESTINATION but do not load it."#, ); - // TODO: Remove this option: - ap.refer(&mut config_table).add_option( - &["--config_table"], - Store, - r#"When reading configuration from a database, the name to use to refer to the main - configuration table (defaults to "table")"#, - ); ap.refer(&mut initial_load).add_option( &["--initial_load"], StoreTrue, @@ -121,23 +113,11 @@ async fn main() -> Result<(), sqlx::Error> { process::exit(1); } - if config_table.trim() == "" { - config_table = "table".to_string(); - } - let interactive = !yes; if api_test { run_api_tests(&source, &destination).await?; } else if dump_config { - let valve = Valve::build( - &source, - &config_table, - &destination, - verbose, - initial_load, - interactive, - ) - .await?; + let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; let mut config = valve.global_config.clone(); let datatype_conditions = format!("{:?}", valve.compiled_datatype_conditions).replace(r"\", r"\\"); @@ -155,38 +135,17 @@ async fn main() -> Result<(), sqlx::Error> { let config = serde_json::to_string(&config).unwrap(); println!("{}", config); + } else if dump_schema { + let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + valve.dump_schema().await?; } else if drop_all { - let valve = Valve::build( - &source, - &config_table, - &destination, - verbose, - initial_load, - interactive, - ) - .await?; + let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; valve.drop_all_tables().await?; } else if create_only { - let valve = Valve::build( - &source, - &config_table, - &destination, - verbose, - initial_load, - interactive, - ) - .await?; + let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; valve.create_all_tables().await?; } else { - let valve = Valve::build( - &source, - &config_table, - &destination, - verbose, - initial_load, - interactive, - ) - .await?; + let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; valve.load_all_tables(true).await?; } From 61cf3916659d13124e4c8b6f2acd0b91110596ca Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 20 Dec 2023 13:18:52 -0500 Subject: [PATCH 38/57] collect table dependencies --- src/lib.rs | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0c7af547..d67e5291 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -164,6 +164,8 @@ pub struct Valve { /// TODO: Add docstring here. pub parsed_structure_conditions: HashMap, /// TODO: Add docstring here. + pub table_dependencies: HashMap>, + /// TODO: Add docstring here. pub pool: AnyPool, /// TODO: Add docstring here. pub user: String, @@ -224,6 +226,7 @@ impl Valve { rules_config, constraints_config, sorted_table_list, + table_dependencies, ) = read_config_files(table_path, &parser, &pool); let mut global_config = SerdeMap::new(); @@ -270,6 +273,7 @@ impl Valve { compiled_datatype_conditions: compiled_datatype_conditions, compiled_rule_conditions: compiled_rule_conditions, parsed_structure_conditions: parsed_structure_conditions, + table_dependencies: table_dependencies, pool: pool, user: String::from("VALVE"), verbose: verbose, @@ -924,6 +928,7 @@ impl Valve { // flagged tables. } if !once_dropped { + // TODO: Rethink this. let mut tables_to_drop = vec![""; sorted_table_list.len() - i]; tables_to_drop.clone_from_slice(&sorted_table_list[i..]); tables_to_drop.reverse(); @@ -985,6 +990,7 @@ impl Valve { pub async fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError + // TODO: Re-think how this is done. let drop_list = { let mut drop_list = vec![]; let drop_order = self.get_tables_ordered_for_deletion(); @@ -1044,6 +1050,7 @@ impl Valve { self.create_all_tables().await?; + // TODO: Re-think how this is done. let truncate_list = { let mut truncate_list = vec![]; let truncate_order = self.get_tables_ordered_for_deletion(); @@ -1956,6 +1963,7 @@ fn read_config_files( SerdeMap, SerdeMap, Vec, + HashMap>, ) { let special_table_types = json!({ "table": {"required": true}, @@ -2497,7 +2505,7 @@ fn read_config_files( // Sort the tables (aside from the message and history tables) according to their foreign key // dependencies so that tables are always loaded after the tables they depend on. - let sorted_tables = verify_table_deps_and_sort( + let (sorted_tables, table_dependencies) = verify_table_deps_and_sort( &tables_config .keys() .cloned() @@ -2514,6 +2522,7 @@ fn read_config_files( rules_config, constraints_config, sorted_tables, + table_dependencies, ) } @@ -2672,7 +2681,6 @@ fn get_parsed_structure_conditions( parsed_structure_conditions } -// removed the old valve functions that require it. /// Given the name of a table and a database connection pool, generate SQL for creating a view /// based on the table that provides a unified representation of the normal and conflict versions /// of the table, plus columns summarising the information associated with the given table that is @@ -4487,7 +4495,10 @@ fn local_sql_syntax(pool: &AnyPool, sql: &String) -> String { /// under dependencies, returns the list of tables sorted according to their foreign key /// dependencies, such that if table_a depends on table_b, then table_b comes before table_a in the /// list that is returned. -fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) -> Vec { +fn verify_table_deps_and_sort( + table_list: &Vec, + constraints: &SerdeMap, +) -> (Vec, HashMap>) { fn get_cycles(g: &DiGraphMap<&str, ()>) -> Result, Vec>> { let mut cycles = vec![]; match toposort(&g, None) { @@ -4520,6 +4531,7 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) } } + // Check for intra-table cycles: let trees = constraints.get("tree").and_then(|t| t.as_object()).unwrap(); for table_name in table_list { let mut dependency_graph = DiGraphMap::<&str, ()>::new(); @@ -4564,6 +4576,7 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) }; } + // Check for inter-table cycles: let foreign_keys = constraints .get("foreign") .and_then(|f| f.as_object()) @@ -4614,7 +4627,15 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) match get_cycles(&dependency_graph) { Ok(sorted_table_list) => { - return sorted_table_list; + let mut dependencies = HashMap::new(); + for node in dependency_graph.nodes() { + let neighbors = dependency_graph + .neighbors_directed(node, petgraph::Direction::Outgoing) + .map(|n| n.to_string()) + .collect::>(); + dependencies.insert(node.to_string(), neighbors); + } + return (sorted_table_list, dependencies); } Err(cycles) => { let mut message = String::new(); From efabdd47af7b44086d94af6a70cd0f49dd7ee3b1 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 21 Dec 2023 12:57:05 -0500 Subject: [PATCH 39/57] use table dependencies to determine deletion order --- src/lib.rs | 215 +++++++++++++++++++++++++----------------------- src/validate.rs | 9 +- 2 files changed, 119 insertions(+), 105 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d67e5291..6abfb11c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -84,10 +84,32 @@ lazy_static! { pub type SerdeMap = serde_json::Map; pub type ValveRow = serde_json::Map; +/// Write a debugging message to STDERR. #[macro_export] -macro_rules! valve_log { +macro_rules! debug { () => (eprintln!()); - ($($arg:tt)*) => (eprintln!("{} - {}", Utc::now(), format_args!($($arg)*))); + ($($arg:tt)*) => (eprintln!("{} - DEBUG {}", Utc::now(), format_args!($($arg)*))); +} + +/// Write an information message to STDERR. +#[macro_export] +macro_rules! info { + () => (eprintln!()); + ($($arg:tt)*) => (eprintln!("{} - INFO {}", Utc::now(), format_args!($($arg)*))); +} + +/// Write a warning message to STDERR. +#[macro_export] +macro_rules! warn { + () => (eprintln!()); + ($($arg:tt)*) => (eprintln!("{} - WARN {}", Utc::now(), format_args!($($arg)*))); +} + +/// Write an error message to STDERR. +#[macro_export] +macro_rules! error { + () => (eprintln!()); + ($($arg:tt)*) => (eprintln!("{} - ERROR {}", Utc::now(), format_args!($($arg)*))); } /// Represents a structure such as those found in the `structure` column of the `column` table in @@ -350,10 +372,10 @@ impl Valve { /// (1) the number of columns or any of their names differs from their configured values, or /// the order of database columns differs from the configured order; (2) The values in the /// table table differ from their configured values; (3) The SQL type of one or more columns - /// does not match the configured SQL type for that column; (3) All columns with a 'unique', - /// 'primary', or 'from(table, column)' in their column configuration are associated, in the - /// database, with a unique constraint, primary key, and foreign key, respectively, and vice - /// versa. + /// does not match the configured SQL type for that column; (3) Some column with a 'unique', + /// 'primary', or 'from(table, column)' in its column configuration fails to be associated, in + /// the database, with a unique constraint, primary key, or foreign key, respectively; or vice + /// versa; (4) The table does not exist in the database. async fn table_has_changed(&self, table: &str) -> Result { // A clojure that, given a parsed structure condition, a table and column name, and an // unsigned integer representing whether the given column, in the case of a SQLite database, @@ -596,7 +618,7 @@ impl Valve { let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; if rows.len() == 0 { if self.verbose { - valve_log!( + info!( "The table '{}' will be recreated as it does not exist in the database", table ); @@ -629,7 +651,7 @@ impl Valve { let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; if rows.len() == 0 { if self.verbose { - valve_log!( + info!( "The table '{}' will be recreated as it does not exist in the database", table ); @@ -659,12 +681,10 @@ impl Valve { .collect::>(); if db_column_order != configured_column_order { if self.verbose { - valve_log!( + info!( "The table '{}' will be recreated since the database columns: {:?} \ and/or their order does not match the configured columns: {:?}", - table, - db_column_order, - configured_column_order + table, db_column_order, configured_column_order ); } return Ok(true); @@ -696,7 +716,7 @@ impl Valve { let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; if rows.len() == 0 { if self.verbose { - valve_log!( + info!( "The table '{table}' will be recreated because the entries in the \ table table for '{table}' have changed.", table = table @@ -705,10 +725,7 @@ impl Valve { return Ok(true); } else if rows.len() > 1 { if self.verbose { - valve_log!( - "WARN more than one row was returned from the query '{}'", - sql - ); + warn!("More than one row was returned from the query '{}'", sql); } } } @@ -744,7 +761,7 @@ impl Valve { && (c.starts_with("varchar") || c.starts_with("character varying"))) { if self.verbose { - valve_log!( + info!( "The table '{}' will be recreated because the SQL type of column '{}', \ {}, does not match the configured value: {}", table, @@ -768,13 +785,11 @@ impl Valve { .unwrap(); if structure_has_changed(&parsed_structure, table, &cname, &pk)? { if self.verbose { - valve_log!( + info!( "The table '{}' will be recreated because the database \ constraints for column '{}' do not match the configured \ structure, '{}'", - table, - cname, - structure + table, cname, structure ); } return Ok(true); @@ -912,6 +927,18 @@ impl Valve { Ok(()) } + /// TODO: Add docstring here. + fn get_dependent_tables(&self, table: &str) -> Vec { + let mut dependent_tables = vec![]; + let direct_deps = self.table_dependencies.get(table).unwrap().to_vec(); + for direct_dep in direct_deps { + let mut indirect_deps = self.get_dependent_tables(&direct_dep); + dependent_tables.append(&mut indirect_deps); + dependent_tables.push(direct_dep); + } + dependent_tables + } + /// Create all configured database tables and views if they do not already exist as configured. pub async fn create_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError @@ -920,24 +947,9 @@ impl Valve { let setup_statements = self.get_setup_statements().await?; let sorted_table_list = self.get_tables_ordered_for_creation(); - let mut once_dropped = false; - for (i, table) in sorted_table_list.iter().enumerate() { + for table in &sorted_table_list { if self.table_has_changed(*table).await? { - if self.interactive { - // TODO: Prompt the user to confirm whether she wants to automatically drop any - // flagged tables. - } - if !once_dropped { - // TODO: Rethink this. - let mut tables_to_drop = vec![""; sorted_table_list.len() - i]; - tables_to_drop.clone_from_slice(&sorted_table_list[i..]); - tables_to_drop.reverse(); - for table in tables_to_drop { - self.drop_tables(vec![table]).await?; - } - once_dropped = true; - } - + self.drop_tables(&vec![table]).await?; let table_statements = setup_statements.get(*table).unwrap(); for stmt in table_statements { self.execute_sql(stmt).await?; @@ -979,7 +991,7 @@ impl Valve { // DatabaseError // Drop all of the database tables in the reverse of their sorted order: - self.drop_tables(self.get_tables_ordered_for_deletion()) + self.drop_tables(&self.get_tables_ordered_for_deletion()) .await?; Ok(self) } @@ -987,21 +999,19 @@ impl Valve { /// Given a vector of table names, /// drop those tables, in the given order. /// Return an error on invalid table name or database problem. - pub async fn drop_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { + pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError - // TODO: Re-think how this is done. let drop_list = { let mut drop_list = vec![]; - let drop_order = self.get_tables_ordered_for_deletion(); - for table in &tables { - let idx = drop_order.iter().position(|s| s == table).unwrap(); - for i in 0..idx + 1 { - let dep_table = drop_order[i]; + for table in tables { + let dependent_tables = self.get_dependent_tables(table); + for dep_table in dependent_tables { if !drop_list.contains(&dep_table) { - drop_list.push(drop_order[i]); + drop_list.push(dep_table.to_string()); } } + drop_list.push(table.to_string()); } drop_list }; @@ -1009,7 +1019,9 @@ impl Valve { if self.interactive { let auto_drops = drop_list .iter() - .filter(|t| !tables.contains(t) && !block_on(self.table_exists(t)).unwrap()) + .filter(|t| { + !tables.contains(&t.as_str()) && !block_on(self.table_exists(t)).unwrap() + }) .collect::>(); if auto_drops.len() > 0 { // TODO: prompt the user to confirm whether she wants to automatically drop @@ -1037,7 +1049,7 @@ impl Valve { pub async fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError - self.truncate_tables(self.get_tables_ordered_for_deletion()) + self.truncate_tables(&self.get_tables_ordered_for_deletion()) .await?; Ok(self) } @@ -1045,23 +1057,21 @@ impl Valve { /// Given a vector of table names, /// truncate those tables, in the given order. /// Return an error on invalid table name or database problem. - pub async fn truncate_tables(&self, tables: Vec<&str>) -> Result<&Self, sqlx::Error> { + pub async fn truncate_tables(&self, tables: &Vec<&str>) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError self.create_all_tables().await?; - // TODO: Re-think how this is done. let truncate_list = { let mut truncate_list = vec![]; - let truncate_order = self.get_tables_ordered_for_deletion(); - for table in &tables { - let idx = truncate_order.iter().position(|s| s == table).unwrap(); - for i in 0..idx + 1 { - let dep_table = truncate_order[i]; + for table in tables { + let dependent_tables = self.get_dependent_tables(table); + for dep_table in dependent_tables { if !truncate_list.contains(&dep_table) { - truncate_list.push(truncate_order[i]); + truncate_list.push(dep_table.to_string()); } } + truncate_list.push(table.to_string()); } truncate_list }; @@ -1069,7 +1079,9 @@ impl Valve { if self.interactive { let auto_truncates = truncate_list .iter() - .filter(|t| !tables.contains(t) && !block_on(self.table_exists(t)).unwrap()) + .filter(|t| { + !tables.contains(&t.as_str()) && !block_on(self.table_exists(t)).unwrap() + }) .collect::>(); if auto_truncates.len() > 0 { // TODO: prompt the user to confirm whether she wants to automatically truncate @@ -1110,9 +1122,9 @@ impl Valve { let table_list = self.get_tables_ordered_for_creation(); if self.verbose { - valve_log!("Processing {} tables.", table_list.len()); + info!("Processing {} tables.", table_list.len()); } - self.load_tables(table_list, true).await + self.load_tables(&table_list, true).await } /// Given a vector of table names, @@ -1126,14 +1138,14 @@ impl Valve { /// progress messages to stderr during load. pub async fn load_tables( &self, - table_list: Vec<&str>, + table_list: &Vec<&str>, _validate: bool, ) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError let mut list_for_deletion = table_list.clone(); list_for_deletion.reverse(); - self.truncate_tables(list_for_deletion).await?; + self.truncate_tables(&list_for_deletion).await?; let num_tables = table_list.len(); let mut total_errors = 0; @@ -1141,7 +1153,7 @@ impl Valve { let mut total_infos = 0; let mut table_num = 1; for table_name in table_list { - if table_name == "message" || table_name == "history" { + if *table_name == "message" || *table_name == "history" { continue; } let table_name = table_name.to_string(); @@ -1157,7 +1169,7 @@ impl Valve { let mut rdr = { match File::open(path.clone()) { Err(e) => { - valve_log!("WARN: Unable to open '{}': {}", path.clone(), e); + warn!("Unable to open '{}': {}", path.clone(), e); continue; } Ok(table_file) => csv::ReaderBuilder::new() @@ -1167,7 +1179,7 @@ impl Valve { } }; if self.verbose { - valve_log!("Loading table {}/{}: {}", table_num, num_tables, table_name); + info!("Loading table {}/{}: {}", table_num, num_tables, table_name); } table_num += 1; @@ -1281,7 +1293,7 @@ impl Valve { "{} errors, {} warnings, and {} information messages generated for {}", errors, warnings, infos, table_name ); - valve_log!("{}", status_message); + info!("{}", status_message); total_errors += errors; total_warnings += warnings; total_infos += infos; @@ -1289,11 +1301,9 @@ impl Valve { } if self.verbose { - valve_log!( + info!( "Loading complete with {} errors, {} warnings, and {} information messages", - total_errors, - total_warnings, - total_infos + total_errors, total_warnings, total_infos ); } Ok(self) @@ -1548,7 +1558,7 @@ impl Valve { // DatabaseError let last_change = match self.get_record_to_undo().await? { None => { - valve_log!("WARN: Nothing to undo."); + warn!("Nothing to undo."); return Ok(None); } Some(r) => r, @@ -1639,13 +1649,13 @@ impl Valve { // DatabaseError let last_undo = match self.get_record_to_redo().await? { None => { - valve_log!("WARN: Nothing to redo."); + warn!("Nothing to redo."); return Ok(None); } Some(last_undo) => { let undone_by = last_undo.try_get_raw("undone_by")?; if undone_by.is_null() { - valve_log!("WARN: Nothing to redo."); + warn!("Nothing to redo."); return Ok(None); } last_undo @@ -2262,10 +2272,10 @@ fn read_config_files( continue; } Some(p) if !Path::new(p).is_file() => { - valve_log!("WARN: File does not exist {}", p); + warn!("File does not exist {}", p); } Some(p) if Path::new(p).canonicalize().is_err() => { - valve_log!("WARN: File path could not be made canonical {}", p); + warn!("File path could not be made canonical {}", p); } Some(p) => path = Some(p.to_string()), }; @@ -2509,6 +2519,10 @@ fn read_config_files( &tables_config .keys() .cloned() + // TODO: Should we not remove message and history to the sorted table list here? If so, + // then we need to check if there is anywhere in the code where we assume that they are not + // in the list, and change it. One place where this is definitely assumed is in + // get_tables_ordered_for_creation() .filter(|m| m != "history" && m != "message") .collect(), &constraints_config, @@ -3232,10 +3246,9 @@ async fn get_rows_to_update( let updates_before = match query_as_if.kind { QueryAsIfKind::Add => { if let None = query_as_if.row { - valve_log!( - "WARN: No row in query_as_if: {:?} for {:?}", - query_as_if, - query_as_if.kind + warn!( + "No row in query_as_if: {:?} for {:?}", + query_as_if, query_as_if.kind ); } IndexMap::new() @@ -3268,10 +3281,9 @@ async fn get_rows_to_update( let updates_after = match &query_as_if.row { None => { if query_as_if.kind != QueryAsIfKind::Remove { - valve_log!( - "WARN: No row in query_as_if: {:?} for {:?}", - query_as_if, - query_as_if.kind + warn!( + "No row in query_as_if: {:?} for {:?}", + query_as_if, query_as_if.kind ); } IndexMap::new() @@ -3344,10 +3356,9 @@ async fn get_rows_to_update( let updates = match query_as_if.kind { QueryAsIfKind::Add => { if let None = query_as_if.row { - valve_log!( - "WARN: No row in query_as_if: {:?} for {:?}", - query_as_if, - query_as_if.kind + warn!( + "No row in query_as_if: {:?} for {:?}", + query_as_if, query_as_if.kind ); } IndexMap::new() @@ -3549,12 +3560,12 @@ fn get_json_from_row(row: &AnyRow, column: &str) -> Option { let value: &str = row.get(column); match serde_json::from_str::(value) { Err(e) => { - valve_log!("WARN: {}", e); + warn!("{}", e); None } Ok(SerdeValue::Object(value)) => Some(value), _ => { - valve_log!("WARN: {} is not an object.", value); + warn!("{} is not an object.", value); None } } @@ -4154,13 +4165,9 @@ fn read_tsv_into_vector(path: &str) -> Vec { let val = val.as_str().unwrap(); let trimmed_val = val.trim(); if trimmed_val != val { - valve_log!( - "Error: Value '{}' of column '{}' in row {} of table '{}' {}", - val, - col, - i, - path, - "has leading and/or trailing whitespace." + error!( + "Value '{}' of column '{}' in row {} of table '{}' {}", + val, col, i, path, "has leading and/or trailing whitespace." ); process::exit(1); } @@ -4627,15 +4634,19 @@ fn verify_table_deps_and_sort( match get_cycles(&dependency_graph) { Ok(sorted_table_list) => { - let mut dependencies = HashMap::new(); + let mut table_dependencies = HashMap::new(); for node in dependency_graph.nodes() { let neighbors = dependency_graph - .neighbors_directed(node, petgraph::Direction::Outgoing) + .neighbors_directed(node, petgraph::Direction::Incoming) .map(|n| n.to_string()) .collect::>(); - dependencies.insert(node.to_string(), neighbors); + table_dependencies.insert(node.to_string(), neighbors); } - return (sorted_table_list, dependencies); + // Add entries for the message and history tables: + table_dependencies.insert("message".to_string(), sorted_table_list.clone()); + table_dependencies.insert("history".to_string(), sorted_table_list.clone()); + + return (sorted_table_list, table_dependencies); } Err(cycles) => { let mut message = String::new(); @@ -5083,7 +5094,7 @@ fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap valve_log!("ERROR while processing row for '{}': {}", table_name, err), + Err(err) => error!( + "While processing row for '{}', got error '{}'", + table_name, err + ), Ok(row) => { let mut result_row = ResultRow { row_number: None, From 5751468bbf6ae5df1c1255a2813f9d71ed03561e Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 22 Dec 2023 12:21:20 -0500 Subject: [PATCH 40/57] add --table_order option --- src/lib.rs | 108 ++++++++++++++++++++++------------------------------ src/main.rs | 10 +++++ 2 files changed, 55 insertions(+), 63 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6abfb11c..92008c3c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -341,10 +341,9 @@ impl Valve { Ok(()) } - /// Returns a list of tables, including the message and history tables, in the right order for - /// table creation. - fn get_tables_ordered_for_creation(&self) -> Vec<&str> { - // Every other table depends on the message and history table so these will go last: + /// TODO: Add docstring. + pub fn get_sorted_table_list(&self, reverse: bool) -> Vec<&str> { + // Every other table depends on the message and history table so these will go first/last: let mut sorted_tables = vec!["message", "history"]; sorted_tables.append( &mut self @@ -355,15 +354,9 @@ impl Valve { .and_then(|l| Some(l.collect::>())) .unwrap(), ); - sorted_tables - } - - /// Returns a list of tables, including the message and history tables, in the right order for - /// table deletion. - fn get_tables_ordered_for_deletion(&self) -> Vec<&str> { - // Every other table depends on the message and history table so these will go last: - let mut sorted_tables = self.get_tables_ordered_for_creation(); - sorted_tables.reverse(); + if reverse { + sorted_tables.reverse(); + } sorted_tables } @@ -919,7 +912,7 @@ impl Valve { /// TODO: Add docstring pub async fn dump_schema(&self) -> Result<(), sqlx::Error> { let setup_statements = self.get_setup_statements().await?; - for table in self.get_tables_ordered_for_creation() { + for table in self.get_sorted_table_list(false) { let table_statements = setup_statements.get(table).unwrap(); let output = String::from(table_statements.join("\n")); println!("{}\n", output); @@ -930,11 +923,13 @@ impl Valve { /// TODO: Add docstring here. fn get_dependent_tables(&self, table: &str) -> Vec { let mut dependent_tables = vec![]; - let direct_deps = self.table_dependencies.get(table).unwrap().to_vec(); - for direct_dep in direct_deps { - let mut indirect_deps = self.get_dependent_tables(&direct_dep); - dependent_tables.append(&mut indirect_deps); - dependent_tables.push(direct_dep); + if table != "message" && table != "history" { + let direct_deps = self.table_dependencies.get(table).unwrap().to_vec(); + for direct_dep in direct_deps { + let mut indirect_deps = self.get_dependent_tables(&direct_dep); + dependent_tables.append(&mut indirect_deps); + dependent_tables.push(direct_dep); + } } dependent_tables } @@ -946,7 +941,7 @@ impl Valve { // TODO: Add logging statements here. let setup_statements = self.get_setup_statements().await?; - let sorted_table_list = self.get_tables_ordered_for_creation(); + let sorted_table_list = self.get_sorted_table_list(false); for table in &sorted_table_list { if self.table_has_changed(*table).await? { self.drop_tables(&vec![table]).await?; @@ -986,13 +981,31 @@ impl Valve { return Ok(rows.len() > 0); } + pub fn order_tables(&self, tables: &Vec<&str>, reverse: bool) -> Vec { + let mut ordering = vec![]; + for table in tables { + let dependent_tables = self.get_dependent_tables(table); + for dep_table in dependent_tables { + // TODO: Somehow we are still getting some repetition. It is harmless (I think) + // but look into why. + if !ordering.contains(&dep_table) { + ordering.push(dep_table.to_string()); + } + } + ordering.push(table.to_string()); + } + if reverse { + ordering.reverse(); + } + ordering + } + /// Drop all configured tables, in reverse dependency order. pub async fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError // Drop all of the database tables in the reverse of their sorted order: - self.drop_tables(&self.get_tables_ordered_for_deletion()) - .await?; + self.drop_tables(&self.get_sorted_table_list(true)).await?; Ok(self) } @@ -1002,20 +1015,7 @@ impl Valve { pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError - let drop_list = { - let mut drop_list = vec![]; - for table in tables { - let dependent_tables = self.get_dependent_tables(table); - for dep_table in dependent_tables { - if !drop_list.contains(&dep_table) { - drop_list.push(dep_table.to_string()); - } - } - drop_list.push(table.to_string()); - } - drop_list - }; - + let drop_list = self.order_tables(tables, false); if self.interactive { let auto_drops = drop_list .iter() @@ -1049,7 +1049,7 @@ impl Valve { pub async fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError - self.truncate_tables(&self.get_tables_ordered_for_deletion()) + self.truncate_tables(&self.get_sorted_table_list(true)) .await?; Ok(self) } @@ -1062,20 +1062,7 @@ impl Valve { self.create_all_tables().await?; - let truncate_list = { - let mut truncate_list = vec![]; - for table in tables { - let dependent_tables = self.get_dependent_tables(table); - for dep_table in dependent_tables { - if !truncate_list.contains(&dep_table) { - truncate_list.push(dep_table.to_string()); - } - } - truncate_list.push(table.to_string()); - } - truncate_list - }; - + let truncate_list = self.order_tables(tables, false); if self.interactive { let auto_truncates = truncate_list .iter() @@ -1120,7 +1107,7 @@ impl Valve { pub async fn load_all_tables(&self, _validate: bool) -> Result<&Self, sqlx::Error> { // DatabaseError - let table_list = self.get_tables_ordered_for_creation(); + let table_list = self.get_sorted_table_list(false); if self.verbose { info!("Processing {} tables.", table_list.len()); } @@ -1143,9 +1130,9 @@ impl Valve { ) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - let mut list_for_deletion = table_list.clone(); - list_for_deletion.reverse(); - self.truncate_tables(&list_for_deletion).await?; + let mut list_for_truncation = table_list.clone(); + list_for_truncation.reverse(); + self.truncate_tables(&list_for_truncation).await?; let num_tables = table_list.len(); let mut total_errors = 0; @@ -2519,10 +2506,9 @@ fn read_config_files( &tables_config .keys() .cloned() - // TODO: Should we not remove message and history to the sorted table list here? If so, - // then we need to check if there is anywhere in the code where we assume that they are not - // in the list, and change it. One place where this is definitely assumed is in - // get_tables_ordered_for_creation() + // We are filtering out history and message here because the fact that all of the table + // views depend on them is not reflected in the constraints configuration. Other + // functions, like, for instance, get_sorted_table_list() need to account for this. .filter(|m| m != "history" && m != "message") .collect(), &constraints_config, @@ -4642,10 +4628,6 @@ fn verify_table_deps_and_sort( .collect::>(); table_dependencies.insert(node.to_string(), neighbors); } - // Add entries for the message and history tables: - table_dependencies.insert("message".to_string(), sorted_table_list.clone()); - table_dependencies.insert("history".to_string(), sorted_table_list.clone()); - return (sorted_table_list, table_dependencies); } Err(cycles) => { diff --git a/src/main.rs b/src/main.rs index 91233ca6..1e0991f9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,6 +19,7 @@ async fn main() -> Result<(), sqlx::Error> { let mut api_test = false; let mut dump_config = false; let mut dump_schema = false; + let mut table_order = false; let mut drop_all = false; let mut create_only = false; let mut initial_load = false; @@ -62,6 +63,11 @@ async fn main() -> Result<(), sqlx::Error> { StoreTrue, r#"Write the SQL used to create the database to stdout."#, ); + ap.refer(&mut table_order).add_option( + &["--table_order"], + StoreTrue, + r#"Display the order in which tables must be created or dropped."#, + ); ap.refer(&mut drop_all).add_option( &["--drop_all"], StoreTrue, @@ -138,6 +144,10 @@ async fn main() -> Result<(), sqlx::Error> { } else if dump_schema { let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; valve.dump_schema().await?; + } else if table_order { + let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let dependency_order = valve.get_sorted_table_list(false); + println!("{}", dependency_order.join(", ")); } else if drop_all { let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; valve.drop_all_tables().await?; From 7a1b17c58dd3c5ca1fb4749d3b57ffa1551d2391 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 22 Dec 2023 21:47:20 -0500 Subject: [PATCH 41/57] add message and history to sorted_table_list --- src/lib.rs | 59 +++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 92008c3c..2819f9fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -343,17 +343,13 @@ impl Valve { /// TODO: Add docstring. pub fn get_sorted_table_list(&self, reverse: bool) -> Vec<&str> { - // Every other table depends on the message and history table so these will go first/last: - let mut sorted_tables = vec!["message", "history"]; - sorted_tables.append( - &mut self - .global_config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) - .and_then(|l| Some(l.collect::>())) - .unwrap(), - ); + let mut sorted_tables = self + .global_config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) + .and_then(|l| Some(l.collect::>())) + .unwrap(); if reverse { sorted_tables.reverse(); } @@ -981,23 +977,28 @@ impl Valve { return Ok(rows.len() > 0); } - pub fn order_tables(&self, tables: &Vec<&str>, reverse: bool) -> Vec { - let mut ordering = vec![]; + pub fn order_tables(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { + let mut with_dups = vec![]; for table in tables { let dependent_tables = self.get_dependent_tables(table); for dep_table in dependent_tables { - // TODO: Somehow we are still getting some repetition. It is harmless (I think) - // but look into why. - if !ordering.contains(&dep_table) { - ordering.push(dep_table.to_string()); - } + with_dups.push(dep_table.to_string()); } - ordering.push(table.to_string()); + with_dups.push(table.to_string()); } - if reverse { - ordering.reverse(); + // The algorithm above gives the tables in the order needed for deletion. But we want + // this function to return the creation order by default so we reverse it unless + // the deletion_order flag is set to true. + if !deletion_order { + with_dups.reverse(); + } + + // Remove the duplicates from the returned table list: + let mut tables_in_order = vec![]; + for table in with_dups.iter().unique() { + tables_in_order.push(table.to_string()); } - ordering + tables_in_order } /// Drop all configured tables, in reverse dependency order. @@ -1015,7 +1016,7 @@ impl Valve { pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError - let drop_list = self.order_tables(tables, false); + let drop_list = self.order_tables(tables, true); if self.interactive { let auto_drops = drop_list .iter() @@ -1062,7 +1063,7 @@ impl Valve { self.create_all_tables().await?; - let truncate_list = self.order_tables(tables, false); + let truncate_list = self.order_tables(tables, true); if self.interactive { let auto_truncates = truncate_list .iter() @@ -2507,8 +2508,9 @@ fn read_config_files( .keys() .cloned() // We are filtering out history and message here because the fact that all of the table - // views depend on them is not reflected in the constraints configuration. Other - // functions, like, for instance, get_sorted_table_list() need to account for this. + // views depend on them is not reflected in the constraints configuration. They will be + // taken account of within verify_table_deps_and_sort() and manually added to the sorted + // table list that is returned. .filter(|m| m != "history" && m != "message") .collect(), &constraints_config, @@ -4628,7 +4630,10 @@ fn verify_table_deps_and_sort( .collect::>(); table_dependencies.insert(node.to_string(), neighbors); } - return (sorted_table_list, table_dependencies); + let mut sorted_table_list = sorted_table_list.clone(); + let mut with_specials = vec!["message".to_string(), "history".to_string()]; + with_specials.append(&mut sorted_table_list); + return (with_specials, table_dependencies); } Err(cycles) => { let mut message = String::new(); From 10638b97b2387c1e270014d45a7018140c248612 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 3 Jan 2024 16:08:26 -0500 Subject: [PATCH 42/57] add --show_deps_in and --show_deps_out options --- src/lib.rs | 182 +++++++++++++++++++++++++--------------------------- src/main.rs | 37 ++++++++++- 2 files changed, 123 insertions(+), 96 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2819f9fa..b5901272 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,7 +186,9 @@ pub struct Valve { /// TODO: Add docstring here. pub parsed_structure_conditions: HashMap, /// TODO: Add docstring here. - pub table_dependencies: HashMap>, + pub table_dependencies_in: HashMap>, + /// TODO: Add docstring here. + pub table_dependencies_out: HashMap>, /// TODO: Add docstring here. pub pool: AnyPool, /// TODO: Add docstring here. @@ -248,7 +250,8 @@ impl Valve { rules_config, constraints_config, sorted_table_list, - table_dependencies, + table_dependencies_in, + table_dependencies_out, ) = read_config_files(table_path, &parser, &pool); let mut global_config = SerdeMap::new(); @@ -295,7 +298,8 @@ impl Valve { compiled_datatype_conditions: compiled_datatype_conditions, compiled_rule_conditions: compiled_rule_conditions, parsed_structure_conditions: parsed_structure_conditions, - table_dependencies: table_dependencies, + table_dependencies_in: table_dependencies_in, + table_dependencies_out: table_dependencies_out, pool: pool, user: String::from("VALVE"), verbose: verbose, @@ -538,7 +542,7 @@ impl Valve { Ok(false) }; - let (columns_config, configured_column_order, description, table_type, path) = { + let (columns_config, configured_column_order) = { let table_config = self .global_config .get("table") @@ -570,32 +574,8 @@ impl Valve { ); configured_column_order }; - let description = table_config - .get("description") - .and_then(|c| c.as_str()) - .unwrap(); - let table_type = { - if table != "message" && table != "history" { - table_config.get("type").and_then(|c| c.as_str()) - } else { - None - } - }; - let path = { - if table != "message" && table != "history" { - table_config.get("path").and_then(|c| c.as_str()) - } else { - None - } - }; - ( - columns_config, - configured_column_order, - description, - table_type, - path, - ) + (columns_config, configured_column_order) }; let db_columns_in_order = { @@ -679,47 +659,6 @@ impl Valve { return Ok(true); } - // Check, for tables other than "message" and "history", whether the corresponding entries - // for 'description', 'type', and 'path' in the configuration match the contents of the - // table table: - if table != "message" && table != "history" { - for table_param in vec![ - ("description", Some(description)), - ("type", table_type), - ("path", path), - ] { - let column = table_param.0; - let is_clause = if self.pool.any_kind() == AnyKind::Sqlite { - "IS" - } else { - "IS NOT DISTINCT FROM" - }; - let eq_value = match table_param.1 { - Some(value) => format!("= '{}'", value), - None => format!("{} NULL", is_clause), - }; - let sql = format!( - r#"SELECT 1 from "table" WHERE "table" = '{}' AND "{}" {}"#, - table, column, eq_value, - ); - let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; - if rows.len() == 0 { - if self.verbose { - info!( - "The table '{table}' will be recreated because the entries in the \ - table table for '{table}' have changed.", - table = table - ); - } - return Ok(true); - } else if rows.len() > 1 { - if self.verbose { - warn!("More than one row was returned from the query '{}'", sql); - } - } - } - } - // Check, for all tables, whether their column configuration matches the contents of the // database: for (cname, ctype, pk) in &db_columns_in_order { @@ -916,20 +855,6 @@ impl Valve { Ok(()) } - /// TODO: Add docstring here. - fn get_dependent_tables(&self, table: &str) -> Vec { - let mut dependent_tables = vec![]; - if table != "message" && table != "history" { - let direct_deps = self.table_dependencies.get(table).unwrap().to_vec(); - for direct_dep in direct_deps { - let mut indirect_deps = self.get_dependent_tables(&direct_dep); - dependent_tables.append(&mut indirect_deps); - dependent_tables.push(direct_dep); - } - } - dependent_tables - } - /// Create all configured database tables and views if they do not already exist as configured. pub async fn create_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError @@ -977,10 +902,31 @@ impl Valve { return Ok(rows.len() > 0); } - pub fn order_tables(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { + /// TODO: Add docstring here. + fn get_dependencies(&self, table: &str, incoming: bool) -> Vec { + let mut dependent_tables = vec![]; + if table != "message" && table != "history" { + let direct_deps = { + if incoming { + self.table_dependencies_in.get(table).unwrap().to_vec() + } else { + self.table_dependencies_out.get(table).unwrap().to_vec() + } + }; + for direct_dep in direct_deps { + let mut indirect_deps = self.get_dependencies(&direct_dep, incoming); + dependent_tables.append(&mut indirect_deps); + dependent_tables.push(direct_dep); + } + } + dependent_tables + } + + /// TODO: Add docstring here. + fn add_dependencies(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { let mut with_dups = vec![]; for table in tables { - let dependent_tables = self.get_dependent_tables(table); + let dependent_tables = self.get_dependencies(table, true); for dep_table in dependent_tables { with_dups.push(dep_table.to_string()); } @@ -1001,6 +947,40 @@ impl Valve { tables_in_order } + /// TODO: Add docstring here. + fn _order_tables(&self, tables: &Vec<&str>, reverse: bool) -> Vec { + let constraints_config = self + .global_config + .get("constraints") + .and_then(|c| c.as_object()) + .unwrap(); + + // Filter out message and history since they are not represented in the constraints config. + // They will be added implicitly to the list returned by verify_table_deps_and_sort. + let tables = tables + .iter() + .filter(|m| **m != "history" && **m != "message") + .map(|s| s.to_string()) + .collect::>(); + + let (mut sorted_table_list, _, _) = + verify_table_deps_and_sort(&tables, &constraints_config); + if reverse { + sorted_table_list.reverse(); + } + sorted_table_list + } + + /// TODO: Add docstring here. + pub fn collect_dependencies(&self, incoming: bool) -> IndexMap> { + let tables = self.get_sorted_table_list(false); + let mut dependencies = IndexMap::new(); + for table in tables { + dependencies.insert(table.to_string(), self.get_dependencies(table, incoming)); + } + dependencies + } + /// Drop all configured tables, in reverse dependency order. pub async fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { // DatabaseError @@ -1016,7 +996,7 @@ impl Valve { pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, sqlx::Error> { // DatabaseError - let drop_list = self.order_tables(tables, true); + let drop_list = self.add_dependencies(tables, true); if self.interactive { let auto_drops = drop_list .iter() @@ -1063,7 +1043,7 @@ impl Valve { self.create_all_tables().await?; - let truncate_list = self.order_tables(tables, true); + let truncate_list = self.add_dependencies(tables, true); if self.interactive { let auto_truncates = truncate_list .iter() @@ -1962,6 +1942,7 @@ fn read_config_files( SerdeMap, Vec, HashMap>, + HashMap>, ) { let special_table_types = json!({ "table": {"required": true}, @@ -2503,7 +2484,7 @@ fn read_config_files( // Sort the tables (aside from the message and history tables) according to their foreign key // dependencies so that tables are always loaded after the tables they depend on. - let (sorted_tables, table_dependencies) = verify_table_deps_and_sort( + let (sorted_tables, table_dependencies_in, table_dependencies_out) = verify_table_deps_and_sort( &tables_config .keys() .cloned() @@ -2524,7 +2505,8 @@ fn read_config_files( rules_config, constraints_config, sorted_tables, - table_dependencies, + table_dependencies_in, + table_dependencies_out, ) } @@ -4493,7 +4475,11 @@ fn local_sql_syntax(pool: &AnyPool, sql: &String) -> String { fn verify_table_deps_and_sort( table_list: &Vec, constraints: &SerdeMap, -) -> (Vec, HashMap>) { +) -> ( + Vec, + HashMap>, + HashMap>, +) { fn get_cycles(g: &DiGraphMap<&str, ()>) -> Result, Vec>> { let mut cycles = vec![]; match toposort(&g, None) { @@ -4622,18 +4608,26 @@ fn verify_table_deps_and_sort( match get_cycles(&dependency_graph) { Ok(sorted_table_list) => { - let mut table_dependencies = HashMap::new(); + let mut table_dependencies_in = HashMap::new(); for node in dependency_graph.nodes() { let neighbors = dependency_graph .neighbors_directed(node, petgraph::Direction::Incoming) .map(|n| n.to_string()) .collect::>(); - table_dependencies.insert(node.to_string(), neighbors); + table_dependencies_in.insert(node.to_string(), neighbors); + } + let mut table_dependencies_out = HashMap::new(); + for node in dependency_graph.nodes() { + let neighbors = dependency_graph + .neighbors_directed(node, petgraph::Direction::Outgoing) + .map(|n| n.to_string()) + .collect::>(); + table_dependencies_out.insert(node.to_string(), neighbors); } let mut sorted_table_list = sorted_table_list.clone(); let mut with_specials = vec!["message".to_string(), "history".to_string()]; with_specials.append(&mut sorted_table_list); - return (with_specials, table_dependencies); + return (with_specials, table_dependencies_in, table_dependencies_out); } Err(cycles) => { let mut message = String::new(); diff --git a/src/main.rs b/src/main.rs index 1e0991f9..06b4c4df 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,8 @@ async fn main() -> Result<(), sqlx::Error> { let mut dump_config = false; let mut dump_schema = false; let mut table_order = false; + let mut show_deps_in = false; + let mut show_deps_out = false; let mut drop_all = false; let mut create_only = false; let mut initial_load = false; @@ -68,6 +70,16 @@ async fn main() -> Result<(), sqlx::Error> { StoreTrue, r#"Display the order in which tables must be created or dropped."#, ); + ap.refer(&mut show_deps_in).add_option( + &["--show_deps_in"], + StoreTrue, + r#"Display the incoming dependencies for each configured table."#, + ); + ap.refer(&mut show_deps_out).add_option( + &["--show_deps_out"], + StoreTrue, + r#"Display the outgoing dependencies for each configured table."#, + ); ap.refer(&mut drop_all).add_option( &["--drop_all"], StoreTrue, @@ -146,8 +158,29 @@ async fn main() -> Result<(), sqlx::Error> { valve.dump_schema().await?; } else if table_order { let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; - let dependency_order = valve.get_sorted_table_list(false); - println!("{}", dependency_order.join(", ")); + let sorted_table_list = valve.get_sorted_table_list(false); + println!("{}", sorted_table_list.join(", ")); + } else if show_deps_in || show_deps_out { + let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let dependencies = valve.collect_dependencies(show_deps_in); + for (table, deps) in dependencies.iter() { + let deps = { + let deps = deps.iter().map(|s| format!("'{}'", s)).collect::>(); + if deps.is_empty() { + "None".to_string() + } else { + deps.join(", ") + } + }; + let preamble = { + if show_deps_in { + format!("Tables that depend on '{}'", table) + } else { + format!("Table '{}' depends on", table) + } + }; + println!("{}: {}", preamble, deps); + } } else if drop_all { let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; valve.drop_all_tables().await?; From bfbeadff2d8e48369026b2dbbde060979d1dc56f Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 4 Jan 2024 09:42:18 -0500 Subject: [PATCH 43/57] implement sort_tables() to sort a given subset of the configured tables --- src/lib.rs | 54 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b5901272..d8777139 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -948,27 +948,48 @@ impl Valve { } /// TODO: Add docstring here. - fn _order_tables(&self, tables: &Vec<&str>, reverse: bool) -> Vec { + fn sort_tables(&self, table_subset: &Vec<&str>, reverse: bool) -> Result, String> { + let full_table_list = self.get_sorted_table_list(false); + if !table_subset + .iter() + .all(|item| full_table_list.contains(item)) + { + return Err(format!( + "[{}] contains tables that are not in the configured table list: [{}]", + table_subset.join(", "), + full_table_list.join(", ") + )); + } + let constraints_config = self .global_config .get("constraints") .and_then(|c| c.as_object()) - .unwrap(); + .ok_or("Unable to retrieve configured constraints.")?; // Filter out message and history since they are not represented in the constraints config. // They will be added implicitly to the list returned by verify_table_deps_and_sort. - let tables = tables + let filtered_subset = table_subset .iter() .filter(|m| **m != "history" && **m != "message") .map(|s| s.to_string()) .collect::>(); - let (mut sorted_table_list, _, _) = - verify_table_deps_and_sort(&tables, &constraints_config); + let (sorted_subset, _, _) = + verify_table_deps_and_sort(&filtered_subset, &constraints_config); + + // Since the result of verify_table_deps_and_sort() will include dependencies of the tables + // in its input list, we filter those out here: + let mut sorted_subset = sorted_subset + .iter() + .filter(|m| table_subset.contains(&m.as_str())) + .map(|s| s.to_string()) + .collect::>(); + if reverse { - sorted_table_list.reverse(); + sorted_subset.reverse(); } - sorted_table_list + Ok(sorted_subset) } /// TODO: Add docstring here. @@ -1085,14 +1106,14 @@ impl Valve { /// If `validate` is false, just try to insert all rows. /// Return an error on database problem, /// including database conflicts that prevent rows being inserted. - pub async fn load_all_tables(&self, _validate: bool) -> Result<&Self, sqlx::Error> { + pub async fn load_all_tables(&self, validate: bool) -> Result<&Self, sqlx::Error> { // DatabaseError let table_list = self.get_sorted_table_list(false); if self.verbose { info!("Processing {} tables.", table_list.len()); } - self.load_tables(&table_list, true).await + self.load_tables(&table_list, validate).await } /// Given a vector of table names, @@ -1107,13 +1128,20 @@ impl Valve { pub async fn load_tables( &self, table_list: &Vec<&str>, - _validate: bool, + validate: bool, ) -> Result<&Self, sqlx::Error> { // ConfigOrDatabaseError - let mut list_for_truncation = table_list.clone(); - list_for_truncation.reverse(); - self.truncate_tables(&list_for_truncation).await?; + let list_for_truncation = self + .sort_tables(table_list, true) + .map_err(|e| SqlxCErr(e.into()))?; + self.truncate_tables( + &list_for_truncation + .iter() + .map(|i| i.as_str()) + .collect::>(), + ) + .await?; let num_tables = table_list.len(); let mut total_errors = 0; From 8fd68ee6cb55a98912a27a3de9df2dc8de3b6169 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 4 Jan 2024 11:50:04 -0500 Subject: [PATCH 44/57] rename Valve.global_config to simply config --- src/lib.rs | 109 +++++++++++++++++++++------------------------------- src/main.rs | 2 +- 2 files changed, 44 insertions(+), 67 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d8777139..977829b0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -178,7 +178,7 @@ impl std::fmt::Debug for ColumnRule { #[derive(Debug)] pub struct Valve { /// TODO: Add docstring here. - pub global_config: SerdeMap, + pub config: SerdeMap, /// TODO: Add docstring here. pub compiled_datatype_conditions: HashMap, /// TODO: Add docstring here. @@ -254,24 +254,24 @@ impl Valve { table_dependencies_out, ) = read_config_files(table_path, &parser, &pool); - let mut global_config = SerdeMap::new(); - global_config.insert( + let mut config = SerdeMap::new(); + config.insert( String::from("special"), SerdeValue::Object(specials_config.clone()), ); - global_config.insert( + config.insert( String::from("table"), SerdeValue::Object(tables_config.clone()), ); - global_config.insert( + config.insert( String::from("datatype"), SerdeValue::Object(datatypes_config.clone()), ); - global_config.insert( + config.insert( String::from("rule"), SerdeValue::Object(rules_config.clone()), ); - global_config.insert( + config.insert( String::from("constraints"), SerdeValue::Object(constraints_config.clone()), ); @@ -279,22 +279,18 @@ impl Valve { for table in &sorted_table_list { sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); } - global_config.insert( + config.insert( String::from("sorted_table_list"), SerdeValue::Array(sorted_table_serdevalue_list), ); - let compiled_datatype_conditions = - get_compiled_datatype_conditions(&global_config, &parser); - let compiled_rule_conditions = get_compiled_rule_conditions( - &global_config, - compiled_datatype_conditions.clone(), - &parser, - ); - let parsed_structure_conditions = get_parsed_structure_conditions(&global_config, &parser); + let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); + let compiled_rule_conditions = + get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); + let parsed_structure_conditions = get_parsed_structure_conditions(&config, &parser); Ok(Self { - global_config: global_config, + config: config, compiled_datatype_conditions: compiled_datatype_conditions, compiled_rule_conditions: compiled_rule_conditions, parsed_structure_conditions: parsed_structure_conditions, @@ -348,7 +344,7 @@ impl Valve { /// TODO: Add docstring. pub fn get_sorted_table_list(&self, reverse: bool) -> Vec<&str> { let mut sorted_tables = self - .global_config + .config .get("sorted_table_list") .and_then(|l| l.as_array()) .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) @@ -459,7 +455,7 @@ impl Valve { return Ok(true); } else { let trees = - self.global_config + self.config .get("constraints") .and_then(|c| c.as_object()) .and_then(|o| o.get("tree")) @@ -544,7 +540,7 @@ impl Valve { let (columns_config, configured_column_order) = { let table_config = self - .global_config + .config .get("table") .and_then(|tc| tc.get(table)) .and_then(|t| t.as_object()) @@ -677,8 +673,7 @@ impl Valve { .and_then(|c| c.as_object()) .unwrap(); let sql_type = - get_sql_type_from_global_config(&self.global_config, table, &cname, &self.pool) - .unwrap(); + get_sql_type_from_global_config(&self.config, table, &cname, &self.pool).unwrap(); // Check the column's SQL type: if sql_type.to_lowercase() != ctype.to_lowercase() { @@ -733,13 +728,13 @@ impl Valve { /// TODO: Add docstring here async fn get_setup_statements(&self) -> Result>, sqlx::Error> { let tables_config = self - .global_config + .config .get("table") .and_then(|t| t.as_object()) .unwrap() .clone(); let datatypes_config = self - .global_config + .config .get("datatype") .and_then(|d| d.as_object()) .unwrap() @@ -962,7 +957,7 @@ impl Valve { } let constraints_config = self - .global_config + .config .get("constraints") .and_then(|c| c.as_object()) .ok_or("Unable to retrieve configured constraints.")?; @@ -1154,7 +1149,7 @@ impl Valve { } let table_name = table_name.to_string(); let path = String::from( - self.global_config + self.config .get("table") .and_then(|t| t.as_object()) .and_then(|o| o.get(&table_name)) @@ -1208,7 +1203,7 @@ impl Valve { // logic: let chunks = records.chunks(CHUNK_SIZE); validate_and_insert_chunks( - &self.global_config, + &self.config, &self.pool, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, @@ -1225,17 +1220,11 @@ impl Valve { // (the tree's parent) are all contained in another column (the tree's child): // We also need to wait before validating a table's "under" constraints. Although the tree // associated with such a constraint need not be defined on the same table, it can be. - let mut recs_to_update = validate_tree_foreign_keys( - &self.global_config, - &self.pool, - None, - &table_name, - None, - ) - .await?; + let mut recs_to_update = + validate_tree_foreign_keys(&self.config, &self.pool, None, &table_name, None) + .await?; recs_to_update.append( - &mut validate_under(&self.global_config, &self.pool, None, &table_name, None) - .await?, + &mut validate_under(&self.config, &self.pool, None, &table_name, None).await?, ); for record in recs_to_update { @@ -1333,7 +1322,7 @@ impl Valve { // DatabaseError validate_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1365,7 +1354,7 @@ impl Valve { let mut tx = self.pool.begin().await?; let row = validate_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1378,7 +1367,7 @@ impl Valve { .await?; let rn = insert_new_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1411,17 +1400,11 @@ impl Valve { // Get the old version of the row from the database so that we can later record it to the // history table: - let old_row = get_row_from_db( - &self.global_config, - &self.pool, - &mut tx, - table_name, - &row_number, - ) - .await?; + let old_row = + get_row_from_db(&self.config, &self.pool, &mut tx, table_name, &row_number).await?; let row = validate_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1434,7 +1417,7 @@ impl Valve { .await?; update_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1469,14 +1452,8 @@ impl Valve { // ConfigOrDatabaseError let mut tx = self.pool.begin().await?; - let row = get_row_from_db( - &self.global_config, - &self.pool, - &mut tx, - &table_name, - row_number, - ) - .await?; + let row = + get_row_from_db(&self.config, &self.pool, &mut tx, &table_name, row_number).await?; record_row_change( &mut tx, @@ -1489,7 +1466,7 @@ impl Valve { .await?; delete_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1578,7 +1555,7 @@ impl Valve { let mut tx = self.pool.begin().await?; delete_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1597,7 +1574,7 @@ impl Valve { let mut tx = self.pool.begin().await?; insert_new_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1618,7 +1595,7 @@ impl Valve { let mut tx = self.pool.begin().await?; update_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1676,7 +1653,7 @@ impl Valve { let mut tx = self.pool.begin().await?; insert_new_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1697,7 +1674,7 @@ impl Valve { let mut tx = self.pool.begin().await?; delete_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1716,7 +1693,7 @@ impl Valve { let mut tx = self.pool.begin().await?; update_row_tx( - &self.global_config, + &self.config, &self.compiled_datatype_conditions, &self.compiled_rule_conditions, &self.pool, @@ -1747,7 +1724,7 @@ impl Valve { column_name: &str, matching_string: Option<&str>, ) -> Result { - let config = &self.global_config; + let config = &self.config; let compiled_datatype_conditions = &self.compiled_datatype_conditions; let parsed_structure_conditions = &self.parsed_structure_conditions; let pool = &self.pool; diff --git a/src/main.rs b/src/main.rs index 06b4c4df..3205f4b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -136,7 +136,7 @@ async fn main() -> Result<(), sqlx::Error> { run_api_tests(&source, &destination).await?; } else if dump_config { let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; - let mut config = valve.global_config.clone(); + let mut config = valve.config.clone(); let datatype_conditions = format!("{:?}", valve.compiled_datatype_conditions).replace(r"\", r"\\"); let datatype_conditions: SerdeValue = from_str(&datatype_conditions).unwrap(); From 7e6bac73134beedcd31bd50078fe55aab4754b0d Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 4 Jan 2024 14:06:37 -0500 Subject: [PATCH 45/57] optionally load tables without validating --- src/lib.rs | 203 +++++++++++++++++++++++++++--------------------- src/validate.rs | 37 +++++---- 2 files changed, 135 insertions(+), 105 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 977829b0..4c0cf847 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,7 @@ use crate::{ast::Expression, valve_grammar::StartParser}; use async_recursion::async_recursion; use chrono::Utc; use crossbeam; +use csv::{ReaderBuilder, StringRecord, StringRecordsIter}; use enquote::unquote; use futures::executor::block_on; use indexmap::IndexMap; @@ -1163,7 +1164,7 @@ impl Valve { warn!("Unable to open '{}': {}", path.clone(), e); continue; } - Ok(table_file) => csv::ReaderBuilder::new() + Ok(table_file) => ReaderBuilder::new() .has_headers(false) .delimiter(b'\t') .from_reader(table_file), @@ -1202,7 +1203,7 @@ impl Valve { // Split the data into chunks of size CHUNK_SIZE before passing them to the validation // logic: let chunks = records.chunks(CHUNK_SIZE); - validate_and_insert_chunks( + insert_chunks( &self.config, &self.pool, &self.compiled_datatype_conditions, @@ -1212,60 +1213,64 @@ impl Valve { &headers, &mut messages_stats, self.verbose, + validate, ) .await?; - // We need to wait until all of the rows for a table have been loaded before validating the - // "foreign" constraints on a table's trees, since this checks if the values of one column - // (the tree's parent) are all contained in another column (the tree's child): - // We also need to wait before validating a table's "under" constraints. Although the tree - // associated with such a constraint need not be defined on the same table, it can be. - let mut recs_to_update = - validate_tree_foreign_keys(&self.config, &self.pool, None, &table_name, None) - .await?; - recs_to_update.append( - &mut validate_under(&self.config, &self.pool, None, &table_name, None).await?, - ); - - for record in recs_to_update { - let row_number = record.get("row_number").unwrap(); - let column_name = record.get("column").and_then(|s| s.as_str()).unwrap(); - let value = record.get("value").and_then(|s| s.as_str()).unwrap(); - let level = record.get("level").and_then(|s| s.as_str()).unwrap(); - let rule = record.get("rule").and_then(|s| s.as_str()).unwrap(); - let message = record.get("message").and_then(|s| s.as_str()).unwrap(); + if validate { + // We need to wait until all of the rows for a table have been loaded before + // validating the "foreign" constraints on a table's trees, since this checks if the + // values of one column (the tree's parent) are all contained in another column (the + // tree's child). We also need to wait before validating a table's "under" + // constraints. Although the tree associated with such a constraint need not be + // defined on the same table, it can be. + let mut recs_to_update = + validate_tree_foreign_keys(&self.config, &self.pool, None, &table_name, None) + .await?; + recs_to_update.append( + &mut validate_under(&self.config, &self.pool, None, &table_name, None).await?, + ); - let sql = local_sql_syntax( - &self.pool, - &format!( - r#"INSERT INTO "message" + for record in recs_to_update { + let row_number = record.get("row_number").unwrap(); + let column_name = record.get("column").and_then(|s| s.as_str()).unwrap(); + let value = record.get("value").and_then(|s| s.as_str()).unwrap(); + let level = record.get("level").and_then(|s| s.as_str()).unwrap(); + let rule = record.get("rule").and_then(|s| s.as_str()).unwrap(); + let message = record.get("message").and_then(|s| s.as_str()).unwrap(); + + let sql = local_sql_syntax( + &self.pool, + &format!( + r#"INSERT INTO "message" ("table", "row", "column", "value", "level", "rule", "message") VALUES ({}, {}, {}, {}, {}, {}, {})"#, - SQL_PARAM, - row_number, - SQL_PARAM, - SQL_PARAM, - SQL_PARAM, - SQL_PARAM, - SQL_PARAM - ), - ); - let mut query = sqlx_query(&sql); - query = query.bind(&table_name); - query = query.bind(&column_name); - query = query.bind(&value); - query = query.bind(&level); - query = query.bind(&rule); - query = query.bind(&message); - query.execute(&self.pool).await?; - - if self.verbose { - // Add the generated message to messages_stats: - let messages = vec![json!({ - "message": message, - "level": level, - })]; - add_message_counts(&messages, &mut messages_stats); + SQL_PARAM, + row_number, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM + ), + ); + let mut query = sqlx_query(&sql); + query = query.bind(&table_name); + query = query.bind(&column_name); + query = query.bind(&value); + query = query.bind(&level); + query = query.bind(&rule); + query = query.bind(&message); + query.execute(&self.pool).await?; + + if self.verbose { + // Add the generated message to messages_stats: + let messages = vec![json!({ + "message": message, + "level": level, + })]; + add_message_counts(&messages, &mut messages_stats); + } } } @@ -2271,7 +2276,7 @@ fn read_config_files( // Get the actual columns from the data itself. Note that we set has_headers to // false(even though the files have header rows) in order to explicitly read the // header row. - let mut rdr = csv::ReaderBuilder::new() + let mut rdr = ReaderBuilder::new() .has_headers(false) .delimiter(b'\t') .from_reader(File::open(path.clone()).unwrap_or_else(|err| { @@ -4115,7 +4120,7 @@ async fn update_row_tx( /// configuration tables. fn read_tsv_into_vector(path: &str) -> Vec { let mut rdr = - csv::ReaderBuilder::new() + ReaderBuilder::new() .delimiter(b'\t') .from_reader(File::open(path).unwrap_or_else(|err| { panic!("Unable to open '{}': {}", path, err); @@ -5400,7 +5405,7 @@ async fn make_inserts( /// and the chunk number corresponding to the rows, do inter-row validation on the rows and insert /// them to the table. If the verbose flag is set to true, error/warning/info stats will be /// collected in messages_stats and later written to stderr. -async fn validate_rows_inter_and_insert( +async fn insert_chunk( config: &SerdeMap, pool: &AnyPool, table_name: &String, @@ -5408,9 +5413,12 @@ async fn validate_rows_inter_and_insert( chunk_number: usize, messages_stats: &mut HashMap, verbose: bool, + validate: bool, ) -> Result<(), sqlx::Error> { // First, do the tree validation: - validate_rows_trees(config, pool, table_name, rows).await?; + if validate { + validate_rows_trees(config, pool, table_name, rows).await?; + } // Try to insert the rows to the db first without validating unique and foreign constraints. // If there are constraint violations this will cause a database error, in which case we then @@ -5477,10 +5485,17 @@ async fn validate_rows_inter_and_insert( ); } } - Err(_) => { - validate_rows_constraints(config, pool, table_name, rows).await?; - let (main_sql, main_params, conflict_sql, conflict_params, message_sql, message_params) = - make_inserts( + Err(e) => { + if validate { + validate_rows_constraints(config, pool, table_name, rows).await?; + let ( + main_sql, + main_params, + conflict_sql, + conflict_params, + message_sql, + message_params, + ) = make_inserts( config, table_name, rows, @@ -5491,26 +5506,29 @@ async fn validate_rows_inter_and_insert( ) .await?; - let main_sql = local_sql_syntax(&pool, &main_sql); - let mut main_query = sqlx_query(&main_sql); - for param in &main_params { - main_query = main_query.bind(param); - } - main_query.execute(pool).await?; + let main_sql = local_sql_syntax(&pool, &main_sql); + let mut main_query = sqlx_query(&main_sql); + for param in &main_params { + main_query = main_query.bind(param); + } + main_query.execute(pool).await?; - let conflict_sql = local_sql_syntax(&pool, &conflict_sql); - let mut conflict_query = sqlx_query(&conflict_sql); - for param in &conflict_params { - conflict_query = conflict_query.bind(param); - } - conflict_query.execute(pool).await?; + let conflict_sql = local_sql_syntax(&pool, &conflict_sql); + let mut conflict_query = sqlx_query(&conflict_sql); + for param in &conflict_params { + conflict_query = conflict_query.bind(param); + } + conflict_query.execute(pool).await?; - let message_sql = local_sql_syntax(&pool, &message_sql); - let mut message_query = sqlx_query(&message_sql); - for param in &message_params { - message_query = message_query.bind(param); + let message_sql = local_sql_syntax(&pool, &message_sql); + let mut message_query = sqlx_query(&message_sql); + for param in &message_params { + message_query = message_query.bind(param); + } + message_query.execute(pool).await?; + } else { + return Err(e); } - message_query.execute(pool).await?; } }; @@ -5522,29 +5540,34 @@ async fn validate_rows_inter_and_insert( /// and the headers of the rows to be inserted, validate each chunk and insert the validated rows /// to the table. If the verbose flag is set to true, error/warning/info stats will be collected in /// messages_stats and later written to stderr. -async fn validate_and_insert_chunks( +async fn insert_chunks( config: &SerdeMap, pool: &AnyPool, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, table_name: &String, - chunks: &IntoChunks>, - headers: &csv::StringRecord, + chunks: &IntoChunks>, + headers: &StringRecord, messages_stats: &mut HashMap, verbose: bool, + validate: bool, ) -> Result<(), sqlx::Error> { if !MULTI_THREADED { for (chunk_number, chunk) in chunks.into_iter().enumerate() { let mut rows: Vec<_> = chunk.collect(); - let mut intra_validated_rows = validate_rows_intra( - config, - compiled_datatype_conditions, - compiled_rule_conditions, - table_name, - headers, - &mut rows, - ); - validate_rows_inter_and_insert( + let mut intra_validated_rows = { + let only_nulltype = !validate; + validate_rows_intra( + config, + compiled_datatype_conditions, + compiled_rule_conditions, + table_name, + headers, + &mut rows, + only_nulltype, + ) + }; + insert_chunk( config, pool, table_name, @@ -5552,6 +5575,7 @@ async fn validate_and_insert_chunks( chunk_number, messages_stats, verbose, + validate, ) .await?; } @@ -5576,6 +5600,7 @@ async fn validate_and_insert_chunks( for chunk in batch.into_iter() { let mut rows: Vec<_> = chunk.collect(); workers.push(scope.spawn(move |_| { + let only_nulltype = !validate; validate_rows_intra( config, compiled_datatype_conditions, @@ -5583,6 +5608,7 @@ async fn validate_and_insert_chunks( table_name, headers, &mut rows, + only_nulltype, ) })); } @@ -5596,7 +5622,7 @@ async fn validate_and_insert_chunks( .expect("A child thread panicked"); for (chunk_number, mut intra_validated_rows) in results { - validate_rows_inter_and_insert( + insert_chunk( config, pool, table_name, @@ -5604,6 +5630,7 @@ async fn validate_and_insert_chunks( chunk_number, messages_stats, verbose, + validate, ) .await?; } diff --git a/src/validate.rs b/src/validate.rs index 0e794a6b..ae0552a8 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -670,8 +670,8 @@ pub async fn validate_rows_constraints( } /// Given a config map, compiled datatype and rule conditions, a table name, the headers for the -/// table, and a number of rows to validate, validate all of the rows and return the validated -/// versions. +/// table, and a number of rows to validate, run intra-row validatation on all of the rows and +/// return the validated versions. pub fn validate_rows_intra( config: &SerdeMap, compiled_datatype_conditions: &HashMap, @@ -679,6 +679,7 @@ pub fn validate_rows_intra( table_name: &String, headers: &csv::StringRecord, rows: &Vec>, + only_nulltype: bool, ) -> Vec { let mut result_rows = vec![]; for row in rows { @@ -726,26 +727,28 @@ pub fn validate_rows_intra( ); } - for column_name in &column_names { - let context = result_row.clone(); - let cell = result_row.contents.get_mut(column_name).unwrap(); - validate_cell_rules( - config, - compiled_rule_conditions, - table_name, - &column_name, - &context, - cell, - ); - - if cell.nulltype == None { - validate_cell_datatype( + if !only_nulltype { + for column_name in &column_names { + let context = result_row.clone(); + let cell = result_row.contents.get_mut(column_name).unwrap(); + validate_cell_rules( config, - compiled_datatype_conditions, + compiled_rule_conditions, table_name, &column_name, + &context, cell, ); + + if cell.nulltype == None { + validate_cell_datatype( + config, + compiled_datatype_conditions, + table_name, + &column_name, + cell, + ); + } } } result_rows.push(result_row); From c8df0c9ce0a5e545b2f10e2a0fb6c6a9f0603c5a Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 5 Jan 2024 09:28:49 -0500 Subject: [PATCH 46/57] remove the distinction between 'SQLite type' and 'PostgreSQL type' --- scripts/guess.py | 12 +++--- src/lib.rs | 46 ++++++----------------- test/guess_test_data/column.tsv | 3 +- test/guess_test_data/column_expected.tsv | 3 +- test/guess_test_data/datatype.tsv | 44 +++++++++++----------- test/perf_test_data/column.tsv | 3 +- test/perf_test_data/datatype.tsv | 44 +++++++++++----------- test/random_test_data/column.tsv | 3 +- test/random_test_data/datatype.tsv | 44 +++++++++++----------- test/src/column.tsv | 3 +- test/src/datatype.tsv | 48 ++++++++++++------------ 11 files changed, 112 insertions(+), 141 deletions(-) diff --git a/scripts/guess.py b/scripts/guess.py index 0f9ab864..b5eda161 100755 --- a/scripts/guess.py +++ b/scripts/guess.py @@ -155,14 +155,14 @@ def get_higher_datatypes(datatype_hierarchies, universals, depth): def get_sql_type(config, datatype): """Given the config map and the name of a datatype, climb the datatype tree (as required), - and return the first 'SQLite type' found.""" + and return the first 'SQL type' found.""" if "datatype" not in config: print("Missing datatypes in config") sys.exit(1) if datatype not in config["datatype"]: return None - if config["datatype"][datatype].get("SQLite type"): - return config["datatype"][datatype]["SQLite type"] + if config["datatype"][datatype].get("SQL type"): + return config["datatype"][datatype]["SQL type"] return get_sql_type(config, config["datatype"][datatype].get("parent")) @@ -260,9 +260,9 @@ def is_match(datatype): # If the datatype has no associated condition then it matches anything: if not datatype.get("condition"): return True - # If the SQLite type is NULL this datatype is ruled out: - sqlite_type = datatype.get("SQLite type") - if sqlite_type and sqlite_type.casefold() == "null": + # If the SQL type is NULL this datatype is ruled out: + sql_type = datatype.get("SQL type") + if sql_type and sql_type.casefold() == "null": return False condition = get_compiled_condition(datatype["condition"], config["parser"]) diff --git a/src/lib.rs b/src/lib.rs index 4c0cf847..f14f8997 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,9 +74,7 @@ static MULTI_THREADED: bool = true; static SQL_PARAM: &str = "VALVEPARAM"; lazy_static! { - static ref PG_SQL_TYPES: Vec<&'static str> = - vec!["text", "varchar", "numeric", "integer", "real"]; - static ref SL_SQL_TYPES: Vec<&'static str> = vec!["text", "numeric", "integer", "real"]; + static ref SQL_TYPES: Vec<&'static str> = vec!["text", "varchar", "numeric", "integer", "real"]; } /// Aliases for [serde_json::Map](..//serde_json/struct.Map.html). @@ -2098,8 +2096,7 @@ fn read_config_files( "datatype", "parent", "condition", - "SQLite type", - "PostgreSQL type", + "SQL type", ] { if !row.contains_key(column) || row.get(column) == None { panic!("Missing required column '{}' reading '{}'", column, path); @@ -2112,7 +2109,7 @@ fn read_config_files( } } - for column in vec!["parent", "condition", "SQLite type", "PostgreSQL type"] { + for column in vec!["parent", "condition", "SQL type"] { if row.get(column).and_then(|c| c.as_str()).unwrap() == "" { row.remove(&column.to_string()); } @@ -4354,15 +4351,7 @@ fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Opti return None; } - let sql_type_column = { - if pool.any_kind() == AnyKind::Sqlite { - "SQLite type" - } else { - "PostgreSQL type" - } - }; - - if let Some(sql_type) = dt_config.get(datatype).and_then(|d| d.get(sql_type_column)) { + if let Some(sql_type) = dt_config.get(datatype).and_then(|d| d.get("SQL type")) { return Some(sql_type.as_str().and_then(|s| Some(s.to_string())).unwrap()); } @@ -4956,26 +4945,13 @@ fn get_table_ddl( } }; - if pool.any_kind() == AnyKind::Postgres { - if !PG_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { - panic!( - "Unrecognized PostgreSQL SQL type '{}' for datatype: '{}'. \ - Accepted SQL types for PostgreSQL are: {}", - sql_type, - row.get("datatype").and_then(|d| d.as_str()).unwrap(), - PG_SQL_TYPES.join(", ") - ); - } - } else { - if !SL_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { - panic!( - "Unrecognized SQLite SQL type '{}' for datatype '{}'. \ - Accepted SQL datatypes for SQLite are: {}", - sql_type, - row.get("datatype").and_then(|d| d.as_str()).unwrap(), - SL_SQL_TYPES.join(", ") - ); - } + if !SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { + panic!( + "Unrecognized SQL type '{}' for datatype: '{}'. Accepted SQL types are: {}", + sql_type, + row.get("datatype").and_then(|d| d.as_str()).unwrap(), + SQL_TYPES.join(", ") + ); } let column_name = row.get("column").and_then(|s| s.as_str()).unwrap(); diff --git a/test/guess_test_data/column.tsv b/test/guess_test_data/column.tsv index 2659b524..69e5dbf8 100644 --- a/test/guess_test_data/column.tsv +++ b/test/guess_test_data/column.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty datatype_name -datatype PostgreSQL type empty datatype_name +datatype SQL type empty datatype_name datatype RDF type empty datatype_name datatype HTML type empty datatype_name rule table table_name diff --git a/test/guess_test_data/column_expected.tsv b/test/guess_test_data/column_expected.tsv index f7e6a20e..d0bf3745 100644 --- a/test/guess_test_data/column_expected.tsv +++ b/test/guess_test_data/column_expected.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty datatype_name -datatype PostgreSQL type empty datatype_name +datatype SQL type empty datatype_name datatype RDF type empty datatype_name datatype HTML type empty datatype_name rule table table_name diff --git a/test/guess_test_data/datatype.tsv b/test/guess_test_data/datatype.tsv index c118588d..156b46d9 100644 --- a/test/guess_test_data/datatype.tsv +++ b/test/guess_test_data/datatype.tsv @@ -1,22 +1,22 @@ -datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +natural_number integer match(/\d+/) a natural number, including zero INTEGER +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore diff --git a/test/perf_test_data/column.tsv b/test/perf_test_data/column.tsv index f7e6a20e..d0bf3745 100644 --- a/test/perf_test_data/column.tsv +++ b/test/perf_test_data/column.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty datatype_name -datatype PostgreSQL type empty datatype_name +datatype SQL type empty datatype_name datatype RDF type empty datatype_name datatype HTML type empty datatype_name rule table table_name diff --git a/test/perf_test_data/datatype.tsv b/test/perf_test_data/datatype.tsv index c118588d..156b46d9 100644 --- a/test/perf_test_data/datatype.tsv +++ b/test/perf_test_data/datatype.tsv @@ -1,22 +1,22 @@ -datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +natural_number integer match(/\d+/) a natural number, including zero INTEGER +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore diff --git a/test/random_test_data/column.tsv b/test/random_test_data/column.tsv index f7e6a20e..d0bf3745 100644 --- a/test/random_test_data/column.tsv +++ b/test/random_test_data/column.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty datatype_name -datatype PostgreSQL type empty datatype_name +datatype SQL type empty datatype_name datatype RDF type empty datatype_name datatype HTML type empty datatype_name rule table table_name diff --git a/test/random_test_data/datatype.tsv b/test/random_test_data/datatype.tsv index c118588d..156b46d9 100644 --- a/test/random_test_data/datatype.tsv +++ b/test/random_test_data/datatype.tsv @@ -1,22 +1,22 @@ -datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +natural_number integer match(/\d+/) a natural number, including zero INTEGER +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore diff --git a/test/src/column.tsv b/test/src/column.tsv index 9c6c8256..05707f63 100644 --- a/test/src/column.tsv +++ b/test/src/column.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty trimmed_line -datatype PostgreSQL type empty trimmed_line +datatype SQL type empty trimmed_line datatype RDF type empty trimmed_line datatype HTML type empty datatype_name rule table table_name diff --git a/test/src/datatype.tsv b/test/src/datatype.tsv index b2079e3b..dd90b419 100644 --- a/test/src/datatype.tsv +++ b/test/src/datatype.tsv @@ -1,24 +1,24 @@ -datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER -nonspace trimmed_line exclude(/\s/) text without whitespace -numeric nonspace match(/-?\d+(\.\d+)?/) a positive or negative number NUMERIC NUMERIC -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -real nonspace match(/-?\d+(\.\d+)?/) a positive or negative real number REAL REAL -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +natural_number integer match(/\d+/) a natural number, including zero INTEGER +nonspace trimmed_line exclude(/\s/) text without whitespace +numeric nonspace match(/-?\d+(\.\d+)?/) a positive or negative number NUMERIC +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +real nonspace match(/-?\d+(\.\d+)?/) a positive or negative real number REAL +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore From 16b6d585ddc3ed2533d4115a2ed4443af195584a Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 14 Jan 2024 13:13:30 -0500 Subject: [PATCH 47/57] implement save_all_tables(), save_tables(), and save_table(), use new ValveError type in Results, change export.py to export_messages.py --- Makefile | 27 +- scripts/{export.py => export_messages.py} | 98 +--- src/api_test.rs | 32 +- src/lib.rs | 623 ++++++++++++---------- src/main.rs | 160 ++++-- src/validate.rs | 34 +- test/insert_update.sh | 10 +- test/round_trip.sh | 6 +- 8 files changed, 520 insertions(+), 470 deletions(-) rename scripts/{export.py => export_messages.py} (75%) diff --git a/Makefile b/Makefile index d5488499..b7d671b1 100644 --- a/Makefile +++ b/Makefile @@ -44,18 +44,17 @@ test/output: test: clean_test_db sqlite_test pg_test api_test random_test -tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 \ - table9 table10 table11 +tables_to_test := $(shell cut -f 1 test/src/table.tsv) sqlite_test: build/valve.db test/src/table.tsv | test/output @echo "Testing valve on sqlite ..." test/round_trip.sh $^ - scripts/export.py messages $< $| $(tables_to_test) + scripts/export_messages.py $< $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages.tsv test/output/messages.tsv - scripts/export.py messages --a1 $< $| $(tables_to_test) + scripts/export_messages.py --a1 $< $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages_a1.tsv test/output/messages.tsv # The "pk" test is run on table7 only since it is the only table whose primary keys are all valid: - scripts/export.py messages --pk $< $| table7 + scripts/export_messages.py --pk $< $| table7 diff --strip-trailing-cr -q test/expected/messages_pk.tsv test/output/messages.tsv @echo "Test succeeded!" @@ -63,12 +62,12 @@ pg_test: valve test/src/table.tsv | test/output @echo "Testing valve on postgresql ..." ./$^ postgresql:///valve_postgres test/round_trip.sh postgresql:///valve_postgres $(word 2,$^) - scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test) + scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages.tsv test/output/messages.tsv - scripts/export.py messages --a1 postgresql:///valve_postgres $| $(tables_to_test) + scripts/export_messages.py --a1 postgresql:///valve_postgres $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages_a1.tsv test/output/messages.tsv # The "pk" test is run on table7 only since it is the only table whose primary keys are all valid: - scripts/export.py messages --pk postgresql:///valve_postgres $| table7 + scripts/export_messages.py --pk postgresql:///valve_postgres $| table7 diff --strip-trailing-cr -q test/expected/messages_pk.tsv test/output/messages.tsv @echo "Test succeeded!" @@ -77,8 +76,8 @@ api_test: sqlite_api_test pg_api_test sqlite_api_test: valve test/src/table.tsv build/valve.db test/insert_update.sh | test/output @echo "Testing API functions on sqlite ..." ./$< --api_test $(word 2,$^) $(word 3,$^) - $(word 4,$^) $(word 3,$^) - scripts/export.py messages $(word 3,$^) $| $(tables_to_test) + $(word 4,$^) $(word 3,$^) $(word 2,$^) + scripts/export_messages.py $(word 3,$^) $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv echo "select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id" | sqlite3 -header -tabs build/valve.db > test/output/history.tsv diff --strip-trailing-cr -q test/expected/history.tsv test/output/history.tsv @@ -91,8 +90,8 @@ pg_api_test: valve test/src/table.tsv test/insert_update.sh | test/output @echo "Testing API functions on postgresql ..." ./$< $(word 2,$^) postgresql:///valve_postgres ./$< --api_test $(word 2,$^) postgresql:///valve_postgres - $(word 3,$^) postgresql:///valve_postgres - scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test) + $(word 3,$^) postgresql:///valve_postgres $(word 2,$^) + scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv psql postgresql:///valve_postgres -c "COPY (select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id) TO STDOUT WITH NULL AS ''" > test/output/history.tsv tail -n +2 test/expected/history.tsv | diff --strip-trailing-cr -q test/output/history.tsv - @@ -161,11 +160,11 @@ $(perf_test_db): valve perf_test_data $(perf_test_dir)/*.tsv | build $(perf_test time -p ./$< --verbose $(perf_test_dir)/table.tsv $@ sqlite_perf_test: build/valve_perf.db | test/output - time -p scripts/export.py messages $< $| $(tables_to_test) + time -p scripts/export_messages.py $< $| $(tables_to_test) pg_perf_test: valve $(perf_test_dir)/ontology | test/output time -p ./$< --verbose $(perf_test_dir)/table.tsv postgresql:///valve_postgres - time -p scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test) + time -p scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test) perf_test: sqlite_perf_test pg_perf_test diff --git a/scripts/export.py b/scripts/export_messages.py similarity index 75% rename from scripts/export.py rename to scripts/export_messages.py index bc61c259..380d8497 100755 --- a/scripts/export.py +++ b/scripts/export_messages.py @@ -115,52 +115,6 @@ def get_column_order_and_info_for_sqlite(cursor, table): } -def export_data(cursor, is_sqlite, args): - """ - Given a database cursor, a flag indicating whether this is a sqlite or postgres db, and a - dictionary containing: an output directory, "output", and a list of tables, "tables": export all - of the given database tables to .tsv files in the output directory. - """ - output_dir = os.path.normpath(args["output_dir"]) - tables = args["tables"] - - for table in tables: - try: - if is_sqlite: - columns_info = get_column_order_and_info_for_sqlite(cursor, table) - else: - columns_info = get_column_order_and_info_for_postgres(cursor, table) - unsorted_columns = columns_info["unsorted_columns"] - - select = [f'"{column}"' for column in unsorted_columns] - select = ", ".join(select) - - # Fetch the rows from the table and write them to a corresponding TSV file in the - # output directory: - cursor.execute(f'SELECT {select} FROM "{table}_text_view" ORDER BY "row_number"') - colnames = [d[0] for d in cursor.description] - rows = map(lambda r: dict(zip(colnames, r)), cursor) - fieldnames = [c for c in colnames if c != "row_number"] - with open(f"{output_dir}/{table}.tsv", "w", newline="\n") as csvfile: - writer = csv.DictWriter( - csvfile, - fieldnames=fieldnames, - delimiter="\t", - doublequote=False, - strict=True, - lineterminator="\n", - quoting=csv.QUOTE_NONE, - escapechar=None, - quotechar=None, - ) - writer.writeheader() - for row in rows: - del row["row_number"] - writer.writerow(row) - except sqlite3.OperationalError as e: - print(f"ERROR while exporting {table}: {e}", file=sys.stderr) - - def export_messages(cursor, is_sqlite, args): """ Given a database cursor, a flag indicating whether this is a sqlite or postgres db, and a @@ -289,42 +243,24 @@ def col_to_a1(column, columns): if __name__ == "__main__": - prog_parser = ArgumentParser(description="Database table export utility") - sub_parsers = prog_parser.add_subparsers(help="Possible sub-commands") - - sub1 = sub_parsers.add_parser( - "data", - description="Export table data", - help="Export table data. For command-line options, run: `%(prog)s data --help`", + parser = ArgumentParser(description="Export Valve messages") + pgroup = parser.add_mutually_exclusive_group() + pgroup.add_argument("--a1", action="store_true", help="Output error messages in A1 format") + pgroup.add_argument("--pk", action="store_true", help="Identify rows using primary keys") + + parser.add_argument( + "db", + help="""Either a database connection URL or a path to a SQLite database file. In the + case of a URL, you must use one of the following schemes: potgresql:// + (for postgreSQL), sqlite:// or file: (for SQLite). + """, ) - - sub1.set_defaults(func=export_data) - - sub2 = sub_parsers.add_parser( - "messages", - description="Export error messages", - help="Export error messages. For command-line options, run: `%(prog)s messages --help`", + parser.add_argument("output_dir", help="The name of the directory in which to save TSV files") + parser.add_argument( + "tables", metavar="table", nargs="+", help="The name of a table to export to TSV" ) - sub2_group = sub2.add_mutually_exclusive_group() - sub2_group.add_argument("--a1", action="store_true", help="Output error messages in A1 format") - sub2_group.add_argument("--pk", action="store_true", help="Identify rows using primary keys") - sub2.set_defaults(func=export_messages) - - for sub in [sub1, sub2]: - sub.add_argument( - "db", - help="""Either a database connection URL or a path to a SQLite database file. In the - case of a URL, you must use one of the following schemes: potgresql:// - (for postgreSQL), sqlite:// or file: (for SQLite). - """, - ) - sub.add_argument("output_dir", help="The name of the directory in which to save TSV files") - sub.add_argument( - "tables", metavar="table", nargs="+", help="The name of a table to export to TSV" - ) - args = prog_parser.parse_args() - func = args.func + args = parser.parse_args() args = vars(args) if not os.path.isdir(args["output_dir"]): @@ -336,7 +272,7 @@ def col_to_a1(column, columns): if db.startswith("postgresql://"): with psycopg2.connect(db) as conn: cursor = conn.cursor() - func(cursor, False, args) + export_messages(cursor, False, args) else: m = re.search(r"(^(file:|sqlite://))?(.+?)(\?.+)?$", db) if m: @@ -348,7 +284,7 @@ def col_to_a1(column, columns): db = f"file:{path}{params}" with sqlite3.connect(db, uri=True) as conn: cursor = conn.cursor() - func(cursor, True, args) + export_messages(cursor, True, args) else: print(f"Could not parse database specification: {db}", file=sys.stderr) sys.exit(1) diff --git a/src/api_test.rs b/src/api_test.rs index 2030d61c..65d8039f 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -1,10 +1,10 @@ -use ontodev_valve::{SerdeMap, Valve}; +use ontodev_valve::{SerdeMap, Valve, ValveError}; use rand::distributions::{Alphanumeric, DistString, Distribution, Uniform}; use rand::{random, thread_rng}; use serde_json::json; -use sqlx::{any::AnyPool, query as sqlx_query, Error::Configuration as SqlxCErr, Row, ValueRef}; +use sqlx::{any::AnyPool, query as sqlx_query, Row, ValueRef}; -async fn test_matching(valve: &Valve) -> Result<(), sqlx::Error> { +async fn test_matching(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_matching() ... "); // Test the get_matching_values() function: @@ -39,7 +39,7 @@ async fn test_matching(valve: &Valve) -> Result<(), sqlx::Error> { Ok(()) } -async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), sqlx::Error> { +async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_idempotent_validate_and_update() ... "); // We test that validate_row() is idempotent by running it multiple times on the same row: @@ -76,7 +76,7 @@ async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), sqlx:: Ok(()) } -async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), sqlx::Error> { +async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_validate_and_insert_1() ... "); // Validate and insert a new row: @@ -104,7 +104,7 @@ async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), sqlx::Error> { Ok(()) } -async fn test_validate_and_update(valve: &Valve) -> Result<(), sqlx::Error> { +async fn test_validate_and_update(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_validate_and_update() ... "); // Validate and update an existing row: @@ -132,7 +132,7 @@ async fn test_validate_and_update(valve: &Valve) -> Result<(), sqlx::Error> { Ok(()) } -async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), sqlx::Error> { +async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_validate_and_insert_2() ... "); // Validate and insert a new row: @@ -160,7 +160,7 @@ async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), sqlx::Error> { Ok(()) } -async fn test_dependencies(valve: &Valve) -> Result<(), sqlx::Error> { +async fn test_dependencies(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_dependencies() ... "); // Test cases for updates/inserts/deletes with dependencies. @@ -211,7 +211,7 @@ enum DbOperation { Redo, } -async fn generate_operation_sequence(pool: &AnyPool) -> Result, sqlx::Error> { +async fn generate_operation_sequence(pool: &AnyPool) -> Result, ValveError> { /* Algorithm: ---------- @@ -308,7 +308,7 @@ async fn generate_operation_sequence(pool: &AnyPool) -> Result, Ok(operations) } -async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> { +async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), ValveError> { // Randomly generate a number of insert/update/delete operations, possibly followed by undos // and/or redos. eprint!("Running test_randomized_api_test_with_undo_redo() ... "); @@ -354,7 +354,7 @@ async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sq let sql_row = query.fetch_one(&valve.pool).await?; let raw_row_number = sql_row.try_get_raw("row_number")?; if raw_row_number.is_null() { - return Err(SqlxCErr("No rows in table1_view".into())); + return Err(ValveError::DataError("No rows in table1_view".into())); } else { let row_number: i64 = sql_row.get("row_number"); let row_number = row_number as u32; @@ -366,7 +366,7 @@ async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sq let sql_row = query.fetch_one(&valve.pool).await?; let raw_row_number = sql_row.try_get_raw("row_number")?; if raw_row_number.is_null() { - return Err(SqlxCErr("No rows in table1_view".into())); + return Err(ValveError::DataError("No rows in table1_view".into())); } else { let row_number: i64 = sql_row.get("row_number"); let row_number = row_number as u32; @@ -391,7 +391,7 @@ async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sq Ok(()) } -async fn test_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> { +async fn test_undo_redo(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_undo_redo() ... "); // Undo/redo tests @@ -467,8 +467,8 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> { Ok(()) } -pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> { - let valve = Valve::build(table, database, false, false, false).await?; +pub async fn run_api_tests(table: &str, database: &str) -> Result<(), ValveError> { + let valve = Valve::build(table, database, false, false).await?; // NOTE that you must use an external script to fetch the data from the database and run a diff // against a known good sample to verify that these tests yield the expected results: test_matching(&valve).await?; @@ -480,5 +480,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro test_undo_redo(&valve).await?; test_randomized_api_test_with_undo_redo(&valve).await?; + // TODO: Add some tests for the new API functions like save. + Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index f14f8997..785e6f99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,9 +31,9 @@ use crate::{ast::Expression, valve_grammar::StartParser}; use async_recursion::async_recursion; use chrono::Utc; use crossbeam; -use csv::{ReaderBuilder, StringRecord, StringRecordsIter}; +use csv::{QuoteStyle, ReaderBuilder, StringRecord, StringRecordsIter, WriterBuilder}; use enquote::unquote; -use futures::executor::block_on; +use futures::{executor::block_on, TryStreamExt}; use indexmap::IndexMap; use indoc::indoc; use itertools::{IntoChunks, Itertools}; @@ -47,9 +47,7 @@ use regex::Regex; use serde_json::{json, Value as SerdeValue}; use sqlx::{ any::{AnyConnectOptions, AnyKind, AnyPool, AnyPoolOptions, AnyRow}, - query as sqlx_query, Acquire, Column, - Error::Configuration as SqlxCErr, - Row, Transaction, ValueRef, + query as sqlx_query, Acquire, Column, Row, Transaction, ValueRef, }; use std::{ collections::{BTreeMap, HashMap}, @@ -83,6 +81,7 @@ lazy_static! { pub type SerdeMap = serde_json::Map; pub type ValveRow = serde_json::Map; +// TODO: Possibly replace these with the tracing library (see nanobot.rs). /// Write a debugging message to STDERR. #[macro_export] macro_rules! debug { @@ -174,52 +173,83 @@ impl std::fmt::Debug for ColumnRule { } } +/// Main entrypoint for the Valve API. #[derive(Debug)] pub struct Valve { - /// TODO: Add docstring here. + /// The valve configuration map. pub config: SerdeMap, - /// TODO: Add docstring here. + /// Pre-compiled datatype conditions. pub compiled_datatype_conditions: HashMap, - /// TODO: Add docstring here. + /// Pre-compiled rule conditions. pub compiled_rule_conditions: HashMap>>, - /// TODO: Add docstring here. + /// Parsed structure conditions: pub parsed_structure_conditions: HashMap, - /// TODO: Add docstring here. + /// Lists of tables that depend on a given table, indexed by table. pub table_dependencies_in: HashMap>, - /// TODO: Add docstring here. + /// Lists of tables that a given table depends on, indexed by table. pub table_dependencies_out: HashMap>, - /// TODO: Add docstring here. + /// The database connection pool. pub pool: AnyPool, - /// TODO: Add docstring here. + /// The user associated with this valve instance. pub user: String, - /// TODO: Add docstring here. + /// Produce more logging output. pub verbose: bool, - /// TODO: Add docstring here. Note that this field is CLI only. - pub interactive: bool, - /// TODO: Add docstring here. + /// Tune the database for initial loading. pub initial_load: bool, } #[derive(Debug)] -pub struct ConfigError { - // TODO: Read https://www.lpalmieri.com/posts/error-handling-rust/ - pub message: String, +pub enum ValveError { + /// An error in the Valve configuration: + ConfigError(String), + /// An error that occurred while reading or writing to a CSV/TSV: + CsvError(csv::Error), + /// An error involving the data: + DataError(String), + /// An error generated by the underlying database: + DatabaseError(sqlx::Error), + /// An error in the inputs to a function: + InputError(String), + /// An error that occurred while reading/writing to stdio: + IOError(std::io::Error), + /// An error that occurred while serialising or deserialising to/from JSON: + SerdeJsonError(serde_json::Error), +} + +impl From for ValveError { + fn from(e: csv::Error) -> Self { + Self::CsvError(e) + } +} + +impl From for ValveError { + fn from(e: sqlx::Error) -> Self { + Self::DatabaseError(e) + } +} + +impl From for ValveError { + fn from(e: serde_json::Error) -> Self { + Self::SerdeJsonError(e) + } +} + +impl From for ValveError { + fn from(e: std::io::Error) -> Self { + Self::IOError(e) + } } impl Valve { - /// Given a path to a table table, its name, a path to a database, a flag for verbose output, - /// and a flag indicating whether the Valve instance should be built for initial loading: - /// Set up a database connection, read the table table, configure VALVE, and return a new - /// Valve struct. + /// Given a path to a table table, a path to a database, a flag for verbose output, and a flag + /// indicating whether the database should be configured for initial loading: Set up a database + /// connection, configure VALVE, and return a new Valve struct. pub async fn build( table_path: &str, database: &str, verbose: bool, - interactive: bool, initial_load: bool, - ) -> Result { - // TODO: Error type should be ConfigError - + ) -> Result { let pool = get_pool_from_connection_string(database).await?; if pool.any_kind() == AnyKind::Sqlite { sqlx_query("PRAGMA foreign_keys = ON") @@ -298,7 +328,6 @@ impl Valve { pool: pool, user: String::from("VALVE"), verbose: verbose, - interactive: interactive, initial_load: initial_load, }) } @@ -306,26 +335,22 @@ impl Valve { /// Controls the maximum length of a username. const USERNAME_MAX_LEN: usize = 20; - /// Set the user name, which must be a short, trimmed, string without newlines, for this Valve + /// Sets the user name, which must be a short, trimmed, string without newlines, for this Valve /// instance. - pub fn set_user(&mut self, user: &str) -> Result<&mut Self, ConfigError> { + pub fn set_user(&mut self, user: &str) -> Result<&mut Self, ValveError> { if user.len() > Self::USERNAME_MAX_LEN { - return Err(ConfigError { - message: format!( - "Username '{}' is longer than {} characters.", - user, - Self::USERNAME_MAX_LEN - ), - }); + return Err(ValveError::ConfigError(format!( + "Username '{}' is longer than {} characters.", + user, + Self::USERNAME_MAX_LEN + ))); } else { let user_regex = Regex::new(r#"^\S([^\n]*\S)*$"#).unwrap(); if !user_regex.is_match(user) { - return Err(ConfigError { - message: format!( - "Username '{}' is not a short, trimmed, string without newlines.", - user, - ), - }); + return Err(ValveError::ConfigError(format!( + "Username '{}' is not a short, trimmed, string without newlines.", + user, + ))); } } self.user = user.to_string(); @@ -333,14 +358,13 @@ impl Valve { } /// Given a SQL string, execute it using the connection pool associated with the Valve instance. - async fn execute_sql(&self, sql: &str) -> Result<(), sqlx::Error> { - // DatabaseError - + async fn execute_sql(&self, sql: &str) -> Result<(), ValveError> { sqlx_query(&sql).execute(&self.pool).await?; Ok(()) } - /// TODO: Add docstring. + /// Return the list of configured tables in sorted order, or reverse sorted order if the + /// reverse flag is set. pub fn get_sorted_table_list(&self, reverse: bool) -> Vec<&str> { let mut sorted_tables = self .config @@ -358,13 +382,12 @@ impl Valve { /// Given the name of a table, determine whether its current instantiation in the database /// differs from the way it has been configured. The answer to this question is yes whenever /// (1) the number of columns or any of their names differs from their configured values, or - /// the order of database columns differs from the configured order; (2) The values in the - /// table table differ from their configured values; (3) The SQL type of one or more columns - /// does not match the configured SQL type for that column; (3) Some column with a 'unique', - /// 'primary', or 'from(table, column)' in its column configuration fails to be associated, in - /// the database, with a unique constraint, primary key, or foreign key, respectively; or vice - /// versa; (4) The table does not exist in the database. - async fn table_has_changed(&self, table: &str) -> Result { + /// the order of database columns differs from the configured order; (2) The SQL type of one or + /// more columns does not match the configured SQL type for that column; (3) Some column with a + /// 'unique', 'primary', or 'from(table, column)' in its column configuration fails to be + /// associated, in the database, with a unique constraint, primary key, or foreign key, + /// respectively; or vice versa; (4) The table does not exist in the database. + async fn table_has_changed(&self, table: &str) -> Result { // A clojure that, given a parsed structure condition, a table and column name, and an // unsigned integer representing whether the given column, in the case of a SQLite database, // is a primary key (in the case of PostgreSQL, the sqlite_pk parameter is ignored): @@ -375,10 +398,10 @@ impl Valve { table: &str, column: &str, sqlite_pk: &u32| - -> Result { + -> Result { // A clojure to determine whether the given column has the given constraint type, which // can be one of 'UNIQUE', 'PRIMARY KEY', 'FOREIGN KEY': - let column_has_constraint_type = |constraint_type: &str| -> Result { + let column_has_constraint_type = |constraint_type: &str| -> Result { if self.pool.any_kind() == AnyKind::Postgres { let sql = format!( r#"SELECT 1 @@ -426,7 +449,7 @@ impl Valve { } Ok(false) } else { - return Err(SqlxCErr( + return Err(ValveError::InputError( format!("Unrecognized constraint type: '{}'", constraint_type).into(), )); } @@ -525,7 +548,7 @@ impl Valve { } } _ => { - return Err(SqlxCErr( + return Err(ValveError::InputError( format!("Unrecognized structure: {:?}", pstruct).into(), )); } @@ -724,8 +747,8 @@ impl Valve { Ok(false) } - /// TODO: Add docstring here - async fn get_setup_statements(&self) -> Result>, sqlx::Error> { + /// Generates and returns the DDL required to setup the database. + async fn get_setup_statements(&self) -> Result>, ValveError> { let tables_config = self .config .get("table") @@ -838,8 +861,8 @@ impl Valve { return Ok(setup_statements); } - /// TODO: Add docstring - pub async fn dump_schema(&self) -> Result<(), sqlx::Error> { + /// Writes the database schema to stdout. + pub async fn dump_schema(&self) -> Result<(), ValveError> { let setup_statements = self.get_setup_statements().await?; for table in self.get_sorted_table_list(false) { let table_statements = setup_statements.get(table).unwrap(); @@ -850,11 +873,7 @@ impl Valve { } /// Create all configured database tables and views if they do not already exist as configured. - pub async fn create_all_tables(&self) -> Result<&Self, sqlx::Error> { - // DatabaseError - - // TODO: Add logging statements here. - + pub async fn create_all_tables(&self) -> Result<&Self, ValveError> { let setup_statements = self.get_setup_statements().await?; let sorted_table_list = self.get_sorted_table_list(false); for table in &sorted_table_list { @@ -870,8 +889,8 @@ impl Valve { Ok(self) } - /// TODO: Add docstring here. - pub async fn table_exists(&self, table: &str) -> Result { + /// Checks whether the given table exists in the database. + pub async fn table_exists(&self, table: &str) -> Result { let sql = { if self.pool.any_kind() == AnyKind::Sqlite { format!( @@ -896,7 +915,8 @@ impl Valve { return Ok(rows.len() > 0); } - /// TODO: Add docstring here. + /// Get all the incoming (tables that depend on it) or outgoing (tables it depends on) + /// dependencies of the given table. fn get_dependencies(&self, table: &str, incoming: bool) -> Vec { let mut dependent_tables = vec![]; if table != "message" && table != "history" { @@ -916,7 +936,9 @@ impl Valve { dependent_tables } - /// TODO: Add docstring here. + /// Given a list of tables, fill it in with any further tables that are dependent upon tables + /// in the given list. If deletion_order is true, the tables are sorted as required for + /// deleting them all sequentially, otherwise they are ordered in reverse. fn add_dependencies(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { let mut with_dups = vec![]; for table in tables { @@ -941,25 +963,32 @@ impl Valve { tables_in_order } - /// TODO: Add docstring here. - fn sort_tables(&self, table_subset: &Vec<&str>, reverse: bool) -> Result, String> { + /// Given a subset of the configured tables, return them in sorted dependency order, or in + /// reverse if `reverse` is set to true. + fn sort_tables( + &self, + table_subset: &Vec<&str>, + reverse: bool, + ) -> Result, ValveError> { let full_table_list = self.get_sorted_table_list(false); if !table_subset .iter() .all(|item| full_table_list.contains(item)) { - return Err(format!( + return Err(ValveError::InputError(format!( "[{}] contains tables that are not in the configured table list: [{}]", table_subset.join(", "), full_table_list.join(", ") - )); + ))); } let constraints_config = self .config .get("constraints") .and_then(|c| c.as_object()) - .ok_or("Unable to retrieve configured constraints.")?; + .ok_or(ValveError::ConfigError( + "Unable to retrieve configured constraints.".into(), + ))?; // Filter out message and history since they are not represented in the constraints config. // They will be added implicitly to the list returned by verify_table_deps_and_sort. @@ -986,7 +1015,8 @@ impl Valve { Ok(sorted_subset) } - /// TODO: Add docstring here. + /// Returns an IndexMap, indexed by configured table, containing lists of their dependencies. + /// If incoming is true, the lists are incoming dependencies, else they are outgoing. pub fn collect_dependencies(&self, incoming: bool) -> IndexMap> { let tables = self.get_sorted_table_list(false); let mut dependencies = IndexMap::new(); @@ -997,34 +1027,15 @@ impl Valve { } /// Drop all configured tables, in reverse dependency order. - pub async fn drop_all_tables(&self) -> Result<&Self, sqlx::Error> { - // DatabaseError - + pub async fn drop_all_tables(&self) -> Result<&Self, ValveError> { // Drop all of the database tables in the reverse of their sorted order: self.drop_tables(&self.get_sorted_table_list(true)).await?; Ok(self) } - /// Given a vector of table names, - /// drop those tables, in the given order. - /// Return an error on invalid table name or database problem. - pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, sqlx::Error> { - // DatabaseError - + /// Given a vector of table names, drop those tables, in the given order. + pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, ValveError> { let drop_list = self.add_dependencies(tables, true); - if self.interactive { - let auto_drops = drop_list - .iter() - .filter(|t| { - !tables.contains(&t.as_str()) && !block_on(self.table_exists(t)).unwrap() - }) - .collect::>(); - if auto_drops.len() > 0 { - // TODO: prompt the user to confirm whether she wants to automatically drop - // the dependent tables. - } - } - for table in &drop_list { if *table != "message" && *table != "history" { let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); @@ -1042,35 +1053,16 @@ impl Valve { } /// Truncate all configured tables, in reverse dependency order. - pub async fn truncate_all_tables(&self) -> Result<&Self, sqlx::Error> { - // DatabaseError - + pub async fn truncate_all_tables(&self) -> Result<&Self, ValveError> { self.truncate_tables(&self.get_sorted_table_list(true)) .await?; Ok(self) } - /// Given a vector of table names, - /// truncate those tables, in the given order. - /// Return an error on invalid table name or database problem. - pub async fn truncate_tables(&self, tables: &Vec<&str>) -> Result<&Self, sqlx::Error> { - // ConfigOrDatabaseError - + /// Given a vector of table names, truncate those tables, in the given order. + pub async fn truncate_tables(&self, tables: &Vec<&str>) -> Result<&Self, ValveError> { self.create_all_tables().await?; - let truncate_list = self.add_dependencies(tables, true); - if self.interactive { - let auto_truncates = truncate_list - .iter() - .filter(|t| { - !tables.contains(&t.as_str()) && !block_on(self.table_exists(t)).unwrap() - }) - .collect::>(); - if auto_truncates.len() > 0 { - // TODO: prompt the user to confirm whether she wants to automatically truncate - // the dependent tables. - } - } // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that // depends on another table, T', even in the case where we have previously truncated T'. @@ -1096,13 +1088,9 @@ impl Valve { Ok(self) } - /// Load all configured tables in dependency order. - /// If `validate` is false, just try to insert all rows. - /// Return an error on database problem, - /// including database conflicts that prevent rows being inserted. - pub async fn load_all_tables(&self, validate: bool) -> Result<&Self, sqlx::Error> { - // DatabaseError - + /// Load all configured tables in dependency order. If `validate` is false, just try to insert + /// all rows, irrespective of whether they are valid or not or will possibly trigger a db error. + pub async fn load_all_tables(&self, validate: bool) -> Result<&Self, ValveError> { let table_list = self.get_sorted_table_list(false); if self.verbose { info!("Processing {} tables.", table_list.len()); @@ -1110,25 +1098,15 @@ impl Valve { self.load_tables(&table_list, validate).await } - /// Given a vector of table names, - /// load those tables in the given order. - /// If `validate` is false, just try to insert all rows. - /// Return an error on invalid table name or database problem. - /// Given a configuration map, a database connection pool, a parser, HashMaps representing - /// compiled datatype and rule conditions, and a HashMap representing parsed structure - /// conditions, read in the data TSV files corresponding to each configured table, then validate - /// and load all of the corresponding data rows. If the verbose flag is set to true, output - /// progress messages to stderr during load. + /// Given a vector of table names, load those tables in the given order. If `validate` is false, + /// just try to insert all rows, irrespective of whether they are valid or not or will possibly + /// trigger a db error. pub async fn load_tables( &self, table_list: &Vec<&str>, validate: bool, - ) -> Result<&Self, sqlx::Error> { - // ConfigOrDatabaseError - - let list_for_truncation = self - .sort_tables(table_list, true) - .map_err(|e| SqlxCErr(e.into()))?; + ) -> Result<&Self, ValveError> { + let list_for_truncation = self.sort_tables(table_list, true)?; self.truncate_tables( &list_for_truncation .iter() @@ -1297,33 +1275,126 @@ impl Valve { Ok(self) } - /// Save all configured tables to their 'path's. - /// Return an error on writing or database problem. - pub fn save_all_tables(&self) -> Result<&Self, sqlx::Error> { - // WriteOrDatabaseError - // TODO. See https://github.com/ontodev/nanobot.rs/pull/65 for hints. + /// Save all configured tables to their configured path's, unless save_dir is specified, + /// in which case save them there instead. + pub fn save_all_tables(&self, save_dir: &Option) -> Result<&Self, ValveError> { + let tables = self.get_sorted_table_list(false); + self.save_tables(&tables, save_dir)?; Ok(self) } - /// Given a vector of table names, - /// Save thosee tables to their 'path's, in the given order. - /// Return an error on writing or database problem. - pub fn save_tables(&self, _tables: Vec<&str>) -> Result<&Self, sqlx::Error> { - // WriteOrDatabaseError - // TODO + /// Given a vector of table names, save those tables to their configured path's, unless + /// save_dir is specified, in which case save them there instead. + pub fn save_tables( + &self, + tables: &Vec<&str>, + save_dir: &Option, + ) -> Result<&Self, ValveError> { + let table_paths: HashMap = self + .config + .get("table") + .unwrap() + .as_object() + .unwrap() + .iter() + .filter(|(k, v)| { + !["message", "history"].contains(&k.as_str()) + && tables.contains(&k.as_str()) + && v.get("path").is_some() + }) + .map(|(k, v)| { + ( + k.clone(), + v.get("path").unwrap().as_str().unwrap().to_string(), + ) + }) + .collect(); + + info!( + "Saving tables: {} ...", + table_paths + .keys() + .map(|k| k.to_string()) + .collect::>() + .join(", ") + ); + for (table, path) in table_paths.iter() { + let columns: Vec<&str> = self + .config + .get("table") + .and_then(|v| v.as_object()) + .and_then(|o| o.get(table)) + .and_then(|v| v.as_object()) + .and_then(|o| o.get("column_order")) + .and_then(|v| v.as_array()) + .and_then(|v| Some(v.iter().map(|i| i.as_str().unwrap()).collect())) + .unwrap(); + + let path = match save_dir { + Some(s) => format!( + "{}/{}", + s, + Path::new(path) + .file_name() + .and_then(|n| n.to_str()) + .unwrap() + ), + None => path.to_string(), + }; + self.save_table(table, &columns, &path)?; + } + Ok(self) } - /// Given a table name and a row as JSON, - /// return the validated row. - /// Return an error on database problem. + /// Save the given table with the given columns at the given path as a TSV file. + pub fn save_table( + &self, + table: &str, + columns: &Vec<&str>, + path: &str, + ) -> Result<&Self, ValveError> { + // TODO: Do some validation on the path. + + let mut quoted_columns = vec!["\"row_number\"".to_string()]; + quoted_columns.append( + &mut columns + .iter() + .map(|v| enquote::enquote('"', v)) + .collect::>(), + ); + let text_view = format!("\"{}_text_view\"", table); + let sql = format!( + r#"SELECT {} from {} ORDER BY "row_number""#, + quoted_columns.join(", "), + text_view + ); + + let mut writer = WriterBuilder::new() + .delimiter(b'\t') + .quote_style(QuoteStyle::Never) + .from_path(path)?; + writer.write_record(columns)?; + let mut stream = sqlx_query(&sql).fetch(&self.pool); + while let Some(row) = block_on(stream.try_next()).unwrap() { + let mut record: Vec<&str> = vec![]; + for column in columns.iter() { + let cell = row.try_get::<&str, &str>(column).ok().unwrap_or_default(); + record.push(cell); + } + writer.write_record(record)?; + } + writer.flush()?; + + Ok(self) + } + + /// Given a table name and a row, return the validated row. pub async fn validate_row( &self, table_name: &str, row: &ValveRow, - ) -> Result { - // DatabaseError - + ) -> Result { validate_row_tx( &self.config, &self.compiled_datatype_conditions, @@ -1338,22 +1409,13 @@ impl Valve { .await } - /// Given a table name and a row as JSON, - /// add the row to the table in the database, - /// and return the validated row, including its new row_number. - /// Return an error invalid table name or database problem. - /// A wrapper around [insert_new_row_tx()] in which the following steps are also performed: - /// - A database transaction is created and then committed once the given new row has been - /// inserted. - /// - The row is validated before insertion and the update to the database is recorded to the - /// history table indicating that the given user is responsible for the change. + /// Given a table name and a row as JSON, add the row to the table in the database, and return + /// the validated row, including its new row_number. pub async fn insert_row( &self, table_name: &str, row: &ValveRow, - ) -> Result<(u32, ValveRow), sqlx::Error> { - // ConfigOrDatabaseError - + ) -> Result<(u32, ValveRow), ValveError> { let mut tx = self.pool.begin().await?; let row = validate_row_tx( @@ -1387,18 +1449,14 @@ impl Valve { Ok((rn, row)) } - /// Given a table name, a row number, and a row as JSON, - /// update the row in the database, - /// and return the validated row. - /// Return an error invalid table name or row number or database problem. + /// Given a table name, a row number, and a row, update the row in the database, and return the + /// validated row. pub async fn update_row( &self, table_name: &str, row_number: &u32, row: &ValveRow, - ) -> Result { - // ConfigOrDatabaseError - + ) -> Result { let mut tx = self.pool.begin().await?; // Get the old version of the row from the database so that we can later record it to the @@ -1448,11 +1506,8 @@ impl Valve { Ok(row) } - /// Given a table name and a row number, - /// delete that row from the table. - /// Return an error invalid table name or row number or database problem. - pub async fn delete_row(&self, table_name: &str, row_number: &u32) -> Result<(), sqlx::Error> { - // ConfigOrDatabaseError + /// Given a table name and a row number, delete that row from the table. + pub async fn delete_row(&self, table_name: &str, row_number: &u32) -> Result<(), ValveError> { let mut tx = self.pool.begin().await?; let row = @@ -1483,10 +1538,8 @@ impl Valve { Ok(()) } - /// Return the next change to undo, or None. - /// Return an error on database problem. - pub async fn get_record_to_undo(&self) -> Result, sqlx::Error> { - // DatabaseError + /// Return the next change that can be undone, or None if there isn't any. + pub async fn get_record_to_undo(&self) -> Result, ValveError> { // Look in the history table, get the row with the greatest ID, get the row number, // from, and to, and determine whether the last operation was a delete, insert, or update. let is_clause = if self.pool.any_kind() == AnyKind::Sqlite { @@ -1505,10 +1558,8 @@ impl Valve { Ok(result_row) } - /// Return the next change to redo, or None. - /// Return an error on database problem. - pub async fn get_record_to_redo(&self) -> Result, sqlx::Error> { - // DatabaseError + /// Return the next change that can be redone, or None if there isn't any. + pub async fn get_record_to_redo(&self) -> Result, ValveError> { // Look in the history table, get the row with the greatest ID, get the row number, // from, and to, and determine whether the last operation was a delete, insert, or update. let is_not_clause = if self.pool.any_kind() == AnyKind::Sqlite { @@ -1527,11 +1578,8 @@ impl Valve { Ok(result_row) } - /// Undo one change and return the change record - /// or None if there was no change to undo. - /// Return an error on database problem. - pub async fn undo(&self) -> Result, sqlx::Error> { - // DatabaseError + /// Undo one change and return the change record or None if there was no change to undo. + pub async fn undo(&self) -> Result, ValveError> { let last_change = match self.get_record_to_undo().await? { None => { warn!("Nothing to undo."); @@ -1549,7 +1597,7 @@ impl Valve { match (from, to) { (None, None) => { - return Err(SqlxCErr( + return Err(ValveError::DataError( "Cannot redo unknown operation from None to None".into(), )) } @@ -1618,11 +1666,8 @@ impl Valve { } } - /// Redo one change and return the change record - /// or None if there was no change to redo. - /// Return an error on database problem. - pub async fn redo(&self) -> Result, sqlx::Error> { - // DatabaseError + /// Redo one change and return the change record or None if there was no change to redo. + pub async fn redo(&self) -> Result, ValveError> { let last_undo = match self.get_record_to_redo().await? { None => { warn!("Nothing to redo."); @@ -1647,7 +1692,7 @@ impl Valve { match (from, to) { (None, None) => { - return Err(SqlxCErr( + return Err(ValveError::DataError( "Cannot redo unknown operation from None to None".into(), )) } @@ -1716,17 +1761,17 @@ impl Valve { } } - /// Given a config map, a map of compiled datatype conditions, a database connection pool, a - /// table name, a column name, and (optionally) a string to match, return a JSON array of - /// possible valid values for the given column which contain the matching string as a substring - /// (or all of them if no matching string is given). The JSON array returned is formatted for - /// Typeahead, i.e., it takes the form: `[{"id": id, "label": label, "order": order}, ...]`. + /// Given a table name, a column name, and (optionally) a string to match, return a JSON array + /// of possible valid values for the given column which contain the matching string as a + /// substring (or all of them if no matching string is given). The JSON array returned is + /// formatted for Typeahead, i.e., it takes the form: + /// `[{"id": id, "label": label, "order": order}, ...]`. pub async fn get_matching_values( &self, table_name: &str, column_name: &str, matching_string: Option<&str>, - ) -> Result { + ) -> Result { let config = &self.config; let compiled_datatype_conditions = &self.compiled_datatype_conditions; let parsed_structure_conditions = &self.parsed_structure_conditions; @@ -1911,8 +1956,8 @@ impl Valve { } } -/// TODO: Add docstring here. -async fn get_pool_from_connection_string(database: &str) -> Result { +/// Given a string representing the location of a database, return a database connection pool. +async fn get_pool_from_connection_string(database: &str) -> Result { let connection_options; if database.starts_with("postgresql://") { connection_options = AnyConnectOptions::from_str(database)?; @@ -1923,7 +1968,7 @@ async fn get_pool_from_connection_string(database: &str) -> Result, -) -> Result, String> { +) -> Result, ValveError> { // Since the consequence of an update could involve currently invalid rows // (in the conflict table) becoming valid or vice versa, we need to check rows for // which the value of the column is the same as `value` @@ -2993,11 +3033,7 @@ async fn get_affected_rows( let query = sqlx_query(&sql); let mut table_rows = IndexMap::new(); - for row in query - .fetch_all(tx.acquire().await.map_err(|e| e.to_string())?) - .await - .map_err(|e| e.to_string())? - { + for row in query.fetch_all(tx.acquire().await?).await? { let mut table_row = ValveRow::new(); let mut row_number: Option = None; for column in row.columns() { @@ -3020,7 +3056,8 @@ async fn get_affected_rows( table_row.insert(cname.to_string(), json!(cell)); } } - let row_number = row_number.ok_or("Row: has no row number".to_string())?; + let row_number = + row_number.ok_or(ValveError::DataError("Row: has no row number".to_string()))?; table_rows.insert(row_number, table_row); } @@ -3036,7 +3073,7 @@ async fn get_row_from_db( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, -) -> Result { +) -> Result { let sql = format!( "{} WHERE row_number = {}", query_with_message_values(table, global_config, pool), @@ -3045,7 +3082,7 @@ async fn get_row_from_db( let query = sqlx_query(&sql); let rows = query.fetch_all(tx.acquire().await?).await?; if rows.len() == 0 { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!( "In get_row_from_db(). No rows found for row_number: {}", row_number @@ -3062,9 +3099,13 @@ async fn get_row_from_db( } else { let messages: &str = sql_row.get("message"); match serde_json::from_str::(messages) { - Err(e) => return Err(SqlxCErr(e.into())), + Err(e) => return Err(ValveError::SerdeJsonError(e.into())), Ok(SerdeValue::Array(m)) => m, - _ => return Err(SqlxCErr(format!("{} is not an array.", messages).into())), + _ => { + return Err(ValveError::DataError( + format!("{} is not an array.", messages).into(), + )) + } } } }; @@ -3110,7 +3151,7 @@ async fn get_db_value( row_number: &u32, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, -) -> Result { +) -> Result { let is_clause = if pool.any_kind() == AnyKind::Sqlite { "IS" } else { @@ -3144,14 +3185,14 @@ async fn get_db_value( ); let query = sqlx_query(&sql); - let rows = query - .fetch_all(tx.acquire().await.map_err(|e| e.to_string())?) - .await - .map_err(|e| e.to_string())?; + let rows = query.fetch_all(tx.acquire().await?).await?; if rows.len() == 0 { - return Err(format!( - "In get_db_value(). No rows found for row_number: {}", - row_number + return Err(ValveError::DataError( + format!( + "In get_db_value(). No rows found for row_number: {}", + row_number + ) + .into(), )); } let result_row = &rows[0]; @@ -3175,16 +3216,19 @@ async fn get_rows_to_update( IndexMap>, IndexMap>, ), - String, + ValveError, > { - fn get_cell_value(row: &ValveRow, column: &str) -> Result { + fn get_cell_value(row: &ValveRow, column: &str) -> Result { match row.get(column).and_then(|cell| cell.get("value")) { Some(SerdeValue::String(s)) => Ok(format!("{}", s)), Some(SerdeValue::Number(n)) => Ok(format!("{}", n)), Some(SerdeValue::Bool(b)) => Ok(format!("{}", b)), - _ => Err(format!( - "Value missing or of unknown type in column {} of row to update: {:?}", - column, row + _ => Err(ValveError::DataError( + format!( + "Value missing or of unknown type in column {} of row to update: {:?}", + column, row + ) + .into(), )), } } @@ -3384,7 +3428,7 @@ async fn process_updates( updates: &IndexMap>, query_as_if: &QueryAsIf, do_not_recurse: bool, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { for (update_table, rows_to_update) in updates { for (row_number, row) in rows_to_update { // Validate each row 'counterfactually': @@ -3431,9 +3475,9 @@ async fn record_row_change( from: Option<&ValveRow>, to: Option<&ValveRow>, user: &str, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { if let (None, None) = (from, to) { - return Err(SqlxCErr( + return Err(ValveError::InputError( "Arguments 'from' and 'to' to function record_row_change() cannot both be None".into(), )); } @@ -3460,7 +3504,7 @@ async fn record_row_change( } } - fn summarize(from: Option<&ValveRow>, to: Option<&ValveRow>) -> Result { + fn summarize(from: Option<&ValveRow>, to: Option<&ValveRow>) -> Result { // Constructs a summary of the form: // { // "column":"bar", @@ -3483,7 +3527,9 @@ async fn record_row_change( SerdeValue::Bool(b) => Some(format!("{}", b)), _ => None, }) - .ok_or(format!("No value in {}", cell))?; + .ok_or(ValveError::DataError( + format!("No value in {}", cell).into(), + ))?; let new_value = to .get(column) .and_then(|v| v.get("value")) @@ -3493,7 +3539,9 @@ async fn record_row_change( SerdeValue::Bool(b) => Some(format!("{}", b)), _ => None, }) - .ok_or(format!("No value for column: {} in {:?}", column, to))?; + .ok_or(ValveError::DataError( + format!("No value for column: {} in {:?}", column, to).into(), + ))?; if new_value != old_value { let mut column_summary = SerdeMap::new(); column_summary.insert("column".to_string(), json!(column)); @@ -3517,7 +3565,7 @@ async fn record_row_change( } } - let summary = summarize(from, to).map_err(|e| SqlxCErr(e.into()))?; + let summary = summarize(from, to)?; let (from, to) = (to_text(from, true), to_text(to, true)); let sql = format!( r#"INSERT INTO "history" ("table", "row", "from", "to", "summary", "user") @@ -3562,7 +3610,7 @@ async fn switch_undone_state( undone_state: bool, tx: &mut Transaction<'_, sqlx::Any>, pool: &AnyPool, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // Set the history record to undone: let timestamp = { if pool.any_kind() == AnyKind::Sqlite { @@ -3713,7 +3761,7 @@ async fn insert_new_row_tx( row: &ValveRow, new_row_number: Option, skip_validation: bool, -) -> Result { +) -> Result { // Send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = if !skip_validation { @@ -3777,20 +3825,26 @@ async fn insert_new_row_tx( for (column, cell) in row.iter() { insert_columns.append(&mut vec![format!(r#""{}""#, column)]); - let cell = cell - .as_object() - .ok_or(SqlxCErr(format!("Cell {:?} is not an object", cell).into()))?; - let valid = cell.get("valid").and_then(|v| v.as_bool()).ok_or(SqlxCErr( - format!("No bool named 'valid' in {:?}", cell).into(), - ))?; - let value = cell.get("value").and_then(|v| v.as_str()).ok_or(SqlxCErr( - format!("No string named 'value' in {:?}", cell).into(), + let cell = cell.as_object().ok_or(ValveError::InputError( + format!("Cell {:?} is not an object", cell).into(), ))?; + let valid = cell + .get("valid") + .and_then(|v| v.as_bool()) + .ok_or(ValveError::InputError( + format!("No bool named 'valid' in {:?}", cell).into(), + ))?; + let value = cell + .get("value") + .and_then(|v| v.as_str()) + .ok_or(ValveError::InputError( + format!("No string named 'value' in {:?}", cell).into(), + ))?; let messages = sort_messages( &sorted_datatypes, cell.get("messages") .and_then(|m| m.as_array()) - .ok_or(SqlxCErr( + .ok_or(ValveError::InputError( format!("No array named 'messages' in {:?}", cell).into(), ))?, ); @@ -3801,21 +3855,23 @@ async fn insert_new_row_tx( "value": value, "level": message.get("level").and_then(|s| s.as_str()) .ok_or( - SqlxCErr(format!("No 'level' in {:?}", message).into()) + ValveError::InputError(format!("No 'level' in {:?}", message).into()) )?, "rule": message.get("rule").and_then(|s| s.as_str()) .ok_or( - SqlxCErr(format!("No 'rule' in {:?}", message).into()) + ValveError::InputError(format!("No 'rule' in {:?}", message).into()) )?, "message": message.get("message").and_then(|s| s.as_str()) .ok_or( - SqlxCErr(format!("No 'message' in {:?}", message).into()) + ValveError::InputError(format!("No 'message' in {:?}", message).into()) )?, })); } let sql_type = get_sql_type_from_global_config(global_config, table, column, pool).ok_or( - SqlxCErr(format!("Could not get SQL type for {}.{}", table, column).into()), + ValveError::ConfigError( + format!("Could not get SQL type for {}.{}", table, column).into(), + ), )?; if is_sql_type_error(&sql_type, value) { insert_values.push(String::from("NULL")); @@ -3841,9 +3897,8 @@ async fn insert_new_row_tx( // Look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated: - let (_, updates_after, _) = get_rows_to_update(global_config, pool, tx, table, &query_as_if) - .await - .map_err(|e| SqlxCErr(e.into()))?; + let (_, updates_after, _) = + get_rows_to_update(global_config, pool, tx, table, &query_as_if).await?; // Check it to see if the row should be redirected to the conflict table: let table_to_write = { @@ -3916,7 +3971,7 @@ async fn delete_row_tx( tx: &mut Transaction, table: &str, row_number: &u32, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // Used to validate the given row, counterfactually, "as if" the row did not exist in the // database: let query_as_if = QueryAsIf { @@ -3931,9 +3986,7 @@ async fn delete_row_tx( // rows that need to be updated. Since this is a delete there will only be rows to update // before and none after the delete: let (updates_before, _, updates_intra) = - get_rows_to_update(global_config, pool, tx, table, &query_as_if) - .await - .map_err(|e| SqlxCErr(e.into()))?; + get_rows_to_update(global_config, pool, tx, table, &query_as_if).await?; // Process the updates that need to be performed before the update of the target row: process_updates( @@ -4003,7 +4056,7 @@ async fn update_row_tx( row_number: &u32, skip_validation: bool, do_not_recurse: bool, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // First, look through the valve config to see which tables are dependent on this table and find // the rows that need to be updated. The variable query_as_if is used to validate the given row, // counterfactually, "as if" the version of the row in the database currently were replaced with @@ -4019,9 +4072,7 @@ async fn update_row_tx( if do_not_recurse { (IndexMap::new(), IndexMap::new(), IndexMap::new()) } else { - get_rows_to_update(global_config, pool, tx, table, &query_as_if) - .await - .map_err(|e| SqlxCErr(e.into()))? + get_rows_to_update(global_config, pool, tx, table, &query_as_if).await? } }; @@ -4682,7 +4733,8 @@ fn verify_table_deps_and_sort( }; } -/// TODO: Add doc string here. +/// Given table configuration map and a datatype configuration map, a parser, a table name, and a +/// database connection pool, return a configuration map representing all of the table constraints. fn get_table_constraints( tables_config: &SerdeMap, datatypes_config: &SerdeMap, @@ -4867,7 +4919,9 @@ fn get_table_constraints( return table_constraints; } -// TODO: Add docstring here +/// Given table configuration map and a datatype configuration map, a parser, a table name, and a +/// database connection pool, return a list of DDL statements that can be used to create the +/// database tables. fn get_table_ddl( tables_config: &SerdeMap, datatypes_config: &SerdeMap, @@ -5185,7 +5239,7 @@ async fn make_inserts( String, Vec, ), - sqlx::Error, + ValveError, > { fn is_conflict_row(row: &ResultRow, conflict_columns: &Vec) -> bool { for (column, cell) in &row.contents { @@ -5390,8 +5444,9 @@ async fn insert_chunk( messages_stats: &mut HashMap, verbose: bool, validate: bool, -) -> Result<(), sqlx::Error> { - // First, do the tree validation: +) -> Result<(), ValveError> { + // First, do the tree validation. TODO: I don't remember why this needs to be done first, but + // it does. Add a comment here explaining why. if validate { validate_rows_trees(config, pool, table_name, rows).await?; } @@ -5503,7 +5558,7 @@ async fn insert_chunk( } message_query.execute(pool).await?; } else { - return Err(e); + return Err(ValveError::DatabaseError(e)); } } }; @@ -5527,7 +5582,7 @@ async fn insert_chunks( messages_stats: &mut HashMap, verbose: bool, validate: bool, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { if !MULTI_THREADED { for (chunk_number, chunk) in chunks.into_iter().enumerate() { let mut rows: Vec<_> = chunk.collect(); diff --git a/src/main.rs b/src/main.rs index 3205f4b7..569938bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,18 +4,17 @@ use crate::api_test::run_api_tests; use argparse::{ArgumentParser, Store, StoreTrue}; -use ontodev_valve::Valve; +use ontodev_valve::{Valve, ValveError}; use serde_json::{from_str, Value as SerdeValue}; use std::{env, process}; -fn cli_args_valid(source: &str, destination: &str, dump_config: bool) -> bool { - source != "" && (dump_config || destination != "") -} - #[async_std::main] -async fn main() -> Result<(), sqlx::Error> { +async fn main() -> Result<(), ValveError> { + // Command line parameters and their default values. See below for descriptions. Note that some + // of these are mutually exclusive. This is accounted for below. + // TODO: Use a more powerful command-line parser library that can automatically take care of + // things like mutually exclusive options, since argparse doesn't seem to be able to do it. let mut verbose = false; - let mut yes = false; let mut api_test = false; let mut dump_config = false; let mut dump_schema = false; @@ -25,65 +24,63 @@ async fn main() -> Result<(), sqlx::Error> { let mut drop_all = false; let mut create_only = false; let mut initial_load = false; + let mut save = String::new(); + let mut save_all = false; + let mut save_dir = String::new(); let mut source = String::new(); let mut destination = String::new(); + // TODO: Add a "dry_run" parameter. { // this block limits scope of borrows by ap.refer() method let mut ap = ArgumentParser::new(); - ap.set_description( - r#"A lightweight validation engine written in rust. If neither - --api_test nor --dump_config is specified, the configuration referred - to by SOURCE will be read and a new database will be created and loaded - with the indicated data."#, - ); + ap.set_description(r#"Valve is a lightweight validation engine written in rust."#); ap.refer(&mut verbose).add_option( &["--verbose"], StoreTrue, - r#"While loading the database, write progress messages to stderr."#, - ); - ap.refer(&mut yes).add_option( - &["--yes"], - StoreTrue, - r#"Do not prompt the user to confirm dropping/truncating tables."#, + r#"Write informative messages about what Valve is doing to stderr."#, ); ap.refer(&mut api_test).add_option( &["--api_test"], StoreTrue, - r#"Read the configuration referred to by SOURCE and test the functions that - are callable externally on the existing, pre-loaded database indicated by - DESTINATION."#, + r#"Read the configuration referred to by SOURCE and run a set of predefined tests on the + existing, pre-loaded database indicated by DESTINATION."#, ); ap.refer(&mut dump_config).add_option( &["--dump_config"], StoreTrue, - r#"Read the configuration referred to by SOURCE and send it to stdout as a - JSON-formatted string."#, + r#"Read the configuration referred to by SOURCE and print it as a JSON-formatted + string."#, ); ap.refer(&mut dump_schema).add_option( &["--dump_schema"], StoreTrue, - r#"Write the SQL used to create the database to stdout."#, + r#"Read the configuration referred to by SOURCE and print the SQL that will be used to + create the database to stdout."#, ); ap.refer(&mut table_order).add_option( &["--table_order"], StoreTrue, - r#"Display the order in which tables must be created or dropped."#, + r#"Read the configuration referred to by SOURCE and print the order in which the + configured tables will be created, as determined by their dependency relations."#, ); ap.refer(&mut show_deps_in).add_option( &["--show_deps_in"], StoreTrue, - r#"Display the incoming dependencies for each configured table."#, + r#"Read the configuration referred to by SOURCE and print the incoming dependencies + for each configured table."#, ); ap.refer(&mut show_deps_out).add_option( &["--show_deps_out"], StoreTrue, - r#"Display the outgoing dependencies for each configured table."#, + r#"Read the configuration referred to by SOURCE andprint the outgoing dependencies + for each configured table."#, ); ap.refer(&mut drop_all).add_option( &["--drop_all"], StoreTrue, - r#"Drop all tables in the database."#, + r#"Read the configuration referred to by SOURCE and drop all of the configured tables + in the given database."#, ); ap.refer(&mut create_only).add_option( &["--create_only"], @@ -99,20 +96,39 @@ async fn main() -> Result<(), sqlx::Error> { only, as data integrity will not be guaranteed in the case of an interrupted transaction."#, ); + ap.refer(&mut save).add_option( + &["--save"], + Store, + r#"Read the configuration referred to by SOURCE and save the configured data tables + from the given list as TSV files to their configured paths (as specified in the table + configuration). Optionally, specify --save-path to save the files at an alternative + location."#, + ); + ap.refer(&mut save_all).add_option( + &["--save_all"], + StoreTrue, + r#"Read the configuration referred to by SOURCE and save the all configured data tables + as TSV files to their configured paths (as specified in the table configuration). + Optionally, specify --save-path to save the files at an alternative location."#, + ); + ap.refer(&mut save_dir).add_option( + &["--save_dir"], + Store, + r#"Ignored if neither --save nor --save-all has been specified. Saves the tables to the + given path instead of to their configured paths."#, + ); ap.refer(&mut source).add_argument( "SOURCE", Store, - r#"(Required.) The location of the valve configuration entrypoint. Can be - one of (A) A URL of the form `postgresql://...` or `sqlite://...` indicating a - database connection where the valve configuration can be read from a table named - "table"; (B) The filename (including path) of the table file (usually called - table.tsv)."#, + r#"The location of the valve configuration entrypoint. Can be one of (A) A URL of the + form `postgresql://...` or `sqlite://...` indicating a database connection where + the valve configuration can be read from a table named "table"; (B) The filename + (including path) of the table file (usually called table.tsv)."#, ); ap.refer(&mut destination).add_argument( "DESTINATION", Store, - r#"(Required unless the --dump_config option has been specified.) Can be - one of (A) A URL of the form `postgresql://...` or `sqlite://...` + r#"Can be one of (A) A URL of the form `postgresql://...` or `sqlite://...` (B) The filename (including path) of a sqlite database."#, ); @@ -121,21 +137,63 @@ async fn main() -> Result<(), sqlx::Error> { let args: Vec = env::args().collect(); let program_name = &args[0]; - if !cli_args_valid(&source, &destination, dump_config) { - if source == "" { - eprintln!("Parameter SOURCE is required."); - } else if destination == "" { - eprintln!("Parameter DESTINATION is required."); - } - eprintln!("To see command-line usage, run {} --help", program_name); + let advice = format!("Run `{} --help` for command line usage.", program_name); + + let mutually_exclusive_options = vec![ + api_test, + dump_config, + dump_schema, + table_order, + show_deps_in, + show_deps_out, + drop_all, + create_only, + save != "" || save_all, + ]; + + if mutually_exclusive_options + .iter() + .filter(|&i| *i == true) + .count() + > 1 + { + eprintln!( + "More than one mutually exclusive option specified. {}.", + advice + ); + process::exit(1); + } + + let destination_optional = + dump_config || dump_schema || table_order || show_deps_in || show_deps_out; + + if source == "" { + eprintln!("Parameter SOURCE is required. {}", advice); + process::exit(1); + } else if !destination_optional && destination == "" { + eprintln!("Parameter DESTINATION is required. {}", advice); process::exit(1); } - let interactive = !yes; if api_test { run_api_tests(&source, &destination).await?; + } else if save_all || save != "" { + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + let save_dir = { + if save_dir == "" { + None + } else { + Some(save_dir.clone()) + } + }; + if save_all { + valve.save_all_tables(&save_dir).unwrap(); + } else { + let tables = save.split(',').collect::>(); + valve.save_tables(&tables, &save_dir).unwrap(); + } } else if dump_config { - let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; let mut config = valve.config.clone(); let datatype_conditions = format!("{:?}", valve.compiled_datatype_conditions).replace(r"\", r"\\"); @@ -154,14 +212,14 @@ async fn main() -> Result<(), sqlx::Error> { let config = serde_json::to_string(&config).unwrap(); println!("{}", config); } else if dump_schema { - let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; valve.dump_schema().await?; } else if table_order { - let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; let sorted_table_list = valve.get_sorted_table_list(false); println!("{}", sorted_table_list.join(", ")); } else if show_deps_in || show_deps_out { - let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; let dependencies = valve.collect_dependencies(show_deps_in); for (table, deps) in dependencies.iter() { let deps = { @@ -182,13 +240,13 @@ async fn main() -> Result<(), sqlx::Error> { println!("{}: {}", preamble, deps); } } else if drop_all { - let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; valve.drop_all_tables().await?; } else if create_only { - let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; valve.create_all_tables().await?; } else { - let valve = Valve::build(&source, &destination, verbose, initial_load, interactive).await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; valve.load_all_tables(true).await?; } diff --git a/src/validate.rs b/src/validate.rs index ae0552a8..b58d639a 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,15 +1,13 @@ use chrono::Utc; use indexmap::IndexMap; use serde_json::{json, Value as SerdeValue}; -use sqlx::{ - any::AnyPool, query as sqlx_query, Acquire, Error::Configuration as SqlxCErr, Row, Transaction, - ValueRef, -}; +use sqlx::{any::AnyPool, query as sqlx_query, Acquire, Row, Transaction, ValueRef}; use std::collections::HashMap; use crate::{ cast_sql_param_from_text, error, get_column_value, get_sql_type_from_global_config, - is_sql_type_error, local_sql_syntax, ColumnRule, CompiledCondition, SerdeMap, ValveRow, + is_sql_type_error, local_sql_syntax, ColumnRule, CompiledCondition, SerdeMap, ValveError, + ValveRow, }; /// Represents a particular cell in a particular row of data with vaildation results. @@ -64,7 +62,7 @@ pub async fn validate_row_tx( row: &ValveRow, row_number: Option, query_as_if: Option<&QueryAsIf>, -) -> Result { +) -> Result { // Fallback to a default transaction if it is not given. Since we do not commit before it falls // out of scope the transaction will be rolled back at the end of this function. And since this // function is read-only the rollback is trivial and therefore inconsequential. @@ -85,7 +83,7 @@ pub async fn validate_row_tx( None => None, Some(SerdeValue::String(s)) => Some(s.to_string()), _ => { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!("No string 'nulltype' in cell: {:?}.", cell).into(), )) } @@ -94,7 +92,7 @@ pub async fn validate_row_tx( Some(SerdeValue::String(s)) => s.to_string(), Some(SerdeValue::Number(n)) => format!("{}", n), _ => { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!("No string/number 'value' in cell: {:#?}.", cell).into(), )) } @@ -102,7 +100,7 @@ pub async fn validate_row_tx( let valid = match cell.get("valid").and_then(|v| v.as_bool()) { Some(b) => b, None => { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!("No bool 'valid' in cell: {:?}.", cell).into(), )) } @@ -110,7 +108,7 @@ pub async fn validate_row_tx( let messages = match cell.get("messages").and_then(|m| m.as_array()) { Some(a) => a.to_vec(), None => { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!("No array 'messages' in cell: {:?}.", cell).into(), )) } @@ -252,7 +250,7 @@ pub async fn validate_under( mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, extra_row: Option<&ResultRow>, -) -> Result, sqlx::Error> { +) -> Result, ValveError> { let mut results = vec![]; let ukeys = config .get("constraints") @@ -444,7 +442,7 @@ pub async fn validate_tree_foreign_keys( mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, extra_row: Option<&ResultRow>, -) -> Result, sqlx::Error> { +) -> Result, ValveError> { let tkeys = config .get("constraints") .and_then(|c| c.as_object()) @@ -547,7 +545,7 @@ pub async fn validate_rows_trees( pool: &AnyPool, table_name: &String, rows: &mut Vec, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { let column_names = config .get("table") .and_then(|t| t.get(table_name)) @@ -607,7 +605,7 @@ pub async fn validate_rows_constraints( pool: &AnyPool, table_name: &String, rows: &mut Vec, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { let column_names = config .get("table") .and_then(|t| t.get(table_name)) @@ -762,7 +760,7 @@ pub fn validate_rows_intra( /// Given a row represented as a ValveRow, remove any duplicate messages from the row's cells, so /// that no cell has messages with the same level, rule, and message text. -fn remove_duplicate_messages(row: &ValveRow) -> Result { +fn remove_duplicate_messages(row: &ValveRow) -> Result { let mut deduped_row = ValveRow::new(); for (column_name, cell) in row.iter() { let mut messages = cell @@ -1288,7 +1286,7 @@ async fn validate_cell_foreign_constraints( column_name: &String, cell: &mut ResultCell, query_as_if: Option<&QueryAsIf>, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { let fkeys = config .get("constraints") .and_then(|c| c.as_object()) @@ -1427,7 +1425,7 @@ async fn validate_cell_trees( cell: &mut ResultCell, context: &ResultRow, prev_results: &Vec, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // If the current column is the parent column of a tree, validate that adding the current value // will not result in a cycle between this and the parent column: let tkeys = config @@ -1609,7 +1607,7 @@ async fn validate_cell_unique_constraints( cell: &mut ResultCell, prev_results: &Vec, row_number: Option, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // If the column has a primary or unique key constraint, or if it is the child associated with // a tree, then if the value of the cell is a duplicate either of one of the previously // validated rows in the batch, or a duplicate of a validated row that has already been inserted diff --git a/test/insert_update.sh b/test/insert_update.sh index 698c5c06..0c30e71d 100755 --- a/test/insert_update.sh +++ b/test/insert_update.sh @@ -1,20 +1,20 @@ #!/usr/bin/env bash -if [[ $# -lt 1 ]] +if [[ $# -lt 2 ]] then - echo "Usage: $(basename $0) DATABASE" + echo "Usage: $(basename $0) DATABASE TABLE_CONFIG" exit 1 fi db=$1 -shift +table_defs=$2 +shift 2 if [[ $# -gt 0 ]] then echo "Warning: Extra arguments: '$*' will be ignored" fi pwd=$(dirname $(readlink -f $0)) -export_script=$pwd/../scripts/export.py output_dir=$pwd/output expected_dir=$pwd/expected @@ -25,7 +25,7 @@ do table_path=$pwd/output/$table_path table_file=$(basename $table_path) table=${table_file%.*} - ${export_script} data $db $output_dir $table + ./valve --save $table --save_dir $output_dir $table_defs $db diff -q $expected_dir/${table}.tsv ${table_path} ret_value=$(expr $ret_value + $?) done diff --git a/test/round_trip.sh b/test/round_trip.sh index fb93e7a5..82368470 100755 --- a/test/round_trip.sh +++ b/test/round_trip.sh @@ -15,8 +15,11 @@ then fi pwd=$(dirname $(readlink -f $0)) -export_script=$pwd/../scripts/export.py output_dir=$pwd/output +valve="./valve" + +# Use valve to save all of th configured tables: +${valve} --save_all --save_dir ${output_dir} ${table_defs} $db num_tables=$(expr $(cat $table_defs | wc -l) - 1) table_paths=$(tail -$num_tables $table_defs | cut -f 2) @@ -28,7 +31,6 @@ do table_path=$pwd/$table_path table_file=$(basename $table_path) table=${table_file%.*} - ${export_script} data $db $output_dir $table diff --strip-trailing-cr -q ${table_path} $output_dir/${table}.tsv ret_value=$(expr $ret_value + $?) done From cd21a4ea318f2c807e77c6409a9966ddcf6cfc8f Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 14 Jan 2024 15:28:26 -0500 Subject: [PATCH 48/57] make most library functions public --- src/lib.rs | 93 +++++++++++++++++++++++++------------------------ src/validate.rs | 20 +++++------ 2 files changed, 58 insertions(+), 55 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 785e6f99..4b961e48 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -387,7 +387,7 @@ impl Valve { /// 'unique', 'primary', or 'from(table, column)' in its column configuration fails to be /// associated, in the database, with a unique constraint, primary key, or foreign key, /// respectively; or vice versa; (4) The table does not exist in the database. - async fn table_has_changed(&self, table: &str) -> Result { + pub async fn table_has_changed(&self, table: &str) -> Result { // A clojure that, given a parsed structure condition, a table and column name, and an // unsigned integer representing whether the given column, in the case of a SQLite database, // is a primary key (in the case of PostgreSQL, the sqlite_pk parameter is ignored): @@ -748,7 +748,7 @@ impl Valve { } /// Generates and returns the DDL required to setup the database. - async fn get_setup_statements(&self) -> Result>, ValveError> { + pub async fn get_setup_statements(&self) -> Result>, ValveError> { let tables_config = self .config .get("table") @@ -917,7 +917,7 @@ impl Valve { /// Get all the incoming (tables that depend on it) or outgoing (tables it depends on) /// dependencies of the given table. - fn get_dependencies(&self, table: &str, incoming: bool) -> Vec { + pub fn get_dependencies(&self, table: &str, incoming: bool) -> Vec { let mut dependent_tables = vec![]; if table != "message" && table != "history" { let direct_deps = { @@ -939,7 +939,7 @@ impl Valve { /// Given a list of tables, fill it in with any further tables that are dependent upon tables /// in the given list. If deletion_order is true, the tables are sorted as required for /// deleting them all sequentially, otherwise they are ordered in reverse. - fn add_dependencies(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { + pub fn add_dependencies(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { let mut with_dups = vec![]; for table in tables { let dependent_tables = self.get_dependencies(table, true); @@ -965,7 +965,7 @@ impl Valve { /// Given a subset of the configured tables, return them in sorted dependency order, or in /// reverse if `reverse` is set to true. - fn sort_tables( + pub fn sort_tables( &self, table_subset: &Vec<&str>, reverse: bool, @@ -1957,7 +1957,7 @@ impl Valve { } /// Given a string representing the location of a database, return a database connection pool. -async fn get_pool_from_connection_string(database: &str) -> Result { +pub async fn get_pool_from_connection_string(database: &str) -> Result { let connection_options; if database.starts_with("postgresql://") { connection_options = AnyConnectOptions::from_str(database)?; @@ -1983,7 +1983,7 @@ async fn get_pool_from_connection_string(database: &str) -> Result HashMap { @@ -2592,7 +2592,7 @@ fn get_compiled_datatype_conditions( /// ... /// } /// ``` -fn get_compiled_rule_conditions( +pub fn get_compiled_rule_conditions( config: &SerdeMap, compiled_datatype_conditions: HashMap, parser: &StartParser, @@ -2670,7 +2670,7 @@ fn get_compiled_rule_conditions( /// Given the global config map and a parser, parse all of the structure conditions, add them to /// a hash map whose keys are given by the text versions of the conditions and whose values are /// given by the parsed versions, and finally return the hashmap. -fn get_parsed_structure_conditions( +pub fn get_parsed_structure_conditions( config: &SerdeMap, parser: &StartParser, ) -> HashMap { @@ -2718,7 +2718,7 @@ fn get_parsed_structure_conditions( /// contained in the message and history tables. The SQL generated is in the form of a tuple of /// Strings, with the first string being a SQL statement for dropping the view, and the second /// string being a SQL statement for creating it. -fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> String { +pub fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> String { let message_t; if pool.any_kind() == AnyKind::Postgres { message_t = format!( @@ -2829,7 +2829,7 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> String { /// errors. Like the function for generating a standard view, the SQL generated by this function is /// returned in the form of a tuple of Strings, with the first string being a SQL statement /// for dropping the view, and the second string being a SQL statement for creating it. -fn get_sql_for_text_view(tables_config: &SerdeMap, table: &str, pool: &AnyPool) -> String { +pub fn get_sql_for_text_view(tables_config: &SerdeMap, table: &str, pool: &AnyPool) -> String { let is_clause = if pool.any_kind() == AnyKind::Sqlite { "IS" } else { @@ -2920,7 +2920,7 @@ fn get_sql_for_text_view(tables_config: &SerdeMap, table: &str, pool: &AnyPool) /// value of the column, such that when the value of a given column is null, the query attempts to /// extract it from the message table. Returns a String representing the SQL to retrieve the value /// of the column. -fn query_column_with_message_value(table: &str, column: &str, pool: &AnyPool) -> String { +pub fn query_column_with_message_value(table: &str, column: &str, pool: &AnyPool) -> String { let is_clause = if pool.any_kind() == AnyKind::Sqlite { "IS" } else { @@ -2954,7 +2954,7 @@ fn query_column_with_message_value(table: &str, column: &str, pool: &AnyPool) -> /// SQL query that one can use to get the logical contents of the table, such that when the value /// of a given column is null, the query attempts to extract it from the message table. Returns a /// String representing the query. -fn query_with_message_values(table: &str, global_config: &SerdeMap, pool: &AnyPool) -> String { +pub fn query_with_message_values(table: &str, global_config: &SerdeMap, pool: &AnyPool) -> String { let real_columns = global_config .get("table") .and_then(|t| t.get(table)) @@ -3004,7 +3004,7 @@ fn query_with_message_values(table: &str, global_config: &SerdeMap, pool: &AnyPo /// column name, and a value for that column: get the rows, other than the one indicated by /// `except`, that would need to be revalidated if the given value were to replace the actual /// value of the column in that row. -async fn get_affected_rows( +pub async fn get_affected_rows( table: &str, column: &str, value: &str, @@ -3067,7 +3067,7 @@ async fn get_affected_rows( /// Given a global configuration map, a database connection pool, a database transaction, a table /// name and a row number, get the logical contents of that row (whether or not it is valid), /// including any messages, from the database. -async fn get_row_from_db( +pub async fn get_row_from_db( global_config: &SerdeMap, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, @@ -3145,7 +3145,7 @@ async fn get_row_from_db( /// Given a database connection pool, a database transaction, a table name, a column name, and a row /// number, get the current value of the given column in the database. -async fn get_db_value( +pub async fn get_db_value( table: &str, column: &str, row_number: &u32, @@ -3204,7 +3204,7 @@ async fn get_db_value( /// and a [QueryAsIf] struct representing a custom modification to the query of the table, get /// the rows that will potentially be affected by the database change to the row indicated in /// query_as_if. -async fn get_rows_to_update( +pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, @@ -3419,7 +3419,7 @@ async fn get_rows_to_update( /// a database transaction, a number of updates to process, a [QueryAsIf] struct indicating how /// we should modify 'in thought' the current state of the database, and a flag indicating whether /// we should allow recursive updates, validate and then update each row indicated in `updates`. -async fn process_updates( +pub async fn process_updates( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -3468,7 +3468,7 @@ async fn process_updates( /// are going to change it from, optionally: the version of the row we are going to change it to, /// and the name of the user making the change, record the change to the history table in the /// database. Note that `from` and `to` cannot both be None. -async fn record_row_change( +pub async fn record_row_change( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, @@ -3579,7 +3579,7 @@ async fn record_row_change( } /// Given a row and a column name, extract the contents of the row as a JSON object and return it. -fn get_json_from_row(row: &AnyRow, column: &str) -> Option { +pub fn get_json_from_row(row: &AnyRow, column: &str) -> Option { let raw_value = row.try_get_raw(column).unwrap(); if !raw_value.is_null() { let value: &str = row.get(column); @@ -3604,7 +3604,7 @@ fn get_json_from_row(row: &AnyRow, column: &str) -> Option { /// (otherwise). When setting the record to undone, user is used for the 'undone_by' field of the /// history table, otherwise undone_by is set to NULL and the user is indicated as the one /// responsible for the change (instead of whoever made the change originally). -async fn switch_undone_state( +pub async fn switch_undone_state( user: &str, history_id: u16, undone_state: bool, @@ -3638,7 +3638,7 @@ async fn switch_undone_state( /// Given a global config map and a table name, return a list of the columns from the table /// that may potentially result in database conflicts. -fn get_conflict_columns(global_config: &SerdeMap, table_name: &str) -> Vec { +pub fn get_conflict_columns(global_config: &SerdeMap, table_name: &str) -> Vec { let mut conflict_columns = vec![]; let primaries = global_config .get("constraints") @@ -3718,7 +3718,7 @@ fn get_conflict_columns(global_config: &SerdeMap, table_name: &str) -> Vec bool { +pub fn is_sql_type_error(sql_type: &str, value: &str) -> bool { let sql_type = sql_type.to_lowercase(); if sql_type == "numeric" { // f64 @@ -3751,7 +3751,7 @@ fn is_sql_type_error(sql_type: &str, value: &str) -> bool { /// insert it to the database using the given transaction, then return the new row number. /// If skip_validation is set to true, omit the implicit call to [validate_row_tx()]. #[async_recursion] -async fn insert_new_row_tx( +pub async fn insert_new_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -3963,7 +3963,7 @@ async fn insert_new_row_tx( /// Given a global config map, maps of datatype and rule conditions, a database connection pool, a /// database transaction, a table name, and a row number, delete the given row from the database. #[async_recursion] -async fn delete_row_tx( +pub async fn delete_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -4045,7 +4045,7 @@ async fn delete_row_tx( /// [validate_row_tx()]. If do_not_recurse, is set, do not look for rows which could be affected by /// this update. #[async_recursion] -async fn update_row_tx( +pub async fn update_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -4166,7 +4166,7 @@ async fn update_row_tx( /// Given a path, read a TSV file and return a vector of rows represented as ValveRows. /// Note: Use this function to read "small" TSVs only. In particular, use this for the special /// configuration tables. -fn read_tsv_into_vector(path: &str) -> Vec { +pub fn read_tsv_into_vector(path: &str) -> Vec { let mut rdr = ReaderBuilder::new() .delimiter(b'\t') @@ -4207,7 +4207,7 @@ fn read_tsv_into_vector(path: &str) -> Vec { /// Given a database at the specified location, query the given table and return a vector of rows /// represented as ValveRows. -fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { +pub fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { let connection_options; if database.starts_with("postgresql://") { connection_options = AnyConnectOptions::from_str(database).unwrap(); @@ -4254,7 +4254,7 @@ fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec, parser: &StartParser, compiled_datatype_conditions: &HashMap, @@ -4397,7 +4397,7 @@ fn compile_condition( /// Given the config map, the name of a datatype, and a database connection pool used to determine /// the database type, climb the datatype tree (as required), and return the first 'SQL type' found. -fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Option { +pub fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Option { if !dt_config.contains_key(datatype) { return None; } @@ -4417,7 +4417,7 @@ fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Opti /// Given the global config map, a table name, a column name, and a database connection pool /// used to determine the database type return the column's SQL type. -fn get_sql_type_from_global_config( +pub fn get_sql_type_from_global_config( global_config: &SerdeMap, table: &str, column: &str, @@ -4441,7 +4441,7 @@ fn get_sql_type_from_global_config( /// Given a SQL type, return the appropriate CAST(...) statement for casting the SQL_PARAM /// from a TEXT column. -fn cast_sql_param_from_text(sql_type: &str) -> String { +pub fn cast_sql_param_from_text(sql_type: &str) -> String { let s = sql_type.to_lowercase(); if s == "numeric" { format!("CAST(NULLIF({}, '') AS NUMERIC)", SQL_PARAM) @@ -4456,7 +4456,7 @@ fn cast_sql_param_from_text(sql_type: &str) -> String { /// Given a SQL type, return the appropriate CAST(...) statement for casting the SQL_PARAM /// to a TEXT column. -fn cast_column_sql_to_text(column: &str, sql_type: &str) -> String { +pub fn cast_column_sql_to_text(column: &str, sql_type: &str) -> String { if sql_type.to_lowercase() == "text" { format!(r#""{}""#, column) } else { @@ -4466,7 +4466,7 @@ fn cast_column_sql_to_text(column: &str, sql_type: &str) -> String { /// Given a database row, the name of a column, and it's SQL type, return the value of that column /// from the given row as a String. -fn get_column_value(row: &AnyRow, column: &str, sql_type: &str) -> String { +pub fn get_column_value(row: &AnyRow, column: &str, sql_type: &str) -> String { let s = sql_type.to_lowercase(); if s == "numeric" { let value: f64 = row.get(format!(r#"{}"#, column).as_str()); @@ -4487,7 +4487,7 @@ fn get_column_value(row: &AnyRow, column: &str, sql_type: &str) -> String { /// SQL_PARAM, and given a database pool, if the pool is of type Sqlite, then change the syntax used /// for unbound parameters to Sqlite syntax, which uses "?", otherwise use Postgres syntax, which /// uses numbered parameters, i.e., $1, $2, ... -fn local_sql_syntax(pool: &AnyPool, sql: &String) -> String { +pub fn local_sql_syntax(pool: &AnyPool, sql: &String) -> String { // Do not replace instances of SQL_PARAM if they are within quotation marks. let rx = Regex::new(&format!( r#"('[^'\\]*(?:\\.[^'\\]*)*'|"[^"\\]*(?:\\.[^"\\]*)*")|\b{}\b"#, @@ -4522,7 +4522,7 @@ fn local_sql_syntax(pool: &AnyPool, sql: &String) -> String { /// under dependencies, returns the list of tables sorted according to their foreign key /// dependencies, such that if table_a depends on table_b, then table_b comes before table_a in the /// list that is returned. -fn verify_table_deps_and_sort( +pub fn verify_table_deps_and_sort( table_list: &Vec, constraints: &SerdeMap, ) -> ( @@ -4735,7 +4735,7 @@ fn verify_table_deps_and_sort( /// Given table configuration map and a datatype configuration map, a parser, a table name, and a /// database connection pool, return a configuration map representing all of the table constraints. -fn get_table_constraints( +pub fn get_table_constraints( tables_config: &SerdeMap, datatypes_config: &SerdeMap, parser: &StartParser, @@ -4922,7 +4922,7 @@ fn get_table_constraints( /// Given table configuration map and a datatype configuration map, a parser, a table name, and a /// database connection pool, return a list of DDL statements that can be used to create the /// database tables. -fn get_table_ddl( +pub fn get_table_ddl( tables_config: &SerdeMap, datatypes_config: &SerdeMap, parser: &StartParser, @@ -5101,7 +5101,7 @@ fn get_table_ddl( /// Given a list of messages and a HashMap, messages_stats, with which to collect counts of /// message types, count the various message types encountered in the list and increment the counts /// in messages_stats accordingly. -fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap) { +pub fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap) { for message in messages { let message = message.as_object().unwrap(); let level = message.get("level").unwrap(); @@ -5122,7 +5122,7 @@ fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap Vec<&str> { +pub fn get_sorted_datatypes(global_config: &SerdeMap) -> Vec<&str> { let mut graph = DiGraphMap::<&str, ()>::new(); let dt_config = global_config .get("datatype") @@ -5172,7 +5172,10 @@ fn get_sorted_datatypes(global_config: &SerdeMap) -> Vec<&str> { /// `sorted_datatypes`, followed by: /// 2. Messages pertaining to violations of one of the rules in the rule table, followed by: /// 3. Messages pertaining to structure violations. -fn sort_messages(sorted_datatypes: &Vec<&str>, cell_messages: &Vec) -> Vec { +pub fn sort_messages( + sorted_datatypes: &Vec<&str>, + cell_messages: &Vec, +) -> Vec { let mut datatype_messages = vec![]; let mut structure_messages = vec![]; let mut rule_messages = vec![]; @@ -5222,7 +5225,7 @@ fn sort_messages(sorted_datatypes: &Vec<&str>, cell_messages: &Vec) /// to bind to that SQL statement. If the verbose flag is set, the number of errors, warnings, /// and information messages generated are added to messages_stats, the contents of which will /// later be written to stderr. -async fn make_inserts( +pub async fn make_inserts( config: &SerdeMap, table_name: &String, rows: &mut Vec, @@ -5435,7 +5438,7 @@ async fn make_inserts( /// and the chunk number corresponding to the rows, do inter-row validation on the rows and insert /// them to the table. If the verbose flag is set to true, error/warning/info stats will be /// collected in messages_stats and later written to stderr. -async fn insert_chunk( +pub async fn insert_chunk( config: &SerdeMap, pool: &AnyPool, table_name: &String, @@ -5571,7 +5574,7 @@ async fn insert_chunk( /// and the headers of the rows to be inserted, validate each chunk and insert the validated rows /// to the table. If the verbose flag is set to true, error/warning/info stats will be collected in /// messages_stats and later written to stderr. -async fn insert_chunks( +pub async fn insert_chunks( config: &SerdeMap, pool: &AnyPool, compiled_datatype_conditions: &HashMap, diff --git a/src/validate.rs b/src/validate.rs index b58d639a..a8aeae03 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -760,7 +760,7 @@ pub fn validate_rows_intra( /// Given a row represented as a ValveRow, remove any duplicate messages from the row's cells, so /// that no cell has messages with the same level, rule, and message text. -fn remove_duplicate_messages(row: &ValveRow) -> Result { +pub fn remove_duplicate_messages(row: &ValveRow) -> Result { let mut deduped_row = ValveRow::new(); for (column_name, cell) in row.iter() { let mut messages = cell @@ -797,7 +797,7 @@ fn remove_duplicate_messages(row: &ValveRow) -> Result { /// Given a result row, convert it to a ValveRow and return it. /// Note that if the incoming result row has an associated row_number, this is ignored. -fn result_row_to_config_map(incoming: &ResultRow) -> ValveRow { +pub fn result_row_to_config_map(incoming: &ResultRow) -> ValveRow { let mut outgoing = ValveRow::new(); for (column, cell) in incoming.contents.iter() { let mut cell_map = ValveRow::new(); @@ -823,7 +823,7 @@ fn result_row_to_config_map(incoming: &ResultRow) -> ValveRow { /// Generate a SQL Select clause that is a union of: (a) the literal values of the given extra row, /// and (b) a Select statement over `table_name` of all the fields in the extra row. -fn select_with_extra_row( +pub fn select_with_extra_row( config: &SerdeMap, extra_row: &ResultRow, table: &str, @@ -925,7 +925,7 @@ pub fn with_tree_sql( /// validate, validate the cell's nulltype condition. If the cell's value is one of the allowable /// nulltype values for this column, then fill in the cell's nulltype value before returning the /// cell. -fn validate_cell_nulltype( +pub fn validate_cell_nulltype( config: &SerdeMap, compiled_datatype_conditions: &HashMap, table_name: &String, @@ -952,7 +952,7 @@ fn validate_cell_nulltype( /// Given a config map, compiled datatype conditions, a table name, a column name, and a cell to /// validate, validate the cell's datatype and return the validated cell. -fn validate_cell_datatype( +pub fn validate_cell_datatype( config: &SerdeMap, compiled_datatype_conditions: &HashMap, table_name: &String, @@ -1068,7 +1068,7 @@ fn validate_cell_datatype( /// Given a config map, compiled rule conditions, a table name, a column name, the row context, /// and the cell to validate, look in the rule table (if it exists) and validate the cell according /// to any applicable rules. -fn validate_cell_rules( +pub fn validate_cell_rules( config: &SerdeMap, compiled_rules: &HashMap>>, table_name: &String, @@ -1159,7 +1159,7 @@ fn validate_cell_rules( /// Generates an SQL fragment representing the "as if" portion of a query that will be used for /// counterfactual validation. -fn as_if_to_sql( +pub fn as_if_to_sql( global_config: &SerdeMap, pool: &AnyPool, as_if: &QueryAsIf, @@ -1278,7 +1278,7 @@ fn as_if_to_sql( /// check the cell value against any foreign keys that have been defined for the column. If there is /// a violation, indicate it with an error message attached to the cell. Optionally, if a /// transaction is given, use that instead of the pool for database access. -async fn validate_cell_foreign_constraints( +pub async fn validate_cell_foreign_constraints( config: &SerdeMap, pool: &AnyPool, mut tx: Option<&mut Transaction<'_, sqlx::Any>>, @@ -1416,7 +1416,7 @@ async fn validate_cell_foreign_constraints( /// validate that none of the "tree" constraints on the column are violated, and indicate any /// violations by attaching error messages to the cell. Optionally, if a transaction is /// given, use that instead of the pool for database access. -async fn validate_cell_trees( +pub async fn validate_cell_trees( config: &SerdeMap, pool: &AnyPool, mut tx: Option<&mut Transaction<'_, sqlx::Any>>, @@ -1598,7 +1598,7 @@ async fn validate_cell_trees( /// `row_number` is set to None, then no row corresponding to the given cell is assumed to exist /// in the table. Optionally, if a transaction is given, use that instead of the pool for database /// access. -async fn validate_cell_unique_constraints( +pub async fn validate_cell_unique_constraints( config: &SerdeMap, pool: &AnyPool, mut tx: Option<&mut Transaction<'_, sqlx::Any>>, From 260ce0f659b88ac99bcb0ec0803320cfc90247f2 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 15 Jan 2024 08:55:50 -0500 Subject: [PATCH 49/57] update README --- README.md | 3 +++ src/lib.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 02113b60..183c8a1d 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,9 @@ valve --help ``` to see command line options. +### API +See [Valve] + ### Python bindings See [valve.py](https://github.com/ontodev/valve.py) diff --git a/src/lib.rs b/src/lib.rs index 4b961e48..2a671b76 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,9 @@ //! ``` //! to see command line options. //! +//! ## API +//! See [Valve] +//! //! ## Python bindings //! See [valve.py](https://github.com/ontodev/valve.py) From 7058cbf25c672124636cca1699501d3ab4e2479d Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 22 Jan 2024 10:45:49 -0500 Subject: [PATCH 50/57] add get_path() --- src/lib.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 2a671b76..417ba9f1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -335,6 +335,20 @@ impl Valve { }) } + /// Convenience function to retrieve the path to Valve's "table table", the main entrypoint + /// to Valve's configuration. + pub fn get_path(&self) -> String { + self.config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get("table")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("path")) + .and_then(|p| p.as_str()) + .unwrap() + .to_string() + } + /// Controls the maximum length of a username. const USERNAME_MAX_LEN: usize = 20; From 5d251510a689bfe4a7fa4a750e8af1b55a836882 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 22 Jan 2024 11:36:41 -0500 Subject: [PATCH 51/57] make Valve cloneable --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 417ba9f1..543066e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -177,7 +177,7 @@ impl std::fmt::Debug for ColumnRule { } /// Main entrypoint for the Valve API. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Valve { /// The valve configuration map. pub config: SerdeMap, From 602673f7d097d1104113f8a2e43a9f5a333e4427 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 23 Jan 2024 13:17:47 -0500 Subject: [PATCH 52/57] add optional row number parameter to validate_row() --- src/api_test.rs | 12 ++++++------ src/lib.rs | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index 65d8039f..eba2acd5 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -58,13 +58,13 @@ async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), ValveE }); let result_row_1 = valve - .validate_row("table2", row.as_object().unwrap()) + .validate_row("table2", row.as_object().unwrap(), None) .await?; - let result_row_2 = valve.validate_row("table2", &result_row_1).await?; + let result_row_2 = valve.validate_row("table2", &result_row_1, None).await?; assert_eq!(result_row_1, result_row_2); - let result_row = valve.validate_row("table2", &result_row_2).await?; + let result_row = valve.validate_row("table2", &result_row_2, None).await?; assert_eq!(result_row, result_row_2); // Update the row we constructed and validated above in the database: @@ -95,7 +95,7 @@ async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), ValveError> { }); let result_row = valve - .validate_row("table3", row.as_object().unwrap()) + .validate_row("table3", row.as_object().unwrap(), None) .await?; let (_new_row_num, _new_row) = valve.insert_row("table3", &result_row).await?; @@ -123,7 +123,7 @@ async fn test_validate_and_update(valve: &Valve) -> Result<(), ValveError> { }); let result_row = valve - .validate_row("table6", row.as_object().unwrap()) + .validate_row("table6", row.as_object().unwrap(), None) .await?; valve.update_row("table6", &1, &result_row).await?; @@ -151,7 +151,7 @@ async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), ValveError> { }); let result_row = valve - .validate_row("table6", row.as_object().unwrap()) + .validate_row("table6", row.as_object().unwrap(), None) .await?; let (_new_row_num, _new_row) = valve.insert_row("table6", &result_row).await?; diff --git a/src/lib.rs b/src/lib.rs index 543066e5..56c7f8af 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1411,6 +1411,7 @@ impl Valve { &self, table_name: &str, row: &ValveRow, + row_number: Option, ) -> Result { validate_row_tx( &self.config, @@ -1420,7 +1421,7 @@ impl Valve { None, table_name, row, - None, + row_number, None, ) .await From b769910d9d3ec6b2586df1d4a826319d6fed57e1 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 24 Jan 2024 16:27:29 -0500 Subject: [PATCH 53/57] remove initial load flag --- src/lib.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 56c7f8af..51983c95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -197,8 +197,6 @@ pub struct Valve { pub user: String, /// Produce more logging output. pub verbose: bool, - /// Tune the database for initial loading. - pub initial_load: bool, } #[derive(Debug)] @@ -331,7 +329,6 @@ impl Valve { pool: pool, user: String::from("VALVE"), verbose: verbose, - initial_load: initial_load, }) } From 08620dbba8d47e39cf0b58c9253c9f0c4b5d9162 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 30 Jan 2024 10:15:31 -0500 Subject: [PATCH 54/57] add new file valve.rs and refactor --- src/api_test.rs | 5 +- src/lib.rs | 1817 +--------------------------------------------- src/main.rs | 4 +- src/validate.rs | 12 +- src/valve.rs | 1828 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1852 insertions(+), 1814 deletions(-) create mode 100644 src/valve.rs diff --git a/src/api_test.rs b/src/api_test.rs index eba2acd5..e551a1da 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -1,4 +1,7 @@ -use ontodev_valve::{SerdeMap, Valve, ValveError}; +use ontodev_valve::{ + valve::{Valve, ValveError}, + SerdeMap, +}; use rand::distributions::{Alphanumeric, DistString, Distribution, Uniform}; use rand::{random, thread_rng}; use serde_json::json; diff --git a/src/lib.rs b/src/lib.rs index 51983c95..e498c14a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,20 +23,25 @@ extern crate lalrpop_util; pub mod ast; pub mod validate; +pub mod valve; lalrpop_mod!(pub valve_grammar); -use crate::validate::{ - validate_row_tx, validate_rows_constraints, validate_rows_intra, validate_rows_trees, - validate_tree_foreign_keys, validate_under, with_tree_sql, QueryAsIf, QueryAsIfKind, ResultRow, +use crate::{ + ast::Expression, + validate::{ + validate_row_tx, validate_rows_constraints, validate_rows_intra, validate_rows_trees, + QueryAsIf, QueryAsIfKind, ResultRow, + }, + valve::ValveError, + valve::ValveRow, + valve_grammar::StartParser, }; -use crate::{ast::Expression, valve_grammar::StartParser}; use async_recursion::async_recursion; use chrono::Utc; use crossbeam; -use csv::{QuoteStyle, ReaderBuilder, StringRecord, StringRecordsIter, WriterBuilder}; -use enquote::unquote; -use futures::{executor::block_on, TryStreamExt}; +use csv::{ReaderBuilder, StringRecord, StringRecordsIter}; +use futures::executor::block_on; use indexmap::IndexMap; use indoc::indoc; use itertools::{IntoChunks, Itertools}; @@ -78,11 +83,10 @@ lazy_static! { static ref SQL_TYPES: Vec<&'static str> = vec!["text", "varchar", "numeric", "integer", "real"]; } -/// Aliases for [serde_json::Map](..//serde_json/struct.Map.html). +/// Alias for [serde_json::Map](..//serde_json/struct.Map.html). // Note: serde_json::Map is // [backed by a BTreeMap by default](https://docs.serde.rs/serde_json/map/index.html) pub type SerdeMap = serde_json::Map; -pub type ValveRow = serde_json::Map; // TODO: Possibly replace these with the tracing library (see nanobot.rs). /// Write a debugging message to STDERR. @@ -176,1801 +180,6 @@ impl std::fmt::Debug for ColumnRule { } } -/// Main entrypoint for the Valve API. -#[derive(Clone, Debug)] -pub struct Valve { - /// The valve configuration map. - pub config: SerdeMap, - /// Pre-compiled datatype conditions. - pub compiled_datatype_conditions: HashMap, - /// Pre-compiled rule conditions. - pub compiled_rule_conditions: HashMap>>, - /// Parsed structure conditions: - pub parsed_structure_conditions: HashMap, - /// Lists of tables that depend on a given table, indexed by table. - pub table_dependencies_in: HashMap>, - /// Lists of tables that a given table depends on, indexed by table. - pub table_dependencies_out: HashMap>, - /// The database connection pool. - pub pool: AnyPool, - /// The user associated with this valve instance. - pub user: String, - /// Produce more logging output. - pub verbose: bool, -} - -#[derive(Debug)] -pub enum ValveError { - /// An error in the Valve configuration: - ConfigError(String), - /// An error that occurred while reading or writing to a CSV/TSV: - CsvError(csv::Error), - /// An error involving the data: - DataError(String), - /// An error generated by the underlying database: - DatabaseError(sqlx::Error), - /// An error in the inputs to a function: - InputError(String), - /// An error that occurred while reading/writing to stdio: - IOError(std::io::Error), - /// An error that occurred while serialising or deserialising to/from JSON: - SerdeJsonError(serde_json::Error), -} - -impl From for ValveError { - fn from(e: csv::Error) -> Self { - Self::CsvError(e) - } -} - -impl From for ValveError { - fn from(e: sqlx::Error) -> Self { - Self::DatabaseError(e) - } -} - -impl From for ValveError { - fn from(e: serde_json::Error) -> Self { - Self::SerdeJsonError(e) - } -} - -impl From for ValveError { - fn from(e: std::io::Error) -> Self { - Self::IOError(e) - } -} - -impl Valve { - /// Given a path to a table table, a path to a database, a flag for verbose output, and a flag - /// indicating whether the database should be configured for initial loading: Set up a database - /// connection, configure VALVE, and return a new Valve struct. - pub async fn build( - table_path: &str, - database: &str, - verbose: bool, - initial_load: bool, - ) -> Result { - let pool = get_pool_from_connection_string(database).await?; - if pool.any_kind() == AnyKind::Sqlite { - sqlx_query("PRAGMA foreign_keys = ON") - .execute(&pool) - .await?; - if initial_load { - // These pragmas are unsafe but they are used during initial loading since data - // integrity is not a priority in this case. - sqlx_query("PRAGMA journal_mode = OFF") - .execute(&pool) - .await?; - sqlx_query("PRAGMA synchronous = 0").execute(&pool).await?; - sqlx_query("PRAGMA cache_size = 1000000") - .execute(&pool) - .await?; - sqlx_query("PRAGMA temp_store = MEMORY") - .execute(&pool) - .await?; - } - } - - let parser = StartParser::new(); - let ( - specials_config, - tables_config, - datatypes_config, - rules_config, - constraints_config, - sorted_table_list, - table_dependencies_in, - table_dependencies_out, - ) = read_config_files(table_path, &parser, &pool); - - let mut config = SerdeMap::new(); - config.insert( - String::from("special"), - SerdeValue::Object(specials_config.clone()), - ); - config.insert( - String::from("table"), - SerdeValue::Object(tables_config.clone()), - ); - config.insert( - String::from("datatype"), - SerdeValue::Object(datatypes_config.clone()), - ); - config.insert( - String::from("rule"), - SerdeValue::Object(rules_config.clone()), - ); - config.insert( - String::from("constraints"), - SerdeValue::Object(constraints_config.clone()), - ); - let mut sorted_table_serdevalue_list: Vec = vec![]; - for table in &sorted_table_list { - sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); - } - config.insert( - String::from("sorted_table_list"), - SerdeValue::Array(sorted_table_serdevalue_list), - ); - - let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); - let compiled_rule_conditions = - get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); - let parsed_structure_conditions = get_parsed_structure_conditions(&config, &parser); - - Ok(Self { - config: config, - compiled_datatype_conditions: compiled_datatype_conditions, - compiled_rule_conditions: compiled_rule_conditions, - parsed_structure_conditions: parsed_structure_conditions, - table_dependencies_in: table_dependencies_in, - table_dependencies_out: table_dependencies_out, - pool: pool, - user: String::from("VALVE"), - verbose: verbose, - }) - } - - /// Convenience function to retrieve the path to Valve's "table table", the main entrypoint - /// to Valve's configuration. - pub fn get_path(&self) -> String { - self.config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get("table")) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("path")) - .and_then(|p| p.as_str()) - .unwrap() - .to_string() - } - - /// Controls the maximum length of a username. - const USERNAME_MAX_LEN: usize = 20; - - /// Sets the user name, which must be a short, trimmed, string without newlines, for this Valve - /// instance. - pub fn set_user(&mut self, user: &str) -> Result<&mut Self, ValveError> { - if user.len() > Self::USERNAME_MAX_LEN { - return Err(ValveError::ConfigError(format!( - "Username '{}' is longer than {} characters.", - user, - Self::USERNAME_MAX_LEN - ))); - } else { - let user_regex = Regex::new(r#"^\S([^\n]*\S)*$"#).unwrap(); - if !user_regex.is_match(user) { - return Err(ValveError::ConfigError(format!( - "Username '{}' is not a short, trimmed, string without newlines.", - user, - ))); - } - } - self.user = user.to_string(); - Ok(self) - } - - /// Given a SQL string, execute it using the connection pool associated with the Valve instance. - async fn execute_sql(&self, sql: &str) -> Result<(), ValveError> { - sqlx_query(&sql).execute(&self.pool).await?; - Ok(()) - } - - /// Return the list of configured tables in sorted order, or reverse sorted order if the - /// reverse flag is set. - pub fn get_sorted_table_list(&self, reverse: bool) -> Vec<&str> { - let mut sorted_tables = self - .config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) - .and_then(|l| Some(l.collect::>())) - .unwrap(); - if reverse { - sorted_tables.reverse(); - } - sorted_tables - } - - /// Given the name of a table, determine whether its current instantiation in the database - /// differs from the way it has been configured. The answer to this question is yes whenever - /// (1) the number of columns or any of their names differs from their configured values, or - /// the order of database columns differs from the configured order; (2) The SQL type of one or - /// more columns does not match the configured SQL type for that column; (3) Some column with a - /// 'unique', 'primary', or 'from(table, column)' in its column configuration fails to be - /// associated, in the database, with a unique constraint, primary key, or foreign key, - /// respectively; or vice versa; (4) The table does not exist in the database. - pub async fn table_has_changed(&self, table: &str) -> Result { - // A clojure that, given a parsed structure condition, a table and column name, and an - // unsigned integer representing whether the given column, in the case of a SQLite database, - // is a primary key (in the case of PostgreSQL, the sqlite_pk parameter is ignored): - // determine whether the structure of the column is properly reflected in the db. E.g., a - // `from(table.column)` struct should be associated with a foreign key, `primary` with a - // primary key, `unique` with a unique constraint. - let structure_has_changed = |pstruct: &Expression, - table: &str, - column: &str, - sqlite_pk: &u32| - -> Result { - // A clojure to determine whether the given column has the given constraint type, which - // can be one of 'UNIQUE', 'PRIMARY KEY', 'FOREIGN KEY': - let column_has_constraint_type = |constraint_type: &str| -> Result { - if self.pool.any_kind() == AnyKind::Postgres { - let sql = format!( - r#"SELECT 1 - FROM information_schema.table_constraints tco - JOIN information_schema.key_column_usage kcu - ON kcu.constraint_name = tco.constraint_name - AND kcu.constraint_schema = tco.constraint_schema - AND kcu.table_name = '{}' - WHERE tco.constraint_type = '{}' - AND kcu.column_name = '{}'"#, - table, constraint_type, column - ); - let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; - if rows.len() > 1 { - unreachable!(); - } - Ok(rows.len() == 1) - } else { - if constraint_type == "PRIMARY KEY" { - return Ok(*sqlite_pk == 1); - } else if constraint_type == "UNIQUE" { - let sql = format!(r#"PRAGMA INDEX_LIST("{}")"#, table); - for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { - let idx_name = row.get::("name"); - let unique = row.get::("unique") as u8; - if unique == 1 { - let sql = format!(r#"PRAGMA INDEX_INFO("{}")"#, idx_name); - let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; - if rows.len() == 1 { - let cname = rows[0].get::("name"); - if cname == column { - return Ok(true); - } - } - } - } - Ok(false) - } else if constraint_type == "FOREIGN KEY" { - let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); - for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { - let cname = row.get::("from"); - if cname == column { - return Ok(true); - } - } - Ok(false) - } else { - return Err(ValveError::InputError( - format!("Unrecognized constraint type: '{}'", constraint_type).into(), - )); - } - } - }; - - // Check if there is a change to whether this column is a primary/unique key: - let is_primary = match pstruct { - Expression::Label(label) if label == "primary" => true, - _ => false, - }; - if is_primary != column_has_constraint_type("PRIMARY KEY")? { - return Ok(true); - } else if !is_primary { - let is_unique = match pstruct { - Expression::Label(label) if label == "unique" => true, - _ => false, - }; - let unique_in_db = column_has_constraint_type("UNIQUE")?; - if is_unique != unique_in_db { - // A child of a tree constraint implies a unique db constraint, so if there is a - // unique constraint in the db that is not configured, that is the explanation, - // and in that case we do not count this as a change to the column. - if !unique_in_db { - return Ok(true); - } else { - let trees = - self.config - .get("constraints") - .and_then(|c| c.as_object()) - .and_then(|o| o.get("tree")) - .and_then(|t| t.as_object()) - .and_then(|o| o.get(table)) - .and_then(|t| t.as_array()) - .and_then(|a| { - Some(a.iter().map(|o| { - o.as_object().and_then(|o| o.get("child")).unwrap() - })) - }) - .unwrap() - .collect::>(); - if !trees.contains(&&SerdeValue::String(column.to_string())) { - return Ok(true); - } - } - } - } - - match pstruct { - Expression::Function(name, args) if name == "from" => { - match &*args[0] { - Expression::Field(cfg_ftable, cfg_fcolumn) => { - if self.pool.any_kind() == AnyKind::Sqlite { - let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); - for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { - let from = row.get::("from"); - if from == column { - let db_ftable = row.get::("table"); - let db_fcolumn = row.get::("to"); - if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { - return Ok(true); - } - } - } - } else { - let sql = format!( - r#"SELECT - ccu.table_name AS foreign_table_name, - ccu.column_name AS foreign_column_name - FROM information_schema.table_constraints AS tc - JOIN information_schema.key_column_usage AS kcu - ON tc.constraint_name = kcu.constraint_name - AND tc.table_schema = kcu.table_schema - JOIN information_schema.constraint_column_usage AS ccu - ON ccu.constraint_name = tc.constraint_name - WHERE tc.constraint_type = 'FOREIGN KEY' - AND tc.table_name = '{}' - AND kcu.column_name = '{}'"#, - table, column - ); - let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; - if rows.len() == 0 { - // If the table doesn't even exist return true. - return Ok(true); - } else if rows.len() > 1 { - // This seems impossible given how PostgreSQL works: - unreachable!(); - } else { - let row = &rows[0]; - let db_ftable = row.get::("foreign_table_name"); - let db_fcolumn = row.get::("foreign_column_name"); - if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { - return Ok(true); - } - } - } - } - _ => { - return Err(ValveError::InputError( - format!("Unrecognized structure: {:?}", pstruct).into(), - )); - } - }; - } - _ => (), - }; - - Ok(false) - }; - - let (columns_config, configured_column_order) = { - let table_config = self - .config - .get("table") - .and_then(|tc| tc.get(table)) - .and_then(|t| t.as_object()) - .unwrap(); - let columns_config = table_config - .get("column") - .and_then(|c| c.as_object()) - .unwrap(); - let configured_column_order = { - let mut configured_column_order = { - if table == "message" { - vec!["message_id".to_string()] - } else if table == "history" { - vec!["history_id".to_string()] - } else { - vec!["row_number".to_string()] - } - }; - configured_column_order.append( - &mut table_config - .get("column_order") - .and_then(|c| c.as_array()) - .and_then(|a| Some(a.iter())) - .and_then(|a| Some(a.map(|c| c.as_str().unwrap().to_string()))) - .and_then(|a| Some(a.collect::>())) - .unwrap(), - ); - configured_column_order - }; - - (columns_config, configured_column_order) - }; - - let db_columns_in_order = { - if self.pool.any_kind() == AnyKind::Sqlite { - let sql = format!( - r#"SELECT 1 FROM sqlite_master WHERE "type" = 'table' AND "name" = '{}'"#, - table - ); - let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; - if rows.len() == 0 { - if self.verbose { - info!( - "The table '{}' will be recreated as it does not exist in the database", - table - ); - } - return Ok(true); - } else if rows.len() == 1 { - // Otherwise send another query to the db to get the column info: - let sql = format!(r#"PRAGMA TABLE_INFO("{}")"#, table); - let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; - rows.iter() - .map(|r| { - ( - r.get::("name"), - r.get::("type"), - r.get::("pk") as u32, - ) - }) - .collect::>() - } else { - unreachable!(); - } - } else { - let sql = format!( - r#"SELECT "column_name", "data_type" - FROM "information_schema"."columns" - WHERE "table_name" = '{}' - ORDER BY "ordinal_position""#, - table, - ); - let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; - if rows.len() == 0 { - if self.verbose { - info!( - "The table '{}' will be recreated as it does not exist in the database", - table - ); - } - return Ok(true); - } - // Otherwise we get the column name: - rows.iter() - .map(|r| { - ( - r.get::("column_name"), - r.get::("data_type"), - // The third entry is just a dummy so that the datatypes in the two - // wings of this if/else block match. - 0, - ) - }) - .collect::>() - } - }; - - // Check if the order of the configured columns matches the order of the columns in the - // database: - let db_column_order = db_columns_in_order - .iter() - .map(|c| c.0.clone()) - .collect::>(); - if db_column_order != configured_column_order { - if self.verbose { - info!( - "The table '{}' will be recreated since the database columns: {:?} \ - and/or their order does not match the configured columns: {:?}", - table, db_column_order, configured_column_order - ); - } - return Ok(true); - } - - // Check, for all tables, whether their column configuration matches the contents of the - // database: - for (cname, ctype, pk) in &db_columns_in_order { - // Do not consider these special columns: - if (table == "message" && cname == "message_id") - || (table == "message" && cname == "row") - || (table == "history" && cname == "history_id") - || (table == "history" && cname == "timestamp") - || (table == "history" && cname == "row") - || cname == "row_number" - { - continue; - } - let column_config = columns_config - .get(cname) - .and_then(|c| c.as_object()) - .unwrap(); - let sql_type = - get_sql_type_from_global_config(&self.config, table, &cname, &self.pool).unwrap(); - - // Check the column's SQL type: - if sql_type.to_lowercase() != ctype.to_lowercase() { - let s = sql_type.to_lowercase(); - let c = ctype.to_lowercase(); - // CHARACTER VARYING and VARCHAR are synonyms so we ignore this difference. - if !((s.starts_with("varchar") || s.starts_with("character varying")) - && (c.starts_with("varchar") || c.starts_with("character varying"))) - { - if self.verbose { - info!( - "The table '{}' will be recreated because the SQL type of column '{}', \ - {}, does not match the configured value: {}", - table, - cname, - ctype, - sql_type - ); - } - return Ok(true); - } - } - - // Check the column's structure: - let structure = column_config.get("structure").and_then(|d| d.as_str()); - match structure { - Some(structure) if structure != "" => { - let parsed_structure = self - .parsed_structure_conditions - .get(structure) - .and_then(|p| Some(p.parsed.clone())) - .unwrap(); - if structure_has_changed(&parsed_structure, table, &cname, &pk)? { - if self.verbose { - info!( - "The table '{}' will be recreated because the database \ - constraints for column '{}' do not match the configured \ - structure, '{}'", - table, cname, structure - ); - } - return Ok(true); - } - } - _ => (), - }; - } - - Ok(false) - } - - /// Generates and returns the DDL required to setup the database. - pub async fn get_setup_statements(&self) -> Result>, ValveError> { - let tables_config = self - .config - .get("table") - .and_then(|t| t.as_object()) - .unwrap() - .clone(); - let datatypes_config = self - .config - .get("datatype") - .and_then(|d| d.as_object()) - .unwrap() - .clone(); - - let parser = StartParser::new(); - - // Begin by reading in the TSV files corresponding to the tables defined in tables_config, - // and use that information to create the associated database tables, while saving - // constraint information to constrains_config. - let mut setup_statements = HashMap::new(); - for table_name in tables_config.keys().cloned().collect::>() { - // Generate the statements for creating the table and its corresponding conflict table: - let mut table_statements = vec![]; - for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { - let mut statements = get_table_ddl( - &tables_config, - &datatypes_config, - &parser, - &table, - &self.pool, - ); - table_statements.append(&mut statements); - } - - let create_view_sql = get_sql_for_standard_view(&table_name, &self.pool); - let create_text_view_sql = - get_sql_for_text_view(&tables_config, &table_name, &self.pool); - table_statements.push(create_view_sql); - table_statements.push(create_text_view_sql); - - setup_statements.insert(table_name.to_string(), table_statements); - } - - let text_type = get_sql_type(&datatypes_config, &"text".to_string(), &self.pool).unwrap(); - - // Generate DDL for the history table: - let mut history_statements = vec![]; - history_statements.push(format!( - indoc! {r#" - CREATE TABLE "history" ( - {history_id} - "table" {text_type}, - "row" BIGINT, - "from" {text_type}, - "to" {text_type}, - "summary" {text_type}, - "user" {text_type}, - "undone_by" {text_type}, - {timestamp} - ); - "#}, - history_id = { - if self.pool.any_kind() == AnyKind::Sqlite { - "\"history_id\" INTEGER PRIMARY KEY," - } else { - "\"history_id\" SERIAL PRIMARY KEY," - } - }, - text_type = text_type, - timestamp = { - if self.pool.any_kind() == AnyKind::Sqlite { - "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" - } else { - "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" - } - }, - )); - history_statements - .push(r#"CREATE INDEX "history_tr_idx" ON "history"("table", "row");"#.to_string()); - setup_statements.insert("history".to_string(), history_statements); - - // Generate DDL for the message table: - let mut message_statements = vec![]; - message_statements.push(format!( - indoc! {r#" - CREATE TABLE "message" ( - {message_id} - "table" {text_type}, - "row" BIGINT, - "column" {text_type}, - "value" {text_type}, - "level" {text_type}, - "rule" {text_type}, - "message" {text_type} - ); - "#}, - message_id = { - if self.pool.any_kind() == AnyKind::Sqlite { - "\"message_id\" INTEGER PRIMARY KEY," - } else { - "\"message_id\" SERIAL PRIMARY KEY," - } - }, - text_type = text_type, - )); - message_statements.push( - r#"CREATE INDEX "message_trc_idx" ON "message"("table", "row", "column");"#.to_string(), - ); - setup_statements.insert("message".to_string(), message_statements); - - return Ok(setup_statements); - } - - /// Writes the database schema to stdout. - pub async fn dump_schema(&self) -> Result<(), ValveError> { - let setup_statements = self.get_setup_statements().await?; - for table in self.get_sorted_table_list(false) { - let table_statements = setup_statements.get(table).unwrap(); - let output = String::from(table_statements.join("\n")); - println!("{}\n", output); - } - Ok(()) - } - - /// Create all configured database tables and views if they do not already exist as configured. - pub async fn create_all_tables(&self) -> Result<&Self, ValveError> { - let setup_statements = self.get_setup_statements().await?; - let sorted_table_list = self.get_sorted_table_list(false); - for table in &sorted_table_list { - if self.table_has_changed(*table).await? { - self.drop_tables(&vec![table]).await?; - let table_statements = setup_statements.get(*table).unwrap(); - for stmt in table_statements { - self.execute_sql(stmt).await?; - } - } - } - - Ok(self) - } - - /// Checks whether the given table exists in the database. - pub async fn table_exists(&self, table: &str) -> Result { - let sql = { - if self.pool.any_kind() == AnyKind::Sqlite { - format!( - r#"SELECT 1 - FROM "sqlite_master" - WHERE "type" = 'table' AND name = '{}' - LIMIT 1"#, - table - ) - } else { - format!( - r#"SELECT 1 - FROM "information_schema"."tables" - WHERE "table_schema" = 'public' - AND "table_name" = '{}'"#, - table - ) - } - }; - let query = sqlx_query(&sql); - let rows = query.fetch_all(&self.pool).await?; - return Ok(rows.len() > 0); - } - - /// Get all the incoming (tables that depend on it) or outgoing (tables it depends on) - /// dependencies of the given table. - pub fn get_dependencies(&self, table: &str, incoming: bool) -> Vec { - let mut dependent_tables = vec![]; - if table != "message" && table != "history" { - let direct_deps = { - if incoming { - self.table_dependencies_in.get(table).unwrap().to_vec() - } else { - self.table_dependencies_out.get(table).unwrap().to_vec() - } - }; - for direct_dep in direct_deps { - let mut indirect_deps = self.get_dependencies(&direct_dep, incoming); - dependent_tables.append(&mut indirect_deps); - dependent_tables.push(direct_dep); - } - } - dependent_tables - } - - /// Given a list of tables, fill it in with any further tables that are dependent upon tables - /// in the given list. If deletion_order is true, the tables are sorted as required for - /// deleting them all sequentially, otherwise they are ordered in reverse. - pub fn add_dependencies(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { - let mut with_dups = vec![]; - for table in tables { - let dependent_tables = self.get_dependencies(table, true); - for dep_table in dependent_tables { - with_dups.push(dep_table.to_string()); - } - with_dups.push(table.to_string()); - } - // The algorithm above gives the tables in the order needed for deletion. But we want - // this function to return the creation order by default so we reverse it unless - // the deletion_order flag is set to true. - if !deletion_order { - with_dups.reverse(); - } - - // Remove the duplicates from the returned table list: - let mut tables_in_order = vec![]; - for table in with_dups.iter().unique() { - tables_in_order.push(table.to_string()); - } - tables_in_order - } - - /// Given a subset of the configured tables, return them in sorted dependency order, or in - /// reverse if `reverse` is set to true. - pub fn sort_tables( - &self, - table_subset: &Vec<&str>, - reverse: bool, - ) -> Result, ValveError> { - let full_table_list = self.get_sorted_table_list(false); - if !table_subset - .iter() - .all(|item| full_table_list.contains(item)) - { - return Err(ValveError::InputError(format!( - "[{}] contains tables that are not in the configured table list: [{}]", - table_subset.join(", "), - full_table_list.join(", ") - ))); - } - - let constraints_config = self - .config - .get("constraints") - .and_then(|c| c.as_object()) - .ok_or(ValveError::ConfigError( - "Unable to retrieve configured constraints.".into(), - ))?; - - // Filter out message and history since they are not represented in the constraints config. - // They will be added implicitly to the list returned by verify_table_deps_and_sort. - let filtered_subset = table_subset - .iter() - .filter(|m| **m != "history" && **m != "message") - .map(|s| s.to_string()) - .collect::>(); - - let (sorted_subset, _, _) = - verify_table_deps_and_sort(&filtered_subset, &constraints_config); - - // Since the result of verify_table_deps_and_sort() will include dependencies of the tables - // in its input list, we filter those out here: - let mut sorted_subset = sorted_subset - .iter() - .filter(|m| table_subset.contains(&m.as_str())) - .map(|s| s.to_string()) - .collect::>(); - - if reverse { - sorted_subset.reverse(); - } - Ok(sorted_subset) - } - - /// Returns an IndexMap, indexed by configured table, containing lists of their dependencies. - /// If incoming is true, the lists are incoming dependencies, else they are outgoing. - pub fn collect_dependencies(&self, incoming: bool) -> IndexMap> { - let tables = self.get_sorted_table_list(false); - let mut dependencies = IndexMap::new(); - for table in tables { - dependencies.insert(table.to_string(), self.get_dependencies(table, incoming)); - } - dependencies - } - - /// Drop all configured tables, in reverse dependency order. - pub async fn drop_all_tables(&self) -> Result<&Self, ValveError> { - // Drop all of the database tables in the reverse of their sorted order: - self.drop_tables(&self.get_sorted_table_list(true)).await?; - Ok(self) - } - - /// Given a vector of table names, drop those tables, in the given order. - pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, ValveError> { - let drop_list = self.add_dependencies(tables, true); - for table in &drop_list { - if *table != "message" && *table != "history" { - let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); - self.execute_sql(&sql).await?; - let sql = format!(r#"DROP VIEW IF EXISTS "{}_view""#, table); - self.execute_sql(&sql).await?; - let sql = format!(r#"DROP TABLE IF EXISTS "{}_conflict""#, table); - self.execute_sql(&sql).await?; - } - let sql = format!(r#"DROP TABLE IF EXISTS "{}""#, table); - self.execute_sql(&sql).await?; - } - - Ok(self) - } - - /// Truncate all configured tables, in reverse dependency order. - pub async fn truncate_all_tables(&self) -> Result<&Self, ValveError> { - self.truncate_tables(&self.get_sorted_table_list(true)) - .await?; - Ok(self) - } - - /// Given a vector of table names, truncate those tables, in the given order. - pub async fn truncate_tables(&self, tables: &Vec<&str>) -> Result<&Self, ValveError> { - self.create_all_tables().await?; - let truncate_list = self.add_dependencies(tables, true); - - // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that - // depends on another table, T', even in the case where we have previously truncated T'. - // SQLite does not need this. However SQLite does require that the tables be truncated in - // deletion order (which means that it must be checking that T' is empty). - let truncate_sql = |table: &str| -> String { - if self.pool.any_kind() == AnyKind::Postgres { - format!(r#"TRUNCATE TABLE "{}" RESTART IDENTITY CASCADE"#, table) - } else { - format!(r#"DELETE FROM "{}""#, table) - } - }; - - for table in &truncate_list { - let sql = truncate_sql(&table); - self.execute_sql(&sql).await?; - if *table != "message" && *table != "history" { - let sql = truncate_sql(&format!("{}_conflict", table)); - self.execute_sql(&sql).await?; - } - } - - Ok(self) - } - - /// Load all configured tables in dependency order. If `validate` is false, just try to insert - /// all rows, irrespective of whether they are valid or not or will possibly trigger a db error. - pub async fn load_all_tables(&self, validate: bool) -> Result<&Self, ValveError> { - let table_list = self.get_sorted_table_list(false); - if self.verbose { - info!("Processing {} tables.", table_list.len()); - } - self.load_tables(&table_list, validate).await - } - - /// Given a vector of table names, load those tables in the given order. If `validate` is false, - /// just try to insert all rows, irrespective of whether they are valid or not or will possibly - /// trigger a db error. - pub async fn load_tables( - &self, - table_list: &Vec<&str>, - validate: bool, - ) -> Result<&Self, ValveError> { - let list_for_truncation = self.sort_tables(table_list, true)?; - self.truncate_tables( - &list_for_truncation - .iter() - .map(|i| i.as_str()) - .collect::>(), - ) - .await?; - - let num_tables = table_list.len(); - let mut total_errors = 0; - let mut total_warnings = 0; - let mut total_infos = 0; - let mut table_num = 1; - for table_name in table_list { - if *table_name == "message" || *table_name == "history" { - continue; - } - let table_name = table_name.to_string(); - let path = String::from( - self.config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|o| o.get(&table_name)) - .and_then(|n| n.get("path")) - .and_then(|p| p.as_str()) - .unwrap(), - ); - let mut rdr = { - match File::open(path.clone()) { - Err(e) => { - warn!("Unable to open '{}': {}", path.clone(), e); - continue; - } - Ok(table_file) => ReaderBuilder::new() - .has_headers(false) - .delimiter(b'\t') - .from_reader(table_file), - } - }; - if self.verbose { - info!("Loading table {}/{}: {}", table_num, num_tables, table_name); - } - table_num += 1; - - // Extract the headers, which we will need later: - let mut records = rdr.records(); - let headers; - if let Some(result) = records.next() { - headers = result.unwrap(); - } else { - panic!("'{}' is empty", path); - } - - for header in headers.iter() { - if header.trim().is_empty() { - panic!( - "One or more of the header fields is empty for table '{}'", - table_name - ); - } - } - - // HashMap used to report info about the number of error/warning/info messages for this - // table when the verbose flag is set to true: - let mut messages_stats = HashMap::new(); - messages_stats.insert("error".to_string(), 0); - messages_stats.insert("warning".to_string(), 0); - messages_stats.insert("info".to_string(), 0); - - // Split the data into chunks of size CHUNK_SIZE before passing them to the validation - // logic: - let chunks = records.chunks(CHUNK_SIZE); - insert_chunks( - &self.config, - &self.pool, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &table_name, - &chunks, - &headers, - &mut messages_stats, - self.verbose, - validate, - ) - .await?; - - if validate { - // We need to wait until all of the rows for a table have been loaded before - // validating the "foreign" constraints on a table's trees, since this checks if the - // values of one column (the tree's parent) are all contained in another column (the - // tree's child). We also need to wait before validating a table's "under" - // constraints. Although the tree associated with such a constraint need not be - // defined on the same table, it can be. - let mut recs_to_update = - validate_tree_foreign_keys(&self.config, &self.pool, None, &table_name, None) - .await?; - recs_to_update.append( - &mut validate_under(&self.config, &self.pool, None, &table_name, None).await?, - ); - - for record in recs_to_update { - let row_number = record.get("row_number").unwrap(); - let column_name = record.get("column").and_then(|s| s.as_str()).unwrap(); - let value = record.get("value").and_then(|s| s.as_str()).unwrap(); - let level = record.get("level").and_then(|s| s.as_str()).unwrap(); - let rule = record.get("rule").and_then(|s| s.as_str()).unwrap(); - let message = record.get("message").and_then(|s| s.as_str()).unwrap(); - - let sql = local_sql_syntax( - &self.pool, - &format!( - r#"INSERT INTO "message" - ("table", "row", "column", "value", "level", "rule", "message") - VALUES ({}, {}, {}, {}, {}, {}, {})"#, - SQL_PARAM, - row_number, - SQL_PARAM, - SQL_PARAM, - SQL_PARAM, - SQL_PARAM, - SQL_PARAM - ), - ); - let mut query = sqlx_query(&sql); - query = query.bind(&table_name); - query = query.bind(&column_name); - query = query.bind(&value); - query = query.bind(&level); - query = query.bind(&rule); - query = query.bind(&message); - query.execute(&self.pool).await?; - - if self.verbose { - // Add the generated message to messages_stats: - let messages = vec![json!({ - "message": message, - "level": level, - })]; - add_message_counts(&messages, &mut messages_stats); - } - } - } - - if self.verbose { - // Output a report on the messages generated to stderr: - let errors = messages_stats.get("error").unwrap(); - let warnings = messages_stats.get("warning").unwrap(); - let infos = messages_stats.get("info").unwrap(); - let status_message = format!( - "{} errors, {} warnings, and {} information messages generated for {}", - errors, warnings, infos, table_name - ); - info!("{}", status_message); - total_errors += errors; - total_warnings += warnings; - total_infos += infos; - } - } - - if self.verbose { - info!( - "Loading complete with {} errors, {} warnings, and {} information messages", - total_errors, total_warnings, total_infos - ); - } - Ok(self) - } - - /// Save all configured tables to their configured path's, unless save_dir is specified, - /// in which case save them there instead. - pub fn save_all_tables(&self, save_dir: &Option) -> Result<&Self, ValveError> { - let tables = self.get_sorted_table_list(false); - self.save_tables(&tables, save_dir)?; - Ok(self) - } - - /// Given a vector of table names, save those tables to their configured path's, unless - /// save_dir is specified, in which case save them there instead. - pub fn save_tables( - &self, - tables: &Vec<&str>, - save_dir: &Option, - ) -> Result<&Self, ValveError> { - let table_paths: HashMap = self - .config - .get("table") - .unwrap() - .as_object() - .unwrap() - .iter() - .filter(|(k, v)| { - !["message", "history"].contains(&k.as_str()) - && tables.contains(&k.as_str()) - && v.get("path").is_some() - }) - .map(|(k, v)| { - ( - k.clone(), - v.get("path").unwrap().as_str().unwrap().to_string(), - ) - }) - .collect(); - - info!( - "Saving tables: {} ...", - table_paths - .keys() - .map(|k| k.to_string()) - .collect::>() - .join(", ") - ); - for (table, path) in table_paths.iter() { - let columns: Vec<&str> = self - .config - .get("table") - .and_then(|v| v.as_object()) - .and_then(|o| o.get(table)) - .and_then(|v| v.as_object()) - .and_then(|o| o.get("column_order")) - .and_then(|v| v.as_array()) - .and_then(|v| Some(v.iter().map(|i| i.as_str().unwrap()).collect())) - .unwrap(); - - let path = match save_dir { - Some(s) => format!( - "{}/{}", - s, - Path::new(path) - .file_name() - .and_then(|n| n.to_str()) - .unwrap() - ), - None => path.to_string(), - }; - self.save_table(table, &columns, &path)?; - } - - Ok(self) - } - - /// Save the given table with the given columns at the given path as a TSV file. - pub fn save_table( - &self, - table: &str, - columns: &Vec<&str>, - path: &str, - ) -> Result<&Self, ValveError> { - // TODO: Do some validation on the path. - - let mut quoted_columns = vec!["\"row_number\"".to_string()]; - quoted_columns.append( - &mut columns - .iter() - .map(|v| enquote::enquote('"', v)) - .collect::>(), - ); - let text_view = format!("\"{}_text_view\"", table); - let sql = format!( - r#"SELECT {} from {} ORDER BY "row_number""#, - quoted_columns.join(", "), - text_view - ); - - let mut writer = WriterBuilder::new() - .delimiter(b'\t') - .quote_style(QuoteStyle::Never) - .from_path(path)?; - writer.write_record(columns)?; - let mut stream = sqlx_query(&sql).fetch(&self.pool); - while let Some(row) = block_on(stream.try_next()).unwrap() { - let mut record: Vec<&str> = vec![]; - for column in columns.iter() { - let cell = row.try_get::<&str, &str>(column).ok().unwrap_or_default(); - record.push(cell); - } - writer.write_record(record)?; - } - writer.flush()?; - - Ok(self) - } - - /// Given a table name and a row, return the validated row. - pub async fn validate_row( - &self, - table_name: &str, - row: &ValveRow, - row_number: Option, - ) -> Result { - validate_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - None, - table_name, - row, - row_number, - None, - ) - .await - } - - /// Given a table name and a row as JSON, add the row to the table in the database, and return - /// the validated row, including its new row_number. - pub async fn insert_row( - &self, - table_name: &str, - row: &ValveRow, - ) -> Result<(u32, ValveRow), ValveError> { - let mut tx = self.pool.begin().await?; - - let row = validate_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - Some(&mut tx), - table_name, - row, - None, - None, - ) - .await?; - - let rn = insert_new_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table_name, - &row, - None, - true, - ) - .await?; - - record_row_change(&mut tx, table_name, &rn, None, Some(&row), &self.user).await?; - tx.commit().await?; - Ok((rn, row)) - } - - /// Given a table name, a row number, and a row, update the row in the database, and return the - /// validated row. - pub async fn update_row( - &self, - table_name: &str, - row_number: &u32, - row: &ValveRow, - ) -> Result { - let mut tx = self.pool.begin().await?; - - // Get the old version of the row from the database so that we can later record it to the - // history table: - let old_row = - get_row_from_db(&self.config, &self.pool, &mut tx, table_name, &row_number).await?; - - let row = validate_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - Some(&mut tx), - table_name, - row, - Some(*row_number), - None, - ) - .await?; - - update_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table_name, - &row, - row_number, - true, - false, - ) - .await?; - - // Record the row update in the history table: - record_row_change( - &mut tx, - table_name, - row_number, - Some(&old_row), - Some(&row), - &self.user, - ) - .await?; - - tx.commit().await?; - Ok(row) - } - - /// Given a table name and a row number, delete that row from the table. - pub async fn delete_row(&self, table_name: &str, row_number: &u32) -> Result<(), ValveError> { - let mut tx = self.pool.begin().await?; - - let row = - get_row_from_db(&self.config, &self.pool, &mut tx, &table_name, row_number).await?; - - record_row_change( - &mut tx, - &table_name, - row_number, - Some(&row), - None, - &self.user, - ) - .await?; - - delete_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table_name, - row_number, - ) - .await?; - - tx.commit().await?; - Ok(()) - } - - /// Return the next change that can be undone, or None if there isn't any. - pub async fn get_record_to_undo(&self) -> Result, ValveError> { - // Look in the history table, get the row with the greatest ID, get the row number, - // from, and to, and determine whether the last operation was a delete, insert, or update. - let is_clause = if self.pool.any_kind() == AnyKind::Sqlite { - "IS" - } else { - "IS NOT DISTINCT FROM" - }; - let sql = format!( - r#"SELECT * FROM "history" - WHERE "undone_by" {} NULL - ORDER BY "history_id" DESC LIMIT 1"#, - is_clause - ); - let query = sqlx_query(&sql); - let result_row = query.fetch_optional(&self.pool).await?; - Ok(result_row) - } - - /// Return the next change that can be redone, or None if there isn't any. - pub async fn get_record_to_redo(&self) -> Result, ValveError> { - // Look in the history table, get the row with the greatest ID, get the row number, - // from, and to, and determine whether the last operation was a delete, insert, or update. - let is_not_clause = if self.pool.any_kind() == AnyKind::Sqlite { - "IS NOT" - } else { - "IS DISTINCT FROM" - }; - let sql = format!( - r#"SELECT * FROM "history" - WHERE "undone_by" {} NULL - ORDER BY "timestamp" DESC LIMIT 1"#, - is_not_clause - ); - let query = sqlx_query(&sql); - let result_row = query.fetch_optional(&self.pool).await?; - Ok(result_row) - } - - /// Undo one change and return the change record or None if there was no change to undo. - pub async fn undo(&self) -> Result, ValveError> { - let last_change = match self.get_record_to_undo().await? { - None => { - warn!("Nothing to undo."); - return Ok(None); - } - Some(r) => r, - }; - let history_id: i32 = last_change.get("history_id"); - let history_id = history_id as u16; - let table: &str = last_change.get("table"); - let row_number: i64 = last_change.get("row"); - let row_number = row_number as u32; - let from = get_json_from_row(&last_change, "from"); - let to = get_json_from_row(&last_change, "to"); - - match (from, to) { - (None, None) => { - return Err(ValveError::DataError( - "Cannot redo unknown operation from None to None".into(), - )) - } - (None, Some(_)) => { - // Undo an insert: - let mut tx = self.pool.begin().await?; - - delete_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table, - &row_number, - ) - .await?; - - switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; - tx.commit().await?; - Ok(None) - } - (Some(from), None) => { - // Undo a delete: - let mut tx = self.pool.begin().await?; - - insert_new_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table, - &from, - Some(row_number), - false, - ) - .await?; - - switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; - tx.commit().await?; - Ok(Some(from)) - } - (Some(from), Some(_)) => { - // Undo an an update: - let mut tx = self.pool.begin().await?; - - update_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table, - &from, - &row_number, - false, - false, - ) - .await?; - - switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; - tx.commit().await?; - Ok(Some(from)) - } - } - } - - /// Redo one change and return the change record or None if there was no change to redo. - pub async fn redo(&self) -> Result, ValveError> { - let last_undo = match self.get_record_to_redo().await? { - None => { - warn!("Nothing to redo."); - return Ok(None); - } - Some(last_undo) => { - let undone_by = last_undo.try_get_raw("undone_by")?; - if undone_by.is_null() { - warn!("Nothing to redo."); - return Ok(None); - } - last_undo - } - }; - let history_id: i32 = last_undo.get("history_id"); - let history_id = history_id as u16; - let table: &str = last_undo.get("table"); - let row_number: i64 = last_undo.get("row"); - let row_number = row_number as u32; - let from = get_json_from_row(&last_undo, "from"); - let to = get_json_from_row(&last_undo, "to"); - - match (from, to) { - (None, None) => { - return Err(ValveError::DataError( - "Cannot redo unknown operation from None to None".into(), - )) - } - (None, Some(to)) => { - // Redo an insert: - let mut tx = self.pool.begin().await?; - - insert_new_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table, - &to, - Some(row_number), - false, - ) - .await?; - - switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; - tx.commit().await?; - Ok(Some(to)) - } - (Some(_), None) => { - // Redo a delete: - let mut tx = self.pool.begin().await?; - - delete_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table, - &row_number, - ) - .await?; - - switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; - tx.commit().await?; - Ok(None) - } - (Some(_), Some(to)) => { - // Redo an an update: - let mut tx = self.pool.begin().await?; - - update_row_tx( - &self.config, - &self.compiled_datatype_conditions, - &self.compiled_rule_conditions, - &self.pool, - &mut tx, - table, - &to, - &row_number, - false, - false, - ) - .await?; - - switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; - tx.commit().await?; - Ok(Some(to)) - } - } - } - - /// Given a table name, a column name, and (optionally) a string to match, return a JSON array - /// of possible valid values for the given column which contain the matching string as a - /// substring (or all of them if no matching string is given). The JSON array returned is - /// formatted for Typeahead, i.e., it takes the form: - /// `[{"id": id, "label": label, "order": order}, ...]`. - pub async fn get_matching_values( - &self, - table_name: &str, - column_name: &str, - matching_string: Option<&str>, - ) -> Result { - let config = &self.config; - let compiled_datatype_conditions = &self.compiled_datatype_conditions; - let parsed_structure_conditions = &self.parsed_structure_conditions; - let pool = &self.pool; - let dt_name = config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column_name)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("datatype")) - .and_then(|d| d.as_str()) - .unwrap(); - - let dt_condition = compiled_datatype_conditions - .get(dt_name) - .and_then(|d| Some(d.parsed.clone())); - - let mut values = vec![]; - match dt_condition { - Some(Expression::Function(name, args)) if name == "in" => { - for arg in args { - if let Expression::Label(arg) = *arg { - // Remove the enclosing quotes from the values being returned: - let label = unquote(&arg).unwrap_or_else(|_| arg); - if let Some(s) = matching_string { - if label.contains(s) { - values.push(label); - } - } - } - } - } - _ => { - // If the datatype for the column does not correspond to an `in(...)` function, then - // we check the column's structure constraints. If they include a - // `from(foreign_table.foreign_column)` condition, then the values are taken from - // the foreign column. Otherwise if the structure includes an - // `under(tree_table.tree_column, value)` condition, then get the values from the - // tree column that are under `value`. - let structure = parsed_structure_conditions.get( - config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column_name)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("structure")) - .and_then(|d| d.as_str()) - .unwrap_or_else(|| ""), - ); - - let sql_type = - get_sql_type_from_global_config(&config, table_name, &column_name, &pool) - .unwrap(); - - match structure { - Some(ParsedStructure { original, parsed }) => { - let matching_string = { - match matching_string { - None => "%".to_string(), - Some(s) => format!("%{}%", s), - } - }; - - match parsed { - Expression::Function(name, args) if name == "from" => { - let foreign_key = &args[0]; - if let Expression::Field(ftable, fcolumn) = &**foreign_key { - let fcolumn_text = cast_column_sql_to_text(&fcolumn, &sql_type); - let sql = local_sql_syntax( - &pool, - &format!( - r#"SELECT "{}" FROM "{}" WHERE {} LIKE {}"#, - fcolumn, ftable, fcolumn_text, SQL_PARAM - ), - ); - let rows = sqlx_query(&sql) - .bind(&matching_string) - .fetch_all(pool) - .await?; - for row in rows.iter() { - values.push(get_column_value(&row, &fcolumn, &sql_type)); - } - } - } - Expression::Function(name, args) - if name == "under" || name == "tree" => - { - let mut tree_col = "not set"; - let mut under_val = Some("not set".to_string()); - if name == "under" { - if let Expression::Field(_, column) = &**&args[0] { - tree_col = column; - } - if let Expression::Label(label) = &**&args[1] { - under_val = Some(label.to_string()); - } - } else { - let tree_key = &args[0]; - if let Expression::Label(label) = &**tree_key { - tree_col = label; - under_val = None; - } - } - - let tree = config - .get("constraints") - .and_then(|c| c.as_object()) - .and_then(|c| c.get("tree")) - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_array()) - .and_then(|t| { - t.iter().find(|o| o.get("child").unwrap() == tree_col) - }) - .expect( - format!("No tree: '{}.{}' found", table_name, tree_col) - .as_str(), - ) - .as_object() - .unwrap(); - let child_column = - tree.get("child").and_then(|c| c.as_str()).unwrap(); - - let (tree_sql, mut params) = with_tree_sql( - &config, - tree, - &table_name.to_string(), - &table_name.to_string(), - under_val.as_ref(), - None, - &pool, - ); - let child_column_text = - cast_column_sql_to_text(&child_column, &sql_type); - let sql = local_sql_syntax( - &pool, - &format!( - r#"{} SELECT "{}" FROM "tree" WHERE {} LIKE {}"#, - tree_sql, child_column, child_column_text, SQL_PARAM - ), - ); - params.push(matching_string); - - let mut query = sqlx_query(&sql); - for param in ¶ms { - query = query.bind(param); - } - - let rows = query.fetch_all(pool).await?; - for row in rows.iter() { - values.push(get_column_value(&row, &child_column, &sql_type)); - } - } - _ => panic!("Unrecognised structure: {}", original), - }; - } - None => (), - }; - } - }; - - let mut typeahead_values = vec![]; - for (i, v) in values.iter().enumerate() { - // enumerate() begins at 0 but we need to begin at 1: - let i = i + 1; - typeahead_values.push(json!({ - "id": v, - "label": v, - "order": i, - })); - } - - Ok(json!(typeahead_values)) - } -} - /// Given a string representing the location of a database, return a database connection pool. pub async fn get_pool_from_connection_string(database: &str) -> Result { let connection_options; diff --git a/src/main.rs b/src/main.rs index 569938bd..413b42f6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,8 @@ mod api_test; use crate::api_test::run_api_tests; - use argparse::{ArgumentParser, Store, StoreTrue}; - -use ontodev_valve::{Valve, ValveError}; +use ontodev_valve::{valve::Valve, valve::ValveError}; use serde_json::{from_str, Value as SerdeValue}; use std::{env, process}; diff --git a/src/validate.rs b/src/validate.rs index a8aeae03..26c70f53 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,15 +1,15 @@ +use crate::{ + cast_sql_param_from_text, error, get_column_value, get_sql_type_from_global_config, + is_sql_type_error, local_sql_syntax, + valve::{ValveError, ValveRow}, + ColumnRule, CompiledCondition, SerdeMap, +}; use chrono::Utc; use indexmap::IndexMap; use serde_json::{json, Value as SerdeValue}; use sqlx::{any::AnyPool, query as sqlx_query, Acquire, Row, Transaction, ValueRef}; use std::collections::HashMap; -use crate::{ - cast_sql_param_from_text, error, get_column_value, get_sql_type_from_global_config, - is_sql_type_error, local_sql_syntax, ColumnRule, CompiledCondition, SerdeMap, ValveError, - ValveRow, -}; - /// Represents a particular cell in a particular row of data with vaildation results. #[derive(Clone, Debug)] pub struct ResultCell { diff --git a/src/valve.rs b/src/valve.rs new file mode 100644 index 00000000..9866390a --- /dev/null +++ b/src/valve.rs @@ -0,0 +1,1828 @@ +use crate::{ + add_message_counts, + ast::Expression, + cast_column_sql_to_text, delete_row_tx, get_column_value, get_compiled_datatype_conditions, + get_compiled_rule_conditions, get_json_from_row, get_parsed_structure_conditions, + get_pool_from_connection_string, get_row_from_db, get_sql_for_standard_view, + get_sql_for_text_view, get_sql_type, get_sql_type_from_global_config, get_table_ddl, info, + insert_chunks, insert_new_row_tx, local_sql_syntax, read_config_files, record_row_change, + switch_undone_state, update_row_tx, + validate::{validate_row_tx, validate_tree_foreign_keys, validate_under, with_tree_sql}, + valve_grammar::StartParser, + verify_table_deps_and_sort, warn, ColumnRule, CompiledCondition, ParsedStructure, SerdeMap, + CHUNK_SIZE, SQL_PARAM, +}; +use chrono::Utc; +use csv::{QuoteStyle, ReaderBuilder, WriterBuilder}; +use enquote::unquote; +use futures::{executor::block_on, TryStreamExt}; +use indexmap::IndexMap; +use indoc::indoc; +use itertools::Itertools; +use regex::Regex; +use serde_json::{json, Value as SerdeValue}; +use sqlx::{ + any::{AnyKind, AnyPool, AnyRow}, + query as sqlx_query, Row, ValueRef, +}; +use std::{collections::HashMap, fs::File, path::Path}; + +/// Alias for [serde_json::Map](..//serde_json/struct.Map.html). +// Note: serde_json::Map is +// [backed by a BTreeMap by default](https://docs.serde.rs/serde_json/map/index.html) +pub type ValveRow = serde_json::Map; + +/// Main entrypoint for the Valve API. +#[derive(Clone, Debug)] +pub struct Valve { + /// The valve configuration map. + pub config: SerdeMap, + /// Pre-compiled datatype conditions. + pub compiled_datatype_conditions: HashMap, + /// Pre-compiled rule conditions. + pub compiled_rule_conditions: HashMap>>, + /// Parsed structure conditions: + pub parsed_structure_conditions: HashMap, + /// Lists of tables that depend on a given table, indexed by table. + pub table_dependencies_in: HashMap>, + /// Lists of tables that a given table depends on, indexed by table. + pub table_dependencies_out: HashMap>, + /// The database connection pool. + pub pool: AnyPool, + /// The user associated with this valve instance. + pub user: String, + /// Produce more logging output. + pub verbose: bool, +} + +#[derive(Debug)] +pub enum ValveError { + /// An error in the Valve configuration: + ConfigError(String), + /// An error that occurred while reading or writing to a CSV/TSV: + CsvError(csv::Error), + /// An error involving the data: + DataError(String), + /// An error generated by the underlying database: + DatabaseError(sqlx::Error), + /// An error in the inputs to a function: + InputError(String), + /// An error that occurred while reading/writing to stdio: + IOError(std::io::Error), + /// An error that occurred while serialising or deserialising to/from JSON: + SerdeJsonError(serde_json::Error), +} + +impl From for ValveError { + fn from(e: csv::Error) -> Self { + Self::CsvError(e) + } +} + +impl From for ValveError { + fn from(e: sqlx::Error) -> Self { + Self::DatabaseError(e) + } +} + +impl From for ValveError { + fn from(e: serde_json::Error) -> Self { + Self::SerdeJsonError(e) + } +} + +impl From for ValveError { + fn from(e: std::io::Error) -> Self { + Self::IOError(e) + } +} + +impl Valve { + /// Given a path to a table table, a path to a database, a flag for verbose output, and a flag + /// indicating whether the database should be configured for initial loading: Set up a database + /// connection, configure VALVE, and return a new Valve struct. + pub async fn build( + table_path: &str, + database: &str, + verbose: bool, + initial_load: bool, + ) -> Result { + let pool = get_pool_from_connection_string(database).await?; + if pool.any_kind() == AnyKind::Sqlite { + sqlx_query("PRAGMA foreign_keys = ON") + .execute(&pool) + .await?; + if initial_load { + // These pragmas are unsafe but they are used during initial loading since data + // integrity is not a priority in this case. + sqlx_query("PRAGMA journal_mode = OFF") + .execute(&pool) + .await?; + sqlx_query("PRAGMA synchronous = 0").execute(&pool).await?; + sqlx_query("PRAGMA cache_size = 1000000") + .execute(&pool) + .await?; + sqlx_query("PRAGMA temp_store = MEMORY") + .execute(&pool) + .await?; + } + } + + let parser = StartParser::new(); + let ( + specials_config, + tables_config, + datatypes_config, + rules_config, + constraints_config, + sorted_table_list, + table_dependencies_in, + table_dependencies_out, + ) = read_config_files(table_path, &parser, &pool); + + let mut config = SerdeMap::new(); + config.insert( + String::from("special"), + SerdeValue::Object(specials_config.clone()), + ); + config.insert( + String::from("table"), + SerdeValue::Object(tables_config.clone()), + ); + config.insert( + String::from("datatype"), + SerdeValue::Object(datatypes_config.clone()), + ); + config.insert( + String::from("rule"), + SerdeValue::Object(rules_config.clone()), + ); + config.insert( + String::from("constraints"), + SerdeValue::Object(constraints_config.clone()), + ); + let mut sorted_table_serdevalue_list: Vec = vec![]; + for table in &sorted_table_list { + sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); + } + config.insert( + String::from("sorted_table_list"), + SerdeValue::Array(sorted_table_serdevalue_list), + ); + + let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); + let compiled_rule_conditions = + get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); + let parsed_structure_conditions = get_parsed_structure_conditions(&config, &parser); + + Ok(Self { + config: config, + compiled_datatype_conditions: compiled_datatype_conditions, + compiled_rule_conditions: compiled_rule_conditions, + parsed_structure_conditions: parsed_structure_conditions, + table_dependencies_in: table_dependencies_in, + table_dependencies_out: table_dependencies_out, + pool: pool, + user: String::from("VALVE"), + verbose: verbose, + }) + } + + /// Convenience function to retrieve the path to Valve's "table table", the main entrypoint + /// to Valve's configuration. + pub fn get_path(&self) -> String { + self.config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get("table")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("path")) + .and_then(|p| p.as_str()) + .unwrap() + .to_string() + } + + /// Controls the maximum length of a username. + const USERNAME_MAX_LEN: usize = 20; + + /// Sets the user name, which must be a short, trimmed, string without newlines, for this Valve + /// instance. + pub fn set_user(&mut self, user: &str) -> Result<&mut Self, ValveError> { + if user.len() > Self::USERNAME_MAX_LEN { + return Err(ValveError::ConfigError(format!( + "Username '{}' is longer than {} characters.", + user, + Self::USERNAME_MAX_LEN + ))); + } else { + let user_regex = Regex::new(r#"^\S([^\n]*\S)*$"#).unwrap(); + if !user_regex.is_match(user) { + return Err(ValveError::ConfigError(format!( + "Username '{}' is not a short, trimmed, string without newlines.", + user, + ))); + } + } + self.user = user.to_string(); + Ok(self) + } + + /// Given a SQL string, execute it using the connection pool associated with the Valve instance. + async fn execute_sql(&self, sql: &str) -> Result<(), ValveError> { + sqlx_query(&sql).execute(&self.pool).await?; + Ok(()) + } + + /// Return the list of configured tables in sorted order, or reverse sorted order if the + /// reverse flag is set. + pub fn get_sorted_table_list(&self, reverse: bool) -> Vec<&str> { + let mut sorted_tables = self + .config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) + .and_then(|l| Some(l.collect::>())) + .unwrap(); + if reverse { + sorted_tables.reverse(); + } + sorted_tables + } + + /// Given the name of a table, determine whether its current instantiation in the database + /// differs from the way it has been configured. The answer to this question is yes whenever + /// (1) the number of columns or any of their names differs from their configured values, or + /// the order of database columns differs from the configured order; (2) The SQL type of one or + /// more columns does not match the configured SQL type for that column; (3) Some column with a + /// 'unique', 'primary', or 'from(table, column)' in its column configuration fails to be + /// associated, in the database, with a unique constraint, primary key, or foreign key, + /// respectively; or vice versa; (4) The table does not exist in the database. + pub async fn table_has_changed(&self, table: &str) -> Result { + // A clojure that, given a parsed structure condition, a table and column name, and an + // unsigned integer representing whether the given column, in the case of a SQLite database, + // is a primary key (in the case of PostgreSQL, the sqlite_pk parameter is ignored): + // determine whether the structure of the column is properly reflected in the db. E.g., a + // `from(table.column)` struct should be associated with a foreign key, `primary` with a + // primary key, `unique` with a unique constraint. + let structure_has_changed = |pstruct: &Expression, + table: &str, + column: &str, + sqlite_pk: &u32| + -> Result { + // A clojure to determine whether the given column has the given constraint type, which + // can be one of 'UNIQUE', 'PRIMARY KEY', 'FOREIGN KEY': + let column_has_constraint_type = |constraint_type: &str| -> Result { + if self.pool.any_kind() == AnyKind::Postgres { + let sql = format!( + r#"SELECT 1 + FROM information_schema.table_constraints tco + JOIN information_schema.key_column_usage kcu + ON kcu.constraint_name = tco.constraint_name + AND kcu.constraint_schema = tco.constraint_schema + AND kcu.table_name = '{}' + WHERE tco.constraint_type = '{}' + AND kcu.column_name = '{}'"#, + table, constraint_type, column + ); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() > 1 { + unreachable!(); + } + Ok(rows.len() == 1) + } else { + if constraint_type == "PRIMARY KEY" { + return Ok(*sqlite_pk == 1); + } else if constraint_type == "UNIQUE" { + let sql = format!(r#"PRAGMA INDEX_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let idx_name = row.get::("name"); + let unique = row.get::("unique") as u8; + if unique == 1 { + let sql = format!(r#"PRAGMA INDEX_INFO("{}")"#, idx_name); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() == 1 { + let cname = rows[0].get::("name"); + if cname == column { + return Ok(true); + } + } + } + } + Ok(false) + } else if constraint_type == "FOREIGN KEY" { + let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let cname = row.get::("from"); + if cname == column { + return Ok(true); + } + } + Ok(false) + } else { + return Err(ValveError::InputError( + format!("Unrecognized constraint type: '{}'", constraint_type).into(), + )); + } + } + }; + + // Check if there is a change to whether this column is a primary/unique key: + let is_primary = match pstruct { + Expression::Label(label) if label == "primary" => true, + _ => false, + }; + if is_primary != column_has_constraint_type("PRIMARY KEY")? { + return Ok(true); + } else if !is_primary { + let is_unique = match pstruct { + Expression::Label(label) if label == "unique" => true, + _ => false, + }; + let unique_in_db = column_has_constraint_type("UNIQUE")?; + if is_unique != unique_in_db { + // A child of a tree constraint implies a unique db constraint, so if there is a + // unique constraint in the db that is not configured, that is the explanation, + // and in that case we do not count this as a change to the column. + if !unique_in_db { + return Ok(true); + } else { + let trees = + self.config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|o| o.get("tree")) + .and_then(|t| t.as_object()) + .and_then(|o| o.get(table)) + .and_then(|t| t.as_array()) + .and_then(|a| { + Some(a.iter().map(|o| { + o.as_object().and_then(|o| o.get("child")).unwrap() + })) + }) + .unwrap() + .collect::>(); + if !trees.contains(&&SerdeValue::String(column.to_string())) { + return Ok(true); + } + } + } + } + + match pstruct { + Expression::Function(name, args) if name == "from" => { + match &*args[0] { + Expression::Field(cfg_ftable, cfg_fcolumn) => { + if self.pool.any_kind() == AnyKind::Sqlite { + let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let from = row.get::("from"); + if from == column { + let db_ftable = row.get::("table"); + let db_fcolumn = row.get::("to"); + if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + return Ok(true); + } + } + } + } else { + let sql = format!( + r#"SELECT + ccu.table_name AS foreign_table_name, + ccu.column_name AS foreign_column_name + FROM information_schema.table_constraints AS tc + JOIN information_schema.key_column_usage AS kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + JOIN information_schema.constraint_column_usage AS ccu + ON ccu.constraint_name = tc.constraint_name + WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_name = '{}' + AND kcu.column_name = '{}'"#, + table, column + ); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() == 0 { + // If the table doesn't even exist return true. + return Ok(true); + } else if rows.len() > 1 { + // This seems impossible given how PostgreSQL works: + unreachable!(); + } else { + let row = &rows[0]; + let db_ftable = row.get::("foreign_table_name"); + let db_fcolumn = row.get::("foreign_column_name"); + if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + return Ok(true); + } + } + } + } + _ => { + return Err(ValveError::InputError( + format!("Unrecognized structure: {:?}", pstruct).into(), + )); + } + }; + } + _ => (), + }; + + Ok(false) + }; + + let (columns_config, configured_column_order) = { + let table_config = self + .config + .get("table") + .and_then(|tc| tc.get(table)) + .and_then(|t| t.as_object()) + .unwrap(); + let columns_config = table_config + .get("column") + .and_then(|c| c.as_object()) + .unwrap(); + let configured_column_order = { + let mut configured_column_order = { + if table == "message" { + vec!["message_id".to_string()] + } else if table == "history" { + vec!["history_id".to_string()] + } else { + vec!["row_number".to_string()] + } + }; + configured_column_order.append( + &mut table_config + .get("column_order") + .and_then(|c| c.as_array()) + .and_then(|a| Some(a.iter())) + .and_then(|a| Some(a.map(|c| c.as_str().unwrap().to_string()))) + .and_then(|a| Some(a.collect::>())) + .unwrap(), + ); + configured_column_order + }; + + (columns_config, configured_column_order) + }; + + let db_columns_in_order = { + if self.pool.any_kind() == AnyKind::Sqlite { + let sql = format!( + r#"SELECT 1 FROM sqlite_master WHERE "type" = 'table' AND "name" = '{}'"#, + table + ); + let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; + if rows.len() == 0 { + if self.verbose { + info!( + "The table '{}' will be recreated as it does not exist in the database", + table + ); + } + return Ok(true); + } else if rows.len() == 1 { + // Otherwise send another query to the db to get the column info: + let sql = format!(r#"PRAGMA TABLE_INFO("{}")"#, table); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + rows.iter() + .map(|r| { + ( + r.get::("name"), + r.get::("type"), + r.get::("pk") as u32, + ) + }) + .collect::>() + } else { + unreachable!(); + } + } else { + let sql = format!( + r#"SELECT "column_name", "data_type" + FROM "information_schema"."columns" + WHERE "table_name" = '{}' + ORDER BY "ordinal_position""#, + table, + ); + let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; + if rows.len() == 0 { + if self.verbose { + info!( + "The table '{}' will be recreated as it does not exist in the database", + table + ); + } + return Ok(true); + } + // Otherwise we get the column name: + rows.iter() + .map(|r| { + ( + r.get::("column_name"), + r.get::("data_type"), + // The third entry is just a dummy so that the datatypes in the two + // wings of this if/else block match. + 0, + ) + }) + .collect::>() + } + }; + + // Check if the order of the configured columns matches the order of the columns in the + // database: + let db_column_order = db_columns_in_order + .iter() + .map(|c| c.0.clone()) + .collect::>(); + if db_column_order != configured_column_order { + if self.verbose { + info!( + "The table '{}' will be recreated since the database columns: {:?} \ + and/or their order does not match the configured columns: {:?}", + table, db_column_order, configured_column_order + ); + } + return Ok(true); + } + + // Check, for all tables, whether their column configuration matches the contents of the + // database: + for (cname, ctype, pk) in &db_columns_in_order { + // Do not consider these special columns: + if (table == "message" && cname == "message_id") + || (table == "message" && cname == "row") + || (table == "history" && cname == "history_id") + || (table == "history" && cname == "timestamp") + || (table == "history" && cname == "row") + || cname == "row_number" + { + continue; + } + let column_config = columns_config + .get(cname) + .and_then(|c| c.as_object()) + .unwrap(); + let sql_type = + get_sql_type_from_global_config(&self.config, table, &cname, &self.pool).unwrap(); + + // Check the column's SQL type: + if sql_type.to_lowercase() != ctype.to_lowercase() { + let s = sql_type.to_lowercase(); + let c = ctype.to_lowercase(); + // CHARACTER VARYING and VARCHAR are synonyms so we ignore this difference. + if !((s.starts_with("varchar") || s.starts_with("character varying")) + && (c.starts_with("varchar") || c.starts_with("character varying"))) + { + if self.verbose { + info!( + "The table '{}' will be recreated because the SQL type of column '{}', \ + {}, does not match the configured value: {}", + table, + cname, + ctype, + sql_type + ); + } + return Ok(true); + } + } + + // Check the column's structure: + let structure = column_config.get("structure").and_then(|d| d.as_str()); + match structure { + Some(structure) if structure != "" => { + let parsed_structure = self + .parsed_structure_conditions + .get(structure) + .and_then(|p| Some(p.parsed.clone())) + .unwrap(); + if structure_has_changed(&parsed_structure, table, &cname, &pk)? { + if self.verbose { + info!( + "The table '{}' will be recreated because the database \ + constraints for column '{}' do not match the configured \ + structure, '{}'", + table, cname, structure + ); + } + return Ok(true); + } + } + _ => (), + }; + } + + Ok(false) + } + + /// Generates and returns the DDL required to setup the database. + pub async fn get_setup_statements(&self) -> Result>, ValveError> { + let tables_config = self + .config + .get("table") + .and_then(|t| t.as_object()) + .unwrap() + .clone(); + let datatypes_config = self + .config + .get("datatype") + .and_then(|d| d.as_object()) + .unwrap() + .clone(); + + let parser = StartParser::new(); + + // Begin by reading in the TSV files corresponding to the tables defined in tables_config, + // and use that information to create the associated database tables, while saving + // constraint information to constrains_config. + let mut setup_statements = HashMap::new(); + for table_name in tables_config.keys().cloned().collect::>() { + // Generate the statements for creating the table and its corresponding conflict table: + let mut table_statements = vec![]; + for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { + let mut statements = get_table_ddl( + &tables_config, + &datatypes_config, + &parser, + &table, + &self.pool, + ); + table_statements.append(&mut statements); + } + + let create_view_sql = get_sql_for_standard_view(&table_name, &self.pool); + let create_text_view_sql = + get_sql_for_text_view(&tables_config, &table_name, &self.pool); + table_statements.push(create_view_sql); + table_statements.push(create_text_view_sql); + + setup_statements.insert(table_name.to_string(), table_statements); + } + + let text_type = get_sql_type(&datatypes_config, &"text".to_string(), &self.pool).unwrap(); + + // Generate DDL for the history table: + let mut history_statements = vec![]; + history_statements.push(format!( + indoc! {r#" + CREATE TABLE "history" ( + {history_id} + "table" {text_type}, + "row" BIGINT, + "from" {text_type}, + "to" {text_type}, + "summary" {text_type}, + "user" {text_type}, + "undone_by" {text_type}, + {timestamp} + ); + "#}, + history_id = { + if self.pool.any_kind() == AnyKind::Sqlite { + "\"history_id\" INTEGER PRIMARY KEY," + } else { + "\"history_id\" SERIAL PRIMARY KEY," + } + }, + text_type = text_type, + timestamp = { + if self.pool.any_kind() == AnyKind::Sqlite { + "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" + } else { + "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" + } + }, + )); + history_statements + .push(r#"CREATE INDEX "history_tr_idx" ON "history"("table", "row");"#.to_string()); + setup_statements.insert("history".to_string(), history_statements); + + // Generate DDL for the message table: + let mut message_statements = vec![]; + message_statements.push(format!( + indoc! {r#" + CREATE TABLE "message" ( + {message_id} + "table" {text_type}, + "row" BIGINT, + "column" {text_type}, + "value" {text_type}, + "level" {text_type}, + "rule" {text_type}, + "message" {text_type} + ); + "#}, + message_id = { + if self.pool.any_kind() == AnyKind::Sqlite { + "\"message_id\" INTEGER PRIMARY KEY," + } else { + "\"message_id\" SERIAL PRIMARY KEY," + } + }, + text_type = text_type, + )); + message_statements.push( + r#"CREATE INDEX "message_trc_idx" ON "message"("table", "row", "column");"#.to_string(), + ); + setup_statements.insert("message".to_string(), message_statements); + + return Ok(setup_statements); + } + + /// Writes the database schema to stdout. + pub async fn dump_schema(&self) -> Result<(), ValveError> { + let setup_statements = self.get_setup_statements().await?; + for table in self.get_sorted_table_list(false) { + let table_statements = setup_statements.get(table).unwrap(); + let output = String::from(table_statements.join("\n")); + println!("{}\n", output); + } + Ok(()) + } + + /// Create all configured database tables and views if they do not already exist as configured. + pub async fn create_all_tables(&self) -> Result<&Self, ValveError> { + let setup_statements = self.get_setup_statements().await?; + let sorted_table_list = self.get_sorted_table_list(false); + for table in &sorted_table_list { + if self.table_has_changed(*table).await? { + self.drop_tables(&vec![table]).await?; + let table_statements = setup_statements.get(*table).unwrap(); + for stmt in table_statements { + self.execute_sql(stmt).await?; + } + } + } + + Ok(self) + } + + /// Checks whether the given table exists in the database. + pub async fn table_exists(&self, table: &str) -> Result { + let sql = { + if self.pool.any_kind() == AnyKind::Sqlite { + format!( + r#"SELECT 1 + FROM "sqlite_master" + WHERE "type" = 'table' AND name = '{}' + LIMIT 1"#, + table + ) + } else { + format!( + r#"SELECT 1 + FROM "information_schema"."tables" + WHERE "table_schema" = 'public' + AND "table_name" = '{}'"#, + table + ) + } + }; + let query = sqlx_query(&sql); + let rows = query.fetch_all(&self.pool).await?; + return Ok(rows.len() > 0); + } + + /// Get all the incoming (tables that depend on it) or outgoing (tables it depends on) + /// dependencies of the given table. + pub fn get_dependencies(&self, table: &str, incoming: bool) -> Vec { + let mut dependent_tables = vec![]; + if table != "message" && table != "history" { + let direct_deps = { + if incoming { + self.table_dependencies_in.get(table).unwrap().to_vec() + } else { + self.table_dependencies_out.get(table).unwrap().to_vec() + } + }; + for direct_dep in direct_deps { + let mut indirect_deps = self.get_dependencies(&direct_dep, incoming); + dependent_tables.append(&mut indirect_deps); + dependent_tables.push(direct_dep); + } + } + dependent_tables + } + + /// Given a list of tables, fill it in with any further tables that are dependent upon tables + /// in the given list. If deletion_order is true, the tables are sorted as required for + /// deleting them all sequentially, otherwise they are ordered in reverse. + pub fn add_dependencies(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { + let mut with_dups = vec![]; + for table in tables { + let dependent_tables = self.get_dependencies(table, true); + for dep_table in dependent_tables { + with_dups.push(dep_table.to_string()); + } + with_dups.push(table.to_string()); + } + // The algorithm above gives the tables in the order needed for deletion. But we want + // this function to return the creation order by default so we reverse it unless + // the deletion_order flag is set to true. + if !deletion_order { + with_dups.reverse(); + } + + // Remove the duplicates from the returned table list: + let mut tables_in_order = vec![]; + for table in with_dups.iter().unique() { + tables_in_order.push(table.to_string()); + } + tables_in_order + } + + /// Given a subset of the configured tables, return them in sorted dependency order, or in + /// reverse if `reverse` is set to true. + pub fn sort_tables( + &self, + table_subset: &Vec<&str>, + reverse: bool, + ) -> Result, ValveError> { + let full_table_list = self.get_sorted_table_list(false); + if !table_subset + .iter() + .all(|item| full_table_list.contains(item)) + { + return Err(ValveError::InputError(format!( + "[{}] contains tables that are not in the configured table list: [{}]", + table_subset.join(", "), + full_table_list.join(", ") + ))); + } + + let constraints_config = self + .config + .get("constraints") + .and_then(|c| c.as_object()) + .ok_or(ValveError::ConfigError( + "Unable to retrieve configured constraints.".into(), + ))?; + + // Filter out message and history since they are not represented in the constraints config. + // They will be added implicitly to the list returned by verify_table_deps_and_sort. + let filtered_subset = table_subset + .iter() + .filter(|m| **m != "history" && **m != "message") + .map(|s| s.to_string()) + .collect::>(); + + let (sorted_subset, _, _) = + verify_table_deps_and_sort(&filtered_subset, &constraints_config); + + // Since the result of verify_table_deps_and_sort() will include dependencies of the tables + // in its input list, we filter those out here: + let mut sorted_subset = sorted_subset + .iter() + .filter(|m| table_subset.contains(&m.as_str())) + .map(|s| s.to_string()) + .collect::>(); + + if reverse { + sorted_subset.reverse(); + } + Ok(sorted_subset) + } + + /// Returns an IndexMap, indexed by configured table, containing lists of their dependencies. + /// If incoming is true, the lists are incoming dependencies, else they are outgoing. + pub fn collect_dependencies(&self, incoming: bool) -> IndexMap> { + let tables = self.get_sorted_table_list(false); + let mut dependencies = IndexMap::new(); + for table in tables { + dependencies.insert(table.to_string(), self.get_dependencies(table, incoming)); + } + dependencies + } + + /// Drop all configured tables, in reverse dependency order. + pub async fn drop_all_tables(&self) -> Result<&Self, ValveError> { + // Drop all of the database tables in the reverse of their sorted order: + self.drop_tables(&self.get_sorted_table_list(true)).await?; + Ok(self) + } + + /// Given a vector of table names, drop those tables, in the given order. + pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, ValveError> { + let drop_list = self.add_dependencies(tables, true); + for table in &drop_list { + if *table != "message" && *table != "history" { + let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); + self.execute_sql(&sql).await?; + let sql = format!(r#"DROP VIEW IF EXISTS "{}_view""#, table); + self.execute_sql(&sql).await?; + let sql = format!(r#"DROP TABLE IF EXISTS "{}_conflict""#, table); + self.execute_sql(&sql).await?; + } + let sql = format!(r#"DROP TABLE IF EXISTS "{}""#, table); + self.execute_sql(&sql).await?; + } + + Ok(self) + } + + /// Truncate all configured tables, in reverse dependency order. + pub async fn truncate_all_tables(&self) -> Result<&Self, ValveError> { + self.truncate_tables(&self.get_sorted_table_list(true)) + .await?; + Ok(self) + } + + /// Given a vector of table names, truncate those tables, in the given order. + pub async fn truncate_tables(&self, tables: &Vec<&str>) -> Result<&Self, ValveError> { + self.create_all_tables().await?; + let truncate_list = self.add_dependencies(tables, true); + + // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that + // depends on another table, T', even in the case where we have previously truncated T'. + // SQLite does not need this. However SQLite does require that the tables be truncated in + // deletion order (which means that it must be checking that T' is empty). + let truncate_sql = |table: &str| -> String { + if self.pool.any_kind() == AnyKind::Postgres { + format!(r#"TRUNCATE TABLE "{}" RESTART IDENTITY CASCADE"#, table) + } else { + format!(r#"DELETE FROM "{}""#, table) + } + }; + + for table in &truncate_list { + let sql = truncate_sql(&table); + self.execute_sql(&sql).await?; + if *table != "message" && *table != "history" { + let sql = truncate_sql(&format!("{}_conflict", table)); + self.execute_sql(&sql).await?; + } + } + + Ok(self) + } + + /// Load all configured tables in dependency order. If `validate` is false, just try to insert + /// all rows, irrespective of whether they are valid or not or will possibly trigger a db error. + pub async fn load_all_tables(&self, validate: bool) -> Result<&Self, ValveError> { + let table_list = self.get_sorted_table_list(false); + if self.verbose { + info!("Processing {} tables.", table_list.len()); + } + self.load_tables(&table_list, validate).await + } + + /// Given a vector of table names, load those tables in the given order. If `validate` is false, + /// just try to insert all rows, irrespective of whether they are valid or not or will possibly + /// trigger a db error. + pub async fn load_tables( + &self, + table_list: &Vec<&str>, + validate: bool, + ) -> Result<&Self, ValveError> { + let list_for_truncation = self.sort_tables(table_list, true)?; + self.truncate_tables( + &list_for_truncation + .iter() + .map(|i| i.as_str()) + .collect::>(), + ) + .await?; + + let num_tables = table_list.len(); + let mut total_errors = 0; + let mut total_warnings = 0; + let mut total_infos = 0; + let mut table_num = 1; + for table_name in table_list { + if *table_name == "message" || *table_name == "history" { + continue; + } + let table_name = table_name.to_string(); + let path = String::from( + self.config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|o| o.get(&table_name)) + .and_then(|n| n.get("path")) + .and_then(|p| p.as_str()) + .unwrap(), + ); + let mut rdr = { + match File::open(path.clone()) { + Err(e) => { + warn!("Unable to open '{}': {}", path.clone(), e); + continue; + } + Ok(table_file) => ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(table_file), + } + }; + if self.verbose { + info!("Loading table {}/{}: {}", table_num, num_tables, table_name); + } + table_num += 1; + + // Extract the headers, which we will need later: + let mut records = rdr.records(); + let headers; + if let Some(result) = records.next() { + headers = result.unwrap(); + } else { + panic!("'{}' is empty", path); + } + + for header in headers.iter() { + if header.trim().is_empty() { + panic!( + "One or more of the header fields is empty for table '{}'", + table_name + ); + } + } + + // HashMap used to report info about the number of error/warning/info messages for this + // table when the verbose flag is set to true: + let mut messages_stats = HashMap::new(); + messages_stats.insert("error".to_string(), 0); + messages_stats.insert("warning".to_string(), 0); + messages_stats.insert("info".to_string(), 0); + + // Split the data into chunks of size CHUNK_SIZE before passing them to the validation + // logic: + let chunks = records.chunks(CHUNK_SIZE); + insert_chunks( + &self.config, + &self.pool, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &table_name, + &chunks, + &headers, + &mut messages_stats, + self.verbose, + validate, + ) + .await?; + + if validate { + // We need to wait until all of the rows for a table have been loaded before + // validating the "foreign" constraints on a table's trees, since this checks if the + // values of one column (the tree's parent) are all contained in another column (the + // tree's child). We also need to wait before validating a table's "under" + // constraints. Although the tree associated with such a constraint need not be + // defined on the same table, it can be. + let mut recs_to_update = + validate_tree_foreign_keys(&self.config, &self.pool, None, &table_name, None) + .await?; + recs_to_update.append( + &mut validate_under(&self.config, &self.pool, None, &table_name, None).await?, + ); + + for record in recs_to_update { + let row_number = record.get("row_number").unwrap(); + let column_name = record.get("column").and_then(|s| s.as_str()).unwrap(); + let value = record.get("value").and_then(|s| s.as_str()).unwrap(); + let level = record.get("level").and_then(|s| s.as_str()).unwrap(); + let rule = record.get("rule").and_then(|s| s.as_str()).unwrap(); + let message = record.get("message").and_then(|s| s.as_str()).unwrap(); + + let sql = local_sql_syntax( + &self.pool, + &format!( + r#"INSERT INTO "message" + ("table", "row", "column", "value", "level", "rule", "message") + VALUES ({}, {}, {}, {}, {}, {}, {})"#, + SQL_PARAM, + row_number, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM + ), + ); + let mut query = sqlx_query(&sql); + query = query.bind(&table_name); + query = query.bind(&column_name); + query = query.bind(&value); + query = query.bind(&level); + query = query.bind(&rule); + query = query.bind(&message); + query.execute(&self.pool).await?; + + if self.verbose { + // Add the generated message to messages_stats: + let messages = vec![json!({ + "message": message, + "level": level, + })]; + add_message_counts(&messages, &mut messages_stats); + } + } + } + + if self.verbose { + // Output a report on the messages generated to stderr: + let errors = messages_stats.get("error").unwrap(); + let warnings = messages_stats.get("warning").unwrap(); + let infos = messages_stats.get("info").unwrap(); + let status_message = format!( + "{} errors, {} warnings, and {} information messages generated for {}", + errors, warnings, infos, table_name + ); + info!("{}", status_message); + total_errors += errors; + total_warnings += warnings; + total_infos += infos; + } + } + + if self.verbose { + info!( + "Loading complete with {} errors, {} warnings, and {} information messages", + total_errors, total_warnings, total_infos + ); + } + Ok(self) + } + + /// Save all configured tables to their configured path's, unless save_dir is specified, + /// in which case save them there instead. + pub fn save_all_tables(&self, save_dir: &Option) -> Result<&Self, ValveError> { + let tables = self.get_sorted_table_list(false); + self.save_tables(&tables, save_dir)?; + Ok(self) + } + + /// Given a vector of table names, save those tables to their configured path's, unless + /// save_dir is specified, in which case save them there instead. + pub fn save_tables( + &self, + tables: &Vec<&str>, + save_dir: &Option, + ) -> Result<&Self, ValveError> { + let table_paths: HashMap = self + .config + .get("table") + .unwrap() + .as_object() + .unwrap() + .iter() + .filter(|(k, v)| { + !["message", "history"].contains(&k.as_str()) + && tables.contains(&k.as_str()) + && v.get("path").is_some() + }) + .map(|(k, v)| { + ( + k.clone(), + v.get("path").unwrap().as_str().unwrap().to_string(), + ) + }) + .collect(); + + info!( + "Saving tables: {} ...", + table_paths + .keys() + .map(|k| k.to_string()) + .collect::>() + .join(", ") + ); + for (table, path) in table_paths.iter() { + let columns: Vec<&str> = self + .config + .get("table") + .and_then(|v| v.as_object()) + .and_then(|o| o.get(table)) + .and_then(|v| v.as_object()) + .and_then(|o| o.get("column_order")) + .and_then(|v| v.as_array()) + .and_then(|v| Some(v.iter().map(|i| i.as_str().unwrap()).collect())) + .unwrap(); + + let path = match save_dir { + Some(s) => format!( + "{}/{}", + s, + Path::new(path) + .file_name() + .and_then(|n| n.to_str()) + .unwrap() + ), + None => path.to_string(), + }; + self.save_table(table, &columns, &path)?; + } + + Ok(self) + } + + /// Save the given table with the given columns at the given path as a TSV file. + pub fn save_table( + &self, + table: &str, + columns: &Vec<&str>, + path: &str, + ) -> Result<&Self, ValveError> { + // TODO: Do some validation on the path. + + let mut quoted_columns = vec!["\"row_number\"".to_string()]; + quoted_columns.append( + &mut columns + .iter() + .map(|v| enquote::enquote('"', v)) + .collect::>(), + ); + let text_view = format!("\"{}_text_view\"", table); + let sql = format!( + r#"SELECT {} from {} ORDER BY "row_number""#, + quoted_columns.join(", "), + text_view + ); + + let mut writer = WriterBuilder::new() + .delimiter(b'\t') + .quote_style(QuoteStyle::Never) + .from_path(path)?; + writer.write_record(columns)?; + let mut stream = sqlx_query(&sql).fetch(&self.pool); + while let Some(row) = block_on(stream.try_next()).unwrap() { + let mut record: Vec<&str> = vec![]; + for column in columns.iter() { + let cell = row.try_get::<&str, &str>(column).ok().unwrap_or_default(); + record.push(cell); + } + writer.write_record(record)?; + } + writer.flush()?; + + Ok(self) + } + + /// Given a table name and a row, return the validated row. + pub async fn validate_row( + &self, + table_name: &str, + row: &ValveRow, + row_number: Option, + ) -> Result { + validate_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + None, + table_name, + row, + row_number, + None, + ) + .await + } + + /// Given a table name and a row as JSON, add the row to the table in the database, and return + /// the validated row, including its new row_number. + pub async fn insert_row( + &self, + table_name: &str, + row: &ValveRow, + ) -> Result<(u32, ValveRow), ValveError> { + let mut tx = self.pool.begin().await?; + + let row = validate_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + Some(&mut tx), + table_name, + row, + None, + None, + ) + .await?; + + let rn = insert_new_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + &row, + None, + true, + ) + .await?; + + record_row_change(&mut tx, table_name, &rn, None, Some(&row), &self.user).await?; + tx.commit().await?; + Ok((rn, row)) + } + + /// Given a table name, a row number, and a row, update the row in the database, and return the + /// validated row. + pub async fn update_row( + &self, + table_name: &str, + row_number: &u32, + row: &ValveRow, + ) -> Result { + let mut tx = self.pool.begin().await?; + + // Get the old version of the row from the database so that we can later record it to the + // history table: + let old_row = + get_row_from_db(&self.config, &self.pool, &mut tx, table_name, &row_number).await?; + + let row = validate_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + Some(&mut tx), + table_name, + row, + Some(*row_number), + None, + ) + .await?; + + update_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + &row, + row_number, + true, + false, + ) + .await?; + + // Record the row update in the history table: + record_row_change( + &mut tx, + table_name, + row_number, + Some(&old_row), + Some(&row), + &self.user, + ) + .await?; + + tx.commit().await?; + Ok(row) + } + + /// Given a table name and a row number, delete that row from the table. + pub async fn delete_row(&self, table_name: &str, row_number: &u32) -> Result<(), ValveError> { + let mut tx = self.pool.begin().await?; + + let row = + get_row_from_db(&self.config, &self.pool, &mut tx, &table_name, row_number).await?; + + record_row_change( + &mut tx, + &table_name, + row_number, + Some(&row), + None, + &self.user, + ) + .await?; + + delete_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + row_number, + ) + .await?; + + tx.commit().await?; + Ok(()) + } + + /// Return the next change that can be undone, or None if there isn't any. + pub async fn get_record_to_undo(&self) -> Result, ValveError> { + // Look in the history table, get the row with the greatest ID, get the row number, + // from, and to, and determine whether the last operation was a delete, insert, or update. + let is_clause = if self.pool.any_kind() == AnyKind::Sqlite { + "IS" + } else { + "IS NOT DISTINCT FROM" + }; + let sql = format!( + r#"SELECT * FROM "history" + WHERE "undone_by" {} NULL + ORDER BY "history_id" DESC LIMIT 1"#, + is_clause + ); + let query = sqlx_query(&sql); + let result_row = query.fetch_optional(&self.pool).await?; + Ok(result_row) + } + + /// Return the next change that can be redone, or None if there isn't any. + pub async fn get_record_to_redo(&self) -> Result, ValveError> { + // Look in the history table, get the row with the greatest ID, get the row number, + // from, and to, and determine whether the last operation was a delete, insert, or update. + let is_not_clause = if self.pool.any_kind() == AnyKind::Sqlite { + "IS NOT" + } else { + "IS DISTINCT FROM" + }; + let sql = format!( + r#"SELECT * FROM "history" + WHERE "undone_by" {} NULL + ORDER BY "timestamp" DESC LIMIT 1"#, + is_not_clause + ); + let query = sqlx_query(&sql); + let result_row = query.fetch_optional(&self.pool).await?; + Ok(result_row) + } + + /// Undo one change and return the change record or None if there was no change to undo. + pub async fn undo(&self) -> Result, ValveError> { + let last_change = match self.get_record_to_undo().await? { + None => { + warn!("Nothing to undo."); + return Ok(None); + } + Some(r) => r, + }; + let history_id: i32 = last_change.get("history_id"); + let history_id = history_id as u16; + let table: &str = last_change.get("table"); + let row_number: i64 = last_change.get("row"); + let row_number = row_number as u32; + let from = get_json_from_row(&last_change, "from"); + let to = get_json_from_row(&last_change, "to"); + + match (from, to) { + (None, None) => { + return Err(ValveError::DataError( + "Cannot redo unknown operation from None to None".into(), + )) + } + (None, Some(_)) => { + // Undo an insert: + let mut tx = self.pool.begin().await?; + + delete_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &row_number, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(None) + } + (Some(from), None) => { + // Undo a delete: + let mut tx = self.pool.begin().await?; + + insert_new_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &from, + Some(row_number), + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(from)) + } + (Some(from), Some(_)) => { + // Undo an an update: + let mut tx = self.pool.begin().await?; + + update_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &from, + &row_number, + false, + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(from)) + } + } + } + + /// Redo one change and return the change record or None if there was no change to redo. + pub async fn redo(&self) -> Result, ValveError> { + let last_undo = match self.get_record_to_redo().await? { + None => { + warn!("Nothing to redo."); + return Ok(None); + } + Some(last_undo) => { + let undone_by = last_undo.try_get_raw("undone_by")?; + if undone_by.is_null() { + warn!("Nothing to redo."); + return Ok(None); + } + last_undo + } + }; + let history_id: i32 = last_undo.get("history_id"); + let history_id = history_id as u16; + let table: &str = last_undo.get("table"); + let row_number: i64 = last_undo.get("row"); + let row_number = row_number as u32; + let from = get_json_from_row(&last_undo, "from"); + let to = get_json_from_row(&last_undo, "to"); + + match (from, to) { + (None, None) => { + return Err(ValveError::DataError( + "Cannot redo unknown operation from None to None".into(), + )) + } + (None, Some(to)) => { + // Redo an insert: + let mut tx = self.pool.begin().await?; + + insert_new_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &to, + Some(row_number), + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(to)) + } + (Some(_), None) => { + // Redo a delete: + let mut tx = self.pool.begin().await?; + + delete_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &row_number, + ) + .await?; + + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(None) + } + (Some(_), Some(to)) => { + // Redo an an update: + let mut tx = self.pool.begin().await?; + + update_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &to, + &row_number, + false, + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(to)) + } + } + } + + /// Given a table name, a column name, and (optionally) a string to match, return a JSON array + /// of possible valid values for the given column which contain the matching string as a + /// substring (or all of them if no matching string is given). The JSON array returned is + /// formatted for Typeahead, i.e., it takes the form: + /// `[{"id": id, "label": label, "order": order}, ...]`. + pub async fn get_matching_values( + &self, + table_name: &str, + column_name: &str, + matching_string: Option<&str>, + ) -> Result { + let config = &self.config; + let compiled_datatype_conditions = &self.compiled_datatype_conditions; + let parsed_structure_conditions = &self.parsed_structure_conditions; + let pool = &self.pool; + let dt_name = config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column_name)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("datatype")) + .and_then(|d| d.as_str()) + .unwrap(); + + let dt_condition = compiled_datatype_conditions + .get(dt_name) + .and_then(|d| Some(d.parsed.clone())); + + let mut values = vec![]; + match dt_condition { + Some(Expression::Function(name, args)) if name == "in" => { + for arg in args { + if let Expression::Label(arg) = *arg { + // Remove the enclosing quotes from the values being returned: + let label = unquote(&arg).unwrap_or_else(|_| arg); + if let Some(s) = matching_string { + if label.contains(s) { + values.push(label); + } + } + } + } + } + _ => { + // If the datatype for the column does not correspond to an `in(...)` function, then + // we check the column's structure constraints. If they include a + // `from(foreign_table.foreign_column)` condition, then the values are taken from + // the foreign column. Otherwise if the structure includes an + // `under(tree_table.tree_column, value)` condition, then get the values from the + // tree column that are under `value`. + let structure = parsed_structure_conditions.get( + config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column_name)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|d| d.as_str()) + .unwrap_or_else(|| ""), + ); + + let sql_type = + get_sql_type_from_global_config(&config, table_name, &column_name, &pool) + .unwrap(); + + match structure { + Some(ParsedStructure { original, parsed }) => { + let matching_string = { + match matching_string { + None => "%".to_string(), + Some(s) => format!("%{}%", s), + } + }; + + match parsed { + Expression::Function(name, args) if name == "from" => { + let foreign_key = &args[0]; + if let Expression::Field(ftable, fcolumn) = &**foreign_key { + let fcolumn_text = cast_column_sql_to_text(&fcolumn, &sql_type); + let sql = local_sql_syntax( + &pool, + &format!( + r#"SELECT "{}" FROM "{}" WHERE {} LIKE {}"#, + fcolumn, ftable, fcolumn_text, SQL_PARAM + ), + ); + let rows = sqlx_query(&sql) + .bind(&matching_string) + .fetch_all(pool) + .await?; + for row in rows.iter() { + values.push(get_column_value(&row, &fcolumn, &sql_type)); + } + } + } + Expression::Function(name, args) + if name == "under" || name == "tree" => + { + let mut tree_col = "not set"; + let mut under_val = Some("not set".to_string()); + if name == "under" { + if let Expression::Field(_, column) = &**&args[0] { + tree_col = column; + } + if let Expression::Label(label) = &**&args[1] { + under_val = Some(label.to_string()); + } + } else { + let tree_key = &args[0]; + if let Expression::Label(label) = &**tree_key { + tree_col = label; + under_val = None; + } + } + + let tree = config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|c| c.get("tree")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_array()) + .and_then(|t| { + t.iter().find(|o| o.get("child").unwrap() == tree_col) + }) + .expect( + format!("No tree: '{}.{}' found", table_name, tree_col) + .as_str(), + ) + .as_object() + .unwrap(); + let child_column = + tree.get("child").and_then(|c| c.as_str()).unwrap(); + + let (tree_sql, mut params) = with_tree_sql( + &config, + tree, + &table_name.to_string(), + &table_name.to_string(), + under_val.as_ref(), + None, + &pool, + ); + let child_column_text = + cast_column_sql_to_text(&child_column, &sql_type); + let sql = local_sql_syntax( + &pool, + &format!( + r#"{} SELECT "{}" FROM "tree" WHERE {} LIKE {}"#, + tree_sql, child_column, child_column_text, SQL_PARAM + ), + ); + params.push(matching_string); + + let mut query = sqlx_query(&sql); + for param in ¶ms { + query = query.bind(param); + } + + let rows = query.fetch_all(pool).await?; + for row in rows.iter() { + values.push(get_column_value(&row, &child_column, &sql_type)); + } + } + _ => panic!("Unrecognised structure: {}", original), + }; + } + None => (), + }; + } + }; + + let mut typeahead_values = vec![]; + for (i, v) in values.iter().enumerate() { + // enumerate() begins at 0 but we need to begin at 1: + let i = i + 1; + typeahead_values.push(json!({ + "id": v, + "label": v, + "order": i, + })); + } + + Ok(json!(typeahead_values)) + } +} From 844d65675cc079671b8f0f3ab63c9aae02c9a1ee Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 5 Feb 2024 11:01:26 -0500 Subject: [PATCH 55/57] better error message when not able to determine the sql type --- src/lib.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e498c14a..1a14ad7b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3000,15 +3000,13 @@ pub fn get_table_constraints( } for row in colvals { - let sql_type = get_sql_type( - datatypes_config, - &row.get("datatype") - .and_then(|d| d.as_str()) - .and_then(|s| Some(s.to_string())) - .unwrap(), - pool, - ) - .unwrap(); + let datatype = row + .get("datatype") + .and_then(|d| d.as_str()) + .and_then(|s| Some(s.to_string())) + .unwrap(); + let sql_type = get_sql_type(datatypes_config, &datatype, pool) + .expect(&format!("Unable to determine SQL type for {}", datatype)); let column_name = row.get("column").and_then(|s| s.as_str()).unwrap(); let structure = row.get("structure").and_then(|s| s.as_str()); if let Some(structure) = structure { From c857bd78cd3dfbf90100d3a321c6cb6632b5e94a Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 5 Feb 2024 14:16:27 -0500 Subject: [PATCH 56/57] default to sql type: TEXT for unknown datatypes --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 1a14ad7b..6e0a8622 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2623,7 +2623,7 @@ pub fn compile_condition( /// the database type, climb the datatype tree (as required), and return the first 'SQL type' found. pub fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Option { if !dt_config.contains_key(datatype) { - return None; + return Some("TEXT".to_string()); } if let Some(sql_type) = dt_config.get(datatype).and_then(|d| d.get("SQL type")) { From 341c1cb1bbaa7e70b6aaf13edb375274022cd924 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 5 Feb 2024 15:05:38 -0500 Subject: [PATCH 57/57] redo changes that git failed to merge --- src/api_test.rs | 21 --------------------- test/expected/history.tsv | 28 ++++++++++++++-------------- 2 files changed, 14 insertions(+), 35 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index ae4e71d8..e551a1da 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -409,9 +409,6 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), ValveError> { "numeric_foreign_column": {"messages": [], "valid": true, "value": "11"}, }); - // Our initial undo/redo state: - verify_undo_redo(pool, false, false).await?; - // Undo/redo test 1: let (_rn, _r) = valve .insert_row("table10", &row_1.as_object().unwrap()) @@ -423,8 +420,6 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), ValveError> { valve.undo().await?; - verify_undo_redo(pool, false, true).await?; - // Undo/redo test 2: valve .update_row("table10", &8, &row_2.as_object().unwrap()) @@ -436,8 +431,6 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), ValveError> { valve.undo().await?; - verify_undo_redo(pool, false, true).await?; - // Undo/redo test 3: valve.delete_row("table10", &8).await?; @@ -447,8 +440,6 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), ValveError> { valve.undo().await?; - verify_undo_redo(pool, false, true).await?; - // Undo/redo test 4: let (rn, _row) = valve .insert_row("table10", &row_1.as_object().unwrap()) @@ -458,35 +449,23 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), ValveError> { .update_row("table10", &rn, &row_2.as_object().unwrap()) .await?; - verify_undo_redo(pool, true, false).await?; - // Undo update: valve.undo().await?; - verify_undo_redo(pool, true, true).await?; - // Redo update: valve.redo().await?; valve.delete_row("table10", &rn).await?; - verify_undo_redo(pool, true, false).await?; - // Undo delete: valve.undo().await?; - verify_undo_redo(pool, true, true).await?; - // Undo update: valve.undo().await?; - verify_undo_redo(pool, true, true).await?; - // Undo insert: valve.undo().await?; - verify_undo_redo(pool, false, true).await?; - eprintln!("done."); Ok(()) } diff --git a/test/expected/history.tsv b/test/expected/history.tsv index 86afe795..23f15d77 100644 --- a/test/expected/history.tsv +++ b/test/expected/history.tsv @@ -1,15 +1,15 @@ history_id table row from to summary user undone_by -1 table10 9 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} VALVE VALVE -2 table10 8 {"foreign_column":{"messages":[],"valid":true,"value":"h"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"8"},"other_foreign_column":{"messages":[],"valid":true,"value":"h"}} {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'h' to 'k'","old_value":"h","value":"k"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 8 to 11","old_value":"8","value":"11"},{"column":"other_foreign_column","level":"update","message":"Value changed from 'h' to 'k'","old_value":"h","value":"k"}] VALVE VALVE -3 table10 8 {"foreign_column":{"messages":[],"valid":true,"value":"h"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"8"},"other_foreign_column":{"messages":[],"valid":true,"value":"h"}} VALVE VALVE -4 table10 10 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} VALVE VALVE -5 table10 10 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'j' to 'k'","old_value":"j","value":"k"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 10 to 11","old_value":"10","value":"11"},{"column":"other_foreign_column","level":"update","message":"Value changed from 'j' to 'k'","old_value":"j","value":"k"}] VALVE VALVE -6 table10 10 {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} VALVE VALVE -7 table2 1 {"bar":{"messages":[],"valid":true,"value":""},"child":{"messages":[],"valid":true,"value":"a"},"foo":{"messages":[{"column":"foo","level":"error","message":"bar cannot be null if foo is not null","rule":"rule:foo-2","value":"5"},{"column":"foo","level":"error","message":"bar must be 'y' or 'z' if foo = 5","rule":"rule:foo-4","value":"5"}],"valid":false,"value":"5"},"parent":{"messages":[],"valid":true,"value":"b"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"B"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"b"},"foo":{"messages":[],"valid":true,"value":"1"},"parent":{"messages":[],"valid":true,"value":"f"},"xyzzy":{"messages":[{"level":"error","message":"Value 'w' of column xyzzy is not in table2.child","rule":"under:not-in-tree"}],"valid":false,"value":"w"}} [{"column":"bar","level":"update","message":"Value changed from '' to 'B'","old_value":"","value":"B"},{"column":"child","level":"update","message":"Value changed from 'a' to 'b'","old_value":"a","value":"b"},{"column":"foo","level":"update","message":"Value changed from 5 to 1","old_value":"5","value":"1"},{"column":"parent","level":"update","message":"Value changed from 'b' to 'f'","old_value":"b","value":"f"},{"column":"xyzzy","level":"update","message":"Value changed from 'd' to 'w'","old_value":"d","value":"w"}] VALVE -8 table3 11 {"id":{"messages":[],"valid":true,"value":"BFO:0000027"},"label":{"messages":[],"valid":true,"value":"bazaar"},"parent":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"},{"level":"error","message":"Value 'barrie' of column parent is not in column label","rule":"tree:foreign"}],"valid":false,"value":"barrie"},"source":{"messages":[{"level":"error","message":"Value 'BFOBBER' of column source is not in table1.prefix","rule":"key:foreign"}],"valid":false,"value":"BFOBBER"},"type":{"messages":[],"valid":true,"value":"owl:Class"}} VALVE -9 table6 1 {"bar":{"messages":[],"valid":true,"value":""},"child":{"messages":[],"valid":true,"value":"1"},"foo":{"messages":[{"column":"foo","level":"error","message":"bar cannot be null if foo is not null","rule":"rule:foo-2","value":"e"},{"column":"foo","level":"error","message":"bar must be 25 or 26 if foo = 'e'","rule":"rule:foo-4","value":"e"}],"valid":false,"value":"e"},"parent":{"messages":[],"valid":true,"value":"2"},"xyzzy":{"messages":[],"valid":true,"value":"4"}} {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"2"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"2"},"foo":{"messages":[],"valid":true,"value":"a"},"parent":{"messages":[],"valid":true,"value":"6"},"xyzzy":{"messages":[{"level":"error","message":"Value '23' of column xyzzy is not in table6.child","rule":"under:not-in-tree"}],"valid":false,"value":"23"}} [{"column":"bar","level":"update","message":"Value changed from '' to 2","old_value":"","value":"2"},{"column":"child","level":"update","message":"Value changed from 1 to 2","old_value":"1","value":"2"},{"column":"foo","level":"update","message":"Value changed from 'e' to 'a'","old_value":"e","value":"a"},{"column":"parent","level":"update","message":"Value changed from 2 to 6","old_value":"2","value":"6"},{"column":"xyzzy","level":"update","message":"Value changed from 4 to 23","old_value":"4","value":"23"}] VALVE -10 table6 10 {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"2"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"2"},"foo":{"messages":[],"valid":true,"value":"a"},"parent":{"messages":[],"valid":true,"value":"6"},"xyzzy":{"messages":[{"level":"error","message":"Value '23' of column xyzzy is not in table6.child","rule":"under:not-in-tree"}],"valid":false,"value":"23"}} VALVE -11 table10 1 {"foreign_column":{"messages":[],"valid":true,"value":"a"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"1"},"other_foreign_column":{"messages":[],"valid":true,"value":"a"}} {"foreign_column":{"messages":[],"valid":true,"value":"w"},"numeric_foreign_column":{"messages":[{"level":"error","message":"numeric_foreign_column should be a positive or negative integer","rule":"datatype:integer"},{"level":"error","message":"numeric_foreign_column should be a line of text that does not begin or end with whitespace","rule":"datatype:trimmed_line"}],"valid":false,"value":""},"other_foreign_column":{"messages":[],"valid":true,"value":"z"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'a' to 'w'","old_value":"a","value":"w"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 1 to ''","old_value":"1","value":""},{"column":"other_foreign_column","level":"update","message":"Value changed from 'a' to 'z'","old_value":"a","value":"z"}] VALVE -12 table11 2 {"bar":{"messages":[],"valid":true,"value":"f"},"child":{"messages":[],"valid":true,"value":"b"},"foo":{"messages":[],"valid":true,"value":"e"},"parent":{"messages":[],"valid":true,"value":"c"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} {"bar":{"messages":[],"valid":true,"value":"f"},"child":{"messages":[],"valid":true,"value":"b"},"foo":{"messages":[{"level":"error","message":"Values of foo must be unique","rule":"key:primary"}],"valid":false,"value":"d"},"parent":{"messages":[],"valid":true,"value":"c"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} [{"column":"foo","level":"update","message":"Value changed from 'e' to 'd'","old_value":"e","value":"d"}] VALVE -13 table11 4 {"bar":{"messages":[],"valid":true,"value":"z"},"child":{"messages":[],"valid":true,"value":"f"},"foo":{"messages":[],"valid":true,"value":"e"},"parent":{"messages":[],"valid":true,"value":"g"},"xyzzy":{"messages":[],"valid":true,"value":"x"}} VALVE -14 table10 11 {"foreign_column":{"messages":[],"valid":true,"value":"i"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"9"},"other_foreign_column":{"messages":[],"valid":true,"value":"i"}} VALVE +1 table2 1 {"bar":{"messages":[],"valid":true,"value":""},"child":{"messages":[],"valid":true,"value":"a"},"foo":{"messages":[{"column":"foo","level":"error","message":"bar cannot be null if foo is not null","rule":"rule:foo-2","value":"5"},{"column":"foo","level":"error","message":"bar must be 'y' or 'z' if foo = 5","rule":"rule:foo-4","value":"5"}],"valid":false,"value":"5"},"parent":{"messages":[],"valid":true,"value":"b"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"B"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"b"},"foo":{"messages":[],"valid":true,"value":"1"},"parent":{"messages":[],"valid":true,"value":"f"},"xyzzy":{"messages":[{"level":"error","message":"Value 'w' of column xyzzy is not in table2.child","rule":"under:not-in-tree"}],"valid":false,"value":"w"}} [{"column":"bar","level":"update","message":"Value changed from '' to 'B'","old_value":"","value":"B"},{"column":"child","level":"update","message":"Value changed from 'a' to 'b'","old_value":"a","value":"b"},{"column":"foo","level":"update","message":"Value changed from 5 to 1","old_value":"5","value":"1"},{"column":"parent","level":"update","message":"Value changed from 'b' to 'f'","old_value":"b","value":"f"},{"column":"xyzzy","level":"update","message":"Value changed from 'd' to 'w'","old_value":"d","value":"w"}] VALVE +2 table3 11 {"id":{"messages":[],"valid":true,"value":"BFO:0000027"},"label":{"messages":[],"valid":true,"value":"bazaar"},"parent":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"},{"level":"error","message":"Value 'barrie' of column parent is not in column label","rule":"tree:foreign"}],"valid":false,"value":"barrie"},"source":{"messages":[{"level":"error","message":"Value 'BFOBBER' of column source is not in table1.prefix","rule":"key:foreign"}],"valid":false,"value":"BFOBBER"},"type":{"messages":[],"valid":true,"value":"owl:Class"}} VALVE +3 table6 1 {"bar":{"messages":[],"valid":true,"value":""},"child":{"messages":[],"valid":true,"value":"1"},"foo":{"messages":[{"column":"foo","level":"error","message":"bar cannot be null if foo is not null","rule":"rule:foo-2","value":"e"},{"column":"foo","level":"error","message":"bar must be 25 or 26 if foo = 'e'","rule":"rule:foo-4","value":"e"}],"valid":false,"value":"e"},"parent":{"messages":[],"valid":true,"value":"2"},"xyzzy":{"messages":[],"valid":true,"value":"4"}} {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"2"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"2"},"foo":{"messages":[],"valid":true,"value":"a"},"parent":{"messages":[],"valid":true,"value":"6"},"xyzzy":{"messages":[{"level":"error","message":"Value '23' of column xyzzy is not in table6.child","rule":"under:not-in-tree"}],"valid":false,"value":"23"}} [{"column":"bar","level":"update","message":"Value changed from '' to 2","old_value":"","value":"2"},{"column":"child","level":"update","message":"Value changed from 1 to 2","old_value":"1","value":"2"},{"column":"foo","level":"update","message":"Value changed from 'e' to 'a'","old_value":"e","value":"a"},{"column":"parent","level":"update","message":"Value changed from 2 to 6","old_value":"2","value":"6"},{"column":"xyzzy","level":"update","message":"Value changed from 4 to 23","old_value":"4","value":"23"}] VALVE +4 table6 10 {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"2"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"2"},"foo":{"messages":[],"valid":true,"value":"a"},"parent":{"messages":[],"valid":true,"value":"6"},"xyzzy":{"messages":[{"level":"error","message":"Value '23' of column xyzzy is not in table6.child","rule":"under:not-in-tree"}],"valid":false,"value":"23"}} VALVE +5 table10 1 {"foreign_column":{"messages":[],"valid":true,"value":"a"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"1"},"other_foreign_column":{"messages":[],"valid":true,"value":"a"}} {"foreign_column":{"messages":[],"valid":true,"value":"w"},"numeric_foreign_column":{"messages":[{"level":"error","message":"numeric_foreign_column should be a positive or negative integer","rule":"datatype:integer"},{"level":"error","message":"numeric_foreign_column should be a line of text that does not begin or end with whitespace","rule":"datatype:trimmed_line"}],"valid":false,"value":""},"other_foreign_column":{"messages":[],"valid":true,"value":"z"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'a' to 'w'","old_value":"a","value":"w"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 1 to ''","old_value":"1","value":""},{"column":"other_foreign_column","level":"update","message":"Value changed from 'a' to 'z'","old_value":"a","value":"z"}] VALVE +6 table11 2 {"bar":{"messages":[],"valid":true,"value":"f"},"child":{"messages":[],"valid":true,"value":"b"},"foo":{"messages":[],"valid":true,"value":"e"},"parent":{"messages":[],"valid":true,"value":"c"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} {"bar":{"messages":[],"valid":true,"value":"f"},"child":{"messages":[],"valid":true,"value":"b"},"foo":{"messages":[{"level":"error","message":"Values of foo must be unique","rule":"key:primary"}],"valid":false,"value":"d"},"parent":{"messages":[],"valid":true,"value":"c"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} [{"column":"foo","level":"update","message":"Value changed from 'e' to 'd'","old_value":"e","value":"d"}] VALVE +7 table11 4 {"bar":{"messages":[],"valid":true,"value":"z"},"child":{"messages":[],"valid":true,"value":"f"},"foo":{"messages":[],"valid":true,"value":"e"},"parent":{"messages":[],"valid":true,"value":"g"},"xyzzy":{"messages":[],"valid":true,"value":"x"}} VALVE +8 table10 9 {"foreign_column":{"messages":[],"valid":true,"value":"i"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"9"},"other_foreign_column":{"messages":[],"valid":true,"value":"i"}} VALVE +9 table10 10 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} VALVE VALVE +10 table10 8 {"foreign_column":{"messages":[],"valid":true,"value":"h"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"8"},"other_foreign_column":{"messages":[],"valid":true,"value":"h"}} {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'h' to 'k'","old_value":"h","value":"k"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 8 to 11","old_value":"8","value":"11"},{"column":"other_foreign_column","level":"update","message":"Value changed from 'h' to 'k'","old_value":"h","value":"k"}] VALVE VALVE +11 table10 8 {"foreign_column":{"messages":[],"valid":true,"value":"h"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"8"},"other_foreign_column":{"messages":[],"valid":true,"value":"h"}} VALVE VALVE +12 table10 11 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} VALVE VALVE +13 table10 11 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'j' to 'k'","old_value":"j","value":"k"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 10 to 11","old_value":"10","value":"11"},{"column":"other_foreign_column","level":"update","message":"Value changed from 'j' to 'k'","old_value":"j","value":"k"}] VALVE VALVE +14 table10 11 {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} VALVE VALVE