From 757a3a8778d249779ef9ce31b5d240ed39e0119e Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 21 Jun 2023 11:54:47 -0400 Subject: [PATCH 01/31] call validate_row() implicitly when inserting or updating an individual row. Fix SQL-syntax-related bugs --- src/api_test.rs | 76 +++++++++++++++++++++-- src/lib.rs | 39 +++++++++++- src/validate.rs | 43 +++++++++---- test/expected/messages.tsv | 26 ++++---- test/expected/messages_a1.tsv | 26 ++++---- test/expected/messages_after_api_test.tsv | 36 +++++------ 6 files changed, 184 insertions(+), 62 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index 1622639e..1ef2de14 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -116,7 +116,8 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro }, }); - let result_row = validate_row( + // We test that validate_row() is idempotent by running it multiple times on the same row: + let result_row_1 = validate_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, @@ -127,8 +128,46 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro Some(1), ) .await?; - update_row(&config, &pool, "table2", &result_row, 1).await?; + let result_row_2 = validate_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table2", + &result_row_1, + true, + Some(1), + ) + .await?; + assert_eq!(result_row_1, result_row_2); + + let result_row = validate_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table2", + &result_row_2, + true, + Some(1), + ) + .await?; + assert_eq!(result_row, result_row_2); + + // Now update the database with the validated row: + update_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table2", + &result_row, + 1, + ) + .await?; + + // Validate and insert a new row: let row = json!({ "id": {"messages": [], "valid": true, "value": "BFO:0000027"}, "label": {"messages": [], "valid": true, "value": "bazaar"}, @@ -154,8 +193,17 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro None, ) .await?; - let _new_row_num = insert_new_row(&config, &pool, "table3", &result_row).await?; + let _new_row_num = insert_new_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table3", + &result_row, + ) + .await?; + // Validate and update: let row = json!({ "child": {"messages": [], "valid": true, "value": 2}, "parent": {"messages": [], "valid": true, "value": 6}, @@ -181,8 +229,18 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro Some(1), ) .await?; - update_row(&config, &pool, "table6", &result_row, 1).await?; + update_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table6", + &result_row, + 1, + ) + .await?; + // Validate and insert let row = json!({ "child": {"messages": [], "valid": true, "value": 2}, "parent": {"messages": [], "valid": true, "value": 6}, @@ -207,7 +265,15 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro None, ) .await?; - let _new_row_num = insert_new_row(&config, &pool, "table6", &result_row).await?; + let _new_row_num = insert_new_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table6", + &result_row, + ) + .await?; Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index c28818e1..452fd5e1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,7 @@ pub mod validate; lalrpop_mod!(pub valve_grammar); use crate::validate::{ - validate_rows_constraints, validate_rows_intra, validate_rows_trees, + validate_row, validate_rows_constraints, validate_rows_intra, validate_rows_trees, validate_tree_foreign_keys, validate_under, ResultRow, }; use crate::{ast::Expression, valve_grammar::StartParser}; @@ -1058,10 +1058,28 @@ pub async fn valve( /// row number to the row and insert it to the database, then return the new row number. pub async fn insert_new_row( global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, pool: &AnyPool, table_name: &str, row: &SerdeMap, ) -> Result { + // First, send the row through the row validator to determine if any fields are problematic and + // to mark them with appropriate messages: + let row = validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + table_name, + row, + false, + None, + ) + .await?; + + // Now prepare the row and messages for insertion to the database. + // The new row number to insert is the current highest row number + 1. let sql = format!( r#"SELECT MAX("row_number") AS "row_number" FROM "{}_view""#, @@ -1143,6 +1161,7 @@ pub async fn insert_new_row( let level = m.get("level").and_then(|c| c.as_str()).unwrap(); let rule = m.get("rule").and_then(|c| c.as_str()).unwrap(); let message = m.get("message").and_then(|c| c.as_str()).unwrap(); + let message = message.replace("'", "''"); let message_sql = format!( r#"INSERT INTO "message" ("table", "row", "column", "value", "level", "rule", "message") @@ -1160,11 +1179,28 @@ pub async fn insert_new_row( /// update, update the corresponding row in the database with new values as specified by `row`. pub async fn update_row( global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, pool: &AnyPool, table_name: &str, row: &SerdeMap, row_number: u32, ) -> Result<(), sqlx::Error> { + // First, send the row through the row validator to determine if any fields are problematic and + // to mark them with appropriate messages: + let row = validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + table_name, + row, + true, + Some(row_number), + ) + .await?; + + // Now prepare the row and messages for the database update: let mut assignments = vec![]; let mut params = vec![]; let mut messages = vec![]; @@ -1305,6 +1341,7 @@ pub async fn update_row( let level = m.get("level").and_then(|c| c.as_str()).unwrap(); let rule = m.get("rule").and_then(|c| c.as_str()).unwrap(); let message = m.get("message").and_then(|c| c.as_str()).unwrap(); + let message = message.replace("'", "''"); let insert_sql = format!( r#"INSERT INTO "message" ("table", "row", "column", "value", "level", "rule", "message") diff --git a/src/validate.rs b/src/validate.rs index 56e9d274..f3ea5bb0 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -28,7 +28,8 @@ pub struct ResultRow { /// Given a config map, maps of compiled datatype and rule conditions, a database connection /// pool, a table name, a row to validate, and a row number in case the row already exists, -/// perform both intra- and inter-row validation and return the validated row. +/// perform both intra- and inter-row validation and return the validated row. Note that this +/// function is idempotent. pub async fn validate_row( config: &SerdeMap, compiled_datatype_conditions: &HashMap, @@ -169,8 +170,7 @@ pub async fn validate_row( } } - let result_row = result_row_to_config_map(&result_row); - + let result_row = remove_duplicate_messages(&result_row_to_config_map(&result_row))?; Ok(result_row) } @@ -573,7 +573,7 @@ pub async fn validate_under( "value": column_val, "level": "error", "rule": "under:not-in-tree", - "message": format!("Value {} of column {} is not in {}.{}", + "message": format!("Value '{}' of column {} is not in {}.{}", column_val, column, tree_table, tree_child).as_str(), })); } else if is_under == 0 { @@ -746,7 +746,7 @@ pub async fn validate_tree_foreign_keys( "value": parent_val, "level": "error", "rule": "tree:foreign", - "message": format!("Value {} of column {} is not in column {}", + "message": format!("Value '{}' of column {} is not in column {}", parent_val, parent_col, child_col).as_str(), })); } @@ -956,6 +956,28 @@ pub fn validate_rows_intra( result_rows } +/// Given a row represented as a SerdeMap, remove any duplicate messages from the row's cells, so +/// that no cell has messages with the same level, rule, and message text. +fn remove_duplicate_messages(row: &SerdeMap) -> Result { + let mut deduped_row = SerdeMap::new(); + for (column_name, cell) in row.iter() { + let mut messages = cell + .get("messages") + .and_then(|m| m.as_array()) + .unwrap_or(&vec![]) + .clone(); + messages.dedup_by(|a, b| { + a.get("level").unwrap() == b.get("level").unwrap() + && a.get("rule").unwrap() == b.get("rule").unwrap() + && a.get("message").unwrap() == b.get("message").unwrap() + }); + let mut cell = cell.as_object().unwrap().clone(); + cell.insert("messages".to_string(), json!(messages)); + deduped_row.insert(column_name.to_string(), json!(cell)); + } + Ok(deduped_row) +} + /// Given a result row, convert it to a SerdeMap and return it. /// Note that if the incoming result row has an associated row_number, this is ignored. fn result_row_to_config_map(incoming: &ResultRow) -> SerdeMap { @@ -1213,7 +1235,7 @@ fn validate_cell_datatype( let dt_condition = &compiled_datatype_conditions.get(dt_name).unwrap().compiled; if !dt_condition(&cell.value) { let message = if dt_description == "" { - format!("{} should be of datatype `{}'", column_name, dt_name) + format!("{} should be of datatype {}", column_name, dt_name) } else { format!("{} should be {}", column_name, dt_description) }; @@ -1227,10 +1249,7 @@ fn validate_cell_datatype( } let message = if primary_dt_description == "" { - format!( - "{} should be of datatype `{}'", - column_name, primary_dt_name - ) + format!("{} should be of datatype {}", column_name, primary_dt_name) } else { format!("{} should be {}", column_name, primary_dt_description) }; @@ -1402,7 +1421,7 @@ async fn validate_cell_foreign_constraints( m.insert( "message".to_string(), SerdeValue::String(format!( - "Value {} of column {} is not in {}.{}", + "Value '{}' of column {} is not in {}.{}", cell.value, column_name, ftable, fcolumn )), ) @@ -1412,7 +1431,7 @@ async fn validate_cell_foreign_constraints( m.insert( "message".to_string(), SerdeValue::String(format!( - "Value {} of column {} exists only in {}_conflict.{}", + "Value '{}' of column {} exists only in {}_conflict.{}", cell.value, column_name, ftable, fcolumn )), ) diff --git a/test/expected/messages.tsv b/test/expected/messages.tsv index 79addd17..13f7d413 100644 --- a/test/expected/messages.tsv +++ b/test/expected/messages.tsv @@ -12,18 +12,18 @@ table2 3 xyzzy error under:not-under Value 'f' of column xyzzy is not under 'd' table2 4 foo error rule:foo-4 bar must be 'y' or 'z' if foo = 5 5 table2 4 xyzzy error under:not-under Value 'g' of column xyzzy is not under 'd' g table2 5 xyzzy error under:not-under Value 'h' of column xyzzy is not under 'd' h -table2 7 xyzzy error under:not-in-tree Value z of column xyzzy is not in table2.child z -table2 9 child error key:foreign Value i of column child exists only in table4_conflict.other_foreign_column i -table3 1 label error datatype:label label should be of datatype `label' mobecular entity +table2 7 xyzzy error under:not-in-tree Value 'z' of column xyzzy is not in table2.child z +table2 9 child error key:foreign Value 'i' of column child exists only in table4_conflict.other_foreign_column i +table3 1 label error datatype:label label should be of datatype label mobecular entity table3 1 label error datatype:trimmed_line label should be a line of text that does not begin or end with whitespace mobecular entity -table3 1 source error key:foreign Value MOB of column source is not in table1.prefix MOB -table3 2 source error key:foreign Value ZOB of column source is not in table1.prefix ZOB -table3 3 source error key:foreign Value JOB of column source is not in table1.prefix JOB +table3 1 source error key:foreign Value 'MOB' of column source is not in table1.prefix MOB +table3 2 source error key:foreign Value 'ZOB' of column source is not in table1.prefix ZOB +table3 3 source error key:foreign Value 'JOB' of column source is not in table1.prefix JOB table3 4 parent error tree:cycle Cyclic dependency: (label: bar, parent: car), (label: car, parent: foo), (label: foo, parent: bar) for tree(parent) of label bar -table3 4 source error key:foreign Value SOB of column source is not in table1.prefix SOB -table3 5 parent error tree:foreign Value jafar of column parent is not in column label jafar -table3 5 source error key:foreign Value YOB of column source is not in table1.prefix YOB -table3 6 parent error tree:foreign Value owl:Thing of column parent is not in column label owl:Thing +table3 4 source error key:foreign Value 'SOB' of column source is not in table1.prefix SOB +table3 5 parent error tree:foreign Value 'jafar' of column parent is not in column label jafar +table3 5 source error key:foreign Value 'YOB' of column source is not in table1.prefix YOB +table3 6 parent error tree:foreign Value 'owl:Thing' of column parent is not in column label owl:Thing table3 7 source error datatype:nonspace source should be text without whitespace CO B table3 7 source error datatype:prefix source should be a prefix for a CURIE CO B table3 7 source error datatype:word source should be a single word: letters, numbers, underscore CO B @@ -31,7 +31,7 @@ table3 8 id error key:unique Values of id must be unique COB:0000013 table3 10 id error key:unique Values of id must be unique VO:0000001 table3 10 label error key:primary Values of label must be unique vaccine table3 10 label error tree:child-unique Values of label must be unique vaccine -table3 10 source error key:foreign Value BOB of column source is not in table1.prefix BOB +table3 10 source error key:foreign Value 'BOB' of column source is not in table1.prefix BOB table4 9 foreign_column error key:unique Values of foreign_column must be unique a table4 10 foreign_column error key:unique Values of foreign_column must be unique b table4 10 numeric_foreign_column error key:primary Values of numeric_foreign_column must be unique 9 @@ -43,6 +43,6 @@ table6 3 xyzzy error under:not-under Value '6' of column xyzzy is not under '4' table6 4 foo error rule:foo-4 bar must be 25 or 26 if foo = 'e' e table6 4 xyzzy error under:not-under Value '7' of column xyzzy is not under '4' 7 table6 5 xyzzy error under:not-under Value '8' of column xyzzy is not under '4' 8 -table6 7 xyzzy error under:not-in-tree Value 26 of column xyzzy is not in table6.child 26 -table6 9 child error key:foreign Value 9 of column child exists only in table4_conflict.numeric_foreign_column 9 +table6 7 xyzzy error under:not-in-tree Value '26' of column xyzzy is not in table6.child 26 +table6 9 child error key:foreign Value '9' of column child exists only in table4_conflict.numeric_foreign_column 9 table7 3 planetfall error datatype:integer planetfall should be a positive or negative integer e diff --git a/test/expected/messages_a1.tsv b/test/expected/messages_a1.tsv index 493f3188..1f3d8b44 100644 --- a/test/expected/messages_a1.tsv +++ b/test/expected/messages_a1.tsv @@ -12,18 +12,18 @@ table2 C3 error under:not-under Value 'f' of column xyzzy is not under 'd' f table2 D4 error rule:foo-4 bar must be 'y' or 'z' if foo = 5 5 table2 C4 error under:not-under Value 'g' of column xyzzy is not under 'd' g table2 C5 error under:not-under Value 'h' of column xyzzy is not under 'd' h -table2 C7 error under:not-in-tree Value z of column xyzzy is not in table2.child z -table2 A9 error key:foreign Value i of column child exists only in table4_conflict.other_foreign_column i -table3 C1 error datatype:label label should be of datatype `label' mobecular entity +table2 C7 error under:not-in-tree Value 'z' of column xyzzy is not in table2.child z +table2 A9 error key:foreign Value 'i' of column child exists only in table4_conflict.other_foreign_column i +table3 C1 error datatype:label label should be of datatype label mobecular entity table3 C1 error datatype:trimmed_line label should be a line of text that does not begin or end with whitespace mobecular entity -table3 A1 error key:foreign Value MOB of column source is not in table1.prefix MOB -table3 A2 error key:foreign Value ZOB of column source is not in table1.prefix ZOB -table3 A3 error key:foreign Value JOB of column source is not in table1.prefix JOB +table3 A1 error key:foreign Value 'MOB' of column source is not in table1.prefix MOB +table3 A2 error key:foreign Value 'ZOB' of column source is not in table1.prefix ZOB +table3 A3 error key:foreign Value 'JOB' of column source is not in table1.prefix JOB table3 E4 error tree:cycle Cyclic dependency: (label: bar, parent: car), (label: car, parent: foo), (label: foo, parent: bar) for tree(parent) of label bar -table3 A4 error key:foreign Value SOB of column source is not in table1.prefix SOB -table3 E5 error tree:foreign Value jafar of column parent is not in column label jafar -table3 A5 error key:foreign Value YOB of column source is not in table1.prefix YOB -table3 E6 error tree:foreign Value owl:Thing of column parent is not in column label owl:Thing +table3 A4 error key:foreign Value 'SOB' of column source is not in table1.prefix SOB +table3 E5 error tree:foreign Value 'jafar' of column parent is not in column label jafar +table3 A5 error key:foreign Value 'YOB' of column source is not in table1.prefix YOB +table3 E6 error tree:foreign Value 'owl:Thing' of column parent is not in column label owl:Thing table3 A7 error datatype:nonspace source should be text without whitespace CO B table3 A7 error datatype:prefix source should be a prefix for a CURIE CO B table3 A7 error datatype:word source should be a single word: letters, numbers, underscore CO B @@ -31,7 +31,7 @@ table3 B8 error key:unique Values of id must be unique COB:0000013 table3 B10 error key:unique Values of id must be unique VO:0000001 table3 C10 error key:primary Values of label must be unique vaccine table3 C10 error tree:child-unique Values of label must be unique vaccine -table3 A10 error key:foreign Value BOB of column source is not in table1.prefix BOB +table3 A10 error key:foreign Value 'BOB' of column source is not in table1.prefix BOB table4 A9 error key:unique Values of foreign_column must be unique a table4 A10 error key:unique Values of foreign_column must be unique b table4 C10 error key:primary Values of numeric_foreign_column must be unique 9 @@ -43,6 +43,6 @@ table6 C3 error under:not-under Value '6' of column xyzzy is not under '4' 6 table6 D4 error rule:foo-4 bar must be 25 or 26 if foo = 'e' e table6 C4 error under:not-under Value '7' of column xyzzy is not under '4' 7 table6 C5 error under:not-under Value '8' of column xyzzy is not under '4' 8 -table6 C7 error under:not-in-tree Value 26 of column xyzzy is not in table6.child 26 -table6 A9 error key:foreign Value 9 of column child exists only in table4_conflict.numeric_foreign_column 9 +table6 C7 error under:not-in-tree Value '26' of column xyzzy is not in table6.child 26 +table6 A9 error key:foreign Value '9' of column child exists only in table4_conflict.numeric_foreign_column 9 table7 B3 error datatype:integer planetfall should be a positive or negative integer e diff --git a/test/expected/messages_after_api_test.tsv b/test/expected/messages_after_api_test.tsv index f4675e25..3da25719 100644 --- a/test/expected/messages_after_api_test.tsv +++ b/test/expected/messages_after_api_test.tsv @@ -11,25 +11,25 @@ table2 1 child error tree:child-unique Values of child must be unique b table2 1 foo update rule:update Value changed from '5' to '1' 1 table2 1 parent update rule:update Value changed from 'b' to 'f' f table2 1 xyzzy update rule:update Value changed from 'd' to 'w' w -table2 1 xyzzy error under:not-in-tree Value w of column xyzzy is not in table2.child w +table2 1 xyzzy error under:not-in-tree Value 'w' of column xyzzy is not in table2.child w table2 2 foo error rule:foo-1 bar must be null whenever foo is null table2 2 xyzzy error under:not-under Value 'e' of column xyzzy is not under 'd' e table2 3 xyzzy error under:not-under Value 'f' of column xyzzy is not under 'd' f table2 4 foo error rule:foo-4 bar must be 'y' or 'z' if foo = 5 5 table2 4 xyzzy error under:not-under Value 'g' of column xyzzy is not under 'd' g table2 5 xyzzy error under:not-under Value 'h' of column xyzzy is not under 'd' h -table2 7 xyzzy error under:not-in-tree Value z of column xyzzy is not in table2.child z -table2 9 child error key:foreign Value i of column child exists only in table4_conflict.other_foreign_column i -table3 1 label error datatype:label label should be of datatype `label' mobecular entity +table2 7 xyzzy error under:not-in-tree Value 'z' of column xyzzy is not in table2.child z +table2 9 child error key:foreign Value 'i' of column child exists only in table4_conflict.other_foreign_column i +table3 1 label error datatype:label label should be of datatype label mobecular entity table3 1 label error datatype:trimmed_line label should be a line of text that does not begin or end with whitespace mobecular entity -table3 1 source error key:foreign Value MOB of column source is not in table1.prefix MOB -table3 2 source error key:foreign Value ZOB of column source is not in table1.prefix ZOB -table3 3 source error key:foreign Value JOB of column source is not in table1.prefix JOB +table3 1 source error key:foreign Value 'MOB' of column source is not in table1.prefix MOB +table3 2 source error key:foreign Value 'ZOB' of column source is not in table1.prefix ZOB +table3 3 source error key:foreign Value 'JOB' of column source is not in table1.prefix JOB table3 4 parent error tree:cycle Cyclic dependency: (label: bar, parent: car), (label: car, parent: foo), (label: foo, parent: bar) for tree(parent) of label bar -table3 4 source error key:foreign Value SOB of column source is not in table1.prefix SOB -table3 5 parent error tree:foreign Value jafar of column parent is not in column label jafar -table3 5 source error key:foreign Value YOB of column source is not in table1.prefix YOB -table3 6 parent error tree:foreign Value owl:Thing of column parent is not in column label owl:Thing +table3 4 source error key:foreign Value 'SOB' of column source is not in table1.prefix SOB +table3 5 parent error tree:foreign Value 'jafar' of column parent is not in column label jafar +table3 5 source error key:foreign Value 'YOB' of column source is not in table1.prefix YOB +table3 6 parent error tree:foreign Value 'owl:Thing' of column parent is not in column label owl:Thing table3 7 source error datatype:nonspace source should be text without whitespace CO B table3 7 source error datatype:prefix source should be a prefix for a CURIE CO B table3 7 source error datatype:word source should be a single word: letters, numbers, underscore CO B @@ -37,10 +37,10 @@ table3 8 id error key:unique Values of id must be unique COB:0000013 table3 10 id error key:unique Values of id must be unique VO:0000001 table3 10 label error key:primary Values of label must be unique vaccine table3 10 label error tree:child-unique Values of label must be unique vaccine -table3 10 source error key:foreign Value BOB of column source is not in table1.prefix BOB +table3 10 source error key:foreign Value 'BOB' of column source is not in table1.prefix BOB table3 11 parent error custom:unrelated An unrelated error barrie -table3 11 parent error tree:foreign Value barrie of column parent is not in column label barrie -table3 11 source error key:foreign Value BFOBBER of column source is not in table1.prefix BFOBBER +table3 11 parent error tree:foreign Value 'barrie' of column parent is not in column label barrie +table3 11 source error key:foreign Value 'BFOBBER' of column source is not in table1.prefix BFOBBER table4 9 foreign_column error key:unique Values of foreign_column must be unique a table4 10 foreign_column error key:unique Values of foreign_column must be unique b table4 10 numeric_foreign_column error key:primary Values of numeric_foreign_column must be unique 9 @@ -51,16 +51,16 @@ table6 1 child error tree:child-unique Values of child must be unique 2 table6 1 foo update rule:update Value changed from 'e' to 'a' a table6 1 parent update rule:update Value changed from '2' to '6' 6 table6 1 xyzzy update rule:update Value changed from '4' to '23' 23 -table6 1 xyzzy error under:not-in-tree Value 23 of column xyzzy is not in table6.child 23 +table6 1 xyzzy error under:not-in-tree Value '23' of column xyzzy is not in table6.child 23 table6 2 foo error rule:foo-1 bar must be null whenever foo is null table6 2 xyzzy error under:not-under Value '5' of column xyzzy is not under '4' 5 table6 3 xyzzy error under:not-under Value '6' of column xyzzy is not under '4' 6 table6 4 foo error rule:foo-4 bar must be 25 or 26 if foo = 'e' e table6 4 xyzzy error under:not-under Value '7' of column xyzzy is not under '4' 7 table6 5 xyzzy error under:not-under Value '8' of column xyzzy is not under '4' 8 -table6 7 xyzzy error under:not-in-tree Value 26 of column xyzzy is not in table6.child 26 -table6 9 child error key:foreign Value 9 of column child exists only in table4_conflict.numeric_foreign_column 9 +table6 7 xyzzy error under:not-in-tree Value '26' of column xyzzy is not in table6.child 26 +table6 9 child error key:foreign Value '9' of column child exists only in table4_conflict.numeric_foreign_column 9 table6 10 bar error custom:unrelated An unrelated error 2 table6 10 child error tree:child-unique Values of child must be unique 2 -table6 10 xyzzy error under:not-in-tree Value 23 of column xyzzy is not in table6.child 23 +table6 10 xyzzy error under:not-in-tree Value '23' of column xyzzy is not in table6.child 23 table7 3 planetfall error datatype:integer planetfall should be a positive or negative integer e From 9fdca903dcf8a3972c9d1e897511a02366e743ff Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 21 Jun 2023 12:30:48 -0400 Subject: [PATCH 02/31] small change to update_row() signature --- src/api_test.rs | 4 ++-- src/lib.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index 1ef2de14..26dd11ca 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -163,7 +163,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table2", &result_row, - 1, + &1, ) .await?; @@ -236,7 +236,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table6", &result_row, - 1, + &1, ) .await?; diff --git a/src/lib.rs b/src/lib.rs index 452fd5e1..a1ea7327 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1184,7 +1184,7 @@ pub async fn update_row( pool: &AnyPool, table_name: &str, row: &SerdeMap, - row_number: u32, + row_number: &u32, ) -> Result<(), sqlx::Error> { // First, send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: @@ -1196,7 +1196,7 @@ pub async fn update_row( table_name, row, true, - Some(row_number), + Some(*row_number), ) .await?; From 5a1203d7e3cf7341e7de9f24549e449a3547e506 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sat, 24 Jun 2023 11:56:24 -0400 Subject: [PATCH 03/31] instruct user to explicitly delete the performance databse before rebuilding it --- Makefile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile b/Makefile index f434f9c6..3c832e7b 100644 --- a/Makefile +++ b/Makefile @@ -111,6 +111,12 @@ test/perf_test_data/ontology: test/generate_random_test_data.py ./$< 1 10000 5 $@ build/valve_perf.db: valve | test/perf_test_data/ontology build + @if [ -f $@ ]; \ + then \ + echo "'$@' exists but is out of date. To rebuild '$@', run \`make cleanperfdb\`" \ + "before running \`make $@\`" ; \ + false; \ + fi time -p ./$< --verbose test/perf_test_data/table.tsv $@ .PHONY: sqlite_perf_test From 77bdc5a4a37020c45ba9277c66378d0e9b8d8cd4 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 26 Jun 2023 20:20:32 -0400 Subject: [PATCH 04/31] (WIP) implement dependency checking during updates and deletes --- scripts/export.py | 1 + src/api_test.rs | 14 +- src/lib.rs | 320 +++++++++++++++++++++++++++++++++++++++++++++- src/validate.rs | 26 +--- 4 files changed, 328 insertions(+), 33 deletions(-) diff --git a/scripts/export.py b/scripts/export.py index 1d0e4579..7b300a31 100755 --- a/scripts/export.py +++ b/scripts/export.py @@ -148,6 +148,7 @@ def export_data(cursor, is_sqlite, args): WHERE "row" = "row_number" AND "column" = '{column}' AND "table" = '{table}' + ORDER BY "message_id" DESC LIMIT 1 ) ELSE "{column}"{cast} diff --git a/src/api_test.rs b/src/api_test.rs index 26dd11ca..483653d1 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -124,8 +124,8 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table2", row.as_object().unwrap(), - true, Some(1), + None, ) .await?; @@ -136,8 +136,8 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table2", &result_row_1, - true, Some(1), + None, ) .await?; assert_eq!(result_row_1, result_row_2); @@ -149,8 +149,8 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table2", &result_row_2, - true, Some(1), + None, ) .await?; assert_eq!(result_row, result_row_2); @@ -162,7 +162,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &compiled_rule_conditions, &pool, "table2", - &result_row, + &row.as_object().unwrap(), &1, ) .await?; @@ -189,7 +189,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table3", row.as_object().unwrap(), - false, + None, None, ) .await?; @@ -225,8 +225,8 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table6", row.as_object().unwrap(), - true, Some(1), + None, ) .await?; update_row( @@ -261,7 +261,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table6", row.as_object().unwrap(), - false, + None, None, ) .await?; diff --git a/src/lib.rs b/src/lib.rs index a1ea7327..19456ab8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,7 @@ use crate::{ast::Expression, valve_grammar::StartParser}; use chrono::Utc; use crossbeam; use futures::executor::block_on; +use indexmap::IndexMap; use indoc::indoc; use itertools::{IntoChunks, Itertools}; use lazy_static::lazy_static; @@ -43,7 +44,9 @@ use regex::Regex; use serde_json::{json, Value as SerdeValue}; use sqlx::{ any::{AnyConnectOptions, AnyKind, AnyPool, AnyPoolOptions, AnyRow}, - query as sqlx_query, Column, Row, ValueRef, + query as sqlx_query, Column, + Error::Configuration, + Row, ValueRef, }; use std::{ collections::{BTreeMap, HashMap}, @@ -1073,7 +1076,7 @@ pub async fn insert_new_row( pool, table_name, row, - false, + None, None, ) .await?; @@ -1175,6 +1178,255 @@ pub async fn insert_new_row( Ok(new_row_number) } +pub async fn get_updates( + global_config: &SerdeMap, + pool: &AnyPool, + table: &str, + row: &SerdeMap, + row_number: &u32, +) -> Result>, String> { + // eprintln!("GETTING UPDATES FOR ROW {} OF {}", row_number, table); + + fn get_cell_value(row: &SerdeMap, column: &str) -> Result { + match row.get(column).and_then(|cell| cell.get("value")) { + Some(SerdeValue::String(s)) => Ok(format!("{}", s)), + Some(SerdeValue::Number(n)) => Ok(format!("{}", n)), + Some(SerdeValue::Bool(b)) => Ok(format!("{}", b)), + _ => Err(format!( + "Value missing or of unknown type in column {} of row to update: {:?}", + column, row + )), + } + } + + async fn get_current_value( + table: &str, + column: &str, + row_number: &u32, + pool: &AnyPool, + ) -> Result { + let (is_clause, cast) = if pool.any_kind() == AnyKind::Sqlite { + ("IS", "") + } else { + ("IS NOT DISTINCT FROM", "::TEXT") + }; + let sql = format!( + r#"SELECT + CASE + WHEN "{column}" {is_clause} NULL THEN ( + SELECT value + FROM "message" + WHERE "row" = "row_number" + AND "column" = '{column}' + AND "table" = '{table}' + ORDER BY "message_id" DESC + LIMIT 1 + ) + ELSE "{column}"{cast} + END AS "{column}" + FROM "{table}_view" WHERE "row_number" = {row_number} + "#, + column = column, + is_clause = is_clause, + table = table, + cast = cast, + row_number = row_number, + ); + // eprintln!("SQL FOR CURRENT VALUE:\n{}", sql); + + let query = sqlx_query(&sql); + let result_row = query.fetch_one(pool).await.map_err(|e| e.to_string())?; + let value: &str = result_row.try_get(column).unwrap(); + Ok(value.to_string()) + } + + // TODO: Make this an outer function and make it public. + async fn get_affected_rows( + table: &str, + column: &str, + current_value: &str, + new_value: &str, + global_config: &SerdeMap, + pool: &AnyPool, + ) -> Result, String> { + // eprintln!("GETTING AFFECTED ROWS FOR {}.{}", table, column); + + let sql_type = + get_sql_type_from_global_config(&global_config, &table, &column, pool).unwrap(); + + let sql = { + let is_clause = if pool.any_kind() == AnyKind::Sqlite { + "IS" + } else { + "IS NOT DISTINCT FROM" + }; + + let select_columns = global_config + .get("table") + .and_then(|t| t.get(table)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|t| t.as_object()) + .and_then(|t| Some(t.keys())) + .and_then(|k| Some(k.map(|k| k.to_string()))) + .and_then(|t| Some(t.collect::>())) + .unwrap(); + + let mut select_columns = select_columns + .iter() + .map(|c| { + format!( + r#"CASE + WHEN "{column}" {is_clause} NULL THEN ( + SELECT value + FROM "message" + WHERE "row" = "row_number" + AND "column" = '{column}' + AND "table" = '{table}' + ORDER BY "message_id" DESC + LIMIT 1 + ) + ELSE {casted_column} + END AS "{column}""#, + casted_column = if pool.any_kind() == AnyKind::Sqlite { + cast_column_sql_to_text(c, "non-text") + } else { + format!("\"{}\"::TEXT", c) + }, + column = c, + table = table, + ) + }) + .collect::>(); + + let select_columns = { + let mut v = vec!["row_number".to_string()]; + v.append(&mut select_columns); + v + }; + + // Since the consequence of an update could involve currently invalid rows + // (in the conflict table) becoming valid or vice versa, we need to check rows for + // which the value of the column is either new_value or the current value. + format!( + "SELECT {} FROM \"{}_view\" \ + WHERE {} IN ('{}', '{}')", + select_columns.join(", "), + table, + cast_column_sql_to_text(column, &sql_type), + current_value, + new_value + ) + }; + // eprintln!("SQL: {}", sql); + + let query = sqlx_query(&sql); + let mut table_rows = IndexMap::new(); + for row in query.fetch_all(pool).await.map_err(|e| e.to_string())? { + let mut table_row = SerdeMap::new(); + let mut row_number: Option = None; + for column in row.columns() { + let cname = column.name(); + if cname == "row_number" { + row_number = Some(row.get::("row_number") as u32); + } else { + let raw_value = row.try_get_raw(format!(r#"{}"#, cname).as_str()).unwrap(); + let value; + if !raw_value.is_null() { + value = get_column_value(&row, &cname, "text"); + } else { + value = String::from(""); + } + let cell = json!({ + "value": value, + "valid": true, + "messages": json!([]), + }); + table_row.insert(cname.to_string(), json!(cell)); + } + } + let row_number = row_number.ok_or("Row: has no row number".to_string())?; + table_rows.insert(row_number, table_row); + } + + Ok(table_rows) + } + + let foreign_dependencies = { + let mut foreign_dependencies = vec![]; + let global_fconstraints = global_config + .get("constraints") + .and_then(|c| c.get("foreign")) + .and_then(|c| c.as_object()) + .unwrap(); + for (dependent_table, fconstraints) in global_fconstraints { + for entry in fconstraints.as_array().unwrap() { + let ftable = entry.get("ftable").and_then(|c| c.as_str()).unwrap(); + if ftable == table { + let mut fdep = entry.as_object().unwrap().clone(); + fdep.insert("table".to_string(), json!(dependent_table)); + foreign_dependencies.push(fdep); + } + } + } + foreign_dependencies + }; + + //eprintln!( + // "FOREIGN KEYS THAT DEPEND ON {}: {:#?}", + // table, foreign_dependencies + //); + + let mut updates = IndexMap::new(); + for fdep in &foreign_dependencies { + let dependent_table = fdep.get("table").and_then(|c| c.as_str()).unwrap(); + let dependent_column = fdep.get("column").and_then(|c| c.as_str()).unwrap(); + let target_column = fdep.get("fcolumn").and_then(|c| c.as_str()).unwrap(); + let target_table = fdep.get("ftable").and_then(|c| c.as_str()).unwrap(); + + // Fetch the cell corresponding to `column` from `row`, and the value of that cell, + // which is the new value for the row. + let new_value = get_cell_value(row, target_column)?; + //eprintln!( + // "NEW VALUE OF {}.{}: {}", + // target_table, target_column, new_value + //); + + // Query the database using `row_number` to get the current value of the column for + // the row. + let current_value = + get_current_value(target_table, target_column, row_number, pool).await?; + //eprintln!( + // "CURRENT VALUE OF {}.{}: {}", + // target_table, target_column, current_value + //); + + // Query dependent_table.dependent_column for the rows that will be affected by the change + // from the current to the new value: + let affected_rows = get_affected_rows( + dependent_table, + dependent_column, + ¤t_value, + &new_value, + global_config, + pool, + ) + .await?; + //eprintln!("AFFECTED ROWS: {:#?}", affected_rows); + + updates.insert(dependent_table.to_string(), affected_rows); + } + + // TODO (later): tree. Add more rows to the IndexMap for tree constraints. + + // TODO (later): under. Add more rows to the IndexMap for under constraints. + + // TODO: I think this reverse is unneeded but let's keep it commented for now. + // We reverse here because the deepest dependencies need to be updated first. + // updates.reverse(); + Ok(updates) +} + /// Given global config map, a database connection pool, a table name, a row, and the row number to /// update, update the corresponding row in the database with new values as specified by `row`. pub async fn update_row( @@ -1186,6 +1438,10 @@ pub async fn update_row( row: &SerdeMap, row_number: &u32, ) -> Result<(), sqlx::Error> { + // eprintln!("***** In update_row(). Got row: {:#?}", row); + + // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. + // First, send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = validate_row( @@ -1195,10 +1451,11 @@ pub async fn update_row( pool, table_name, row, - true, Some(*row_number), + None, ) .await?; + //eprintln!("***** In update_row(). Row after validation: {:#?}", row); // Now prepare the row and messages for the database update: let mut assignments = vec![]; @@ -1314,16 +1571,69 @@ pub async fn update_row( } // Update the given row in the table: - let mut update_stmt = format!(r#"UPDATE "{}" SET "#, table_name); + let mut update_stmt = format!(r#"GARBAGE-REMOVE-THIS-LATER UPDATE "{}" SET "#, table_name); update_stmt.push_str(&assignments.join(", ")); update_stmt.push_str(&format!(r#" WHERE "row_number" = {}"#, row_number)); let update_stmt = local_sql_syntax(&pool, &update_stmt); + //eprintln!( + // "***** In update_row(). Running update statement: {} with params: {:?}", + // update_stmt, params, + //); let mut query = sqlx_query(&update_stmt); for param in ¶ms { query = query.bind(param); } - query.execute(pool).await?; + match query.execute(pool).await { + Ok(_) => (), + Err(e) => { + // Overview: + // --------- + // We need to call something similar to validate_row() on every affected row in the + // dependent table, with the hitch that we need to validate them with the target row + // of the target table in the database replaced, somehow, with the modified version + // of it represented by `row`. (In the case of delete_row() we would simply have to + // ignore the target row in the database somehow (maybe by excluding its row number + // using a CTE). Those re-validated rows should then be sent to update_row() which will + // try to insert them into the database one by one. In the case of a database error + // this code (i.e., in this block) will be triggered again, and so on, recursively + // until everything succeeds. + // + // Note also that we might want to run ANALYZE (or the sqlite equivalent) after + // the update has completed. + + // Step 1: + // ------ + // Look through the valve config to see which tables are dependent on this table + // and find the rows that need to be updated. + + let updates = get_updates(global_config, pool, table_name, &row, row_number) + .await + .map_err(|e| Configuration(e.into()))?; + eprintln!("UPDATES: {:#?}", updates); + + // YOU ARE HERE. What we now need to do, for each of the rows returned in `updates` + // (one at a time), is to (i) validate it 'counterfactually' (see above), (ii) Call + // update_row() on the result. Note that for (ii) to work as intended we are going to + // have to add some way to bypass the implicit re-validation that happens at the + // beginning of that function./ I think that might be all. + + // Try this: For counterfactual validation, we could define a new struct like: + // struct AsIf { + // type: AsIfType, // AsIfType::Update, AsIfType::Delete + // row_number, + // row: Option, + // } + // and pass it to validate_row() as an optional parameter. For now we will allow only + // one row to be counterfactually modified in this way, but maybe we might want to + // eventually support more than one row by changing the paramater to a vector. + + // TODO: remove this error return. Do something smarter above instead. + return Err(Configuration( + format!("Arghh!! Got database error: {}", e).into(), + )); + } + }; // Then delete any messages that had been previously inserted to the message table for the old // version of this row (other than any 'update'-level messages): diff --git a/src/validate.rs b/src/validate.rs index f3ea5bb0..61be6e23 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -37,15 +37,9 @@ pub async fn validate_row( pool: &AnyPool, table_name: &str, row: &SerdeMap, - existing_row: bool, row_number: Option, + rows_to_ignore: Option>>, ) -> Result { - // If existing_row is false, then override any row number provided with None: - let mut row_number = row_number.clone(); - if !existing_row { - row_number = None; - } - // Initialize the result row with the values from the given row: let mut result_row = ResultRow { row_number: row_number, @@ -136,7 +130,6 @@ pub async fn validate_row( column_name, cell, &vec![], - existing_row, row_number, ) .await?; @@ -852,7 +845,6 @@ pub async fn validate_rows_constraints( &column_name, cell, &result_rows, - false, None, ) .await?; @@ -1616,8 +1608,8 @@ async fn validate_cell_trees( /// the row, `context`, to which the cell belongs, and a list of previously validated rows, /// check the cell value against any unique-type keys that have been defined for the column. /// If there is a violation, indicate it with an error message attached to the cell. If -/// the `existing_row` flag is set to True, then checks will be made as if the given `row_number` -/// does not exist in the table. +/// `row_number` is set to None, then no row corresponding to the given cell is assumed to exist +/// in the table. async fn validate_cell_unique_constraints( config: &SerdeMap, pool: &AnyPool, @@ -1625,14 +1617,8 @@ async fn validate_cell_unique_constraints( column_name: &String, cell: &mut ResultCell, prev_results: &Vec, - existing_row: bool, row_number: Option, ) -> Result<(), sqlx::Error> { - // If existing_row is false, then override any row number provided with None: - let mut row_number = row_number.clone(); - if !existing_row { - row_number = None; - } // If the column has a primary or unique key constraint, or if it is the child associated with // a tree, then if the value of the cell is a duplicate either of one of the previously // validated rows in the batch, or a duplicate of a validated row that has already been inserted @@ -1684,15 +1670,13 @@ async fn validate_cell_unique_constraints( if is_primary || is_unique || is_tree_child { let mut with_sql = String::new(); let except_table = format!("{}_exc", table_name); - if existing_row { + if let Some(row_number) = row_number { with_sql = format!( r#"WITH "{}" AS ( SELECT * FROM "{}" WHERE "row_number" != {} ) "#, - except_table, - table_name, - row_number.unwrap() + except_table, table_name, row_number ); } From dfd27ebd48bb631200fa934b537668249b285286 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 27 Jun 2023 10:32:11 -0400 Subject: [PATCH 05/31] (WIP II) implement dependency checking during updates and deletes; pass optional references instead of optional objects in some functions --- src/lib.rs | 66 ++++++++++++++++++++++++++++++++----------------- src/validate.rs | 66 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 91 insertions(+), 41 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 19456ab8..de9e9927 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,7 +25,7 @@ lalrpop_mod!(pub valve_grammar); use crate::validate::{ validate_row, validate_rows_constraints, validate_rows_intra, validate_rows_trees, - validate_tree_foreign_keys, validate_under, ResultRow, + validate_tree_foreign_keys, validate_under, QueryAsIf, QueryAsIfKind, ResultRow, }; use crate::{ast::Expression, valve_grammar::StartParser}; use chrono::Utc; @@ -1178,7 +1178,7 @@ pub async fn insert_new_row( Ok(new_row_number) } -pub async fn get_updates( +pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, table: &str, @@ -1377,7 +1377,7 @@ pub async fn get_updates( // table, foreign_dependencies //); - let mut updates = IndexMap::new(); + let mut rows_to_update = IndexMap::new(); for fdep in &foreign_dependencies { let dependent_table = fdep.get("table").and_then(|c| c.as_str()).unwrap(); let dependent_column = fdep.get("column").and_then(|c| c.as_str()).unwrap(); @@ -1414,7 +1414,7 @@ pub async fn get_updates( .await?; //eprintln!("AFFECTED ROWS: {:#?}", affected_rows); - updates.insert(dependent_table.to_string(), affected_rows); + rows_to_update.insert(dependent_table.to_string(), affected_rows); } // TODO (later): tree. Add more rows to the IndexMap for tree constraints. @@ -1423,8 +1423,8 @@ pub async fn get_updates( // TODO: I think this reverse is unneeded but let's keep it commented for now. // We reverse here because the deepest dependencies need to be updated first. - // updates.reverse(); - Ok(updates) + // rows_to_update.reverse(); + Ok(rows_to_update) } /// Given global config map, a database connection pool, a table name, a row, and the row number to @@ -1593,7 +1593,7 @@ pub async fn update_row( // dependent table, with the hitch that we need to validate them with the target row // of the target table in the database replaced, somehow, with the modified version // of it represented by `row`. (In the case of delete_row() we would simply have to - // ignore the target row in the database somehow (maybe by excluding its row number + // ignore the target row in the database somehow, maybe by excluding its row number // using a CTE). Those re-validated rows should then be sent to update_row() which will // try to insert them into the database one by one. In the case of a database error // this code (i.e., in this block) will be triggered again, and so on, recursively @@ -1607,26 +1607,46 @@ pub async fn update_row( // Look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated. - let updates = get_updates(global_config, pool, table_name, &row, row_number) + let updates = get_rows_to_update(global_config, pool, table_name, &row, row_number) .await .map_err(|e| Configuration(e.into()))?; eprintln!("UPDATES: {:#?}", updates); - // YOU ARE HERE. What we now need to do, for each of the rows returned in `updates` - // (one at a time), is to (i) validate it 'counterfactually' (see above), (ii) Call - // update_row() on the result. Note that for (ii) to work as intended we are going to - // have to add some way to bypass the implicit re-validation that happens at the - // beginning of that function./ I think that might be all. - - // Try this: For counterfactual validation, we could define a new struct like: - // struct AsIf { - // type: AsIfType, // AsIfType::Update, AsIfType::Delete - // row_number, - // row: Option, - // } - // and pass it to validate_row() as an optional parameter. For now we will allow only - // one row to be counterfactually modified in this way, but maybe we might want to - // eventually support more than one row by changing the paramater to a vector. + let query_as_if = QueryAsIf { + kind: QueryAsIfKind::Update, + table: table_name.to_string(), + alias: format!("{}_as_if", table_name), + row_number: *row_number, + row: Some(row), + }; + + // Validate each row 'counterfactually' (see above): + let mut validated_rows = vec![]; + for (update_table, rows_to_update) in &updates { + for (row_number, row) in rows_to_update { + eprintln!("ROW NUMBER: {}, ROW: {:#?}", row_number, row); + let vrow = validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + row, + Some(*row_number), + Some(&query_as_if), + ) + .await?; + eprintln!("VALIDATED ROW: {:#?}", vrow); + validated_rows.push(vrow); + } + } + + // TODO: Call update_row() on the each of the validated rows. Note that for this to work + // as intended we are going to have to add some way to bypass the implicit re-validation + // that happens at the beginning of that function. + + // TODO: Finally, retry the update_row() call that resulted in our being in the Err + // branch to begin with. // TODO: remove this error return. Do something smarter above instead. return Err(Configuration( diff --git a/src/validate.rs b/src/validate.rs index 61be6e23..ee207b59 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -26,6 +26,24 @@ pub struct ResultRow { pub contents: IndexMap, } +/// TODO: Add a docstring here +#[derive(Clone, Debug)] +pub enum QueryAsIfKind { + Update, + Ignore, +} + +/// Used for counterfactual validation. +#[derive(Clone, Debug)] +pub struct QueryAsIf { + pub kind: QueryAsIfKind, + pub table: String, + // SQLite does not allow a CTE named 'foo' to reference a table named 'foo' so we need an alias: + pub alias: String, + pub row_number: u32, + pub row: Option, +} + /// Given a config map, maps of compiled datatype and rule conditions, a database connection /// pool, a table name, a row to validate, and a row number in case the row already exists, /// perform both intra- and inter-row validation and return the validated row. Note that this @@ -38,13 +56,14 @@ pub async fn validate_row( table_name: &str, row: &SerdeMap, row_number: Option, - rows_to_ignore: Option>>, + query_as_if: Option<&QueryAsIf>, ) -> Result { // Initialize the result row with the values from the given row: let mut result_row = ResultRow { row_number: row_number, contents: IndexMap::new(), }; + for (column, cell) in row.iter() { let result_cell = ResultCell { nulltype: cell @@ -137,11 +156,21 @@ pub async fn validate_row( } } - let mut violations = - validate_tree_foreign_keys(config, pool, &table_name.to_string(), Some(context.clone())) - .await?; + let mut violations = validate_tree_foreign_keys( + config, + pool, + &table_name.to_string(), + Some(&context.clone()), + ) + .await?; violations.append( - &mut validate_under(config, pool, &table_name.to_string(), Some(context.clone())).await?, + &mut validate_under( + config, + pool, + &table_name.to_string(), + Some(&context.clone()), + ) + .await?, ); for violation in violations.iter_mut() { @@ -309,7 +338,7 @@ pub async fn get_matching_values( tree, &table_name.to_string(), &table_name.to_string(), - under_val, + under_val.as_ref(), None, pool, ); @@ -362,7 +391,7 @@ pub async fn validate_under( config: &SerdeMap, pool: &AnyPool, table_name: &String, - extra_row: Option, + extra_row: Option<&ResultRow>, ) -> Result, sqlx::Error> { let mut results = vec![]; let ukeys = config @@ -442,7 +471,7 @@ pub async fn validate_under( tree, &table_name, &effective_tree, - Some(uval.clone()), + Some(&uval.clone()), None, pool, ); @@ -594,7 +623,7 @@ pub async fn validate_tree_foreign_keys( config: &SerdeMap, pool: &AnyPool, table_name: &String, - extra_row: Option, + extra_row: Option<&ResultRow>, ) -> Result, sqlx::Error> { let tkeys = config .get("constraints") @@ -1062,11 +1091,12 @@ fn with_tree_sql( tree: &SerdeMap, table_name: &str, effective_table_name: &str, - root: Option, - extra_clause: Option, + root: Option<&String>, + extra_clause: Option<&String>, pool: &AnyPool, ) -> (String, Vec) { - let extra_clause = extra_clause.unwrap_or(String::new()); + let empty_string = String::new(); + let extra_clause = extra_clause.unwrap_or(&empty_string); let child_col = tree.get("child").and_then(|c| c.as_str()).unwrap(); let parent_col = tree.get("parent").and_then(|c| c.as_str()).unwrap(); @@ -1153,14 +1183,14 @@ fn validate_cell_datatype( config: &SerdeMap, compiled_datatype_conditions: &HashMap, primary_dt_name: &str, - dt_name: Option, + dt_name: Option<&String>, ) -> Vec { let mut datatypes = vec![]; if let Some(dt_name) = dt_name { let datatype = config .get("datatype") .and_then(|d| d.as_object()) - .and_then(|o| o.get(&dt_name)) + .and_then(|o| o.get(dt_name)) .and_then(|d| d.as_object()) .unwrap(); let dt_name = datatype.get("datatype").and_then(|d| d.as_str()).unwrap(); @@ -1178,7 +1208,7 @@ fn validate_cell_datatype( config, compiled_datatype_conditions, primary_dt_name, - dt_parent, + dt_parent.as_ref(), ); datatypes.append(&mut more_datatypes); } @@ -1212,7 +1242,7 @@ fn validate_cell_datatype( config, compiled_datatype_conditions, primary_dt_name, - Some(primary_dt_name.to_string()), + Some(&primary_dt_name.to_string()), ); // If this datatype has any parents, check them beginning from the most general to the // most specific. We use while and pop instead of a for loop so as to check the @@ -1535,8 +1565,8 @@ async fn validate_cell_trees( &tkey, &table_name, &table_name_ext, - Some(parent_val.clone()), - Some(extra_clause), + Some(&parent_val.clone()), + Some(&extra_clause), pool, ); params.append(&mut tree_sql_params); From 309d2155820b00edc6f55ffbadb57e9298b1efd2 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Tue, 27 Jun 2023 17:12:29 -0400 Subject: [PATCH 06/31] (WIP) implement dependency checking during updates and deletes: handle foreign constraints --- src/lib.rs | 83 +++++++++++++++++----------- src/validate.rs | 142 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 184 insertions(+), 41 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index de9e9927..c2dca98d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1417,9 +1417,11 @@ pub async fn get_rows_to_update( rows_to_update.insert(dependent_table.to_string(), affected_rows); } - // TODO (later): tree. Add more rows to the IndexMap for tree constraints. + // TODO (later): tree. - // TODO (later): under. Add more rows to the IndexMap for under constraints. + // TODO (later): under. + + // TODO (later): unique and primary. // TODO: I think this reverse is unneeded but let's keep it commented for now. // We reverse here because the deepest dependencies need to be updated first. @@ -1437,6 +1439,7 @@ pub async fn update_row( table_name: &str, row: &SerdeMap, row_number: &u32, + skip_validation: bool, ) -> Result<(), sqlx::Error> { // eprintln!("***** In update_row(). Got row: {:#?}", row); @@ -1444,17 +1447,21 @@ pub async fn update_row( // First, send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: - let row = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - table_name, - row, - Some(*row_number), - None, - ) - .await?; + let row = if !skip_validation { + validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + table_name, + row, + Some(*row_number), + None, + ) + .await? + } else { + row.clone() + }; //eprintln!("***** In update_row(). Row after validation: {:#?}", row); // Now prepare the row and messages for the database update: @@ -1571,7 +1578,7 @@ pub async fn update_row( } // Update the given row in the table: - let mut update_stmt = format!(r#"GARBAGE-REMOVE-THIS-LATER UPDATE "{}" SET "#, table_name); + let mut update_stmt = format!(r#"UPDATE "{}" SET "#, table_name); update_stmt.push_str(&assignments.join(", ")); update_stmt.push_str(&format!(r#" WHERE "row_number" = {}"#, row_number)); let update_stmt = local_sql_syntax(&pool, &update_stmt); @@ -1610,21 +1617,23 @@ pub async fn update_row( let updates = get_rows_to_update(global_config, pool, table_name, &row, row_number) .await .map_err(|e| Configuration(e.into()))?; - eprintln!("UPDATES: {:#?}", updates); + //eprintln!("UPDATES: {:#?}", updates); let query_as_if = QueryAsIf { kind: QueryAsIfKind::Update, table: table_name.to_string(), alias: format!("{}_as_if", table_name), row_number: *row_number, - row: Some(row), + row: Some(row.clone()), }; - // Validate each row 'counterfactually' (see above): - let mut validated_rows = vec![]; for (update_table, rows_to_update) in &updates { for (row_number, row) in rows_to_update { - eprintln!("ROW NUMBER: {}, ROW: {:#?}", row_number, row); + eprintln!( + "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", + row_number, update_table, row + ); + // Validate each row 'counterfactually' (see above): let vrow = validate_row( global_config, compiled_datatype_conditions, @@ -1637,21 +1646,33 @@ pub async fn update_row( ) .await?; eprintln!("VALIDATED ROW: {:#?}", vrow); - validated_rows.push(vrow); + // Update the row in the database: + block_on(update_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + &vrow, + row_number, + true, + ))?; } } - - // TODO: Call update_row() on the each of the validated rows. Note that for this to work - // as intended we are going to have to add some way to bypass the implicit re-validation - // that happens at the beginning of that function. - - // TODO: Finally, retry the update_row() call that resulted in our being in the Err + // Finally, retry the update_row() call that resulted in our being in the Err // branch to begin with. - - // TODO: remove this error return. Do something smarter above instead. - return Err(Configuration( - format!("Arghh!! Got database error: {}", e).into(), - )); + let mut query = sqlx_query(&update_stmt); + for param in ¶ms { + query = query.bind(param); + } + match query.execute(pool).await { + Ok(_) => eprintln!("It worked!!"), + Err(e) => { + return Err(Configuration( + format!("Arghh!! Got schmatabase error: {}", e).into(), + )) + } + }; } }; diff --git a/src/validate.rs b/src/validate.rs index ee207b59..d5497466 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -27,7 +27,7 @@ pub struct ResultRow { } /// TODO: Add a docstring here -#[derive(Clone, Debug)] +#[derive(Clone, Debug, PartialEq)] pub enum QueryAsIfKind { Update, Ignore, @@ -38,7 +38,8 @@ pub enum QueryAsIfKind { pub struct QueryAsIf { pub kind: QueryAsIfKind, pub table: String, - // SQLite does not allow a CTE named 'foo' to reference a table named 'foo' so we need an alias: + // Although PostgreSQL allows it, SQLite does not allow a CTE named 'foo' to refer to a table + // named 'foo' so we need to use an alias: pub alias: String, pub row_number: u32, pub row: Option, @@ -124,6 +125,7 @@ pub async fn validate_row( // they can result in database errors when, for instance, we compare a numeric with a // non-numeric type. if cell.valid || !contains_dt_violation(&cell.messages) { + // TODO: Pass query_as_if here: validate_cell_trees( config, pool, @@ -134,14 +136,17 @@ pub async fn validate_row( &vec![], ) .await?; + // DONE. (TODO: Remove this comment later.) validate_cell_foreign_constraints( config, pool, &table_name.to_string(), column_name, cell, + query_as_if, ) .await?; + // TODO: Pass query_as_if here: validate_cell_unique_constraints( config, pool, @@ -156,6 +161,7 @@ pub async fn validate_row( } } + // TODO: Pass query_as_if here: let mut violations = validate_tree_foreign_keys( config, pool, @@ -164,6 +170,7 @@ pub async fn validate_row( ) .await?; violations.append( + // TODO: Pass query_as_if here: &mut validate_under( config, pool, @@ -261,7 +268,7 @@ pub async fn get_matching_values( .and_then(|c| c.as_object()) .and_then(|c| c.get("structure")) .and_then(|d| d.as_str()) - .unwrap(), + .unwrap_or(""), ); let sql_type = @@ -864,8 +871,15 @@ pub async fn validate_rows_constraints( // that have datatype violations. We exclude the latter because they can result in // database errors when, for instance, we compare a numeric with a non-numeric type. if cell.nulltype == None && (cell.valid || !contains_dt_violation(&cell.messages)) { - validate_cell_foreign_constraints(config, pool, table_name, &column_name, cell) - .await?; + validate_cell_foreign_constraints( + config, + pool, + table_name, + &column_name, + cell, + None, + ) + .await?; validate_cell_unique_constraints( config, @@ -1377,6 +1391,84 @@ fn validate_cell_rules( } } +/// TODO: Add docstring here +fn as_if_to_sql( + global_config: &SerdeMap, + pool: &AnyPool, + as_if: &QueryAsIf, + conflict_table: bool, +) -> String { + let sql = { + let suffix = { + if conflict_table { + "_conflict" + } else { + "" + } + }; + + if as_if.kind == QueryAsIfKind::Ignore { + format!( + r#""{table_alias}{suffix}" AS ( + SELECT * FROM "{table_name}{suffix}" WHERE "row_number" <> {row_number} + )"#, + table_alias = as_if.alias, + suffix = suffix, + table_name = as_if.table, + row_number = as_if.row_number, + ) + } else { + let row = as_if.row.as_ref().unwrap(); + let columns = row.keys().cloned().collect::>(); + let values = { + let mut values = vec![]; + for column in &columns { + let value = row + .get(column) + .and_then(|c| c.get("value")) + .and_then(|v| v.as_str()) + .unwrap(); + + let sql_type = get_sql_type_from_global_config( + &global_config, + &as_if.table, + &column, + pool, + ) + .unwrap(); + + if sql_type.to_lowercase() == "text" { + values.push(format!("'{}'", value)); + } else { + values.push(value.to_string()); + } + } + values.join(", ") + }; + format!( + r#""{table_alias}{suffix}" AS ( + SELECT "row_number", {columns} + FROM "{table_name}{suffix}" + WHERE "row_number" <> {row_number} + UNION ALL + SELECT {row_number}, {values} + )"#, + columns = columns + .iter() + .map(|c| format!("\"{}\"", c)) + .collect::>() + .join(", "), + table_alias = as_if.alias, + table_name = as_if.table, + row_number = as_if.row_number, + values = values, + ) + } + }; + + sql +} + /// Given a config map, a db connection pool, a table name, a column name, and a cell to validate, /// check the cell value against any foreign keys that have been defined for the column. If there is /// a violation, indicate it with an error message attached to the cell. @@ -1386,6 +1478,7 @@ async fn validate_cell_foreign_constraints( table_name: &String, column_name: &String, cell: &mut ResultCell, + query_as_if: Option<&QueryAsIf>, ) -> Result<(), sqlx::Error> { let fkeys = config .get("constraints") @@ -1405,20 +1498,40 @@ async fn validate_cell_foreign_constraints( .map(|v| v.as_object().unwrap()) .collect::>(); + let as_if_clause = match query_as_if { + Some(query_as_if) => { + format!("WITH {} ", as_if_to_sql(config, pool, &query_as_if, false)) + } + None => "".to_string(), + }; + let as_if_clause_for_conflict = match query_as_if { + Some(query_as_if) => { + format!("WITH {} ", as_if_to_sql(config, pool, &query_as_if, true)) + } + None => "".to_string(), + }; + for fkey in fkeys { let ftable = fkey.get("ftable").and_then(|t| t.as_str()).unwrap(); + let (as_if_clause, ftable_alias) = match query_as_if { + Some(query_as_if) if ftable == query_as_if.table => { + (as_if_clause.to_string(), query_as_if.alias.to_string()) + } + _ => ("".to_string(), ftable.to_string()), + }; let fcolumn = fkey.get("fcolumn").and_then(|c| c.as_str()).unwrap(); let sql_type = get_sql_type_from_global_config(&config, &ftable, &fcolumn, pool).unwrap(); let sql_param = cast_sql_param_from_text(&sql_type); let fsql = local_sql_syntax( &pool, &format!( - r#"SELECT 1 FROM "{}" WHERE "{}" = {} LIMIT 1"#, - ftable, fcolumn, sql_param + r#"{}SELECT 1 FROM "{}" WHERE "{}" = {} LIMIT 1"#, + as_if_clause, ftable_alias, fcolumn, sql_param ), ); - let frows = sqlx_query(&fsql).bind(&cell.value).fetch_all(pool).await?; + //eprintln!("FSQL FOR {:?}: {}", fkey, fsql); + let frows = sqlx_query(&fsql).bind(&cell.value).fetch_all(pool).await?; if frows.is_empty() { cell.valid = false; let mut message = json!({ @@ -1426,13 +1539,22 @@ async fn validate_cell_foreign_constraints( "level": "error", }); + let (as_if_clause_for_conflict, ftable_alias) = match query_as_if { + Some(query_as_if) if ftable == query_as_if.table => ( + as_if_clause_for_conflict.to_string(), + query_as_if.alias.to_string(), + ), + _ => ("".to_string(), ftable.to_string()), + }; + let fsql = local_sql_syntax( &pool, &format!( - r#"SELECT 1 FROM "{}_conflict" WHERE "{}" = {} LIMIT 1"#, - ftable, fcolumn, sql_param + r#"{}SELECT 1 FROM "{}_conflict" WHERE "{}" = {} LIMIT 1"#, + as_if_clause_for_conflict, ftable_alias, fcolumn, sql_param ), ); + //eprintln!("CONFLICT FSQL FOR {:?}: {}", fkey, fsql); let frows = sqlx_query(&fsql) .bind(cell.value.clone()) .fetch_all(pool) From ee149e8e0196a546df8fea722815bacf0275bc4c Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 28 Jun 2023 11:04:19 -0400 Subject: [PATCH 07/31] divide dependency updates into those that must be done before the target row and those that must be done after --- src/lib.rs | 121 +++++++++++++++++++++++++++++++++++++----------- src/validate.rs | 11 ++--- 2 files changed, 98 insertions(+), 34 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index c2dca98d..75e5eef5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1184,7 +1184,13 @@ pub async fn get_rows_to_update( table: &str, row: &SerdeMap, row_number: &u32, -) -> Result>, String> { +) -> Result< + ( + IndexMap>, + IndexMap>, + ), + String, +> { // eprintln!("GETTING UPDATES FOR ROW {} OF {}", row_number, table); fn get_cell_value(row: &SerdeMap, column: &str) -> Result { @@ -1244,8 +1250,7 @@ pub async fn get_rows_to_update( async fn get_affected_rows( table: &str, column: &str, - current_value: &str, - new_value: &str, + value: &str, global_config: &SerdeMap, pool: &AnyPool, ) -> Result, String> { @@ -1287,7 +1292,7 @@ pub async fn get_rows_to_update( LIMIT 1 ) ELSE {casted_column} - END AS "{column}""#, + END AS "{column}_extended""#, casted_column = if pool.any_kind() == AnyKind::Sqlite { cast_column_sql_to_text(c, "non-text") } else { @@ -1307,16 +1312,20 @@ pub async fn get_rows_to_update( // Since the consequence of an update could involve currently invalid rows // (in the conflict table) becoming valid or vice versa, we need to check rows for - // which the value of the column is either new_value or the current value. + // which the value of the column is the same as `value` + + //let mike_sql = format!( - "SELECT {} FROM \"{}_view\" \ - WHERE {} IN ('{}', '{}')", - select_columns.join(", "), - table, - cast_column_sql_to_text(column, &sql_type), - current_value, - new_value + "SELECT {columns} FROM \"{table}_view\" \ + WHERE \"{column}_extended\" = '{value}'", + columns = select_columns.join(", "), + table = table, + column = column, + value = value ) + //; + //eprintln!("MIKE SQL: {}", mike_sql); + //mike_sql }; // eprintln!("SQL: {}", sql); @@ -1342,6 +1351,7 @@ pub async fn get_rows_to_update( "valid": true, "messages": json!([]), }); + let cname = cname.strip_suffix("_extended").unwrap(); table_row.insert(cname.to_string(), json!(cell)); } } @@ -1377,7 +1387,8 @@ pub async fn get_rows_to_update( // table, foreign_dependencies //); - let mut rows_to_update = IndexMap::new(); + let mut rows_to_update_before = IndexMap::new(); + let mut rows_to_update_after = IndexMap::new(); for fdep in &foreign_dependencies { let dependent_table = fdep.get("table").and_then(|c| c.as_str()).unwrap(); let dependent_column = fdep.get("column").and_then(|c| c.as_str()).unwrap(); @@ -1403,18 +1414,32 @@ pub async fn get_rows_to_update( // Query dependent_table.dependent_column for the rows that will be affected by the change // from the current to the new value: - let affected_rows = get_affected_rows( + //eprintln!("LOOKING FOR UPDATES BEFORE IN {} USING VALUE: '{}'", + // dependent_table, current_value); + let updates_before = get_affected_rows( dependent_table, dependent_column, ¤t_value, + global_config, + pool, + ) + .await?; + //eprintln!("UPDATES BEFORE ARE: {:#?}", updates_before); + + //eprintln!("LOOKING FOR UPDATES AFTER IN {} USING VALUE: '{}'", + // dependent_table, new_value); + let updates_after = get_affected_rows( + dependent_table, + dependent_column, &new_value, global_config, pool, ) .await?; - //eprintln!("AFFECTED ROWS: {:#?}", affected_rows); + //eprintln!("UPDATES AFTER ARE: {:#?}", updates_after); - rows_to_update.insert(dependent_table.to_string(), affected_rows); + rows_to_update_before.insert(dependent_table.to_string(), updates_before); + rows_to_update_after.insert(dependent_table.to_string(), updates_after); } // TODO (later): tree. @@ -1423,10 +1448,7 @@ pub async fn get_rows_to_update( // TODO (later): unique and primary. - // TODO: I think this reverse is unneeded but let's keep it commented for now. - // We reverse here because the deepest dependencies need to be updated first. - // rows_to_update.reverse(); - Ok(rows_to_update) + Ok((rows_to_update_before, rows_to_update_after)) } /// Given global config map, a database connection pool, a table name, a row, and the row number to @@ -1477,6 +1499,10 @@ pub async fn update_row( // Begin by adding an extra 'update' row to the message table indicating that the value of // this column has been updated (if that is the case). + // TODO: We should be able to do this in one query instead of two. See the SQL code in + // get_affected_rows() where we solve the problem this two-query approach is meant to solve + // simply by re-aliasing the subquery as _extended. + // In some cases the current value of the column will have to retrieved from the last // generated message, so we retrieve that from the database: let last_msg_val = { @@ -1614,20 +1640,24 @@ pub async fn update_row( // Look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated. - let updates = get_rows_to_update(global_config, pool, table_name, &row, row_number) - .await - .map_err(|e| Configuration(e.into()))?; - //eprintln!("UPDATES: {:#?}", updates); + let (updates_before, updates_after) = + get_rows_to_update(global_config, pool, table_name, &row, row_number) + .await + .map_err(|e| Configuration(e.into()))?; + eprintln!("UPDATES_BEFORE: {:#?}", updates_before); + eprintln!("UPDATES_AFTER: {:#?}", updates_after); let query_as_if = QueryAsIf { - kind: QueryAsIfKind::Update, + kind: QueryAsIfKind::Replace, table: table_name.to_string(), alias: format!("{}_as_if", table_name), row_number: *row_number, row: Some(row.clone()), }; - for (update_table, rows_to_update) in &updates { + // TODO: Factor this code out into a function instead of repeating it twice for + // updates_after and updates_before + for (update_table, rows_to_update) in &updates_before { for (row_number, row) in rows_to_update { eprintln!( "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", @@ -1666,13 +1696,48 @@ pub async fn update_row( query = query.bind(param); } match query.execute(pool).await { - Ok(_) => eprintln!("It worked!!"), + Ok(_) => eprintln!("Before updates done! Now trying after updates."), Err(e) => { + // TODO NEXT: Instead of returning an error here, we should try to insert to the + // conflict table like we do during bulk loading. return Err(Configuration( format!("Arghh!! Got schmatabase error: {}", e).into(), - )) + )); } }; + + for (update_table, rows_to_update) in &updates_after { + for (row_number, row) in rows_to_update { + eprintln!( + "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", + row_number, update_table, row + ); + // Validate each row 'counterfactually' (see above): + let vrow = validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + row, + Some(*row_number), + Some(&query_as_if), + ) + .await?; + eprintln!("VALIDATED ROW: {:#?}", vrow); + // Update the row in the database: + block_on(update_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + &vrow, + row_number, + true, + ))?; + } + } } }; diff --git a/src/validate.rs b/src/validate.rs index d5497466..12e0a395 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -29,7 +29,7 @@ pub struct ResultRow { /// TODO: Add a docstring here #[derive(Clone, Debug, PartialEq)] pub enum QueryAsIfKind { - Update, + Replace, Ignore, } @@ -125,7 +125,6 @@ pub async fn validate_row( // they can result in database errors when, for instance, we compare a numeric with a // non-numeric type. if cell.valid || !contains_dt_violation(&cell.messages) { - // TODO: Pass query_as_if here: validate_cell_trees( config, pool, @@ -136,7 +135,6 @@ pub async fn validate_row( &vec![], ) .await?; - // DONE. (TODO: Remove this comment later.) validate_cell_foreign_constraints( config, pool, @@ -146,7 +144,6 @@ pub async fn validate_row( query_as_if, ) .await?; - // TODO: Pass query_as_if here: validate_cell_unique_constraints( config, pool, @@ -161,7 +158,7 @@ pub async fn validate_row( } } - // TODO: Pass query_as_if here: + // TODO. let mut violations = validate_tree_foreign_keys( config, pool, @@ -170,7 +167,7 @@ pub async fn validate_row( ) .await?; violations.append( - // TODO: Pass query_as_if here: + // TODO. &mut validate_under( config, pool, @@ -1166,6 +1163,8 @@ fn validate_cell_nulltype( column_name: &String, cell: &mut ResultCell, ) { + //eprintln!("GETTING COLUMN CONFIG FOR {}.{}", table_name, column_name); + let column = config .get("table") .and_then(|t| t.as_object()) From 2cab8f7fb531af1331fb0f86d997877987dca968 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 28 Jun 2023 13:12:15 -0400 Subject: [PATCH 08/31] support postgresql --- src/lib.rs | 55 +++++++++++++++++++++++++++++-------------------- src/validate.rs | 6 +++++- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 75e5eef5..6f06aed4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1191,7 +1191,7 @@ pub async fn get_rows_to_update( ), String, > { - // eprintln!("GETTING UPDATES FOR ROW {} OF {}", row_number, table); + eprintln!("GETTING UPDATES FOR ROW {} OF {}", row_number, table); fn get_cell_value(row: &SerdeMap, column: &str) -> Result { match row.get(column).and_then(|cell| cell.get("value")) { @@ -1254,11 +1254,6 @@ pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, ) -> Result, String> { - // eprintln!("GETTING AFFECTED ROWS FOR {}.{}", table, column); - - let sql_type = - get_sql_type_from_global_config(&global_config, &table, &column, pool).unwrap(); - let sql = { let is_clause = if pool.any_kind() == AnyKind::Sqlite { "IS" @@ -1266,7 +1261,7 @@ pub async fn get_rows_to_update( "IS NOT DISTINCT FROM" }; - let select_columns = global_config + let real_columns = global_config .get("table") .and_then(|t| t.get(table)) .and_then(|t| t.as_object()) @@ -1277,7 +1272,7 @@ pub async fn get_rows_to_update( .and_then(|t| Some(t.collect::>())) .unwrap(); - let mut select_columns = select_columns + let mut inner_columns = real_columns .iter() .map(|c| { format!( @@ -1304,9 +1299,20 @@ pub async fn get_rows_to_update( }) .collect::>(); - let select_columns = { + let mut outer_columns = real_columns + .iter() + .map(|c| format!("t.\"{}_extended\"", c)) + .collect::>(); + + let inner_columns = { let mut v = vec!["row_number".to_string()]; - v.append(&mut select_columns); + v.append(&mut inner_columns); + v + }; + + let outer_columns = { + let mut v = vec!["t.row_number".to_string()]; + v.append(&mut outer_columns); v }; @@ -1314,18 +1320,19 @@ pub async fn get_rows_to_update( // (in the conflict table) becoming valid or vice versa, we need to check rows for // which the value of the column is the same as `value` - //let mike_sql = format!( - "SELECT {columns} FROM \"{table}_view\" \ - WHERE \"{column}_extended\" = '{value}'", - columns = select_columns.join(", "), + r#"SELECT {outer_columns} + FROM ( + SELECT {inner_columns} + FROM "{table}_view" + ) t + WHERE "{column}_extended" = '{value}'"#, + outer_columns = outer_columns.join(", "), + inner_columns = inner_columns.join(", "), table = table, column = column, value = value ) - //; - //eprintln!("MIKE SQL: {}", mike_sql); - //mike_sql }; // eprintln!("SQL: {}", sql); @@ -1414,8 +1421,10 @@ pub async fn get_rows_to_update( // Query dependent_table.dependent_column for the rows that will be affected by the change // from the current to the new value: - //eprintln!("LOOKING FOR UPDATES BEFORE IN {} USING VALUE: '{}'", - // dependent_table, current_value); + //eprintln!( + // "LOOKING FOR UPDATES BEFORE IN {} USING VALUE: '{}'", + // dependent_table, current_value + //); let updates_before = get_affected_rows( dependent_table, dependent_column, @@ -1426,8 +1435,10 @@ pub async fn get_rows_to_update( .await?; //eprintln!("UPDATES BEFORE ARE: {:#?}", updates_before); - //eprintln!("LOOKING FOR UPDATES AFTER IN {} USING VALUE: '{}'", - // dependent_table, new_value); + //eprintln!( + // "LOOKING FOR UPDATES AFTER IN {} USING VALUE: '{}'", + // dependent_table, new_value + //); let updates_after = get_affected_rows( dependent_table, dependent_column, @@ -1619,7 +1630,7 @@ pub async fn update_row( } match query.execute(pool).await { Ok(_) => (), - Err(e) => { + Err(_) => { // Overview: // --------- // We need to call something similar to validate_row() on every affected row in the diff --git a/src/validate.rs b/src/validate.rs index 12e0a395..7d3bc4c9 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1436,7 +1436,9 @@ fn as_if_to_sql( ) .unwrap(); - if sql_type.to_lowercase() == "text" { + if sql_type.to_lowercase() == "text" + || sql_type.to_lowercase().starts_with("varchar(") + { values.push(format!("'{}'", value)); } else { values.push(value.to_string()); @@ -1529,8 +1531,10 @@ async fn validate_cell_foreign_constraints( ), ); //eprintln!("FSQL FOR {:?}: {}", fkey, fsql); + //eprintln!("FSQL: {}", fsql); let frows = sqlx_query(&fsql).bind(&cell.value).fetch_all(pool).await?; + //eprintln!("SUCCEEDED!!!"); if frows.is_empty() { cell.valid = false; let mut message = json!({ From 51a1a1440e9f437c01cee4b221aa31ce97052f09 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 28 Jun 2023 16:15:09 -0400 Subject: [PATCH 09/31] in update_row(), write to the conflict instead of the normal table when appropriate --- Cargo.lock | 53 +++++--- Cargo.toml | 1 + src/lib.rs | 386 +++++++++++++++++++++++++++++++---------------------- 3 files changed, 267 insertions(+), 173 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 49c62db2..868a223c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -53,7 +53,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3203e79f4dd9bdda415ed03cf14dae5a2bf775c683a00f94e9cd1faf0f596e5" dependencies = [ "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -144,6 +144,17 @@ dependencies = [ "windows-sys 0.42.0", ] +[[package]] +name = "async-recursion" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.22", +] + [[package]] name = "async-scoped" version = "0.7.1" @@ -485,7 +496,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" dependencies = [ "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -512,7 +523,7 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn", + "syn 1.0.109", ] [[package]] @@ -529,7 +540,7 @@ checksum = "34fa7e395dc1c001083c7eed28c8f0f0b5a225610f3b6284675f444af6fab86b" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -765,7 +776,7 @@ checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1205,6 +1216,7 @@ name = "ontodev_valve" version = "0.2.2" dependencies = [ "argparse", + "async-recursion", "async-scoped", "async-std", "chrono", @@ -1334,7 +1346,7 @@ checksum = "069bdb1e05adc7a8990dce9cc75370895fbe4e3d58b9b73bf1aee56359344a55" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1383,18 +1395,18 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -1573,7 +1585,7 @@ checksum = "a55492425aa53521babf6137309e7d34c20bbfbbfcfe2c7f3a047fd1f6b92c0c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -1765,7 +1777,7 @@ dependencies = [ "sha2", "sqlx-core", "sqlx-rt", - "syn", + "syn 1.0.109", "url", ] @@ -1819,6 +1831,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "term" version = "0.7.0" @@ -1856,7 +1879,7 @@ checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -2023,7 +2046,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-shared", ] @@ -2057,7 +2080,7 @@ checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index 823bd736..8ea487f0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ lalrpop = { version = "0.19.9", features = ["lexer"] } [dependencies] argparse = "0.2.2" async-std = { version = "1", features = [ "attributes" ] } +async-recursion = "1.0.4" async-scoped = { version = "0.7.0", features = [ "use-async-std" ] } chrono = "0.4.23" crossbeam = "0.8.1" diff --git a/src/lib.rs b/src/lib.rs index 6f06aed4..97f3dec9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,6 +28,7 @@ use crate::validate::{ validate_tree_foreign_keys, validate_under, QueryAsIf, QueryAsIfKind, ResultRow, }; use crate::{ast::Expression, valve_grammar::StartParser}; +use async_recursion::async_recursion; use chrono::Utc; use crossbeam; use futures::executor::block_on; @@ -1058,7 +1059,8 @@ pub async fn valve( } /// Given a global config map, a database connection pool, a table name, and a row, assign a new -/// row number to the row and insert it to the database, then return the new row number. +/// row number to the row and insert it to the database, then return the new row number. Optionally, +/// if row_number is provided, use that to identify the new row. pub async fn insert_new_row( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, @@ -1066,38 +1068,51 @@ pub async fn insert_new_row( pool: &AnyPool, table_name: &str, row: &SerdeMap, + new_row_number: Option<&u32>, + skip_validation: bool, ) -> Result { // First, send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: - let row = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - table_name, - row, - None, - None, - ) - .await?; + + let row = if !skip_validation { + validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + table_name, + row, + None, + None, + ) + .await? + } else { + row.clone() + }; // Now prepare the row and messages for insertion to the database. - // The new row number to insert is the current highest row number + 1. - let sql = format!( - r#"SELECT MAX("row_number") AS "row_number" FROM "{}_view""#, - table_name - ); - let query = sqlx_query(&sql); - let result_row = query.fetch_one(pool).await?; - let result = result_row.try_get_raw("row_number").unwrap(); - let new_row_number: i64; - if result.is_null() { - new_row_number = 1; - } else { - new_row_number = result_row.get("row_number"); - } - let new_row_number = new_row_number as u32 + 1; + let new_row_number = match new_row_number { + Some(n) => *n, + None => { + // The new row number to insert is the current highest row number + 1. + let sql = format!( + r#"SELECT MAX("row_number") AS "row_number" FROM "{}_view""#, + table_name + ); + let query = sqlx_query(&sql); + let result_row = query.fetch_one(pool).await?; + let result = result_row.try_get_raw("row_number").unwrap(); + let new_row_number: i64; + if result.is_null() { + new_row_number = 1; + } else { + new_row_number = result_row.get("row_number"); + } + let new_row_number = new_row_number as u32 + 1; + new_row_number + } + }; let mut insert_columns = vec![]; let mut insert_values = vec![]; @@ -1464,6 +1479,7 @@ pub async fn get_rows_to_update( /// Given global config map, a database connection pool, a table name, a row, and the row number to /// update, update the corresponding row in the database with new values as specified by `row`. +#[async_recursion] pub async fn update_row( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, @@ -1614,143 +1630,197 @@ pub async fn update_row( } } - // Update the given row in the table: - let mut update_stmt = format!(r#"UPDATE "{}" SET "#, table_name); - update_stmt.push_str(&assignments.join(", ")); - update_stmt.push_str(&format!(r#" WHERE "row_number" = {}"#, row_number)); - let update_stmt = local_sql_syntax(&pool, &update_stmt); - //eprintln!( - // "***** In update_row(). Running update statement: {} with params: {:?}", - // update_stmt, params, - //); + // Overview: + // --------- + // We need to call something similar to validate_row() on every affected row in the + // dependent table, with the hitch that we need to validate them with the target row + // of the target table in the database replaced, somehow, with the modified version + // of it represented by `row`. (In the case of delete_row() we would simply have to + // ignore the target row in the database somehow, maybe by excluding its row number + // using a CTE). Those re-validated rows should then be sent to update_row() which will + // try to insert them into the database one by one. In the case of a database error + // this code (i.e., in this block) will be triggered again, and so on, recursively + // until everything succeeds. + // + // Note also that we might want to run ANALYZE (or the sqlite equivalent) after + // the update has completed. + + // Step 1: + // ------ + // Look through the valve config to see which tables are dependent on this table + // and find the rows that need to be updated. + + let (updates_before, updates_after) = + get_rows_to_update(global_config, pool, table_name, &row, row_number) + .await + .map_err(|e| Configuration(e.into()))?; + eprintln!("UPDATES_BEFORE: {:#?}", updates_before); + eprintln!("UPDATES_AFTER: {:#?}", updates_after); + + let query_as_if = QueryAsIf { + kind: QueryAsIfKind::Replace, + table: table_name.to_string(), + alias: format!("{}_as_if", table_name), + row_number: *row_number, + row: Some(row.clone()), + }; - let mut query = sqlx_query(&update_stmt); - for param in ¶ms { - query = query.bind(param); + // TODO: Factor this code out into a function instead of repeating it twice for + // updates_after and updates_before (see below) + for (update_table, rows_to_update) in &updates_before { + for (row_number, row) in rows_to_update { + eprintln!( + "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", + row_number, update_table, row + ); + // Validate each row 'counterfactually' (see above): + let vrow = validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + row, + Some(*row_number), + Some(&query_as_if), + ) + .await?; + eprintln!("VALIDATED ROW: {:#?}", vrow); + // Update the row in the database: + update_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + &vrow, + row_number, + true, + ) + .await?; + } } - match query.execute(pool).await { - Ok(_) => (), - Err(_) => { - // Overview: - // --------- - // We need to call something similar to validate_row() on every affected row in the - // dependent table, with the hitch that we need to validate them with the target row - // of the target table in the database replaced, somehow, with the modified version - // of it represented by `row`. (In the case of delete_row() we would simply have to - // ignore the target row in the database somehow, maybe by excluding its row number - // using a CTE). Those re-validated rows should then be sent to update_row() which will - // try to insert them into the database one by one. In the case of a database error - // this code (i.e., in this block) will be triggered again, and so on, recursively - // until everything succeeds. - // - // Note also that we might want to run ANALYZE (or the sqlite equivalent) after - // the update has completed. - - // Step 1: - // ------ - // Look through the valve config to see which tables are dependent on this table - // and find the rows that need to be updated. - - let (updates_before, updates_after) = - get_rows_to_update(global_config, pool, table_name, &row, row_number) - .await - .map_err(|e| Configuration(e.into()))?; - eprintln!("UPDATES_BEFORE: {:#?}", updates_before); - eprintln!("UPDATES_AFTER: {:#?}", updates_after); - - let query_as_if = QueryAsIf { - kind: QueryAsIfKind::Replace, - table: table_name.to_string(), - alias: format!("{}_as_if", table_name), - row_number: *row_number, - row: Some(row.clone()), - }; - // TODO: Factor this code out into a function instead of repeating it twice for - // updates_after and updates_before - for (update_table, rows_to_update) in &updates_before { - for (row_number, row) in rows_to_update { - eprintln!( - "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", - row_number, update_table, row - ); - // Validate each row 'counterfactually' (see above): - let vrow = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - row, - Some(*row_number), - Some(&query_as_if), - ) - .await?; - eprintln!("VALIDATED ROW: {:#?}", vrow); - // Update the row in the database: - block_on(update_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - &vrow, - row_number, - true, - ))?; - } - } - // Finally, retry the update_row() call that resulted in our being in the Err - // branch to begin with. - let mut query = sqlx_query(&update_stmt); - for param in ¶ms { - query = query.bind(param); - } - match query.execute(pool).await { - Ok(_) => eprintln!("Before updates done! Now trying after updates."), - Err(e) => { - // TODO NEXT: Instead of returning an error here, we should try to insert to the - // conflict table like we do during bulk loading. - return Err(Configuration( - format!("Arghh!! Got schmatabase error: {}", e).into(), - )); - } - }; + // Update the target row - for (update_table, rows_to_update) in &updates_after { - for (row_number, row) in rows_to_update { - eprintln!( - "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", - row_number, update_table, row - ); - // Validate each row 'counterfactually' (see above): - let vrow = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - row, - Some(*row_number), - Some(&query_as_if), - ) - .await?; - eprintln!("VALIDATED ROW: {:#?}", vrow); - // Update the row in the database: - block_on(update_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - &vrow, - row_number, - true, - ))?; + // Figure out where the row currently is: + let sql = format!( + "SELECT 1 FROM \"{}\" WHERE row_number = {}", + table_name, row_number + ); + let query = sqlx_query(&sql); + let rows = query.fetch_all(pool).await?; + let mut current_table = String::from(table_name); + if rows.len() == 0 { + current_table.push_str("_conflict"); + } + + // Figure out where the row needs to go: + let mut table_to_write = String::from(table_name); + for (column, cell) in row.iter() { + let valid = cell.get("valid").unwrap(); + if valid == false { + let structure = global_config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|s| s.as_str()) + .unwrap_or(""); + if vec!["primary", "unique"].contains(&structure) || structure.starts_with("tree(") { + let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); + for msg in messages { + let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); + eprintln!("LEVEL: {}", level); + if level == "error" { + table_to_write.push_str("_conflict"); + break; + } } } } - }; + } + + if table_to_write == current_table { + let mut update_stmt = format!(r#"UPDATE "{}" SET "#, table_to_write); + update_stmt.push_str(&assignments.join(", ")); + update_stmt.push_str(&format!(r#" WHERE "row_number" = {}"#, row_number)); + let update_stmt = local_sql_syntax(&pool, &update_stmt); + eprintln!( + "Table_to_write is current_table. Running update statement: {} with params: {:?}", + update_stmt, params, + ); + + let mut query = sqlx_query(&update_stmt); + for param in ¶ms { + query = query.bind(param); + } + query.execute(pool).await?; + } else { + let sql = format!( + "DELETE FROM \"{}\" WHERE row_number = {}", + current_table, row_number + ); + eprintln!( + "Table to write is not the same as current table. Running {}", + sql + ); + let query = sqlx_query(&sql); + query.execute(pool).await?; + eprintln!("INSERTING NEW ROW"); + insert_new_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &table_to_write, + &row, + Some(row_number), + true, + ) + .await?; + } + + // TODO: Factor this code out into a function instead of repeating it twice for + // updates_after and updates_before (see below) + for (update_table, rows_to_update) in &updates_after { + for (row_number, row) in rows_to_update { + eprintln!( + "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", + row_number, update_table, row + ); + // Validate each row 'counterfactually' (see above): + let vrow = validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + row, + Some(*row_number), + Some(&query_as_if), + ) + .await?; + eprintln!("VALIDATED ROW: {:#?}", vrow); + // Update the row in the database: + update_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + &vrow, + row_number, + true, + ) + .await?; + } + } // Then delete any messages that had been previously inserted to the message table for the old // version of this row (other than any 'update'-level messages): @@ -2044,7 +2114,7 @@ fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Opti /// Given the global config map, a table name, a column name, and a database connection pool /// used to determine the database type return the column's SQL type. -fn get_sql_type_from_global_config( +pub fn get_sql_type_from_global_config( global_config: &SerdeMap, table: &str, column: &str, From b304c626c6d5b6276396d6f2a2f9bd7093ad9aba Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Wed, 28 Jun 2023 17:20:28 -0400 Subject: [PATCH 10/31] cleanup and refactor --- src/lib.rs | 429 ++++++++++++++++++++++-------------------------- src/validate.rs | 9 +- 2 files changed, 204 insertions(+), 234 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 97f3dec9..f5a75fd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1193,6 +1193,128 @@ pub async fn insert_new_row( Ok(new_row_number) } +pub async fn get_affected_rows( + table: &str, + column: &str, + value: &str, + global_config: &SerdeMap, + pool: &AnyPool, +) -> Result, String> { + let sql = { + let is_clause = if pool.any_kind() == AnyKind::Sqlite { + "IS" + } else { + "IS NOT DISTINCT FROM" + }; + + let real_columns = global_config + .get("table") + .and_then(|t| t.get(table)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|t| t.as_object()) + .and_then(|t| Some(t.keys())) + .and_then(|k| Some(k.map(|k| k.to_string()))) + .and_then(|t| Some(t.collect::>())) + .unwrap(); + + let mut inner_columns = real_columns + .iter() + .map(|c| { + format!( + r#"CASE + WHEN "{column}" {is_clause} NULL THEN ( + SELECT value + FROM "message" + WHERE "row" = "row_number" + AND "column" = '{column}' + AND "table" = '{table}' + ORDER BY "message_id" DESC + LIMIT 1 + ) + ELSE {casted_column} + END AS "{column}_extended""#, + casted_column = if pool.any_kind() == AnyKind::Sqlite { + cast_column_sql_to_text(c, "non-text") + } else { + format!("\"{}\"::TEXT", c) + }, + column = c, + table = table, + ) + }) + .collect::>(); + + let mut outer_columns = real_columns + .iter() + .map(|c| format!("t.\"{}_extended\"", c)) + .collect::>(); + + let inner_columns = { + let mut v = vec!["row_number".to_string()]; + v.append(&mut inner_columns); + v + }; + + let outer_columns = { + let mut v = vec!["t.row_number".to_string()]; + v.append(&mut outer_columns); + v + }; + + // Since the consequence of an update could involve currently invalid rows + // (in the conflict table) becoming valid or vice versa, we need to check rows for + // which the value of the column is the same as `value` + + format!( + r#"SELECT {outer_columns} + FROM ( + SELECT {inner_columns} + FROM "{table}_view" + ) t + WHERE "{column}_extended" = '{value}'"#, + outer_columns = outer_columns.join(", "), + inner_columns = inner_columns.join(", "), + table = table, + column = column, + value = value + ) + }; + // eprintln!("SQL: {}", sql); + + let query = sqlx_query(&sql); + let mut table_rows = IndexMap::new(); + for row in query.fetch_all(pool).await.map_err(|e| e.to_string())? { + let mut table_row = SerdeMap::new(); + let mut row_number: Option = None; + for column in row.columns() { + let cname = column.name(); + if cname == "row_number" { + row_number = Some(row.get::("row_number") as u32); + } else { + let raw_value = row.try_get_raw(format!(r#"{}"#, cname).as_str()).unwrap(); + let value; + if !raw_value.is_null() { + value = get_column_value(&row, &cname, "text"); + } else { + value = String::from(""); + } + let cell = json!({ + "value": value, + "valid": true, + "messages": json!([]), + }); + let cname = cname.strip_suffix("_extended").unwrap(); + table_row.insert(cname.to_string(), json!(cell)); + } + } + let row_number = row_number.ok_or("Row: has no row number".to_string())?; + table_rows.insert(row_number, table_row); + } + + Ok(table_rows) +} + pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, @@ -1206,8 +1328,6 @@ pub async fn get_rows_to_update( ), String, > { - eprintln!("GETTING UPDATES FOR ROW {} OF {}", row_number, table); - fn get_cell_value(row: &SerdeMap, column: &str) -> Result { match row.get(column).and_then(|cell| cell.get("value")) { Some(SerdeValue::String(s)) => Ok(format!("{}", s)), @@ -1253,7 +1373,6 @@ pub async fn get_rows_to_update( cast = cast, row_number = row_number, ); - // eprintln!("SQL FOR CURRENT VALUE:\n{}", sql); let query = sqlx_query(&sql); let result_row = query.fetch_one(pool).await.map_err(|e| e.to_string())?; @@ -1261,129 +1380,6 @@ pub async fn get_rows_to_update( Ok(value.to_string()) } - // TODO: Make this an outer function and make it public. - async fn get_affected_rows( - table: &str, - column: &str, - value: &str, - global_config: &SerdeMap, - pool: &AnyPool, - ) -> Result, String> { - let sql = { - let is_clause = if pool.any_kind() == AnyKind::Sqlite { - "IS" - } else { - "IS NOT DISTINCT FROM" - }; - - let real_columns = global_config - .get("table") - .and_then(|t| t.get(table)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|t| t.as_object()) - .and_then(|t| Some(t.keys())) - .and_then(|k| Some(k.map(|k| k.to_string()))) - .and_then(|t| Some(t.collect::>())) - .unwrap(); - - let mut inner_columns = real_columns - .iter() - .map(|c| { - format!( - r#"CASE - WHEN "{column}" {is_clause} NULL THEN ( - SELECT value - FROM "message" - WHERE "row" = "row_number" - AND "column" = '{column}' - AND "table" = '{table}' - ORDER BY "message_id" DESC - LIMIT 1 - ) - ELSE {casted_column} - END AS "{column}_extended""#, - casted_column = if pool.any_kind() == AnyKind::Sqlite { - cast_column_sql_to_text(c, "non-text") - } else { - format!("\"{}\"::TEXT", c) - }, - column = c, - table = table, - ) - }) - .collect::>(); - - let mut outer_columns = real_columns - .iter() - .map(|c| format!("t.\"{}_extended\"", c)) - .collect::>(); - - let inner_columns = { - let mut v = vec!["row_number".to_string()]; - v.append(&mut inner_columns); - v - }; - - let outer_columns = { - let mut v = vec!["t.row_number".to_string()]; - v.append(&mut outer_columns); - v - }; - - // Since the consequence of an update could involve currently invalid rows - // (in the conflict table) becoming valid or vice versa, we need to check rows for - // which the value of the column is the same as `value` - - format!( - r#"SELECT {outer_columns} - FROM ( - SELECT {inner_columns} - FROM "{table}_view" - ) t - WHERE "{column}_extended" = '{value}'"#, - outer_columns = outer_columns.join(", "), - inner_columns = inner_columns.join(", "), - table = table, - column = column, - value = value - ) - }; - // eprintln!("SQL: {}", sql); - - let query = sqlx_query(&sql); - let mut table_rows = IndexMap::new(); - for row in query.fetch_all(pool).await.map_err(|e| e.to_string())? { - let mut table_row = SerdeMap::new(); - let mut row_number: Option = None; - for column in row.columns() { - let cname = column.name(); - if cname == "row_number" { - row_number = Some(row.get::("row_number") as u32); - } else { - let raw_value = row.try_get_raw(format!(r#"{}"#, cname).as_str()).unwrap(); - let value; - if !raw_value.is_null() { - value = get_column_value(&row, &cname, "text"); - } else { - value = String::from(""); - } - let cell = json!({ - "value": value, - "valid": true, - "messages": json!([]), - }); - let cname = cname.strip_suffix("_extended").unwrap(); - table_row.insert(cname.to_string(), json!(cell)); - } - } - let row_number = row_number.ok_or("Row: has no row number".to_string())?; - table_rows.insert(row_number, table_row); - } - - Ok(table_rows) - } - let foreign_dependencies = { let mut foreign_dependencies = vec![]; let global_fconstraints = global_config @@ -1404,11 +1400,6 @@ pub async fn get_rows_to_update( foreign_dependencies }; - //eprintln!( - // "FOREIGN KEYS THAT DEPEND ON {}: {:#?}", - // table, foreign_dependencies - //); - let mut rows_to_update_before = IndexMap::new(); let mut rows_to_update_after = IndexMap::new(); for fdep in &foreign_dependencies { @@ -1420,26 +1411,14 @@ pub async fn get_rows_to_update( // Fetch the cell corresponding to `column` from `row`, and the value of that cell, // which is the new value for the row. let new_value = get_cell_value(row, target_column)?; - //eprintln!( - // "NEW VALUE OF {}.{}: {}", - // target_table, target_column, new_value - //); // Query the database using `row_number` to get the current value of the column for // the row. let current_value = get_current_value(target_table, target_column, row_number, pool).await?; - //eprintln!( - // "CURRENT VALUE OF {}.{}: {}", - // target_table, target_column, current_value - //); // Query dependent_table.dependent_column for the rows that will be affected by the change // from the current to the new value: - //eprintln!( - // "LOOKING FOR UPDATES BEFORE IN {} USING VALUE: '{}'", - // dependent_table, current_value - //); let updates_before = get_affected_rows( dependent_table, dependent_column, @@ -1448,12 +1427,7 @@ pub async fn get_rows_to_update( pool, ) .await?; - //eprintln!("UPDATES BEFORE ARE: {:#?}", updates_before); - //eprintln!( - // "LOOKING FOR UPDATES AFTER IN {} USING VALUE: '{}'", - // dependent_table, new_value - //); let updates_after = get_affected_rows( dependent_table, dependent_column, @@ -1462,17 +1436,16 @@ pub async fn get_rows_to_update( pool, ) .await?; - //eprintln!("UPDATES AFTER ARE: {:#?}", updates_after); rows_to_update_before.insert(dependent_table.to_string(), updates_before); rows_to_update_after.insert(dependent_table.to_string(), updates_after); } - // TODO (later): tree. - - // TODO (later): under. + // TODO: Collect the dependencies for tree constraints similarly to the way we + // collect foreign constraints (see just above). - // TODO (later): unique and primary. + // TODO: Collect the dependencies for under constraints similarly to the way we + // collect foreign constraints (see just above). Ok((rows_to_update_before, rows_to_update_after)) } @@ -1493,6 +1466,7 @@ pub async fn update_row( // eprintln!("***** In update_row(). Got row: {:#?}", row); // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. + // Maybe we need a wrapper function for this. // First, send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: @@ -1511,7 +1485,6 @@ pub async fn update_row( } else { row.clone() }; - //eprintln!("***** In update_row(). Row after validation: {:#?}", row); // Now prepare the row and messages for the database update: let mut assignments = vec![]; @@ -1645,8 +1618,52 @@ pub async fn update_row( // Note also that we might want to run ANALYZE (or the sqlite equivalent) after // the update has completed. - // Step 1: - // ------ + async fn process_updates( + global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, + pool: &AnyPool, + updates: &IndexMap>, + query_as_if: &QueryAsIf, + ) -> Result<(), sqlx::Error> { + // TODO: Factor this code out into a function instead of repeating it twice for + // updates_after and updates_before (see below) + for (update_table, rows_to_update) in updates { + for (row_number, row) in rows_to_update { + eprintln!( + "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", + row_number, update_table, row + ); + // Validate each row 'counterfactually' (see above): + let vrow = validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + row, + Some(*row_number), + Some(&query_as_if), + ) + .await?; + eprintln!("VALIDATED ROW: {:#?}", vrow); + // Update the row in the database: + update_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + &vrow, + row_number, + true, + ) + .await?; + } + } + Ok(()) + } + // Look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated. @@ -1665,45 +1682,20 @@ pub async fn update_row( row: Some(row.clone()), }; - // TODO: Factor this code out into a function instead of repeating it twice for - // updates_after and updates_before (see below) - for (update_table, rows_to_update) in &updates_before { - for (row_number, row) in rows_to_update { - eprintln!( - "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", - row_number, update_table, row - ); - // Validate each row 'counterfactually' (see above): - let vrow = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - row, - Some(*row_number), - Some(&query_as_if), - ) - .await?; - eprintln!("VALIDATED ROW: {:#?}", vrow); - // Update the row in the database: - update_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - &vrow, - row_number, - true, - ) - .await?; - } - } + // Process the updates that need to be performed before the update of the target row: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_before, + &query_as_if, + ) + .await?; - // Update the target row + // Update the target row. - // Figure out where the row currently is: + // First, figure out where the row currently is: let sql = format!( "SELECT 1 FROM \"{}\" WHERE row_number = {}", table_name, row_number @@ -1715,7 +1707,7 @@ pub async fn update_row( current_table.push_str("_conflict"); } - // Figure out where the row needs to go: + // Next, figure out where the row needs to go: let mut table_to_write = String::from(table_name); for (column, cell) in row.iter() { let valid = cell.get("valid").unwrap(); @@ -1746,6 +1738,8 @@ pub async fn update_row( } } + // Update or insert, dependeing on whether the table to write to is the same as the table that + // the row is currently in: if table_to_write == current_table { let mut update_stmt = format!(r#"UPDATE "{}" SET "#, table_to_write); update_stmt.push_str(&assignments.join(", ")); @@ -1786,43 +1780,18 @@ pub async fn update_row( .await?; } - // TODO: Factor this code out into a function instead of repeating it twice for - // updates_after and updates_before (see below) - for (update_table, rows_to_update) in &updates_after { - for (row_number, row) in rows_to_update { - eprintln!( - "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", - row_number, update_table, row - ); - // Validate each row 'counterfactually' (see above): - let vrow = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - row, - Some(*row_number), - Some(&query_as_if), - ) - .await?; - eprintln!("VALIDATED ROW: {:#?}", vrow); - // Update the row in the database: - update_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - &vrow, - row_number, - true, - ) - .await?; - } - } + // Process the updates that need to be performed after the update of the target row: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_after, + &query_as_if, + ) + .await?; - // Then delete any messages that had been previously inserted to the message table for the old + // Now delete any messages that had been previously inserted to the message table for the old // version of this row (other than any 'update'-level messages): let delete_sql = format!( r#"DELETE FROM "message" WHERE "table" = '{}' AND "row" = {} AND "level" <> 'update'"#, diff --git a/src/validate.rs b/src/validate.rs index 7d3bc4c9..5fbfac8a 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -26,7 +26,7 @@ pub struct ResultRow { pub contents: IndexMap, } -/// TODO: Add a docstring here +/// The sense in which a [QueryAsIf] struct should be interpreted. #[derive(Clone, Debug, PartialEq)] pub enum QueryAsIfKind { Replace, @@ -158,7 +158,7 @@ pub async fn validate_row( } } - // TODO. + // TODO: Possibly propagate `query_as_if` down into this function: let mut violations = validate_tree_foreign_keys( config, pool, @@ -167,7 +167,7 @@ pub async fn validate_row( ) .await?; violations.append( - // TODO. + // TODO: Possibly propagate `query_as_if` down into this function: &mut validate_under( config, pool, @@ -1390,7 +1390,8 @@ fn validate_cell_rules( } } -/// TODO: Add docstring here +/// Generates an SQL fragment representing the "as if" portion of a query that will be used for +/// counterfactual validation. fn as_if_to_sql( global_config: &SerdeMap, pool: &AnyPool, From 7cc21adca7d4c37b804f05f59b3ca34bede0c67d Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 29 Jun 2023 08:59:05 -0400 Subject: [PATCH 11/31] more cleanup --- src/lib.rs | 293 ++++++++++++++++++++++-------------------------- src/validate.rs | 6 - 2 files changed, 134 insertions(+), 165 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f5a75fd5..6bbb03a8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1280,7 +1280,6 @@ pub async fn get_affected_rows( value = value ) }; - // eprintln!("SQL: {}", sql); let query = sqlx_query(&sql); let mut table_rows = IndexMap::new(); @@ -1315,6 +1314,84 @@ pub async fn get_affected_rows( Ok(table_rows) } +pub async fn insert_update_message( + pool: &AnyPool, + table: &str, + column: &str, + row_number: &u32, + cell_value: &str, +) -> Result<(), sqlx::Error> { + // TODO: We should be able to do this in one query instead of two. See the SQL code in + // get_affected_rows() where we solve the problem this two-query approach is meant to solve + // simply by re-aliasing the subquery as _extended. + + // In some cases the current value of the column will have to retrieved from the last + // generated message, so we retrieve that from the database: + let last_msg_val = { + let sql = format!( + r#"SELECT value + FROM "message" + WHERE "row" = {row} + AND "column" = '{column}' + AND "table" = '{table}' + ORDER BY "message_id" DESC + LIMIT 1"#, + column = column, + table = table, + row = row_number, + ); + let query = sqlx_query(&sql); + + let results = query.fetch_all(pool).await?; + if results.is_empty() { + "".to_string() + } else { + let row = &results[0]; + let raw_value = row.try_get_raw("value").unwrap(); + if !raw_value.is_null() { + get_column_value(&row, "value", "text") + } else { + "".to_string() + } + } + }; + + // Construct the SQL for the insert of the 'update' message using last_msg_val: + let casted_column = cast_column_sql_to_text(column, "non-text"); + let is_clause = if pool.any_kind() == AnyKind::Sqlite { + "IS" + } else { + "IS NOT DISTINCT FROM" + }; + + let insert_sql = format!( + r#"INSERT INTO "message" + ("table", "row", "column", "value", "level", "rule", "message") + SELECT + '{table}', "row_number", '{column}', '{value}', 'update', 'rule:update', + 'Value changed from ''' || + CASE + WHEN "{column}" {is_clause} NULL THEN '{last_msg_val}' + ELSE {casted_column} + END || + ''' to ''{value}''' + FROM "{table}_view" + WHERE "row_number" = {row} AND ( + ("{column}" {is_clause} NULL AND '{last_msg_val}' != '{value}') + OR {casted_column} != '{value}' + )"#, + column = column, + last_msg_val = last_msg_val, + is_clause = is_clause, + row = row_number, + table = table, + value = cell_value, + ); + let query = sqlx_query(&insert_sql); + query.execute(pool).await?; + Ok(()) +} + pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, @@ -1450,6 +1527,46 @@ pub async fn get_rows_to_update( Ok((rows_to_update_before, rows_to_update_after)) } +pub async fn process_updates( + global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, + pool: &AnyPool, + updates: &IndexMap>, + query_as_if: &QueryAsIf, +) -> Result<(), sqlx::Error> { + for (update_table, rows_to_update) in updates { + for (row_number, row) in rows_to_update { + // Validate each row 'counterfactually': + let vrow = validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + row, + Some(*row_number), + Some(&query_as_if), + ) + .await?; + + // Update the row in the database: + update_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + update_table, + &vrow, + row_number, + true, + ) + .await?; + } + } + Ok(()) +} + /// Given global config map, a database connection pool, a table name, a row, and the row number to /// update, update the corresponding row in the database with new values as specified by `row`. #[async_recursion] @@ -1463,11 +1580,6 @@ pub async fn update_row( row_number: &u32, skip_validation: bool, ) -> Result<(), sqlx::Error> { - // eprintln!("***** In update_row(). Got row: {:#?}", row); - - // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. - // Maybe we need a wrapper function for this. - // First, send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = if !skip_validation { @@ -1486,6 +1598,11 @@ pub async fn update_row( row.clone() }; + // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. + // Maybe we need a wrapper function for this? + // Note also that we might want to run ANALYZE (or the sqlite equivalent) after + // the updates have completed. + // Now prepare the row and messages for the database update: let mut assignments = vec![]; let mut params = vec![]; @@ -1498,75 +1615,7 @@ pub async fn update_row( // Begin by adding an extra 'update' row to the message table indicating that the value of // this column has been updated (if that is the case). - - // TODO: We should be able to do this in one query instead of two. See the SQL code in - // get_affected_rows() where we solve the problem this two-query approach is meant to solve - // simply by re-aliasing the subquery as _extended. - - // In some cases the current value of the column will have to retrieved from the last - // generated message, so we retrieve that from the database: - let last_msg_val = { - let sql = format!( - r#"SELECT value - FROM "message" - WHERE "row" = {row} - AND "column" = '{column}' - AND "table" = '{table}' - ORDER BY "message_id" DESC - LIMIT 1"#, - column = column, - table = table_name, - row = row_number, - ); - let query = sqlx_query(&sql); - - let results = query.fetch_all(pool).await?; - if results.is_empty() { - "".to_string() - } else { - let row = &results[0]; - let raw_value = row.try_get_raw("value").unwrap(); - if !raw_value.is_null() { - get_column_value(&row, "value", "text") - } else { - "".to_string() - } - } - }; - - // Construct the SQL for the insert of the 'update' message using last_msg_val: - let casted_column = cast_column_sql_to_text(column, "non-text"); - let is_clause = if pool.any_kind() == AnyKind::Sqlite { - "IS" - } else { - "IS NOT DISTINCT FROM" - }; - - let insert_sql = format!( - r#"INSERT INTO "message" - ("table", "row", "column", "value", "level", "rule", "message") - SELECT - '{table}', "row_number", '{column}', '{value}', 'update', 'rule:update', - 'Value changed from ''' || - CASE - WHEN "{column}" {is_clause} NULL THEN '{last_msg_val}' - ELSE {casted_column} - END || - ''' to ''{value}''' - FROM "{table}_view" - WHERE "row_number" = {row} AND ( - ("{column}" {is_clause} NULL AND '{last_msg_val}' != '{value}') - OR {casted_column} != '{value}' - )"#, - column = column, - last_msg_val = last_msg_val, - is_clause = is_clause, - row = row_number, - table = table_name, - value = cell_value, - ); - let query = sqlx_query(&insert_sql); - query.execute(pool).await?; + insert_update_message(pool, table_name, column, row_number, cell_value).await?; // Generate the assignment statements and messages for each column: let mut cell_for_insert = cell.clone(); @@ -1603,77 +1652,15 @@ pub async fn update_row( } } - // Overview: - // --------- - // We need to call something similar to validate_row() on every affected row in the - // dependent table, with the hitch that we need to validate them with the target row - // of the target table in the database replaced, somehow, with the modified version - // of it represented by `row`. (In the case of delete_row() we would simply have to - // ignore the target row in the database somehow, maybe by excluding its row number - // using a CTE). Those re-validated rows should then be sent to update_row() which will - // try to insert them into the database one by one. In the case of a database error - // this code (i.e., in this block) will be triggered again, and so on, recursively - // until everything succeeds. - // - // Note also that we might want to run ANALYZE (or the sqlite equivalent) after - // the update has completed. - - async fn process_updates( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - updates: &IndexMap>, - query_as_if: &QueryAsIf, - ) -> Result<(), sqlx::Error> { - // TODO: Factor this code out into a function instead of repeating it twice for - // updates_after and updates_before (see below) - for (update_table, rows_to_update) in updates { - for (row_number, row) in rows_to_update { - eprintln!( - "VALIDATING ROW NUMBER {} OF {}, ROW: {:#?}", - row_number, update_table, row - ); - // Validate each row 'counterfactually' (see above): - let vrow = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - row, - Some(*row_number), - Some(&query_as_if), - ) - .await?; - eprintln!("VALIDATED ROW: {:#?}", vrow); - // Update the row in the database: - update_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - update_table, - &vrow, - row_number, - true, - ) - .await?; - } - } - Ok(()) - } - - // Look through the valve config to see which tables are dependent on this table - // and find the rows that need to be updated. - + // First, look through the valve config to see which tables are dependent on this table + // and find the rows that need to be updated: let (updates_before, updates_after) = get_rows_to_update(global_config, pool, table_name, &row, row_number) .await .map_err(|e| Configuration(e.into()))?; - eprintln!("UPDATES_BEFORE: {:#?}", updates_before); - eprintln!("UPDATES_AFTER: {:#?}", updates_after); + // Used by process_updates() to validate the given row, counterfactually, "as if" the version + // of the row in the database currently were replaced with `row`: let query_as_if = QueryAsIf { kind: QueryAsIfKind::Replace, table: table_name.to_string(), @@ -1693,9 +1680,8 @@ pub async fn update_row( ) .await?; - // Update the target row. - - // First, figure out where the row currently is: + // Now update the target row. First, figure out whether the row is currently in the base table + // or the conflict table: let sql = format!( "SELECT 1 FROM \"{}\" WHERE row_number = {}", table_name, row_number @@ -1707,7 +1693,7 @@ pub async fn update_row( current_table.push_str("_conflict"); } - // Next, figure out where the row needs to go: + // Next, figure out where to put the new version of the row: let mut table_to_write = String::from(table_name); for (column, cell) in row.iter() { let valid = cell.get("valid").unwrap(); @@ -1728,7 +1714,6 @@ pub async fn update_row( let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); for msg in messages { let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); - eprintln!("LEVEL: {}", level); if level == "error" { table_to_write.push_str("_conflict"); break; @@ -1738,18 +1723,13 @@ pub async fn update_row( } } - // Update or insert, dependeing on whether the table to write to is the same as the table that - // the row is currently in: + // If table_to_write and current_table are the same, update it. Otherwise delete the current + // version of the row from the database and insert the new version to table_to_write: if table_to_write == current_table { let mut update_stmt = format!(r#"UPDATE "{}" SET "#, table_to_write); update_stmt.push_str(&assignments.join(", ")); update_stmt.push_str(&format!(r#" WHERE "row_number" = {}"#, row_number)); let update_stmt = local_sql_syntax(&pool, &update_stmt); - eprintln!( - "Table_to_write is current_table. Running update statement: {} with params: {:?}", - update_stmt, params, - ); - let mut query = sqlx_query(&update_stmt); for param in ¶ms { query = query.bind(param); @@ -1760,13 +1740,8 @@ pub async fn update_row( "DELETE FROM \"{}\" WHERE row_number = {}", current_table, row_number ); - eprintln!( - "Table to write is not the same as current table. Running {}", - sql - ); let query = sqlx_query(&sql); query.execute(pool).await?; - eprintln!("INSERTING NEW ROW"); insert_new_row( global_config, compiled_datatype_conditions, @@ -1780,7 +1755,7 @@ pub async fn update_row( .await?; } - // Process the updates that need to be performed after the update of the target row: + // Now process the updates that need to be performed after the update of the target row: process_updates( global_config, compiled_datatype_conditions, diff --git a/src/validate.rs b/src/validate.rs index 5fbfac8a..2091b4c9 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1163,8 +1163,6 @@ fn validate_cell_nulltype( column_name: &String, cell: &mut ResultCell, ) { - //eprintln!("GETTING COLUMN CONFIG FOR {}.{}", table_name, column_name); - let column = config .get("table") .and_then(|t| t.as_object()) @@ -1531,11 +1529,8 @@ async fn validate_cell_foreign_constraints( as_if_clause, ftable_alias, fcolumn, sql_param ), ); - //eprintln!("FSQL FOR {:?}: {}", fkey, fsql); - //eprintln!("FSQL: {}", fsql); let frows = sqlx_query(&fsql).bind(&cell.value).fetch_all(pool).await?; - //eprintln!("SUCCEEDED!!!"); if frows.is_empty() { cell.valid = false; let mut message = json!({ @@ -1558,7 +1553,6 @@ async fn validate_cell_foreign_constraints( as_if_clause_for_conflict, ftable_alias, fcolumn, sql_param ), ); - //eprintln!("CONFLICT FSQL FOR {:?}: {}", fkey, fsql); let frows = sqlx_query(&fsql) .bind(cell.value.clone()) .fetch_all(pool) From a1d36db1e7adb7622eb86ef38b9f18188f4f5b6a Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 29 Jun 2023 11:06:46 -0400 Subject: [PATCH 12/31] implement delete_row() --- src/api_test.rs | 6 ++ src/lib.rs | 157 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 138 insertions(+), 25 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index 483653d1..f5342842 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -164,6 +164,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "table2", &row.as_object().unwrap(), &1, + false, ) .await?; @@ -200,6 +201,8 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table3", &result_row, + None, + false, ) .await?; @@ -237,6 +240,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "table6", &result_row, &1, + false, ) .await?; @@ -272,6 +276,8 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &pool, "table6", &result_row, + None, + false, ) .await?; diff --git a/src/lib.rs b/src/lib.rs index 6bbb03a8..b3a8614d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1396,8 +1396,7 @@ pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, table: &str, - row: &SerdeMap, - row_number: &u32, + query_as_if: &QueryAsIf, ) -> Result< ( IndexMap>, @@ -1485,14 +1484,10 @@ pub async fn get_rows_to_update( let target_column = fdep.get("fcolumn").and_then(|c| c.as_str()).unwrap(); let target_table = fdep.get("ftable").and_then(|c| c.as_str()).unwrap(); - // Fetch the cell corresponding to `column` from `row`, and the value of that cell, - // which is the new value for the row. - let new_value = get_cell_value(row, target_column)?; - // Query the database using `row_number` to get the current value of the column for // the row. let current_value = - get_current_value(target_table, target_column, row_number, pool).await?; + get_current_value(target_table, target_column, &query_as_if.row_number, pool).await?; // Query dependent_table.dependent_column for the rows that will be affected by the change // from the current to the new value: @@ -1505,15 +1500,30 @@ pub async fn get_rows_to_update( ) .await?; - let updates_after = get_affected_rows( - dependent_table, - dependent_column, - &new_value, - global_config, - pool, - ) - .await?; - + let updates_after = match &query_as_if.row { + None => { + if query_as_if.kind == QueryAsIfKind::Replace { + eprintln!( + "WARN: No row in query_as_if: {:?} for {:?}", + query_as_if, query_as_if.kind + ); + } + IndexMap::new() + } + Some(row) => { + // Fetch the cell corresponding to `column` from `row`, and the value of that cell, + // which is the new value for the row. + let new_value = get_cell_value(&row, target_column)?; + get_affected_rows( + dependent_table, + dependent_column, + &new_value, + global_config, + pool, + ) + .await? + } + }; rows_to_update_before.insert(dependent_table.to_string(), updates_before); rows_to_update_after.insert(dependent_table.to_string(), updates_after); } @@ -1567,6 +1577,103 @@ pub async fn process_updates( Ok(()) } +#[async_recursion] +pub async fn delete_row( + global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, + pool: &AnyPool, + table_name: &str, + row_number: &u32, +) -> Result<(), sqlx::Error> { + // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. + // Maybe we need a wrapper function for this? + // Note also that we might want to run ANALYZE (or the sqlite equivalent) after + // the deletes have completed. + + // First, use the row number to fetch the row from the database: + let sql = format!( + "SELECT * FROM \"{}\" WHERE row_number = {}", + table_name, row_number + ); + let query = sqlx_query(&sql); + let sql_row = query.fetch_one(pool).await?; + // TODO: This isn't the only place we do this. Factor this out into its own function. + let mut row = SerdeMap::new(); + for column in sql_row.columns() { + let cname = column.name(); + if cname != "row_number" { + let raw_value = sql_row + .try_get_raw(format!(r#"{}"#, cname).as_str()) + .unwrap(); + let value; + if !raw_value.is_null() { + value = get_column_value(&sql_row, &cname, "text"); + } else { + value = String::from(""); + } + let cell = json!({ + "value": value, + "valid": true, + "messages": json!([]), + }); + row.insert(cname.to_string(), json!(cell)); + } + } + + // Used to validate the given row, counterfactually, "as if" the row did not exist in the + // database: + let query_as_if = QueryAsIf { + kind: QueryAsIfKind::Ignore, + table: table_name.to_string(), + alias: format!("{}_as_if", table_name), + row_number: *row_number, + row: None, + }; + + // Look through the valve config to see which tables are dependent on this table and find the + // rows that need to be updated. Since this is a delete there will only be rows to update + // before and none after the delete: + let (updates_before, _) = get_rows_to_update(global_config, pool, table_name, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))?; + + // Process the updates that need to be performed before the update of the target row: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_before, + &query_as_if, + ) + .await?; + + // Now delete the row: + let sql1 = format!( + "DELETE FROM \"{}\" WHERE row_number = {}", + table_name, row_number + ); + let sql2 = format!( + "DELETE FROM \"{}_conflict\" WHERE row_number = {}", + table_name, row_number + ); + for sql in vec![sql1, sql2] { + let query = sqlx_query(&sql); + query.execute(pool).await?; + } + + // Now delete all messages associated with the row: + let sql = format!( + r#"DELETE FROM "message" WHERE "table" = '{}' AND "row" = {}"#, + table_name, row_number + ); + let query = sqlx_query(&sql); + query.execute(pool).await?; + + Ok(()) +} + /// Given global config map, a database connection pool, a table name, a row, and the row number to /// update, update the corresponding row in the database with new values as specified by `row`. #[async_recursion] @@ -1652,15 +1759,8 @@ pub async fn update_row( } } - // First, look through the valve config to see which tables are dependent on this table - // and find the rows that need to be updated: - let (updates_before, updates_after) = - get_rows_to_update(global_config, pool, table_name, &row, row_number) - .await - .map_err(|e| Configuration(e.into()))?; - - // Used by process_updates() to validate the given row, counterfactually, "as if" the version - // of the row in the database currently were replaced with `row`: + // Used to validate the given row, counterfactually, "as if" the version of the row in the + // database currently were replaced with `row`: let query_as_if = QueryAsIf { kind: QueryAsIfKind::Replace, table: table_name.to_string(), @@ -1669,6 +1769,13 @@ pub async fn update_row( row: Some(row.clone()), }; + // First, look through the valve config to see which tables are dependent on this table + // and find the rows that need to be updated: + let (updates_before, updates_after) = + get_rows_to_update(global_config, pool, table_name, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))?; + // Process the updates that need to be performed before the update of the target row: process_updates( global_config, From 3626d0707a08badfd669b8da22a026ac18392ede Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 29 Jun 2023 13:33:24 -0400 Subject: [PATCH 13/31] handle intra-table dependencies for unique and primary constraints --- src/lib.rs | 177 ++++++++++++++++++++++++++++++++++++++++++++---- src/validate.rs | 2 + 2 files changed, 165 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b3a8614d..097633be 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1197,6 +1197,7 @@ pub async fn get_affected_rows( table: &str, column: &str, value: &str, + except: Option<&u32>, global_config: &SerdeMap, pool: &AnyPool, ) -> Result, String> { @@ -1272,12 +1273,18 @@ pub async fn get_affected_rows( SELECT {inner_columns} FROM "{table}_view" ) t - WHERE "{column}_extended" = '{value}'"#, + WHERE "{column}_extended" = '{value}'{except}"#, outer_columns = outer_columns.join(", "), inner_columns = inner_columns.join(", "), table = table, column = column, - value = value + value = value, + except = match except { + None => "".to_string(), + Some(row_number) => { + format!(" AND row_number != {}", row_number) + } + }, ) }; @@ -1401,6 +1408,7 @@ pub async fn get_rows_to_update( ( IndexMap>, IndexMap>, + IndexMap>, ), String, > { @@ -1495,6 +1503,7 @@ pub async fn get_rows_to_update( dependent_table, dependent_column, ¤t_value, + None, global_config, pool, ) @@ -1518,6 +1527,7 @@ pub async fn get_rows_to_update( dependent_table, dependent_column, &new_value, + None, global_config, pool, ) @@ -1528,13 +1538,105 @@ pub async fn get_rows_to_update( rows_to_update_after.insert(dependent_table.to_string(), updates_after); } - // TODO: Collect the dependencies for tree constraints similarly to the way we - // collect foreign constraints (see just above). + // Collect the unique/primary intra-table dependencies: + eprintln!("ROWS TO UPDATE BEFORE: {:#?}", rows_to_update_before); + let primaries = global_config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|c| c.get("primary")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table)) + .and_then(|t| t.as_array()) + .and_then(|t| Some(t.iter())) + .and_then(|t| Some(t.map(|t| t.as_str().unwrap().to_string()))) + .and_then(|t| Some(t.collect::>())) + .unwrap(); + let uniques = global_config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|c| c.get("unique")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table)) + .and_then(|t| t.as_array()) + .and_then(|t| Some(t.iter())) + .and_then(|t| Some(t.map(|t| t.as_str().unwrap().to_string()))) + .and_then(|t| Some(t.collect::>())) + .unwrap(); + let columns = global_config + .get("table") + .and_then(|t| t.get(table)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|t| t.as_object()) + .and_then(|t| Some(t.keys())) + .and_then(|k| Some(k.map(|k| k.to_string()))) + .and_then(|t| Some(t.collect::>())) + .unwrap(); + + let mut rows_to_update_unique = IndexMap::new(); + for column in &columns { + if !uniques.contains(column) && !primaries.contains(column) { + continue; + } + + // Query the database using `row_number` to get the current value of the column for + // the row. + let current_value = get_current_value(table, column, &query_as_if.row_number, pool).await?; + + // Query table.column for the rows that will be affected by the change from the current to + // the new value: + let mut updates = get_affected_rows( + table, + column, + ¤t_value, + Some(&query_as_if.row_number), + global_config, + pool, + ) + .await?; + + match &query_as_if.row { + None => { + if query_as_if.kind == QueryAsIfKind::Replace { + eprintln!( + "WARN: No row in query_as_if: {:?} for {:?}", + query_as_if, query_as_if.kind + ); + } + } + Some(row) => { + // Fetch the cell corresponding to `column` from `row`, and the value of that cell, + // which is the new value for the row. + let new_value = get_cell_value(&row, column)?; + let further_updates = get_affected_rows( + table, + column, + &new_value, + Some(&query_as_if.row_number), + global_config, + pool, + ) + .await?; + for (key, data) in further_updates { + updates.insert(key, data); + } + } + }; + rows_to_update_unique.insert(table.to_string(), updates); + } + + // TODO: Collect the rule intra-table dependencies. + + // TODO: Collect the tree intra-table dependencies. // TODO: Collect the dependencies for under constraints similarly to the way we // collect foreign constraints (see just above). - Ok((rows_to_update_before, rows_to_update_after)) + Ok(( + rows_to_update_before, + rows_to_update_after, + rows_to_update_unique, + )) } pub async fn process_updates( @@ -1544,6 +1646,7 @@ pub async fn process_updates( pool: &AnyPool, updates: &IndexMap>, query_as_if: &QueryAsIf, + do_not_recurse: bool, ) -> Result<(), sqlx::Error> { for (update_table, rows_to_update) in updates { for (row_number, row) in rows_to_update { @@ -1570,6 +1673,7 @@ pub async fn process_updates( &vrow, row_number, true, + do_not_recurse, ) .await?; } @@ -1593,11 +1697,20 @@ pub async fn delete_row( // First, use the row number to fetch the row from the database: let sql = format!( - "SELECT * FROM \"{}\" WHERE row_number = {}", + "SELECT * FROM \"{}_view\" WHERE row_number = {}", table_name, row_number ); let query = sqlx_query(&sql); - let sql_row = query.fetch_one(pool).await?; + let sql_row = query.fetch_one(pool).await.map_err(|e| { + Configuration( + format!( + "Got: '{}' while fetching row number {} from table {}", + e, row_number, table_name + ) + .into(), + ) + })?; + // TODO: This isn't the only place we do this. Factor this out into its own function. let mut row = SerdeMap::new(); for column in sql_row.columns() { @@ -1634,9 +1747,10 @@ pub async fn delete_row( // Look through the valve config to see which tables are dependent on this table and find the // rows that need to be updated. Since this is a delete there will only be rows to update // before and none after the delete: - let (updates_before, _) = get_rows_to_update(global_config, pool, table_name, &query_as_if) - .await - .map_err(|e| Configuration(e.into()))?; + let (updates_before, _, updates_unique) = + get_rows_to_update(global_config, pool, table_name, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))?; // Process the updates that need to be performed before the update of the target row: process_updates( @@ -1646,6 +1760,20 @@ pub async fn delete_row( pool, &updates_before, &query_as_if, + false, + ) + .await?; + + // Now process the rows from the same table as the target table that need to be re-validated + // because of unique or primary constraints: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_unique, + &query_as_if, + true, ) .await?; @@ -1686,6 +1814,7 @@ pub async fn update_row( row: &SerdeMap, row_number: &u32, skip_validation: bool, + do_not_recurse: bool, ) -> Result<(), sqlx::Error> { // First, send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: @@ -1771,10 +1900,15 @@ pub async fn update_row( // First, look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated: - let (updates_before, updates_after) = - get_rows_to_update(global_config, pool, table_name, &query_as_if) - .await - .map_err(|e| Configuration(e.into()))?; + let (updates_before, updates_after, updates_unique) = { + if do_not_recurse { + (IndexMap::new(), IndexMap::new(), IndexMap::new()) + } else { + get_rows_to_update(global_config, pool, table_name, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))? + } + }; // Process the updates that need to be performed before the update of the target row: process_updates( @@ -1784,6 +1918,20 @@ pub async fn update_row( pool, &updates_before, &query_as_if, + false, + ) + .await?; + + // Now process the rows from the same table as the target table that need to be re-validated + // because of unique or primary constraints: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_unique, + &query_as_if, + true, ) .await?; @@ -1870,6 +2018,7 @@ pub async fn update_row( pool, &updates_after, &query_as_if, + false, ) .await?; diff --git a/src/validate.rs b/src/validate.rs index 2091b4c9..6097e465 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -103,6 +103,7 @@ pub async fn validate_row( let context = result_row.clone(); for (column_name, cell) in result_row.contents.iter_mut() { + // TODO: Pass the query_as_if parameter to validate_cell_rules. validate_cell_rules( config, compiled_rule_conditions, @@ -125,6 +126,7 @@ pub async fn validate_row( // they can result in database errors when, for instance, we compare a numeric with a // non-numeric type. if cell.valid || !contains_dt_violation(&cell.messages) { + // TODO: Pass the query_as_if parameter to validate_cell_trees. validate_cell_trees( config, pool, From b9ce8c305667bc61c8c6be0da09ec2db77f95e0c Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 29 Jun 2023 13:54:13 -0400 Subject: [PATCH 14/31] minor cleanup and refactor --- src/lib.rs | 107 ++++++++++++++++++++++++++--------------------------- 1 file changed, 52 insertions(+), 55 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 097633be..f5a5a2b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1399,6 +1399,46 @@ pub async fn insert_update_message( Ok(()) } +pub async fn get_db_value( + table: &str, + column: &str, + row_number: &u32, + pool: &AnyPool, +) -> Result { + let (is_clause, cast) = if pool.any_kind() == AnyKind::Sqlite { + ("IS", "") + } else { + ("IS NOT DISTINCT FROM", "::TEXT") + }; + let sql = format!( + r#"SELECT + CASE + WHEN "{column}" {is_clause} NULL THEN ( + SELECT value + FROM "message" + WHERE "row" = "row_number" + AND "column" = '{column}' + AND "table" = '{table}' + ORDER BY "message_id" DESC + LIMIT 1 + ) + ELSE "{column}"{cast} + END AS "{column}" + FROM "{table}_view" WHERE "row_number" = {row_number} + "#, + column = column, + is_clause = is_clause, + table = table, + cast = cast, + row_number = row_number, + ); + + let query = sqlx_query(&sql); + let result_row = query.fetch_one(pool).await.map_err(|e| e.to_string())?; + let value: &str = result_row.try_get(column).unwrap(); + Ok(value.to_string()) +} + pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, @@ -1424,46 +1464,7 @@ pub async fn get_rows_to_update( } } - async fn get_current_value( - table: &str, - column: &str, - row_number: &u32, - pool: &AnyPool, - ) -> Result { - let (is_clause, cast) = if pool.any_kind() == AnyKind::Sqlite { - ("IS", "") - } else { - ("IS NOT DISTINCT FROM", "::TEXT") - }; - let sql = format!( - r#"SELECT - CASE - WHEN "{column}" {is_clause} NULL THEN ( - SELECT value - FROM "message" - WHERE "row" = "row_number" - AND "column" = '{column}' - AND "table" = '{table}' - ORDER BY "message_id" DESC - LIMIT 1 - ) - ELSE "{column}"{cast} - END AS "{column}" - FROM "{table}_view" WHERE "row_number" = {row_number} - "#, - column = column, - is_clause = is_clause, - table = table, - cast = cast, - row_number = row_number, - ); - - let query = sqlx_query(&sql); - let result_row = query.fetch_one(pool).await.map_err(|e| e.to_string())?; - let value: &str = result_row.try_get(column).unwrap(); - Ok(value.to_string()) - } - + // Collect foreign key dependencies: let foreign_dependencies = { let mut foreign_dependencies = vec![]; let global_fconstraints = global_config @@ -1495,7 +1496,7 @@ pub async fn get_rows_to_update( // Query the database using `row_number` to get the current value of the column for // the row. let current_value = - get_current_value(target_table, target_column, &query_as_if.row_number, pool).await?; + get_db_value(target_table, target_column, &query_as_if.row_number, pool).await?; // Query dependent_table.dependent_column for the rows that will be affected by the change // from the current to the new value: @@ -1538,8 +1539,8 @@ pub async fn get_rows_to_update( rows_to_update_after.insert(dependent_table.to_string(), updates_after); } - // Collect the unique/primary intra-table dependencies: - eprintln!("ROWS TO UPDATE BEFORE: {:#?}", rows_to_update_before); + // Collect the intra-table dependencies: + // TODO: Consider also the tree intra-table dependencies. let primaries = global_config .get("constraints") .and_then(|c| c.as_object()) @@ -1573,7 +1574,7 @@ pub async fn get_rows_to_update( .and_then(|t| Some(t.collect::>())) .unwrap(); - let mut rows_to_update_unique = IndexMap::new(); + let mut rows_to_update_intra = IndexMap::new(); for column in &columns { if !uniques.contains(column) && !primaries.contains(column) { continue; @@ -1581,7 +1582,7 @@ pub async fn get_rows_to_update( // Query the database using `row_number` to get the current value of the column for // the row. - let current_value = get_current_value(table, column, &query_as_if.row_number, pool).await?; + let current_value = get_db_value(table, column, &query_as_if.row_number, pool).await?; // Query table.column for the rows that will be affected by the change from the current to // the new value: @@ -1622,20 +1623,16 @@ pub async fn get_rows_to_update( } } }; - rows_to_update_unique.insert(table.to_string(), updates); + rows_to_update_intra.insert(table.to_string(), updates); } - // TODO: Collect the rule intra-table dependencies. - - // TODO: Collect the tree intra-table dependencies. - // TODO: Collect the dependencies for under constraints similarly to the way we // collect foreign constraints (see just above). Ok(( rows_to_update_before, rows_to_update_after, - rows_to_update_unique, + rows_to_update_intra, )) } @@ -1747,7 +1744,7 @@ pub async fn delete_row( // Look through the valve config to see which tables are dependent on this table and find the // rows that need to be updated. Since this is a delete there will only be rows to update // before and none after the delete: - let (updates_before, _, updates_unique) = + let (updates_before, _, updates_intra) = get_rows_to_update(global_config, pool, table_name, &query_as_if) .await .map_err(|e| Configuration(e.into()))?; @@ -1771,7 +1768,7 @@ pub async fn delete_row( compiled_datatype_conditions, compiled_rule_conditions, pool, - &updates_unique, + &updates_intra, &query_as_if, true, ) @@ -1900,7 +1897,7 @@ pub async fn update_row( // First, look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated: - let (updates_before, updates_after, updates_unique) = { + let (updates_before, updates_after, updates_intra) = { if do_not_recurse { (IndexMap::new(), IndexMap::new(), IndexMap::new()) } else { @@ -1929,7 +1926,7 @@ pub async fn update_row( compiled_datatype_conditions, compiled_rule_conditions, pool, - &updates_unique, + &updates_intra, &query_as_if, true, ) From 03bc0f34cd3427c31a8839693f9f8fb71bbb5381 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 29 Jun 2023 14:38:15 -0400 Subject: [PATCH 15/31] check for dependencies when inserting a new row --- src/api_test.rs | 2 + src/lib.rs | 164 ++++++++++++++++++++++++++++++++++++++---------- src/validate.rs | 114 ++++++++++++++++++--------------- 3 files changed, 197 insertions(+), 83 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index f5342842..78fa6592 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -165,6 +165,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &row.as_object().unwrap(), &1, false, + false, ) .await?; @@ -241,6 +242,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &result_row, &1, false, + false, ) .await?; diff --git a/src/lib.rs b/src/lib.rs index f5a5a2b9..4c3c005a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1091,7 +1091,6 @@ pub async fn insert_new_row( }; // Now prepare the row and messages for insertion to the database. - let new_row_number = match new_row_number { Some(n) => *n, None => { @@ -1154,24 +1153,95 @@ pub async fn insert_new_row( } } - // First add the new row to the table: + // Used to validate the given row, counterfactually, "as if" the version of the row in the + // database currently were replaced with `row`: + let query_as_if = QueryAsIf { + kind: QueryAsIfKind::Add, + table: table_name.to_string(), + alias: format!("{}_as_if", table_name), + row_number: new_row_number, + row: Some(row.clone()), + }; + + // Look through the valve config to see which tables are dependent on this table + // and find the rows that need to be updated: + let (_, updates_after, updates_intra) = + get_rows_to_update(global_config, pool, table_name, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))?; + + // Process the rows from the same table as the target table that need to be re-validated + // because of unique or primary constraints: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_intra, + &query_as_if, + true, + ) + .await?; + + // Next, figure out where to put the new version of the row: + let mut table_to_write = String::from(table_name); + for (column, cell) in row.iter() { + let valid = cell.get("valid").unwrap(); + if valid == false { + let structure = global_config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|s| s.as_str()) + .unwrap_or(""); + if vec!["primary", "unique"].contains(&structure) || structure.starts_with("tree(") { + let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); + for msg in messages { + let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); + if level == "error" { + table_to_write.push_str("_conflict"); + break; + } + } + } + } + } + + // Add the new row to the table: let insert_stmt = local_sql_syntax( &pool, &format!( r#"INSERT INTO "{}" ("row_number", {}) VALUES ({}, {})"#, - table_name, + table_to_write, insert_columns.join(", "), new_row_number, insert_values.join(", "), ), ); - let mut query = sqlx_query(&insert_stmt); for param in &insert_params { query = query.bind(param); } query.execute(pool).await?; + // Now process the updates that need to be performed after the update of the target row: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_after, + &query_as_if, + false, + ) + .await?; + // Next add any validation messages to the message table: for m in messages { let column = m.get("column").and_then(|c| c.as_str()).unwrap(); @@ -1495,24 +1565,38 @@ pub async fn get_rows_to_update( // Query the database using `row_number` to get the current value of the column for // the row. - let current_value = - get_db_value(target_table, target_column, &query_as_if.row_number, pool).await?; - - // Query dependent_table.dependent_column for the rows that will be affected by the change - // from the current to the new value: - let updates_before = get_affected_rows( - dependent_table, - dependent_column, - ¤t_value, - None, - global_config, - pool, - ) - .await?; + let updates_before = match query_as_if.kind { + QueryAsIfKind::Add => { + if let None = query_as_if.row { + eprintln!( + "WARN: No row in query_as_if: {:?} for {:?}", + query_as_if, query_as_if.kind + ); + } + IndexMap::new() + } + _ => { + let current_value = + get_db_value(target_table, target_column, &query_as_if.row_number, pool) + .await?; + + // Query dependent_table.dependent_column for the rows that will be affected by the + // change from the current value: + get_affected_rows( + dependent_table, + dependent_column, + ¤t_value, + None, + global_config, + pool, + ) + .await? + } + }; let updates_after = match &query_as_if.row { None => { - if query_as_if.kind == QueryAsIfKind::Replace { + if query_as_if.kind != QueryAsIfKind::Ignore { eprintln!( "WARN: No row in query_as_if: {:?} for {:?}", query_as_if, query_as_if.kind @@ -1582,23 +1666,37 @@ pub async fn get_rows_to_update( // Query the database using `row_number` to get the current value of the column for // the row. - let current_value = get_db_value(table, column, &query_as_if.row_number, pool).await?; - - // Query table.column for the rows that will be affected by the change from the current to - // the new value: - let mut updates = get_affected_rows( - table, - column, - ¤t_value, - Some(&query_as_if.row_number), - global_config, - pool, - ) - .await?; + let mut updates = match query_as_if.kind { + QueryAsIfKind::Add => { + if let None = query_as_if.row { + eprintln!( + "WARN: No row in query_as_if: {:?} for {:?}", + query_as_if, query_as_if.kind + ); + } + IndexMap::new() + } + _ => { + let current_value = + get_db_value(table, column, &query_as_if.row_number, pool).await?; + + // Query table.column for the rows that will be affected by the change from the + // current to the new value: + get_affected_rows( + table, + column, + ¤t_value, + Some(&query_as_if.row_number), + global_config, + pool, + ) + .await? + } + }; match &query_as_if.row { None => { - if query_as_if.kind == QueryAsIfKind::Replace { + if query_as_if.kind != QueryAsIfKind::Ignore { eprintln!( "WARN: No row in query_as_if: {:?} for {:?}", query_as_if, query_as_if.kind diff --git a/src/validate.rs b/src/validate.rs index 6097e465..08b7d407 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -29,8 +29,9 @@ pub struct ResultRow { /// The sense in which a [QueryAsIf] struct should be interpreted. #[derive(Clone, Debug, PartialEq)] pub enum QueryAsIfKind { - Replace, + Add, Ignore, + Replace, } /// Used for counterfactual validation. @@ -1407,64 +1408,77 @@ fn as_if_to_sql( } }; - if as_if.kind == QueryAsIfKind::Ignore { - format!( - r#""{table_alias}{suffix}" AS ( - SELECT * FROM "{table_name}{suffix}" WHERE "row_number" <> {row_number} - )"#, - table_alias = as_if.alias, - suffix = suffix, - table_name = as_if.table, - row_number = as_if.row_number, - ) - } else { - let row = as_if.row.as_ref().unwrap(); - let columns = row.keys().cloned().collect::>(); - let values = { - let mut values = vec![]; - for column in &columns { - let value = row - .get(column) - .and_then(|c| c.get("value")) - .and_then(|v| v.as_str()) + match as_if.kind { + QueryAsIfKind::Ignore => { + format!( + r#""{table_alias}{suffix}" AS ( + SELECT * FROM "{table_name}{suffix}" WHERE "row_number" <> {row_number} + )"#, + table_alias = as_if.alias, + suffix = suffix, + table_name = as_if.table, + row_number = as_if.row_number, + ) + } + QueryAsIfKind::Add | QueryAsIfKind::Replace => { + let row = as_if.row.as_ref().unwrap(); + let columns = row.keys().cloned().collect::>(); + let values = { + let mut values = vec![]; + for column in &columns { + let value = row + .get(column) + .and_then(|c| c.get("value")) + .and_then(|v| v.as_str()) + .unwrap(); + + let sql_type = get_sql_type_from_global_config( + &global_config, + &as_if.table, + &column, + pool, + ) .unwrap(); - let sql_type = get_sql_type_from_global_config( - &global_config, - &as_if.table, - &column, - pool, - ) - .unwrap(); - - if sql_type.to_lowercase() == "text" - || sql_type.to_lowercase().starts_with("varchar(") - { - values.push(format!("'{}'", value)); + if sql_type.to_lowercase() == "text" + || sql_type.to_lowercase().starts_with("varchar(") + { + values.push(format!("'{}'", value)); + } else { + values.push(value.to_string()); + } + } + values.join(", ") + }; + let columns = columns + .iter() + .map(|c| format!("\"{}\"", c)) + .collect::>() + .join(", "); + let where_clause = { + if as_if.kind == QueryAsIfKind::Replace { + r#"WHERE "row_number" <> {row_number}"# } else { - values.push(value.to_string()); + "" } - } - values.join(", ") - }; - format!( - r#""{table_alias}{suffix}" AS ( + }; + format!( + r#""{table_alias}{suffix}" AS ( SELECT "row_number", {columns} FROM "{table_name}{suffix}" - WHERE "row_number" <> {row_number} + {where_clause} UNION ALL SELECT {row_number}, {values} )"#, - columns = columns - .iter() - .map(|c| format!("\"{}\"", c)) - .collect::>() - .join(", "), - table_alias = as_if.alias, - table_name = as_if.table, - row_number = as_if.row_number, - values = values, - ) + columns = columns, + table_alias = as_if.alias, + suffix = suffix, + table_name = as_if.table, + row_number = as_if.row_number, + values = values, + where_clause = where_clause, + ) + } } }; From 462fcb927ee6f0877a6b95b99be46f208710004c Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 30 Jun 2023 10:36:08 -0400 Subject: [PATCH 16/31] fix bugs in update_row() and insert_new_row() --- src/lib.rs | 80 ++++++++++++++++++++++++++++--------------------- src/validate.rs | 4 +-- 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4c3c005a..bb87034c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1066,21 +1066,25 @@ pub async fn insert_new_row( compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, pool: &AnyPool, - table_name: &str, + table_to_write: &str, row: &SerdeMap, new_row_number: Option<&u32>, skip_validation: bool, ) -> Result { + let base_table = match table_to_write.strip_suffix("_conflict") { + None => table_to_write.clone(), + Some(base) => base, + }; + // First, send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: - let row = if !skip_validation { validate_row( global_config, compiled_datatype_conditions, compiled_rule_conditions, pool, - table_name, + base_table, row, None, None, @@ -1097,7 +1101,7 @@ pub async fn insert_new_row( // The new row number to insert is the current highest row number + 1. let sql = format!( r#"SELECT MAX("row_number") AS "row_number" FROM "{}_view""#, - table_name + base_table ); let query = sqlx_query(&sql); let result_row = query.fetch_one(pool).await?; @@ -1128,7 +1132,7 @@ pub async fn insert_new_row( cell_for_insert.remove("value"); let sql_type = get_sql_type_from_global_config( &global_config, - &table_name.to_string(), + &base_table, &column, pool, ) @@ -1157,8 +1161,8 @@ pub async fn insert_new_row( // database currently were replaced with `row`: let query_as_if = QueryAsIf { kind: QueryAsIfKind::Add, - table: table_name.to_string(), - alias: format!("{}_as_if", table_name), + table: base_table.to_string(), + alias: format!("{}_as_if", base_table), row_number: new_row_number, row: Some(row.clone()), }; @@ -1166,7 +1170,7 @@ pub async fn insert_new_row( // Look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated: let (_, updates_after, updates_intra) = - get_rows_to_update(global_config, pool, table_name, &query_as_if) + get_rows_to_update(global_config, pool, base_table, &query_as_if) .await .map_err(|e| Configuration(e.into()))?; @@ -1183,35 +1187,43 @@ pub async fn insert_new_row( ) .await?; - // Next, figure out where to put the new version of the row: - let mut table_to_write = String::from(table_name); - for (column, cell) in row.iter() { - let valid = cell.get("valid").unwrap(); - if valid == false { - let structure = global_config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("structure")) - .and_then(|s| s.as_str()) - .unwrap_or(""); - if vec!["primary", "unique"].contains(&structure) || structure.starts_with("tree(") { - let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); - for msg in messages { - let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); - if level == "error" { - table_to_write.push_str("_conflict"); - break; + // If the row is not already being directed to the conflict table, check it to see if it should + // be redirected there: + let table_to_write = { + if table_to_write.ends_with("_conflict") { + table_to_write.to_string() + } else { + let mut table_to_write = String::from(base_table); + for (column, cell) in row.iter() { + let valid = cell.get("valid").unwrap(); + if valid == false { + let structure = global_config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(base_table)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|s| s.as_str()) + .unwrap_or(""); + if vec!["primary", "unique"].contains(&structure) || structure.starts_with("tree(") { + let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); + for msg in messages { + let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); + if level == "error" { + table_to_write.push_str("_conflict"); + break; + } + } } } } + table_to_write } - } + }; // Add the new row to the table: let insert_stmt = local_sql_syntax( @@ -1254,7 +1266,7 @@ pub async fn insert_new_row( r#"INSERT INTO "message" ("table", "row", "column", "value", "level", "rule", "message") VALUES ('{}', {}, '{}', '{}', '{}', '{}', '{}')"#, - table_name, new_row_number, column, value, level, rule, message + base_table, new_row_number, column, value, level, rule, message ); let query = sqlx_query(&message_sql); query.execute(pool).await?; diff --git a/src/validate.rs b/src/validate.rs index 08b7d407..277da0b3 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1457,9 +1457,9 @@ fn as_if_to_sql( .join(", "); let where_clause = { if as_if.kind == QueryAsIfKind::Replace { - r#"WHERE "row_number" <> {row_number}"# + format!(r#"WHERE "row_number" != {}"#, as_if.row_number) } else { - "" + "".to_string() } }; format!( From 39ef3156bd67af7810b06d7bb91839519dacaba1 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 30 Jun 2023 11:20:59 -0400 Subject: [PATCH 17/31] fix bug in delete_row() --- src/lib.rs | 67 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bb87034c..e7f8165d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1130,13 +1130,9 @@ pub async fn insert_new_row( let mut cell_for_insert = cell.clone(); if cell_valid { cell_for_insert.remove("value"); - let sql_type = get_sql_type_from_global_config( - &global_config, - &base_table, - &column, - pool, - ) - .unwrap(); + let sql_type = + get_sql_type_from_global_config(&global_config, &base_table, &column, pool) + .unwrap(); insert_values.push(cast_sql_param_from_text(&sql_type)); insert_params.push(String::from(cell_value)); } else { @@ -1209,7 +1205,9 @@ pub async fn insert_new_row( .and_then(|c| c.get("structure")) .and_then(|s| s.as_str()) .unwrap_or(""); - if vec!["primary", "unique"].contains(&structure) || structure.starts_with("tree(") { + if vec!["primary", "unique"].contains(&structure) + || structure.starts_with("tree(") + { let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); for msg in messages { let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); @@ -1871,19 +1869,6 @@ pub async fn delete_row( ) .await?; - // Now process the rows from the same table as the target table that need to be re-validated - // because of unique or primary constraints: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &updates_intra, - &query_as_if, - true, - ) - .await?; - // Now delete the row: let sql1 = format!( "DELETE FROM \"{}\" WHERE row_number = {}", @@ -1906,6 +1891,19 @@ pub async fn delete_row( let query = sqlx_query(&sql); query.execute(pool).await?; + // Finally process the rows from the same table as the target table that need to be re-validated + // because of unique or primary constraints: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_intra, + &query_as_if, + true, + ) + .await?; + Ok(()) } @@ -2117,18 +2115,6 @@ pub async fn update_row( .await?; } - // Now process the updates that need to be performed after the update of the target row: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &updates_after, - &query_as_if, - false, - ) - .await?; - // Now delete any messages that had been previously inserted to the message table for the old // version of this row (other than any 'update'-level messages): let delete_sql = format!( @@ -2138,7 +2124,7 @@ pub async fn update_row( let query = sqlx_query(&delete_sql); query.execute(pool).await?; - // Finally add the messages to the message table for the new version of this row: + // Now add the messages to the message table for the new version of this row: for m in messages { let column = m.get("column").and_then(|c| c.as_str()).unwrap(); let value = m.get("value").and_then(|c| c.as_str()).unwrap(); @@ -2156,6 +2142,19 @@ pub async fn update_row( query.execute(pool).await?; } + // Finally process the updates from other tables that need to be performed after the update of + // the target row: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_after, + &query_as_if, + false, + ) + .await?; + Ok(()) } From 2c1e374f28fa604f837fbc7a13fd3f6fe69afeff Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 30 Jun 2023 11:59:53 -0400 Subject: [PATCH 18/31] tweaks and re-org --- src/lib.rs | 439 ++++++++++++++++++++++++------------------------ src/validate.rs | 5 +- 2 files changed, 222 insertions(+), 222 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e7f8165d..4abfb077 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1058,221 +1058,6 @@ pub async fn valve( Ok(config.to_string()) } -/// Given a global config map, a database connection pool, a table name, and a row, assign a new -/// row number to the row and insert it to the database, then return the new row number. Optionally, -/// if row_number is provided, use that to identify the new row. -pub async fn insert_new_row( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - table_to_write: &str, - row: &SerdeMap, - new_row_number: Option<&u32>, - skip_validation: bool, -) -> Result { - let base_table = match table_to_write.strip_suffix("_conflict") { - None => table_to_write.clone(), - Some(base) => base, - }; - - // First, send the row through the row validator to determine if any fields are problematic and - // to mark them with appropriate messages: - let row = if !skip_validation { - validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - base_table, - row, - None, - None, - ) - .await? - } else { - row.clone() - }; - - // Now prepare the row and messages for insertion to the database. - let new_row_number = match new_row_number { - Some(n) => *n, - None => { - // The new row number to insert is the current highest row number + 1. - let sql = format!( - r#"SELECT MAX("row_number") AS "row_number" FROM "{}_view""#, - base_table - ); - let query = sqlx_query(&sql); - let result_row = query.fetch_one(pool).await?; - let result = result_row.try_get_raw("row_number").unwrap(); - let new_row_number: i64; - if result.is_null() { - new_row_number = 1; - } else { - new_row_number = result_row.get("row_number"); - } - let new_row_number = new_row_number as u32 + 1; - new_row_number - } - }; - - let mut insert_columns = vec![]; - let mut insert_values = vec![]; - let mut insert_params = vec![]; - let mut messages = vec![]; - let sorted_datatypes = get_sorted_datatypes(global_config); - for (column, cell) in row.iter() { - insert_columns.append(&mut vec![format!(r#""{}""#, column)]); - let cell = cell.as_object().unwrap(); - let cell_valid = cell.get("valid").and_then(|v| v.as_bool()).unwrap(); - let cell_value = cell.get("value").and_then(|v| v.as_str()).unwrap(); - let mut cell_for_insert = cell.clone(); - if cell_valid { - cell_for_insert.remove("value"); - let sql_type = - get_sql_type_from_global_config(&global_config, &base_table, &column, pool) - .unwrap(); - insert_values.push(cast_sql_param_from_text(&sql_type)); - insert_params.push(String::from(cell_value)); - } else { - insert_values.push(String::from("NULL")); - let cell_messages = sort_messages( - &sorted_datatypes, - cell.get("messages").and_then(|m| m.as_array()).unwrap(), - ); - for cell_message in cell_messages { - messages.push(json!({ - "column": column, - "value": cell_value, - "level": cell_message.get("level").and_then(|s| s.as_str()).unwrap(), - "rule": cell_message.get("rule").and_then(|s| s.as_str()).unwrap(), - "message": cell_message.get("message").and_then(|s| s.as_str()).unwrap(), - })); - } - } - } - - // Used to validate the given row, counterfactually, "as if" the version of the row in the - // database currently were replaced with `row`: - let query_as_if = QueryAsIf { - kind: QueryAsIfKind::Add, - table: base_table.to_string(), - alias: format!("{}_as_if", base_table), - row_number: new_row_number, - row: Some(row.clone()), - }; - - // Look through the valve config to see which tables are dependent on this table - // and find the rows that need to be updated: - let (_, updates_after, updates_intra) = - get_rows_to_update(global_config, pool, base_table, &query_as_if) - .await - .map_err(|e| Configuration(e.into()))?; - - // Process the rows from the same table as the target table that need to be re-validated - // because of unique or primary constraints: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &updates_intra, - &query_as_if, - true, - ) - .await?; - - // If the row is not already being directed to the conflict table, check it to see if it should - // be redirected there: - let table_to_write = { - if table_to_write.ends_with("_conflict") { - table_to_write.to_string() - } else { - let mut table_to_write = String::from(base_table); - for (column, cell) in row.iter() { - let valid = cell.get("valid").unwrap(); - if valid == false { - let structure = global_config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(base_table)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("structure")) - .and_then(|s| s.as_str()) - .unwrap_or(""); - if vec!["primary", "unique"].contains(&structure) - || structure.starts_with("tree(") - { - let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); - for msg in messages { - let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); - if level == "error" { - table_to_write.push_str("_conflict"); - break; - } - } - } - } - } - table_to_write - } - }; - - // Add the new row to the table: - let insert_stmt = local_sql_syntax( - &pool, - &format!( - r#"INSERT INTO "{}" ("row_number", {}) VALUES ({}, {})"#, - table_to_write, - insert_columns.join(", "), - new_row_number, - insert_values.join(", "), - ), - ); - let mut query = sqlx_query(&insert_stmt); - for param in &insert_params { - query = query.bind(param); - } - query.execute(pool).await?; - - // Now process the updates that need to be performed after the update of the target row: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &updates_after, - &query_as_if, - false, - ) - .await?; - - // Next add any validation messages to the message table: - for m in messages { - let column = m.get("column").and_then(|c| c.as_str()).unwrap(); - let value = m.get("value").and_then(|c| c.as_str()).unwrap(); - let level = m.get("level").and_then(|c| c.as_str()).unwrap(); - let rule = m.get("rule").and_then(|c| c.as_str()).unwrap(); - let message = m.get("message").and_then(|c| c.as_str()).unwrap(); - let message = message.replace("'", "''"); - let message_sql = format!( - r#"INSERT INTO "message" - ("table", "row", "column", "value", "level", "rule", "message") - VALUES ('{}', {}, '{}', '{}', '{}', '{}', '{}')"#, - base_table, new_row_number, column, value, level, rule, message - ); - let query = sqlx_query(&message_sql); - query.execute(pool).await?; - } - - Ok(new_row_number) -} - pub async fn get_affected_rows( table: &str, column: &str, @@ -1606,7 +1391,7 @@ pub async fn get_rows_to_update( let updates_after = match &query_as_if.row { None => { - if query_as_if.kind != QueryAsIfKind::Ignore { + if query_as_if.kind != QueryAsIfKind::Remove { eprintln!( "WARN: No row in query_as_if: {:?} for {:?}", query_as_if, query_as_if.kind @@ -1706,7 +1491,7 @@ pub async fn get_rows_to_update( match &query_as_if.row { None => { - if query_as_if.kind != QueryAsIfKind::Ignore { + if query_as_if.kind != QueryAsIfKind::Remove { eprintln!( "WARN: No row in query_as_if: {:?} for {:?}", query_as_if, query_as_if.kind @@ -1786,6 +1571,222 @@ pub async fn process_updates( Ok(()) } +/// Given a global config map, a database connection pool, a table name, and a row, assign a new +/// row number to the row and insert it to the database, then return the new row number. Optionally, +/// if row_number is provided, use that to identify the new row. +#[async_recursion] +pub async fn insert_new_row( + global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, + pool: &AnyPool, + table_to_write: &str, + row: &SerdeMap, + new_row_number: Option, + skip_validation: bool, +) -> Result { + let base_table = match table_to_write.strip_suffix("_conflict") { + None => table_to_write.clone(), + Some(base) => base, + }; + + // First, send the row through the row validator to determine if any fields are problematic and + // to mark them with appropriate messages: + let row = if !skip_validation { + validate_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + base_table, + row, + None, + None, + ) + .await? + } else { + row.clone() + }; + + // Now prepare the row and messages for insertion to the database. + let new_row_number = match new_row_number { + Some(n) => n, + None => { + // The new row number to insert is the current highest row number + 1. + let sql = format!( + r#"SELECT MAX("row_number") AS "row_number" FROM "{}_view""#, + base_table + ); + let query = sqlx_query(&sql); + let result_row = query.fetch_one(pool).await?; + let result = result_row.try_get_raw("row_number").unwrap(); + let new_row_number: i64; + if result.is_null() { + new_row_number = 1; + } else { + new_row_number = result_row.get("row_number"); + } + let new_row_number = new_row_number as u32 + 1; + new_row_number + } + }; + + let mut insert_columns = vec![]; + let mut insert_values = vec![]; + let mut insert_params = vec![]; + let mut messages = vec![]; + let sorted_datatypes = get_sorted_datatypes(global_config); + for (column, cell) in row.iter() { + insert_columns.append(&mut vec![format!(r#""{}""#, column)]); + let cell = cell.as_object().unwrap(); + let cell_valid = cell.get("valid").and_then(|v| v.as_bool()).unwrap(); + let cell_value = cell.get("value").and_then(|v| v.as_str()).unwrap(); + let mut cell_for_insert = cell.clone(); + if cell_valid { + cell_for_insert.remove("value"); + let sql_type = + get_sql_type_from_global_config(&global_config, &base_table, &column, pool) + .unwrap(); + insert_values.push(cast_sql_param_from_text(&sql_type)); + insert_params.push(String::from(cell_value)); + } else { + insert_values.push(String::from("NULL")); + let cell_messages = sort_messages( + &sorted_datatypes, + cell.get("messages").and_then(|m| m.as_array()).unwrap(), + ); + for cell_message in cell_messages { + messages.push(json!({ + "column": column, + "value": cell_value, + "level": cell_message.get("level").and_then(|s| s.as_str()).unwrap(), + "rule": cell_message.get("rule").and_then(|s| s.as_str()).unwrap(), + "message": cell_message.get("message").and_then(|s| s.as_str()).unwrap(), + })); + } + } + } + + // Used to validate the given row, counterfactually, "as if" the version of the row in the + // database currently were replaced with `row`: + let query_as_if = QueryAsIf { + kind: QueryAsIfKind::Add, + table: base_table.to_string(), + alias: format!("{}_as_if", base_table), + row_number: new_row_number, + row: Some(row.clone()), + }; + + // Look through the valve config to see which tables are dependent on this table + // and find the rows that need to be updated: + let (_, updates_after, updates_intra) = + get_rows_to_update(global_config, pool, base_table, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))?; + + // Process the rows from the same table as the target table that need to be re-validated + // because of unique or primary constraints: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_intra, + &query_as_if, + true, + ) + .await?; + + // If the row is not already being directed to the conflict table, check it to see if it should + // be redirected there: + let table_to_write = { + if table_to_write.ends_with("_conflict") { + table_to_write.to_string() + } else { + let mut table_to_write = String::from(base_table); + for (column, cell) in row.iter() { + let valid = cell.get("valid").unwrap(); + if valid == false { + let structure = global_config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(base_table)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|s| s.as_str()) + .unwrap_or(""); + if vec!["primary", "unique"].contains(&structure) + || structure.starts_with("tree(") + { + let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); + for msg in messages { + let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); + if level == "error" { + table_to_write.push_str("_conflict"); + break; + } + } + } + } + } + table_to_write + } + }; + + // Add the new row to the table: + let insert_stmt = local_sql_syntax( + &pool, + &format!( + r#"INSERT INTO "{}" ("row_number", {}) VALUES ({}, {})"#, + table_to_write, + insert_columns.join(", "), + new_row_number, + insert_values.join(", "), + ), + ); + let mut query = sqlx_query(&insert_stmt); + for param in &insert_params { + query = query.bind(param); + } + query.execute(pool).await?; + + // Now process the updates that need to be performed after the update of the target row: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_after, + &query_as_if, + false, + ) + .await?; + + // Next add any validation messages to the message table: + for m in messages { + let column = m.get("column").and_then(|c| c.as_str()).unwrap(); + let value = m.get("value").and_then(|c| c.as_str()).unwrap(); + let level = m.get("level").and_then(|c| c.as_str()).unwrap(); + let rule = m.get("rule").and_then(|c| c.as_str()).unwrap(); + let message = m.get("message").and_then(|c| c.as_str()).unwrap(); + let message = message.replace("'", "''"); + let message_sql = format!( + r#"INSERT INTO "message" + ("table", "row", "column", "value", "level", "rule", "message") + VALUES ('{}', {}, '{}', '{}', '{}', '{}', '{}')"#, + base_table, new_row_number, column, value, level, rule, message + ); + let query = sqlx_query(&message_sql); + query.execute(pool).await?; + } + + Ok(new_row_number) +} + #[async_recursion] pub async fn delete_row( global_config: &SerdeMap, @@ -1842,7 +1843,7 @@ pub async fn delete_row( // Used to validate the given row, counterfactually, "as if" the row did not exist in the // database: let query_as_if = QueryAsIf { - kind: QueryAsIfKind::Ignore, + kind: QueryAsIfKind::Remove, table: table_name.to_string(), alias: format!("{}_as_if", table_name), row_number: *row_number, @@ -2109,7 +2110,7 @@ pub async fn update_row( pool, &table_to_write, &row, - Some(row_number), + Some(*row_number), true, ) .await?; diff --git a/src/validate.rs b/src/validate.rs index 277da0b3..f2c96e64 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -30,7 +30,7 @@ pub struct ResultRow { #[derive(Clone, Debug, PartialEq)] pub enum QueryAsIfKind { Add, - Ignore, + Remove, Replace, } @@ -104,7 +104,6 @@ pub async fn validate_row( let context = result_row.clone(); for (column_name, cell) in result_row.contents.iter_mut() { - // TODO: Pass the query_as_if parameter to validate_cell_rules. validate_cell_rules( config, compiled_rule_conditions, @@ -1409,7 +1408,7 @@ fn as_if_to_sql( }; match as_if.kind { - QueryAsIfKind::Ignore => { + QueryAsIfKind::Remove => { format!( r#""{table_alias}{suffix}" AS ( SELECT * FROM "{table_name}{suffix}" WHERE "row_number" <> {row_number} From 827ec4abed44ca783368f52ae4a879b72a5cd6e1 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 30 Jun 2023 13:06:18 -0400 Subject: [PATCH 19/31] don't check intra dependencies when adding a new row --- src/lib.rs | 44 +++++++++++++++----------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4abfb077..605fdf77 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1679,23 +1679,9 @@ pub async fn insert_new_row( // Look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated: - let (_, updates_after, updates_intra) = - get_rows_to_update(global_config, pool, base_table, &query_as_if) - .await - .map_err(|e| Configuration(e.into()))?; - - // Process the rows from the same table as the target table that need to be re-validated - // because of unique or primary constraints: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &updates_intra, - &query_as_if, - true, - ) - .await?; + let (_, updates_after, _) = get_rows_to_update(global_config, pool, base_table, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))?; // If the row is not already being directed to the conflict table, check it to see if it should // be redirected there: @@ -1754,18 +1740,6 @@ pub async fn insert_new_row( } query.execute(pool).await?; - // Now process the updates that need to be performed after the update of the target row: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &updates_after, - &query_as_if, - false, - ) - .await?; - // Next add any validation messages to the message table: for m in messages { let column = m.get("column").and_then(|c| c.as_str()).unwrap(); @@ -1784,6 +1758,18 @@ pub async fn insert_new_row( query.execute(pool).await?; } + // Now process the updates that need to be performed after the update of the target row: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_after, + &query_as_if, + false, + ) + .await?; + Ok(new_row_number) } From d814ddecc5e60ea941c4d89d179720f40915b143 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 30 Jun 2023 13:38:26 -0400 Subject: [PATCH 20/31] fix intra-table dependency handling during updates --- src/lib.rs | 60 ++++++++++++++++-------------------------------------- 1 file changed, 17 insertions(+), 43 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 605fdf77..939e704b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1460,8 +1460,10 @@ pub async fn get_rows_to_update( } // Query the database using `row_number` to get the current value of the column for - // the row. - let mut updates = match query_as_if.kind { + // the row. We only look for rows to update that match the current value of the column. + // Rows matching the column's new value don't also need to be updated. Those will result + // in a validation error for the new/modified row but that is fine. + let updates = match query_as_if.kind { QueryAsIfKind::Add => { if let None = query_as_if.row { eprintln!( @@ -1488,34 +1490,6 @@ pub async fn get_rows_to_update( .await? } }; - - match &query_as_if.row { - None => { - if query_as_if.kind != QueryAsIfKind::Remove { - eprintln!( - "WARN: No row in query_as_if: {:?} for {:?}", - query_as_if, query_as_if.kind - ); - } - } - Some(row) => { - // Fetch the cell corresponding to `column` from `row`, and the value of that cell, - // which is the new value for the row. - let new_value = get_cell_value(&row, column)?; - let further_updates = get_affected_rows( - table, - column, - &new_value, - Some(&query_as_if.row_number), - global_config, - pool, - ) - .await?; - for (key, data) in further_updates { - updates.insert(key, data); - } - } - }; rows_to_update_intra.insert(table.to_string(), updates); } @@ -2014,19 +1988,6 @@ pub async fn update_row( ) .await?; - // Now process the rows from the same table as the target table that need to be re-validated - // because of unique or primary constraints: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &updates_intra, - &query_as_if, - true, - ) - .await?; - // Now update the target row. First, figure out whether the row is currently in the base table // or the conflict table: let sql = format!( @@ -2129,6 +2090,19 @@ pub async fn update_row( query.execute(pool).await?; } + // Now process the rows from the same table as the target table that need to be re-validated + // because of unique or primary constraints: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_intra, + &query_as_if, + true, + ) + .await?; + // Finally process the updates from other tables that need to be performed after the update of // the target row: process_updates( From a1e7ab767a77264f921be96d1fc54d0467df89b7 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 30 Jun 2023 16:39:08 -0400 Subject: [PATCH 21/31] various bug fixes --- src/lib.rs | 144 ++++++++++++++++++++++++++++-------------------- src/validate.rs | 29 +++++++++- 2 files changed, 110 insertions(+), 63 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 939e704b..d3f8d7ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1270,10 +1270,10 @@ pub async fn get_db_value( row_number: &u32, pool: &AnyPool, ) -> Result { - let (is_clause, cast) = if pool.any_kind() == AnyKind::Sqlite { - ("IS", "") + let is_clause = if pool.any_kind() == AnyKind::Sqlite { + "IS" } else { - ("IS NOT DISTINCT FROM", "::TEXT") + "IS NOT DISTINCT FROM" }; let sql = format!( r#"SELECT @@ -1287,15 +1287,19 @@ pub async fn get_db_value( ORDER BY "message_id" DESC LIMIT 1 ) - ELSE "{column}"{cast} + ELSE {casted_column} END AS "{column}" FROM "{table}_view" WHERE "row_number" = {row_number} "#, column = column, is_clause = is_clause, table = table, - cast = cast, row_number = row_number, + casted_column = if pool.any_kind() == AnyKind::Sqlite { + cast_column_sql_to_text(column, "non-text") + } else { + format!("\"{}\"::TEXT", column) + }, ); let query = sqlx_query(&sql); @@ -1536,7 +1540,7 @@ pub async fn process_updates( update_table, &vrow, row_number, - true, + false, do_not_recurse, ) .await?; @@ -1753,9 +1757,15 @@ pub async fn delete_row( compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, pool: &AnyPool, - table_name: &str, + table_to_write: &str, row_number: &u32, + simulated_update: bool, ) -> Result<(), sqlx::Error> { + let base_table = match table_to_write.strip_suffix("_conflict") { + None => table_to_write.clone(), + Some(base) => base, + }; + // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. // Maybe we need a wrapper function for this? // Note also that we might want to run ANALYZE (or the sqlite equivalent) after @@ -1764,14 +1774,14 @@ pub async fn delete_row( // First, use the row number to fetch the row from the database: let sql = format!( "SELECT * FROM \"{}_view\" WHERE row_number = {}", - table_name, row_number + base_table, row_number ); let query = sqlx_query(&sql); let sql_row = query.fetch_one(pool).await.map_err(|e| { Configuration( format!( "Got: '{}' while fetching row number {} from table {}", - e, row_number, table_name + e, row_number, base_table ) .into(), ) @@ -1781,13 +1791,16 @@ pub async fn delete_row( let mut row = SerdeMap::new(); for column in sql_row.columns() { let cname = column.name(); - if cname != "row_number" { + if !vec!["row_number", "message"].contains(&cname) { let raw_value = sql_row .try_get_raw(format!(r#"{}"#, cname).as_str()) .unwrap(); let value; if !raw_value.is_null() { - value = get_column_value(&sql_row, &cname, "text"); + let sql_type = + get_sql_type_from_global_config(global_config, &base_table, &cname, pool) + .unwrap(); + value = get_column_value(&sql_row, &cname, &sql_type); } else { value = String::from(""); } @@ -1804,8 +1817,8 @@ pub async fn delete_row( // database: let query_as_if = QueryAsIf { kind: QueryAsIfKind::Remove, - table: table_name.to_string(), - alias: format!("{}_as_if", table_name), + table: base_table.to_string(), + alias: format!("{}_as_if", base_table), row_number: *row_number, row: None, }; @@ -1814,7 +1827,7 @@ pub async fn delete_row( // rows that need to be updated. Since this is a delete there will only be rows to update // before and none after the delete: let (updates_before, _, updates_intra) = - get_rows_to_update(global_config, pool, table_name, &query_as_if) + get_rows_to_update(global_config, pool, base_table, &query_as_if) .await .map_err(|e| Configuration(e.into()))?; @@ -1833,11 +1846,11 @@ pub async fn delete_row( // Now delete the row: let sql1 = format!( "DELETE FROM \"{}\" WHERE row_number = {}", - table_name, row_number + base_table, row_number, ); let sql2 = format!( "DELETE FROM \"{}_conflict\" WHERE row_number = {}", - table_name, row_number + base_table, row_number ); for sql in vec![sql1, sql2] { let query = sqlx_query(&sql); @@ -1845,9 +1858,16 @@ pub async fn delete_row( } // Now delete all messages associated with the row: + let simulated_update_clause = { + if simulated_update { + r#"AND "level" <> 'update'"# + } else { + "" + } + }; let sql = format!( - r#"DELETE FROM "message" WHERE "table" = '{}' AND "row" = {}"#, - table_name, row_number + r#"DELETE FROM "message" WHERE "table" = '{}' AND "row" = {} {}"#, + base_table, row_number, simulated_update_clause ); let query = sqlx_query(&sql); query.execute(pool).await?; @@ -1882,7 +1902,41 @@ pub async fn update_row( skip_validation: bool, do_not_recurse: bool, ) -> Result<(), sqlx::Error> { - // First, send the row through the row validator to determine if any fields are problematic and + // Used to validate the given row, counterfactually, "as if" the version of the row in the + // database currently were replaced with `row`: + let query_as_if = QueryAsIf { + kind: QueryAsIfKind::Replace, + table: table_name.to_string(), + alias: format!("{}_as_if", table_name), + row_number: *row_number, + row: Some(row.clone()), + }; + + // First, look through the valve config to see which tables are dependent on this table + // and find the rows that need to be updated: + let (updates_before, updates_after, updates_intra) = { + if do_not_recurse { + (IndexMap::new(), IndexMap::new(), IndexMap::new()) + } else { + get_rows_to_update(global_config, pool, table_name, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))? + } + }; + + // Process the updates that need to be performed before the update of the target row: + process_updates( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &updates_before, + &query_as_if, + false, + ) + .await?; + + // Send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = if !skip_validation { validate_row( @@ -1954,40 +2008,6 @@ pub async fn update_row( } } - // Used to validate the given row, counterfactually, "as if" the version of the row in the - // database currently were replaced with `row`: - let query_as_if = QueryAsIf { - kind: QueryAsIfKind::Replace, - table: table_name.to_string(), - alias: format!("{}_as_if", table_name), - row_number: *row_number, - row: Some(row.clone()), - }; - - // First, look through the valve config to see which tables are dependent on this table - // and find the rows that need to be updated: - let (updates_before, updates_after, updates_intra) = { - if do_not_recurse { - (IndexMap::new(), IndexMap::new(), IndexMap::new()) - } else { - get_rows_to_update(global_config, pool, table_name, &query_as_if) - .await - .map_err(|e| Configuration(e.into()))? - } - }; - - // Process the updates that need to be performed before the update of the target row: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &updates_before, - &query_as_if, - false, - ) - .await?; - // Now update the target row. First, figure out whether the row is currently in the base table // or the conflict table: let sql = format!( @@ -2044,12 +2064,16 @@ pub async fn update_row( } query.execute(pool).await?; } else { - let sql = format!( - "DELETE FROM \"{}\" WHERE row_number = {}", - current_table, row_number - ); - let query = sqlx_query(&sql); - query.execute(pool).await?; + delete_row( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + ¤t_table, + row_number, + true, + ) + .await?; insert_new_row( global_config, compiled_datatype_conditions, @@ -2058,7 +2082,7 @@ pub async fn update_row( &table_to_write, &row, Some(*row_number), - true, + false, ) .await?; } diff --git a/src/validate.rs b/src/validate.rs index f2c96e64..201cf82f 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1425,12 +1425,35 @@ fn as_if_to_sql( let values = { let mut values = vec![]; for column in &columns { - let value = row + let valid = row .get(column) - .and_then(|c| c.get("value")) - .and_then(|v| v.as_str()) + .and_then(|c| c.get("valid")) + .and_then(|v| v.as_bool()) .unwrap(); + let value = { + if valid == true { + let value = match row.get(column).and_then(|c| c.get("value")) { + Some(SerdeValue::String(s)) => Ok(format!("{}", s)), + Some(SerdeValue::Number(n)) => Ok(format!("{}", n)), + Some(SerdeValue::Bool(b)) => Ok(format!("{}", b)), + _ => Err(format!( + "Value missing or of unknown type in column {} of row to \ + update: {:?}", + column, row + )), + } + .unwrap(); + if value == "" { + "NULL".to_string() + } else { + value + } + } else { + "NULL".to_string() + } + }; + let sql_type = get_sql_type_from_global_config( &global_config, &as_if.table, From cdc698537971f7902f9a5ddc5c4ba26d975c1342 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 13 Jul 2023 13:04:30 -0400 Subject: [PATCH 22/31] add unit tests for updates/inserts/deletes with dependencies --- Makefile | 2 +- src/api_test.rs | 72 ++++++++++++++++++++--- test/expected/messages.tsv | 5 ++ test/expected/messages_a1.tsv | 5 ++ test/expected/messages_after_api_test.tsv | 12 ++++ test/src/column.tsv | 17 ++++++ test/src/ontology/table10.tsv | 9 +++ test/src/ontology/table11.tsv | 6 ++ test/src/ontology/table8.tsv | 3 + test/src/ontology/table9.tsv | 10 ++++ test/src/table.tsv | 4 ++ 11 files changed, 136 insertions(+), 9 deletions(-) create mode 100644 test/src/ontology/table10.tsv create mode 100644 test/src/ontology/table11.tsv create mode 100644 test/src/ontology/table8.tsv create mode 100644 test/src/ontology/table9.tsv diff --git a/Makefile b/Makefile index 3c832e7b..8d9e350b 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ test/output: test: sqlite_test pg_test api_test random_test -tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 +tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 table9 table10 table11 sqlite_test: build/valve.db test/src/table.tsv | test/output @echo "Testing valve on sqlite ..." diff --git a/src/api_test.rs b/src/api_test.rs index 78fa6592..9208f1e4 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -1,5 +1,5 @@ use ontodev_valve::{ - get_compiled_datatype_conditions, get_compiled_rule_conditions, + delete_row, get_compiled_datatype_conditions, get_compiled_rule_conditions, get_parsed_structure_conditions, insert_new_row, update_row, validate::{get_matching_values, validate_row}, valve, @@ -56,6 +56,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro let compiled_rule_conditions = get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); + // Test the get_matching_values() function: let matching_values = get_matching_values( &config, &compiled_datatype_conditions, @@ -99,9 +100,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ]) ); - // NOTE: No validation of the validate/insert/update functions is done below. You must use an - // external script to fetch the data from the database and run a diff against a known good - // sample. + // We test that validate_row() is idempotent by running it multiple times on the same row: let row = json!({ "child": {"messages": [], "valid": true, "value": "b"}, "parent": {"messages": [], "valid": true, "value": "f"}, @@ -116,7 +115,6 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro }, }); - // We test that validate_row() is idempotent by running it multiple times on the same row: let result_row_1 = validate_row( &config, &compiled_datatype_conditions, @@ -155,7 +153,11 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro .await?; assert_eq!(result_row, result_row_2); - // Now update the database with the validated row: + // Test update, delete, and insert. NOTE that there are no calls to assert() below. You must use + // an external script to fetch the data from the database and run a diff against a known good + // sample. + + // Update the row we constructed and validated above in the database: update_row( &config, &compiled_datatype_conditions, @@ -195,6 +197,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro None, ) .await?; + let _new_row_num = insert_new_row( &config, &compiled_datatype_conditions, @@ -207,7 +210,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; - // Validate and update: + // Validate and update an existing row: let row = json!({ "child": {"messages": [], "valid": true, "value": 2}, "parent": {"messages": [], "valid": true, "value": 6}, @@ -233,6 +236,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro None, ) .await?; + update_row( &config, &compiled_datatype_conditions, @@ -246,7 +250,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; - // Validate and insert + // Validate and insert a new row: let row = json!({ "child": {"messages": [], "valid": true, "value": 2}, "parent": {"messages": [], "valid": true, "value": 6}, @@ -260,6 +264,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "value": 2, }, }); + let result_row = validate_row( &config, &compiled_datatype_conditions, @@ -271,6 +276,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro None, ) .await?; + let _new_row_num = insert_new_row( &config, &compiled_datatype_conditions, @@ -283,5 +289,55 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; + // Test cases for updates/inserts/deletes with dependencies. + let row = json!({ + "foreign_column": {"messages": [], "valid": true, "value": "w"}, + "other_foreign_column": {"messages": [], "valid": true, "value": "z"}, + "numeric_foreign_column": {"messages": [], "valid": true, "value": ""}, + }); + update_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table10", + &row.as_object().unwrap(), + &1, + false, + false, + ) + .await?; + + let row = json!({ + "child": {"messages": [], "valid": true, "value": "b"}, + "parent": {"messages": [], "valid": true, "value": "c"}, + "xyzzy": {"messages": [], "valid": true, "value": "d"}, + "foo": {"messages": [], "valid": true, "value": "d"}, + "bar": {"messages": [], "valid": true, "value": "f"}, + }); + update_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table11", + &row.as_object().unwrap(), + &2, + false, + false, + ) + .await?; + + delete_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table11", + &4, + false, + ) + .await?; + Ok(()) } diff --git a/test/expected/messages.tsv b/test/expected/messages.tsv index 13f7d413..353d0906 100644 --- a/test/expected/messages.tsv +++ b/test/expected/messages.tsv @@ -4,6 +4,9 @@ table1 5 base error key:unique Values of base must be unique http://purl.obolibr table1 5 prefix error key:primary Values of prefix must be unique VO table1 10 base error key:unique Values of base must be unique http://www.w3.org/1999/02/22-rdf-syntax-ns# table1 10 prefix error key:primary Values of prefix must be unique rdf +table11 3 foo error key:primary Values of foo must be unique d +table11 4 foo error key:primary Values of foo must be unique e +table11 5 foo error key:primary Values of foo must be unique e table2 1 foo error rule:foo-2 bar cannot be null if foo is not null 5 table2 1 foo error rule:foo-4 bar must be 'y' or 'z' if foo = 5 5 table2 2 foo error rule:foo-1 bar must be null whenever foo is null @@ -46,3 +49,5 @@ table6 5 xyzzy error under:not-under Value '8' of column xyzzy is not under '4' table6 7 xyzzy error under:not-in-tree Value '26' of column xyzzy is not in table6.child 26 table6 9 child error key:foreign Value '9' of column child exists only in table4_conflict.numeric_foreign_column 9 table7 3 planetfall error datatype:integer planetfall should be a positive or negative integer e +table8 2 prefix error key:foreign Value 'w' of column prefix is not in table10.foreign_column w +table9 9 child error key:foreign Value 'i' of column child is not in table10.other_foreign_column i diff --git a/test/expected/messages_a1.tsv b/test/expected/messages_a1.tsv index 1f3d8b44..04153339 100644 --- a/test/expected/messages_a1.tsv +++ b/test/expected/messages_a1.tsv @@ -4,6 +4,9 @@ table1 B5 error key:unique Values of base must be unique http://purl.obolibrary. table1 A5 error key:primary Values of prefix must be unique VO table1 B10 error key:unique Values of base must be unique http://www.w3.org/1999/02/22-rdf-syntax-ns# table1 A10 error key:primary Values of prefix must be unique rdf +table11 D3 error key:primary Values of foo must be unique d +table11 D4 error key:primary Values of foo must be unique e +table11 D5 error key:primary Values of foo must be unique e table2 D1 error rule:foo-2 bar cannot be null if foo is not null 5 table2 D1 error rule:foo-4 bar must be 'y' or 'z' if foo = 5 5 table2 D2 error rule:foo-1 bar must be null whenever foo is null @@ -46,3 +49,5 @@ table6 C5 error under:not-under Value '8' of column xyzzy is not under '4' 8 table6 C7 error under:not-in-tree Value '26' of column xyzzy is not in table6.child 26 table6 A9 error key:foreign Value '9' of column child exists only in table4_conflict.numeric_foreign_column 9 table7 B3 error datatype:integer planetfall should be a positive or negative integer e +table8 A2 error key:foreign Value 'w' of column prefix is not in table10.foreign_column w +table9 A9 error key:foreign Value 'i' of column child is not in table10.other_foreign_column i diff --git a/test/expected/messages_after_api_test.tsv b/test/expected/messages_after_api_test.tsv index 3da25719..ee5fe672 100644 --- a/test/expected/messages_after_api_test.tsv +++ b/test/expected/messages_after_api_test.tsv @@ -4,6 +4,14 @@ table1 5 base error key:unique Values of base must be unique http://purl.obolibr table1 5 prefix error key:primary Values of prefix must be unique VO table1 10 base error key:unique Values of base must be unique http://www.w3.org/1999/02/22-rdf-syntax-ns# table1 10 prefix error key:primary Values of prefix must be unique rdf +table10 1 foreign_column update rule:update Value changed from 'a' to 'w' w +table10 1 numeric_foreign_column error datatype:integer numeric_foreign_column should be a positive or negative integer +table10 1 numeric_foreign_column error datatype:trimmed_line numeric_foreign_column should be a line of text that does not begin or end with whitespace +table10 1 numeric_foreign_column update rule:update Value changed from '1' to '' +table10 1 other_foreign_column update rule:update Value changed from 'a' to 'z' z +table11 2 foo error key:primary Values of foo must be unique d +table11 2 foo update rule:update Value changed from 'e' to 'd' d +table11 3 foo error key:primary Values of foo must be unique d table2 1 bar error custom:unrelated An unrelated error B table2 1 bar update rule:update Value changed from '' to 'B' B table2 1 child update rule:update Value changed from 'a' to 'b' b @@ -64,3 +72,7 @@ table6 10 bar error custom:unrelated An unrelated error 2 table6 10 child error tree:child-unique Values of child must be unique 2 table6 10 xyzzy error under:not-in-tree Value '23' of column xyzzy is not in table6.child 23 table7 3 planetfall error datatype:integer planetfall should be a positive or negative integer e +table8 1 prefix error key:foreign Value 'a' of column prefix is not in table10.foreign_column a +table8 2 prefix error key:foreign Value 'w' of column prefix exists only in table10_conflict.foreign_column w +table9 1 child error key:foreign Value 'a' of column child is not in table10.other_foreign_column a +table9 9 child error key:foreign Value 'i' of column child is not in table10.other_foreign_column i diff --git a/test/src/column.tsv b/test/src/column.tsv index a891c726..07f38290 100644 --- a/test/src/column.tsv +++ b/test/src/column.tsv @@ -48,3 +48,20 @@ table6 foo empty text table6 bar empty integer table7 zork text primary table7 planetfall integer +table8 prefix text from(table10.foreign_column) +table8 base text +table8 ontology_IRI text +table8 version_IRI text +table9 child trimmed_line from(table10.other_foreign_column) +table9 parent empty trimmed_line +table9 xyzzy empty trimmed_line +table9 foo empty integer +table9 bar empty text +table10 foreign_column text unique +table10 other_foreign_column text unique +table10 numeric_foreign_column integer primary +table11 child text +table11 parent text +table11 xyzzy text +table11 foo text primary +table11 bar text diff --git a/test/src/ontology/table10.tsv b/test/src/ontology/table10.tsv new file mode 100644 index 00000000..77710a3c --- /dev/null +++ b/test/src/ontology/table10.tsv @@ -0,0 +1,9 @@ +foreign_column other_foreign_column numeric_foreign_column +a a 1 +b b 2 +c c 3 +d d 4 +e e 5 +f f 6 +g g 7 +h h 8 diff --git a/test/src/ontology/table11.tsv b/test/src/ontology/table11.tsv new file mode 100644 index 00000000..924eb43b --- /dev/null +++ b/test/src/ontology/table11.tsv @@ -0,0 +1,6 @@ +child parent xyzzy foo bar +a b c d e +b c d e f +g f e d c +f g x e z +d h y e w diff --git a/test/src/ontology/table8.tsv b/test/src/ontology/table8.tsv new file mode 100644 index 00000000..a39386b0 --- /dev/null +++ b/test/src/ontology/table8.tsv @@ -0,0 +1,3 @@ +prefix base ontology_IRI version_IRI +a a a d +w w w z diff --git a/test/src/ontology/table9.tsv b/test/src/ontology/table9.tsv new file mode 100644 index 00000000..8b5dfa84 --- /dev/null +++ b/test/src/ontology/table9.tsv @@ -0,0 +1,10 @@ +child parent xyzzy foo bar +a a d 5 +b w e y +c a f 5 y +d w g 5 w +e a h +f w a +g a z +h w +i a diff --git a/test/src/table.tsv b/test/src/table.tsv index 161e9c99..23057907 100644 --- a/test/src/table.tsv +++ b/test/src/table.tsv @@ -10,3 +10,7 @@ table4 test/src/ontology/table4.tsv The fourth data table table5 test/src/ontology/table5.tsv The fifth data table table6 test/src/ontology/table6.tsv The sixth data table (like table2 but all numeric) table7 test/src/ontology/table7.tsv The seventh data table +table8 test/src/ontology/table8.tsv The eightth data table +table9 test/src/ontology/table9.tsv The ninth data table +table10 test/src/ontology/table10.tsv The tenth data table +table11 test/src/ontology/table11.tsv The eleventh data table From 1e6923d71ace6e21b66ff1bb93172538a92f846b Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 13 Jul 2023 14:28:33 -0400 Subject: [PATCH 23/31] add test case for insert with dependencies --- src/api_test.rs | 18 ++++++++++++++++++ test/expected/messages_after_api_test.tsv | 1 - 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/api_test.rs b/src/api_test.rs index 9208f1e4..c99e7461 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -339,5 +339,23 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; + let row = json!({ + "foreign_column": {"messages": [], "valid": true, "value": "i"}, + "other_foreign_column": {"messages": [], "valid": true, "value": "i"}, + "numeric_foreign_column": {"messages": [], "valid": true, "value": "9"}, + }); + + let _new_row_num = insert_new_row( + &config, + &compiled_datatype_conditions, + &compiled_rule_conditions, + &pool, + "table10", + &row.as_object().unwrap(), + None, + false, + ) + .await?; + Ok(()) } diff --git a/test/expected/messages_after_api_test.tsv b/test/expected/messages_after_api_test.tsv index ee5fe672..609203ac 100644 --- a/test/expected/messages_after_api_test.tsv +++ b/test/expected/messages_after_api_test.tsv @@ -75,4 +75,3 @@ table7 3 planetfall error datatype:integer planetfall should be a positive or ne table8 1 prefix error key:foreign Value 'a' of column prefix is not in table10.foreign_column a table8 2 prefix error key:foreign Value 'w' of column prefix exists only in table10_conflict.foreign_column w table9 1 child error key:foreign Value 'a' of column child is not in table10.other_foreign_column a -table9 9 child error key:foreign Value 'i' of column child is not in table10.other_foreign_column i From 2a03b2bee6c4d3a7562858220f4b7e74075bb508 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 13 Jul 2023 15:27:32 -0400 Subject: [PATCH 24/31] wrap inserts, updates, and deletes within db transactions --- src/lib.rs | 63 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d3f8d7ca..466c889f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1532,7 +1532,7 @@ pub async fn process_updates( .await?; // Update the row in the database: - update_row( + update_row_without_transaction( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -1701,6 +1701,9 @@ pub async fn insert_new_row( } }; + // Begin a transaction in the database: + let transaction = pool.begin().await?; + // Add the new row to the table: let insert_stmt = local_sql_syntax( &pool, @@ -1748,6 +1751,9 @@ pub async fn insert_new_row( ) .await?; + // Commit the database transaction: + transaction.commit().await?; + Ok(new_row_number) } @@ -1766,11 +1772,6 @@ pub async fn delete_row( Some(base) => base, }; - // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. - // Maybe we need a wrapper function for this? - // Note also that we might want to run ANALYZE (or the sqlite equivalent) after - // the deletes have completed. - // First, use the row number to fetch the row from the database: let sql = format!( "SELECT * FROM \"{}_view\" WHERE row_number = {}", @@ -1831,6 +1832,9 @@ pub async fn delete_row( .await .map_err(|e| Configuration(e.into()))?; + // Begin a database transaction: + let transaction = pool.begin().await?; + // Process the updates that need to be performed before the update of the target row: process_updates( global_config, @@ -1885,13 +1889,49 @@ pub async fn delete_row( ) .await?; + // Commit the database transaction: + transaction.commit().await?; + + Ok(()) +} + +/// A wrapper around [update_row_without_transaction()], which wraps that function call inside a +/// database transaction. +#[async_recursion] +pub async fn update_row( + global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, + pool: &AnyPool, + table_name: &str, + row: &SerdeMap, + row_number: &u32, + skip_validation: bool, + do_not_recurse: bool, +) -> Result<(), sqlx::Error> { + let transaction = pool.begin().await?; + update_row_without_transaction( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + table_name, + row, + row_number, + skip_validation, + do_not_recurse, + ) + .await?; + transaction.commit().await?; Ok(()) } /// Given global config map, a database connection pool, a table name, a row, and the row number to /// update, update the corresponding row in the database with new values as specified by `row`. +/// **Warning:** This function updates the database without using database transactions and can +/// result in database corruption if you are not careful. See [update_row()] for a safer version. #[async_recursion] -pub async fn update_row( +pub async fn update_row_without_transaction( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -1936,8 +1976,8 @@ pub async fn update_row( ) .await?; - // Send the row through the row validator to determine if any fields are problematic and - // to mark them with appropriate messages: + // Send the row through the row validator to determine if any fields are problematic in light of + // the previous updates and mark them with appropriate messages if necessary: let row = if !skip_validation { validate_row( global_config, @@ -1954,11 +1994,6 @@ pub async fn update_row( row.clone() }; - // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. - // Maybe we need a wrapper function for this? - // Note also that we might want to run ANALYZE (or the sqlite equivalent) after - // the updates have completed. - // Now prepare the row and messages for the database update: let mut assignments = vec![]; let mut params = vec![]; From d416fd4e0aba99af145a51b73c2be46120d603a2 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Thu, 13 Jul 2023 15:56:46 -0400 Subject: [PATCH 25/31] Revert "wrap inserts, updates, and deletes within db transactions" This reverts commit 2a03b2bee6c4d3a7562858220f4b7e74075bb508. --- src/lib.rs | 63 ++++++++++++------------------------------------------ 1 file changed, 14 insertions(+), 49 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 466c889f..d3f8d7ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1532,7 +1532,7 @@ pub async fn process_updates( .await?; // Update the row in the database: - update_row_without_transaction( + update_row( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -1701,9 +1701,6 @@ pub async fn insert_new_row( } }; - // Begin a transaction in the database: - let transaction = pool.begin().await?; - // Add the new row to the table: let insert_stmt = local_sql_syntax( &pool, @@ -1751,9 +1748,6 @@ pub async fn insert_new_row( ) .await?; - // Commit the database transaction: - transaction.commit().await?; - Ok(new_row_number) } @@ -1772,6 +1766,11 @@ pub async fn delete_row( Some(base) => base, }; + // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. + // Maybe we need a wrapper function for this? + // Note also that we might want to run ANALYZE (or the sqlite equivalent) after + // the deletes have completed. + // First, use the row number to fetch the row from the database: let sql = format!( "SELECT * FROM \"{}_view\" WHERE row_number = {}", @@ -1832,9 +1831,6 @@ pub async fn delete_row( .await .map_err(|e| Configuration(e.into()))?; - // Begin a database transaction: - let transaction = pool.begin().await?; - // Process the updates that need to be performed before the update of the target row: process_updates( global_config, @@ -1889,49 +1885,13 @@ pub async fn delete_row( ) .await?; - // Commit the database transaction: - transaction.commit().await?; - - Ok(()) -} - -/// A wrapper around [update_row_without_transaction()], which wraps that function call inside a -/// database transaction. -#[async_recursion] -pub async fn update_row( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - table_name: &str, - row: &SerdeMap, - row_number: &u32, - skip_validation: bool, - do_not_recurse: bool, -) -> Result<(), sqlx::Error> { - let transaction = pool.begin().await?; - update_row_without_transaction( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - table_name, - row, - row_number, - skip_validation, - do_not_recurse, - ) - .await?; - transaction.commit().await?; Ok(()) } /// Given global config map, a database connection pool, a table name, a row, and the row number to /// update, update the corresponding row in the database with new values as specified by `row`. -/// **Warning:** This function updates the database without using database transactions and can -/// result in database corruption if you are not careful. See [update_row()] for a safer version. #[async_recursion] -pub async fn update_row_without_transaction( +pub async fn update_row( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -1976,8 +1936,8 @@ pub async fn update_row_without_transaction( ) .await?; - // Send the row through the row validator to determine if any fields are problematic in light of - // the previous updates and mark them with appropriate messages if necessary: + // Send the row through the row validator to determine if any fields are problematic and + // to mark them with appropriate messages: let row = if !skip_validation { validate_row( global_config, @@ -1994,6 +1954,11 @@ pub async fn update_row_without_transaction( row.clone() }; + // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. + // Maybe we need a wrapper function for this? + // Note also that we might want to run ANALYZE (or the sqlite equivalent) after + // the updates have completed. + // Now prepare the row and messages for the database update: let mut assignments = vec![]; let mut params = vec![]; From d0dfc1ac9d322a0f3b2bac7ff5f0aaaa449c9a72 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Fri, 14 Jul 2023 10:44:27 -0400 Subject: [PATCH 26/31] update expected output for API tests --- test/expected/table10.tsv | 10 ++++++++++ test/expected/table11.tsv | 5 +++++ test/insert_update.sh | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 test/expected/table10.tsv create mode 100644 test/expected/table11.tsv diff --git a/test/expected/table10.tsv b/test/expected/table10.tsv new file mode 100644 index 00000000..d17d6e5f --- /dev/null +++ b/test/expected/table10.tsv @@ -0,0 +1,10 @@ +foreign_column other_foreign_column numeric_foreign_column +w z +b b 2 +c c 3 +d d 4 +e e 5 +f f 6 +g g 7 +h h 8 +i i 9 diff --git a/test/expected/table11.tsv b/test/expected/table11.tsv new file mode 100644 index 00000000..a327a39e --- /dev/null +++ b/test/expected/table11.tsv @@ -0,0 +1,5 @@ +child parent xyzzy foo bar +a b c d e +b c d d f +g f e d c +d h y e w diff --git a/test/insert_update.sh b/test/insert_update.sh index 2deaa969..698c5c06 100755 --- a/test/insert_update.sh +++ b/test/insert_update.sh @@ -19,7 +19,7 @@ output_dir=$pwd/output expected_dir=$pwd/expected ret_value=0 -for table_path in table3.tsv table2.tsv table6.tsv +for table_path in table2.tsv table3.tsv table6.tsv table10.tsv table11.tsv do table_path=${table_path#test/} table_path=$pwd/output/$table_path From 15fcfa91958051325db35ea35cbcbb27ee8fc63a Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sat, 15 Jul 2023 15:27:14 -0400 Subject: [PATCH 27/31] use transactions in insert, update, delete --- src/api_test.rs | 32 +++++++++++ src/lib.rs | 106 ++++++++++++++++++++++------------- src/validate.rs | 144 +++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 224 insertions(+), 58 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index c99e7461..f9a476f0 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -120,6 +120,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + None, "table2", row.as_object().unwrap(), Some(1), @@ -132,6 +133,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + None, "table2", &result_row_1, Some(1), @@ -145,6 +147,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + None, "table2", &result_row_2, Some(1), @@ -158,11 +161,13 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro // sample. // Update the row we constructed and validated above in the database: + let mut transaction = pool.begin().await?; update_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + &mut transaction, "table2", &row.as_object().unwrap(), &1, @@ -170,6 +175,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro false, ) .await?; + transaction.commit().await?; // Validate and insert a new row: let row = json!({ @@ -191,6 +197,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + None, "table3", row.as_object().unwrap(), None, @@ -198,17 +205,20 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; + let mut transaction = pool.begin().await?; let _new_row_num = insert_new_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + &mut transaction, "table3", &result_row, None, false, ) .await?; + transaction.commit().await?; // Validate and update an existing row: let row = json!({ @@ -230,6 +240,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + None, "table6", row.as_object().unwrap(), Some(1), @@ -237,11 +248,13 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; + let mut transaction = pool.begin().await?; update_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + &mut transaction, "table6", &result_row, &1, @@ -249,6 +262,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro false, ) .await?; + transaction.commit().await?; // Validate and insert a new row: let row = json!({ @@ -270,6 +284,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + None, "table6", row.as_object().unwrap(), None, @@ -277,17 +292,20 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; + let mut transaction = pool.begin().await?; let _new_row_num = insert_new_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + &mut transaction, "table6", &result_row, None, false, ) .await?; + transaction.commit().await?; // Test cases for updates/inserts/deletes with dependencies. let row = json!({ @@ -295,11 +313,14 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "other_foreign_column": {"messages": [], "valid": true, "value": "z"}, "numeric_foreign_column": {"messages": [], "valid": true, "value": ""}, }); + + let mut transaction = pool.begin().await?; update_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + &mut transaction, "table10", &row.as_object().unwrap(), &1, @@ -307,6 +328,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro false, ) .await?; + transaction.commit().await?; let row = json!({ "child": {"messages": [], "valid": true, "value": "b"}, @@ -315,11 +337,14 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "foo": {"messages": [], "valid": true, "value": "d"}, "bar": {"messages": [], "valid": true, "value": "f"}, }); + + let mut transaction = pool.begin().await?; update_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + &mut transaction, "table11", &row.as_object().unwrap(), &2, @@ -327,17 +352,21 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro false, ) .await?; + transaction.commit().await?; + let mut transaction = pool.begin().await?; delete_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + &mut transaction, "table11", &4, false, ) .await?; + transaction.commit().await?; let row = json!({ "foreign_column": {"messages": [], "valid": true, "value": "i"}, @@ -345,17 +374,20 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "numeric_foreign_column": {"messages": [], "valid": true, "value": "9"}, }); + let mut transaction = pool.begin().await?; let _new_row_num = insert_new_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, + &mut transaction, "table10", &row.as_object().unwrap(), None, false, ) .await?; + transaction.commit().await?; Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index d3f8d7ca..9aa2397f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,9 +45,9 @@ use regex::Regex; use serde_json::{json, Value as SerdeValue}; use sqlx::{ any::{AnyConnectOptions, AnyKind, AnyPool, AnyPoolOptions, AnyRow}, - query as sqlx_query, Column, + query as sqlx_query, Acquire, Column, Error::Configuration, - Row, ValueRef, + Row, Transaction, ValueRef, }; use std::{ collections::{BTreeMap, HashMap}, @@ -1065,6 +1065,7 @@ pub async fn get_affected_rows( except: Option<&u32>, global_config: &SerdeMap, pool: &AnyPool, + tx: &mut Transaction<'_, sqlx::Any>, ) -> Result, String> { let sql = { let is_clause = if pool.any_kind() == AnyKind::Sqlite { @@ -1155,7 +1156,11 @@ pub async fn get_affected_rows( let query = sqlx_query(&sql); let mut table_rows = IndexMap::new(); - for row in query.fetch_all(pool).await.map_err(|e| e.to_string())? { + for row in query + .fetch_all(tx.acquire().await.map_err(|e| e.to_string())?) + .await + .map_err(|e| e.to_string())? + { let mut table_row = SerdeMap::new(); let mut row_number: Option = None; for column in row.columns() { @@ -1188,6 +1193,7 @@ pub async fn get_affected_rows( pub async fn insert_update_message( pool: &AnyPool, + tx: &mut Transaction<'_, sqlx::Any>, table: &str, column: &str, row_number: &u32, @@ -1214,7 +1220,7 @@ pub async fn insert_update_message( ); let query = sqlx_query(&sql); - let results = query.fetch_all(pool).await?; + let results = query.fetch_all(tx.acquire().await?).await?; if results.is_empty() { "".to_string() } else { @@ -1260,7 +1266,7 @@ pub async fn insert_update_message( value = cell_value, ); let query = sqlx_query(&insert_sql); - query.execute(pool).await?; + query.execute(tx.acquire().await?).await?; Ok(()) } @@ -1269,6 +1275,7 @@ pub async fn get_db_value( column: &str, row_number: &u32, pool: &AnyPool, + tx: &mut Transaction<'_, sqlx::Any>, ) -> Result { let is_clause = if pool.any_kind() == AnyKind::Sqlite { "IS" @@ -1303,7 +1310,10 @@ pub async fn get_db_value( ); let query = sqlx_query(&sql); - let result_row = query.fetch_one(pool).await.map_err(|e| e.to_string())?; + let result_row = query + .fetch_one(tx.acquire().await.map_err(|e| e.to_string())?) + .await + .map_err(|e| e.to_string())?; let value: &str = result_row.try_get(column).unwrap(); Ok(value.to_string()) } @@ -1311,6 +1321,7 @@ pub async fn get_db_value( pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, + tx: &mut Transaction<'_, sqlx::Any>, table: &str, query_as_if: &QueryAsIf, ) -> Result< @@ -1375,9 +1386,14 @@ pub async fn get_rows_to_update( IndexMap::new() } _ => { - let current_value = - get_db_value(target_table, target_column, &query_as_if.row_number, pool) - .await?; + let current_value = get_db_value( + target_table, + target_column, + &query_as_if.row_number, + pool, + tx, + ) + .await?; // Query dependent_table.dependent_column for the rows that will be affected by the // change from the current value: @@ -1388,6 +1404,7 @@ pub async fn get_rows_to_update( None, global_config, pool, + tx, ) .await? } @@ -1414,6 +1431,7 @@ pub async fn get_rows_to_update( None, global_config, pool, + tx, ) .await? } @@ -1479,7 +1497,7 @@ pub async fn get_rows_to_update( } _ => { let current_value = - get_db_value(table, column, &query_as_if.row_number, pool).await?; + get_db_value(table, column, &query_as_if.row_number, pool, tx).await?; // Query table.column for the rows that will be affected by the change from the // current to the new value: @@ -1490,6 +1508,7 @@ pub async fn get_rows_to_update( Some(&query_as_if.row_number), global_config, pool, + tx, ) .await? } @@ -1512,6 +1531,7 @@ pub async fn process_updates( compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, pool: &AnyPool, + tx: &mut Transaction<'_, sqlx::Any>, updates: &IndexMap>, query_as_if: &QueryAsIf, do_not_recurse: bool, @@ -1524,6 +1544,7 @@ pub async fn process_updates( compiled_datatype_conditions, compiled_rule_conditions, pool, + Some(tx), update_table, row, Some(*row_number), @@ -1537,6 +1558,7 @@ pub async fn process_updates( compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, update_table, &vrow, row_number, @@ -1551,13 +1573,15 @@ pub async fn process_updates( /// Given a global config map, a database connection pool, a table name, and a row, assign a new /// row number to the row and insert it to the database, then return the new row number. Optionally, -/// if row_number is provided, use that to identify the new row. +/// if row_number is provided, use that to identify the new row. Optionally, if a transaction is +/// given, use that instead of the pool for database access. #[async_recursion] pub async fn insert_new_row( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, pool: &AnyPool, + tx: &mut Transaction, table_to_write: &str, row: &SerdeMap, new_row_number: Option, @@ -1576,6 +1600,7 @@ pub async fn insert_new_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + Some(tx), base_table, row, None, @@ -1596,7 +1621,7 @@ pub async fn insert_new_row( base_table ); let query = sqlx_query(&sql); - let result_row = query.fetch_one(pool).await?; + let result_row = query.fetch_one(tx.acquire().await?).await?; let result = result_row.try_get_raw("row_number").unwrap(); let new_row_number: i64; if result.is_null() { @@ -1657,9 +1682,10 @@ pub async fn insert_new_row( // Look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated: - let (_, updates_after, _) = get_rows_to_update(global_config, pool, base_table, &query_as_if) - .await - .map_err(|e| Configuration(e.into()))?; + let (_, updates_after, _) = + get_rows_to_update(global_config, pool, tx, base_table, &query_as_if) + .await + .map_err(|e| Configuration(e.into()))?; // If the row is not already being directed to the conflict table, check it to see if it should // be redirected there: @@ -1716,7 +1742,7 @@ pub async fn insert_new_row( for param in &insert_params { query = query.bind(param); } - query.execute(pool).await?; + query.execute(tx.acquire().await?).await?; // Next add any validation messages to the message table: for m in messages { @@ -1733,7 +1759,7 @@ pub async fn insert_new_row( base_table, new_row_number, column, value, level, rule, message ); let query = sqlx_query(&message_sql); - query.execute(pool).await?; + query.execute(tx.acquire().await?).await?; } // Now process the updates that need to be performed after the update of the target row: @@ -1742,6 +1768,7 @@ pub async fn insert_new_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, &updates_after, &query_as_if, false, @@ -1757,6 +1784,7 @@ pub async fn delete_row( compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, pool: &AnyPool, + tx: &mut Transaction, table_to_write: &str, row_number: &u32, simulated_update: bool, @@ -1766,18 +1794,13 @@ pub async fn delete_row( Some(base) => base, }; - // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. - // Maybe we need a wrapper function for this? - // Note also that we might want to run ANALYZE (or the sqlite equivalent) after - // the deletes have completed. - // First, use the row number to fetch the row from the database: let sql = format!( "SELECT * FROM \"{}_view\" WHERE row_number = {}", base_table, row_number ); let query = sqlx_query(&sql); - let sql_row = query.fetch_one(pool).await.map_err(|e| { + let sql_row = query.fetch_one(tx.acquire().await?).await.map_err(|e| { Configuration( format!( "Got: '{}' while fetching row number {} from table {}", @@ -1827,7 +1850,7 @@ pub async fn delete_row( // rows that need to be updated. Since this is a delete there will only be rows to update // before and none after the delete: let (updates_before, _, updates_intra) = - get_rows_to_update(global_config, pool, base_table, &query_as_if) + get_rows_to_update(global_config, pool, tx, base_table, &query_as_if) .await .map_err(|e| Configuration(e.into()))?; @@ -1837,6 +1860,7 @@ pub async fn delete_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, &updates_before, &query_as_if, false, @@ -1854,7 +1878,7 @@ pub async fn delete_row( ); for sql in vec![sql1, sql2] { let query = sqlx_query(&sql); - query.execute(pool).await?; + query.execute(tx.acquire().await?).await?; } // Now delete all messages associated with the row: @@ -1870,7 +1894,7 @@ pub async fn delete_row( base_table, row_number, simulated_update_clause ); let query = sqlx_query(&sql); - query.execute(pool).await?; + query.execute(tx.acquire().await?).await?; // Finally process the rows from the same table as the target table that need to be re-validated // because of unique or primary constraints: @@ -1879,6 +1903,7 @@ pub async fn delete_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, &updates_intra, &query_as_if, true, @@ -1890,12 +1915,14 @@ pub async fn delete_row( /// Given global config map, a database connection pool, a table name, a row, and the row number to /// update, update the corresponding row in the database with new values as specified by `row`. +/// Optionally, if a transaction is given, use that instead of the pool for database access. #[async_recursion] pub async fn update_row( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, pool: &AnyPool, + tx: &mut Transaction, table_name: &str, row: &SerdeMap, row_number: &u32, @@ -1918,7 +1945,7 @@ pub async fn update_row( if do_not_recurse { (IndexMap::new(), IndexMap::new(), IndexMap::new()) } else { - get_rows_to_update(global_config, pool, table_name, &query_as_if) + get_rows_to_update(global_config, pool, tx, table_name, &query_as_if) .await .map_err(|e| Configuration(e.into()))? } @@ -1930,6 +1957,7 @@ pub async fn update_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, &updates_before, &query_as_if, false, @@ -1944,6 +1972,7 @@ pub async fn update_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + Some(tx), table_name, row, Some(*row_number), @@ -1954,11 +1983,6 @@ pub async fn update_row( row.clone() }; - // TODO: If possible use BEGIN and END TRANSACTION here and ROLLBACK in case of an error. - // Maybe we need a wrapper function for this? - // Note also that we might want to run ANALYZE (or the sqlite equivalent) after - // the updates have completed. - // Now prepare the row and messages for the database update: let mut assignments = vec![]; let mut params = vec![]; @@ -1971,7 +1995,7 @@ pub async fn update_row( // Begin by adding an extra 'update' row to the message table indicating that the value of // this column has been updated (if that is the case). - insert_update_message(pool, table_name, column, row_number, cell_value).await?; + insert_update_message(pool, tx, table_name, column, row_number, cell_value).await?; // Generate the assignment statements and messages for each column: let mut cell_for_insert = cell.clone(); @@ -2015,7 +2039,7 @@ pub async fn update_row( table_name, row_number ); let query = sqlx_query(&sql); - let rows = query.fetch_all(pool).await?; + let rows = query.fetch_all(tx.acquire().await?).await?; let mut current_table = String::from(table_name); if rows.len() == 0 { current_table.push_str("_conflict"); @@ -2062,13 +2086,14 @@ pub async fn update_row( for param in ¶ms { query = query.bind(param); } - query.execute(pool).await?; + query.execute(tx.acquire().await?).await?; } else { delete_row( global_config, compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, ¤t_table, row_number, true, @@ -2079,6 +2104,7 @@ pub async fn update_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, &table_to_write, &row, Some(*row_number), @@ -2094,7 +2120,7 @@ pub async fn update_row( table_name, row_number ); let query = sqlx_query(&delete_sql); - query.execute(pool).await?; + query.execute(tx.acquire().await?).await?; // Now add the messages to the message table for the new version of this row: for m in messages { @@ -2111,7 +2137,7 @@ pub async fn update_row( table_name, row_number, column, value, level, rule, message ); let query = sqlx_query(&insert_sql); - query.execute(pool).await?; + query.execute(tx.acquire().await?).await?; } // Now process the rows from the same table as the target table that need to be re-validated @@ -2121,6 +2147,7 @@ pub async fn update_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, &updates_intra, &query_as_if, true, @@ -2134,6 +2161,7 @@ pub async fn update_row( compiled_datatype_conditions, compiled_rule_conditions, pool, + tx, &updates_after, &query_as_if, false, @@ -3715,8 +3743,8 @@ async fn load_db( // We also need to wait before validating a table's "under" constraints. Although the tree // associated with such a constraint need not be defined on the same table, it can be. let mut recs_to_update = - validate_tree_foreign_keys(config, pool, &table_name, None).await?; - recs_to_update.append(&mut validate_under(config, pool, &table_name, None).await?); + validate_tree_foreign_keys(config, pool, None, &table_name, None).await?; + recs_to_update.append(&mut validate_under(config, pool, None, &table_name, None).await?); for record in recs_to_update { let row_number = record.get("row_number").unwrap(); diff --git a/src/validate.rs b/src/validate.rs index 201cf82f..78fc8066 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,7 +1,7 @@ use enquote::unquote; use indexmap::IndexMap; use serde_json::{json, Value as SerdeValue}; -use sqlx::{any::AnyPool, query as sqlx_query, Row, ValueRef}; +use sqlx::{any::AnyPool, query as sqlx_query, Acquire, Row, Transaction, ValueRef}; use std::collections::HashMap; use crate::{ @@ -49,17 +49,28 @@ pub struct QueryAsIf { /// Given a config map, maps of compiled datatype and rule conditions, a database connection /// pool, a table name, a row to validate, and a row number in case the row already exists, /// perform both intra- and inter-row validation and return the validated row. Note that this -/// function is idempotent. +/// function is idempotent. Optionally, if a transaction is given, use that instead of the pool +/// for database access. pub async fn validate_row( config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, pool: &AnyPool, + tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &str, row: &SerdeMap, row_number: Option, query_as_if: Option<&QueryAsIf>, ) -> Result { + // Fallback to a default transaction if it is not given. Since we do not commit before it falls + // out of scope the transaction will be rolled back at the end of this function. And since this + // function is read-only the rollback is inconsequential. + let default_tx = &mut pool.begin().await?; + let tx = match tx { + Some(tx) => tx, + None => default_tx, + }; + // Initialize the result row with the values from the given row: let mut result_row = ResultRow { row_number: row_number, @@ -130,6 +141,7 @@ pub async fn validate_row( validate_cell_trees( config, pool, + Some(tx), &table_name.to_string(), column_name, cell, @@ -140,6 +152,7 @@ pub async fn validate_row( validate_cell_foreign_constraints( config, pool, + Some(tx), &table_name.to_string(), column_name, cell, @@ -149,6 +162,7 @@ pub async fn validate_row( validate_cell_unique_constraints( config, pool, + Some(tx), &table_name.to_string(), column_name, cell, @@ -164,6 +178,7 @@ pub async fn validate_row( let mut violations = validate_tree_foreign_keys( config, pool, + Some(tx), &table_name.to_string(), Some(&context.clone()), ) @@ -173,6 +188,7 @@ pub async fn validate_row( &mut validate_under( config, pool, + Some(tx), &table_name.to_string(), Some(&context.clone()), ) @@ -392,10 +408,12 @@ pub async fn get_matching_values( } /// Given a config map, a db connection pool, a table name, and an optional extra row, validate -/// any associated under constraints for the current column. +/// any associated under constraints for the current column. Optionally, if a transaction is +/// given, use that instead of the pool for database access. pub async fn validate_under( config: &SerdeMap, pool: &AnyPool, + mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, extra_row: Option<&ResultRow>, ) -> Result, sqlx::Error> { @@ -529,7 +547,15 @@ pub async fn validate_under( for param in ¶ms { query = query.bind(param); } - let rows = query.fetch_all(pool).await?; + let rows = { + if let None = tx { + query.fetch_all(pool).await? + } else { + query + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + } + }; for row in rows { let raw_row_number = row.try_get_raw("row_number").unwrap(); @@ -562,7 +588,15 @@ pub async fn validate_under( message_query = message_query.bind(&table_name); message_query = message_query.bind(&row_number); message_query = message_query.bind(column); - let message_rows = message_query.fetch_all(pool).await?; + let message_rows = { + if let None = tx { + message_query.fetch_all(pool).await? + } else { + message_query + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + } + }; // If there are no rows in the message table then the cell is legitimately empty and // we can skip this row: if message_rows.is_empty() { @@ -624,10 +658,11 @@ pub async fn validate_under( /// Given a config map, a db connection pool, and a table name, validate whether there is a /// 'foreign key' violation for any of the table's trees; i.e., for a given tree: tree(child) which /// has a given parent column, validate that all of the values in the parent column are in the child -/// column. +/// column. Optionally, if a transaction is given, use that instead of the pool for database access. pub async fn validate_tree_foreign_keys( config: &SerdeMap, pool: &AnyPool, + mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, extra_row: Option<&ResultRow>, ) -> Result, sqlx::Error> { @@ -688,7 +723,15 @@ pub async fn validate_tree_foreign_keys( for param in ¶ms { query = query.bind(param); } - let rows = query.fetch_all(pool).await?; + let rows = { + if let None = tx { + query.fetch_all(pool).await? + } else { + query + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + } + }; for row in rows { let raw_row_number = row.try_get_raw("row_number").unwrap(); let row_number: i64; @@ -723,7 +766,15 @@ pub async fn validate_tree_foreign_keys( message_query = message_query.bind(&table_name); message_query = message_query.bind(&row_number); message_query = message_query.bind(parent_col); - let message_rows = message_query.fetch_all(pool).await?; + let message_rows = { + if let None = tx { + message_query.fetch_all(pool).await? + } else { + message_query + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + } + }; // If there are no rows in the message table then the cell is legitimately empty and // we can skip this row: if message_rows.is_empty() { @@ -762,7 +813,15 @@ pub async fn validate_tree_foreign_keys( ), ); let query = sqlx_query(&sql).bind(parent_val.to_string()); - let rows = query.fetch_all(pool).await?; + let rows = { + if let None = tx { + query.fetch_all(pool).await? + } else { + query + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + } + }; if rows.len() > 0 { continue; } @@ -817,6 +876,7 @@ pub async fn validate_rows_trees( validate_cell_trees( config, pool, + None, table_name, &column_name, cell, @@ -873,6 +933,7 @@ pub async fn validate_rows_constraints( validate_cell_foreign_constraints( config, pool, + None, table_name, &column_name, cell, @@ -883,6 +944,7 @@ pub async fn validate_rows_constraints( validate_cell_unique_constraints( config, pool, + None, table_name, &column_name, cell, @@ -1509,10 +1571,12 @@ fn as_if_to_sql( /// Given a config map, a db connection pool, a table name, a column name, and a cell to validate, /// check the cell value against any foreign keys that have been defined for the column. If there is -/// a violation, indicate it with an error message attached to the cell. +/// a violation, indicate it with an error message attached to the cell. Optionally, if a +/// transaction is given, use that instead of the pool for database access. async fn validate_cell_foreign_constraints( config: &SerdeMap, pool: &AnyPool, + mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, column_name: &String, cell: &mut ResultCell, @@ -1568,7 +1632,16 @@ async fn validate_cell_foreign_constraints( ), ); - let frows = sqlx_query(&fsql).bind(&cell.value).fetch_all(pool).await?; + let frows = { + if let None = tx { + sqlx_query(&fsql).bind(&cell.value).fetch_all(pool).await? + } else { + sqlx_query(&fsql) + .bind(&cell.value) + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + } + }; if frows.is_empty() { cell.valid = false; let mut message = json!({ @@ -1591,10 +1664,19 @@ async fn validate_cell_foreign_constraints( as_if_clause_for_conflict, ftable_alias, fcolumn, sql_param ), ); - let frows = sqlx_query(&fsql) - .bind(cell.value.clone()) - .fetch_all(pool) - .await?; + let frows = { + if let None = tx { + sqlx_query(&fsql) + .bind(cell.value.clone()) + .fetch_all(pool) + .await? + } else { + sqlx_query(&fsql) + .bind(cell.value.clone()) + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + } + }; if frows.is_empty() { message.as_object_mut().and_then(|m| { @@ -1627,10 +1709,12 @@ async fn validate_cell_foreign_constraints( /// Given a config map, a db connection pool, a table name, a column name, a cell to validate, /// the row, `context`, to which the cell belongs, and a list of previously validated rows, /// validate that none of the "tree" constraints on the column are violated, and indicate any -/// violations by attaching error messages to the cell. +/// violations by attaching error messages to the cell. Optionally, if a transaction is +/// given, use that instead of the pool for database access. async fn validate_cell_trees( config: &SerdeMap, pool: &AnyPool, + mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, column_name: &String, cell: &mut ResultCell, @@ -1740,7 +1824,16 @@ async fn validate_cell_trees( for param in ¶ms { query = query.bind(param); } - let rows = query.fetch_all(pool).await?; + + let rows = { + if let None = tx { + query.fetch_all(pool).await? + } else { + query + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + } + }; // If there is a row in the tree whose parent is the to-be-inserted child, then inserting // the new row would result in a cycle. @@ -1797,10 +1890,12 @@ async fn validate_cell_trees( /// check the cell value against any unique-type keys that have been defined for the column. /// If there is a violation, indicate it with an error message attached to the cell. If /// `row_number` is set to None, then no row corresponding to the given cell is assumed to exist -/// in the table. +/// in the table. Optionally, if a transaction is given, use that instead of the pool for database +/// access. async fn validate_cell_unique_constraints( config: &SerdeMap, pool: &AnyPool, + mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, column_name: &String, cell: &mut ResultCell, @@ -1896,7 +1991,18 @@ async fn validate_cell_unique_constraints( .collect::>() .is_empty(); - if contained_in_prev_results || !query.fetch_all(pool).await?.is_empty() { + if contained_in_prev_results + || !{ + if let None = tx { + query.fetch_all(pool).await?.is_empty() + } else { + query + .fetch_all(tx.as_mut().unwrap().acquire().await?) + .await? + .is_empty() + } + } + { cell.valid = false; if is_primary || is_unique { let error_message; From a95dd1cdcea551aa29cf593ecf4ba263ce6e1b33 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 16 Jul 2023 12:37:22 -0400 Subject: [PATCH 28/31] add wrappers around update/insert/delete functions --- src/api_test.rs | 24 ----------- src/lib.rs | 110 +++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 100 insertions(+), 34 deletions(-) diff --git a/src/api_test.rs b/src/api_test.rs index f9a476f0..ae2d105c 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -161,13 +161,11 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro // sample. // Update the row we constructed and validated above in the database: - let mut transaction = pool.begin().await?; update_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, - &mut transaction, "table2", &row.as_object().unwrap(), &1, @@ -175,7 +173,6 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro false, ) .await?; - transaction.commit().await?; // Validate and insert a new row: let row = json!({ @@ -205,20 +202,17 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; - let mut transaction = pool.begin().await?; let _new_row_num = insert_new_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, - &mut transaction, "table3", &result_row, None, false, ) .await?; - transaction.commit().await?; // Validate and update an existing row: let row = json!({ @@ -248,13 +242,11 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; - let mut transaction = pool.begin().await?; update_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, - &mut transaction, "table6", &result_row, &1, @@ -262,7 +254,6 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro false, ) .await?; - transaction.commit().await?; // Validate and insert a new row: let row = json!({ @@ -292,20 +283,17 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro ) .await?; - let mut transaction = pool.begin().await?; let _new_row_num = insert_new_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, - &mut transaction, "table6", &result_row, None, false, ) .await?; - transaction.commit().await?; // Test cases for updates/inserts/deletes with dependencies. let row = json!({ @@ -314,13 +302,11 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "numeric_foreign_column": {"messages": [], "valid": true, "value": ""}, }); - let mut transaction = pool.begin().await?; update_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, - &mut transaction, "table10", &row.as_object().unwrap(), &1, @@ -328,7 +314,6 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro false, ) .await?; - transaction.commit().await?; let row = json!({ "child": {"messages": [], "valid": true, "value": "b"}, @@ -338,13 +323,11 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "bar": {"messages": [], "valid": true, "value": "f"}, }); - let mut transaction = pool.begin().await?; update_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, - &mut transaction, "table11", &row.as_object().unwrap(), &2, @@ -352,21 +335,17 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro false, ) .await?; - transaction.commit().await?; - let mut transaction = pool.begin().await?; delete_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, - &mut transaction, "table11", &4, false, ) .await?; - transaction.commit().await?; let row = json!({ "foreign_column": {"messages": [], "valid": true, "value": "i"}, @@ -374,20 +353,17 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro "numeric_foreign_column": {"messages": [], "valid": true, "value": "9"}, }); - let mut transaction = pool.begin().await?; let _new_row_num = insert_new_row( &config, &compiled_datatype_conditions, &compiled_rule_conditions, &pool, - &mut transaction, "table10", &row.as_object().unwrap(), None, false, ) .await?; - transaction.commit().await?; Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 9aa2397f..3be3efdc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1553,7 +1553,7 @@ pub async fn process_updates( .await?; // Update the row in the database: - update_row( + update_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -1571,12 +1571,42 @@ pub async fn process_updates( Ok(()) } -/// Given a global config map, a database connection pool, a table name, and a row, assign a new -/// row number to the row and insert it to the database, then return the new row number. Optionally, -/// if row_number is provided, use that to identify the new row. Optionally, if a transaction is -/// given, use that instead of the pool for database access. +/// A wrapper around [insert_new_row_tx()] in which the database transaction is implicitly created +/// and then committed once the given new row has been inserted. #[async_recursion] pub async fn insert_new_row( + global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, + pool: &AnyPool, + table_to_write: &str, + row: &SerdeMap, + new_row_number: Option, + skip_validation: bool, +) -> Result { + let mut tx = pool.begin().await?; + let rn = insert_new_row_tx( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &mut tx, + table_to_write, + row, + new_row_number, + skip_validation, + ) + .await?; + tx.commit().await?; + Ok(rn) +} + +/// Given a global config map, a database connection pool, a database transaction, a table name, +/// and a row, assign a new row number to the row and insert it to the database using the given +/// transaction, then return the new row number. Optionally, if row_number is provided, use that +/// to identify the new row. +#[async_recursion] +pub async fn insert_new_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -1778,8 +1808,36 @@ pub async fn insert_new_row( Ok(new_row_number) } +/// A wrapper around [delete_row_tx()] in which the database transaction is implicitly created +/// and then committed once the given row has been deleted. #[async_recursion] pub async fn delete_row( + global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, + pool: &AnyPool, + table_to_write: &str, + row_number: &u32, + simulated_update: bool, +) -> Result<(), sqlx::Error> { + let mut tx = pool.begin().await?; + delete_row_tx( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &mut tx, + table_to_write, + row_number, + simulated_update, + ) + .await?; + tx.commit().await?; + Ok(()) +} + +#[async_recursion] +pub async fn delete_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -1913,11 +1971,43 @@ pub async fn delete_row( Ok(()) } -/// Given global config map, a database connection pool, a table name, a row, and the row number to -/// update, update the corresponding row in the database with new values as specified by `row`. -/// Optionally, if a transaction is given, use that instead of the pool for database access. +/// A wrapper around [update_row_tx()] in which the database transaction is implicitly created +/// and then committed once the given row has been updated.. #[async_recursion] pub async fn update_row( + global_config: &SerdeMap, + compiled_datatype_conditions: &HashMap, + compiled_rule_conditions: &HashMap>>, + pool: &AnyPool, + table_name: &str, + row: &SerdeMap, + row_number: &u32, + skip_validation: bool, + do_not_recurse: bool, +) -> Result<(), sqlx::Error> { + let mut tx = pool.begin().await?; + update_row_tx( + global_config, + compiled_datatype_conditions, + compiled_rule_conditions, + pool, + &mut tx, + table_name, + row, + row_number, + skip_validation, + do_not_recurse, + ) + .await?; + tx.commit().await?; + Ok(()) +} + +/// Given global config map, a database connection pool, a database transaction, a table name, a +/// row, and the row number to update, update the corresponding row in the database, using the given +/// transaction, with new values as specified by `row`. +#[async_recursion] +pub async fn update_row_tx( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, @@ -2088,7 +2178,7 @@ pub async fn update_row( } query.execute(tx.acquire().await?).await?; } else { - delete_row( + delete_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -2099,7 +2189,7 @@ pub async fn update_row( true, ) .await?; - insert_new_row( + insert_new_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, From ee5e24d044bbf90d9e08ccea0a1c398034c3b355 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Sun, 16 Jul 2023 13:34:03 -0400 Subject: [PATCH 29/31] remove unnecessary unwraps and panics in API functions --- src/lib.rs | 185 +++++++++++++++++++++++++++++++++--------------- src/validate.rs | 64 ++++++++++++----- 2 files changed, 173 insertions(+), 76 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3be3efdc..048ca5ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1652,7 +1652,7 @@ pub async fn insert_new_row_tx( ); let query = sqlx_query(&sql); let result_row = query.fetch_one(tx.acquire().await?).await?; - let result = result_row.try_get_raw("row_number").unwrap(); + let result = result_row.try_get_raw("row_number")?; let new_row_number: i64; if result.is_null() { new_row_number = 1; @@ -1671,30 +1671,59 @@ pub async fn insert_new_row_tx( let sorted_datatypes = get_sorted_datatypes(global_config); for (column, cell) in row.iter() { insert_columns.append(&mut vec![format!(r#""{}""#, column)]); - let cell = cell.as_object().unwrap(); - let cell_valid = cell.get("valid").and_then(|v| v.as_bool()).unwrap(); - let cell_value = cell.get("value").and_then(|v| v.as_str()).unwrap(); + let cell = cell.as_object().ok_or(Configuration( + format!("Cell {:?} is not an object", cell).into(), + ))?; + let cell_valid = cell + .get("valid") + .and_then(|v| v.as_bool()) + .ok_or(Configuration( + format!("No flag named 'valid' in {:?}", cell).into(), + ))?; + let cell_value = cell + .get("value") + .and_then(|v| v.as_str()) + .ok_or(Configuration( + format!("No str named 'value' in {:?}", cell).into(), + ))?; let mut cell_for_insert = cell.clone(); if cell_valid { cell_for_insert.remove("value"); let sql_type = - get_sql_type_from_global_config(&global_config, &base_table, &column, pool) - .unwrap(); + get_sql_type_from_global_config(&global_config, &base_table, &column, pool).ok_or( + Configuration( + format!("Unable to determine SQL type for {}.{}", base_table, column) + .into(), + ), + )?; insert_values.push(cast_sql_param_from_text(&sql_type)); insert_params.push(String::from(cell_value)); } else { insert_values.push(String::from("NULL")); let cell_messages = sort_messages( &sorted_datatypes, - cell.get("messages").and_then(|m| m.as_array()).unwrap(), + cell.get("messages") + .and_then(|m| m.as_array()) + .ok_or(Configuration( + format!("No array named 'messages' in {:?}", cell).into(), + ))?, ); for cell_message in cell_messages { messages.push(json!({ "column": column, "value": cell_value, - "level": cell_message.get("level").and_then(|s| s.as_str()).unwrap(), - "rule": cell_message.get("rule").and_then(|s| s.as_str()).unwrap(), - "message": cell_message.get("message").and_then(|s| s.as_str()).unwrap(), + "level": cell_message.get("level").and_then(|s| s.as_str()) + .ok_or( + Configuration(format!("No 'level' in {:?}", cell_message).into()) + )?, + "rule": cell_message.get("rule").and_then(|s| s.as_str()) + .ok_or( + Configuration(format!("No 'rule' in {:?}", cell_message).into()) + )?, + "message": cell_message.get("message").and_then(|s| s.as_str()) + .ok_or( + Configuration(format!("No 'message' in {:?}", cell_message).into()) + )?, })); } } @@ -1719,43 +1748,50 @@ pub async fn insert_new_row_tx( // If the row is not already being directed to the conflict table, check it to see if it should // be redirected there: - let table_to_write = { - if table_to_write.ends_with("_conflict") { - table_to_write.to_string() - } else { - let mut table_to_write = String::from(base_table); - for (column, cell) in row.iter() { - let valid = cell.get("valid").unwrap(); - if valid == false { - let structure = global_config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(base_table)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("structure")) - .and_then(|s| s.as_str()) - .unwrap_or(""); - if vec!["primary", "unique"].contains(&structure) - || structure.starts_with("tree(") - { - let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); - for msg in messages { - let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); - if level == "error" { - table_to_write.push_str("_conflict"); - break; + let table_to_write = + { + if table_to_write.ends_with("_conflict") { + table_to_write.to_string() + } else { + let mut table_to_write = String::from(base_table); + for (column, cell) in row.iter() { + let valid = cell.get("valid").ok_or(Configuration( + format!("No flag named 'valid' in {:?}", cell).into(), + ))?; + if valid == false { + let structure = global_config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(base_table)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|s| s.as_str()) + .unwrap_or(""); + if vec!["primary", "unique"].contains(&structure) + || structure.starts_with("tree(") + { + let messages = cell.get("messages").and_then(|m| m.as_array()).ok_or( + Configuration(format!("No 'messages' in {:?}", cell).into()), + )?; + for msg in messages { + let level = msg.get("level").and_then(|l| l.as_str()).ok_or( + Configuration(format!("No 'level' in {:?}", cell).into()), + )?; + if level == "error" { + table_to_write.push_str("_conflict"); + break; + } } } } } + table_to_write } - table_to_write - } - }; + }; // Add the new row to the table: let insert_stmt = local_sql_syntax( @@ -1873,14 +1909,15 @@ pub async fn delete_row_tx( for column in sql_row.columns() { let cname = column.name(); if !vec!["row_number", "message"].contains(&cname) { - let raw_value = sql_row - .try_get_raw(format!(r#"{}"#, cname).as_str()) - .unwrap(); + let raw_value = sql_row.try_get_raw(format!(r#"{}"#, cname).as_str())?; let value; if !raw_value.is_null() { let sql_type = get_sql_type_from_global_config(global_config, &base_table, &cname, pool) - .unwrap(); + .ok_or(Configuration( + format!("Unable to determine SQL type for {}.{}", base_table, cname) + .into(), + ))?; value = get_column_value(&sql_row, &cname, &sql_type); } else { value = String::from(""); @@ -2079,9 +2116,21 @@ pub async fn update_row_tx( let mut messages = vec![]; let sorted_datatypes = get_sorted_datatypes(global_config); for (column, cell) in row.iter() { - let cell = cell.as_object().unwrap(); - let cell_valid = cell.get("valid").and_then(|v| v.as_bool()).unwrap(); - let cell_value = cell.get("value").and_then(|v| v.as_str()).unwrap(); + let cell = cell.as_object().ok_or(Configuration( + format!("Cell {:?} is not an object", cell).into(), + ))?; + let cell_valid = cell + .get("valid") + .and_then(|v| v.as_bool()) + .ok_or(Configuration( + format!("No flag named 'valid' in {:?}", cell).into(), + ))?; + let cell_value = cell + .get("value") + .and_then(|v| v.as_str()) + .ok_or(Configuration( + format!("No str named 'value' in {:?}", cell).into(), + ))?; // Begin by adding an extra 'update' row to the message table indicating that the value of // this column has been updated (if that is the case). @@ -2097,7 +2146,9 @@ pub async fn update_row_tx( &column, pool, ) - .unwrap(); + .ok_or(Configuration( + format!("Unable to determine SQL type for {}.{}", table_name, column).into(), + ))?; assignments.push(format!( r#""{}" = {}"#, column, @@ -2108,15 +2159,25 @@ pub async fn update_row_tx( assignments.push(format!(r#""{}" = NULL"#, column)); let cell_messages = sort_messages( &sorted_datatypes, - cell.get("messages").and_then(|m| m.as_array()).unwrap(), + cell.get("messages") + .and_then(|m| m.as_array()) + .ok_or(Configuration( + format!("No array named 'messages' in {:?}", cell).into(), + ))?, ); for cell_message in cell_messages { messages.push(json!({ "column": String::from(column), "value": String::from(cell_value), - "level": cell_message.get("level").and_then(|s| s.as_str()).unwrap(), - "rule": cell_message.get("rule").and_then(|s| s.as_str()).unwrap(), - "message": cell_message.get("message").and_then(|s| s.as_str()).unwrap(), + "level": cell_message.get("level").and_then(|s| s.as_str()).ok_or( + Configuration(format!("No 'level' in {:?}", cell_message).into()) + )?, + "rule": cell_message.get("rule").and_then(|s| s.as_str()).ok_or( + Configuration(format!("No 'rule' in {:?}", cell_message).into()) + )?, + "message": cell_message.get("message").and_then(|s| s.as_str()).ok_or( + Configuration(format!("No 'message' in {:?}", cell_message).into()) + )?, })); } } @@ -2138,7 +2199,9 @@ pub async fn update_row_tx( // Next, figure out where to put the new version of the row: let mut table_to_write = String::from(table_name); for (column, cell) in row.iter() { - let valid = cell.get("valid").unwrap(); + let valid = cell.get("valid").ok_or(Configuration( + format!("No flag named 'valid' in {:?}", cell).into(), + ))?; if valid == false { let structure = global_config .get("table") @@ -2153,9 +2216,17 @@ pub async fn update_row_tx( .and_then(|s| s.as_str()) .unwrap_or(""); if vec!["primary", "unique"].contains(&structure) || structure.starts_with("tree(") { - let messages = cell.get("messages").and_then(|m| m.as_array()).unwrap(); + let messages = + cell.get("messages") + .and_then(|m| m.as_array()) + .ok_or(Configuration( + format!("No array named 'messages' in {:?}", cell).into(), + ))?; for msg in messages { - let level = msg.get("level").and_then(|l| l.as_str()).unwrap(); + let level = msg + .get("level") + .and_then(|l| l.as_str()) + .ok_or(Configuration(format!("No 'level' in {:?}", msg).into()))?; if level == "error" { table_to_write.push_str("_conflict"); break; diff --git a/src/validate.rs b/src/validate.rs index 78fc8066..182af89e 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,7 +1,9 @@ use enquote::unquote; use indexmap::IndexMap; use serde_json::{json, Value as SerdeValue}; -use sqlx::{any::AnyPool, query as sqlx_query, Acquire, Row, Transaction, ValueRef}; +use sqlx::{ + any::AnyPool, query as sqlx_query, Acquire, Error::Configuration, Row, Transaction, ValueRef, +}; use std::collections::HashMap; use crate::{ @@ -78,25 +80,49 @@ pub async fn validate_row( }; for (column, cell) in row.iter() { + let nulltype = match cell.get("nulltype") { + None => None, + Some(SerdeValue::String(s)) => Some(s.to_string()), + _ => { + return Err(Configuration( + format!("Nulltype is not a string in cell: {:?}", cell).into(), + )) + } + }; + let value = match cell.get("value") { + Some(SerdeValue::String(s)) => s.to_string(), + Some(SerdeValue::Number(n)) => format!("{}", n), + _ => { + return Err(Configuration( + format!( + "Field 'value' of: {:#?} is neither a number nor a string.", + cell + ) + .into(), + )) + } + }; + let valid = match cell.get("valid").and_then(|v| v.as_bool()) { + Some(b) => b, + None => { + return Err(Configuration( + format!("No bool named 'valid' in cell: {:?}", cell).into(), + )) + } + }; + let messages = match cell.get("messages").and_then(|m| m.as_array()) { + Some(a) => a.to_vec(), + None => { + return Err(Configuration( + format!("No array named 'messages' in cell: {:?}", cell).into(), + )) + } + }; let result_cell = ResultCell { - nulltype: cell - .get("nulltype") - .and_then(|n| Some(n.as_str().unwrap())) - .and_then(|n| Some(n.to_string())), - value: match cell.get("value") { - Some(SerdeValue::String(s)) => s.to_string(), - Some(SerdeValue::Number(n)) => format!("{}", n), - _ => panic!( - "Field 'value' of: {:#?} is neither a number nor a string.", - cell - ), - }, - valid: cell.get("valid").and_then(|v| v.as_bool()).unwrap(), - messages: cell - .get("messages") - .and_then(|m| m.as_array()) - .unwrap() - .to_vec(), + nulltype: nulltype, + value: value, + valid: valid, + messages: messages, }; result_row.contents.insert(column.to_string(), result_cell); } From 479bc998c99f661b954521ea6638f439f7eda65d Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 17 Jul 2023 15:00:35 -0400 Subject: [PATCH 30/31] minor cleanup --- src/lib.rs | 228 +++++++++++++++++++++++++----------------------- src/validate.rs | 39 ++++----- 2 files changed, 138 insertions(+), 129 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 048ca5ab..86c0e581 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,7 +46,7 @@ use serde_json::{json, Value as SerdeValue}; use sqlx::{ any::{AnyConnectOptions, AnyKind, AnyPool, AnyPoolOptions, AnyRow}, query as sqlx_query, Acquire, Column, - Error::Configuration, + Error::Configuration as SqlxCErr, Row, Transaction, ValueRef, }; use std::{ @@ -1058,6 +1058,10 @@ pub async fn valve( Ok(config.to_string()) } +/// Given a global config map, a database connection pool, a database transaction, a table name, a +/// column name, and a value for that column: get the rows, other than the one indicated by +/// `except`, that would need to be revalidated if the given value were to replace the actual +/// value of the column in that row. pub async fn get_affected_rows( table: &str, column: &str, @@ -1090,17 +1094,17 @@ pub async fn get_affected_rows( .map(|c| { format!( r#"CASE - WHEN "{column}" {is_clause} NULL THEN ( - SELECT value - FROM "message" - WHERE "row" = "row_number" - AND "column" = '{column}' - AND "table" = '{table}' - ORDER BY "message_id" DESC - LIMIT 1 - ) - ELSE {casted_column} - END AS "{column}_extended""#, + WHEN "{column}" {is_clause} NULL THEN ( + SELECT value + FROM "message" + WHERE "row" = "row_number" + AND "column" = '{column}' + AND "table" = '{table}' + ORDER BY "message_id" DESC + LIMIT 1 + ) + ELSE {casted_column} + END AS "{column}_extended""#, casted_column = if pool.any_kind() == AnyKind::Sqlite { cast_column_sql_to_text(c, "non-text") } else { @@ -1191,6 +1195,9 @@ pub async fn get_affected_rows( Ok(table_rows) } +/// Given a database connection pool, a database transaction, a table name, a column name, a row +/// number, and a cell value for the column, insert an update message to the message table +/// indicating that the actual value of the column has been changed to cell_value. pub async fn insert_update_message( pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, @@ -1270,6 +1277,8 @@ pub async fn insert_update_message( Ok(()) } +/// Given a database connection pool, a database transaction, a table name, a column name, and a row +/// number, get the current value of the given column in the database. pub async fn get_db_value( table: &str, column: &str, @@ -1318,6 +1327,10 @@ pub async fn get_db_value( Ok(value.to_string()) } +/// Given a global config map, a database connection pool, a database transaction, a table name, +/// and a [QueryAsIf] struct representing a custom modification to the query of the table, get +/// the rows that will potentially be affected by the database change to the row indicated in +/// query_as_if. pub async fn get_rows_to_update( global_config: &SerdeMap, pool: &AnyPool, @@ -1526,6 +1539,10 @@ pub async fn get_rows_to_update( )) } +/// Given a global config map, maps of datatype and rule conditions, a database connection pool, +/// a database transaction, a number of updates to process, a [QueryAsIf] struct indicating how +/// we should modify 'in thought' the current state of the database, and a flag indicating whether +/// we should allow recursive updates, validate and then update each row indicated in `updates`. pub async fn process_updates( global_config: &SerdeMap, compiled_datatype_conditions: &HashMap, @@ -1601,10 +1618,11 @@ pub async fn insert_new_row( Ok(rn) } -/// Given a global config map, a database connection pool, a database transaction, a table name, -/// and a row, assign a new row number to the row and insert it to the database using the given -/// transaction, then return the new row number. Optionally, if row_number is provided, use that -/// to identify the new row. +/// Given a global config map, compiled datatype and rule conditions, a database connection pool, a +/// database transaction, a table name, and a row, assign a new row number to the row and insert it +/// to the database using the given transaction, then return the new row number. Optionally, if +/// row_number is provided, use that to identify the new row. If skip_validation is set to true, +/// omit the implicit call to [validate_row()]. #[async_recursion] pub async fn insert_new_row_tx( global_config: &SerdeMap, @@ -1617,12 +1635,13 @@ pub async fn insert_new_row_tx( new_row_number: Option, skip_validation: bool, ) -> Result { + // Extract the base table name in case table_to_write has a _conflict suffix: let base_table = match table_to_write.strip_suffix("_conflict") { None => table_to_write.clone(), Some(base) => base, }; - // First, send the row through the row validator to determine if any fields are problematic and + // Send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = if !skip_validation { validate_row( @@ -1671,27 +1690,21 @@ pub async fn insert_new_row_tx( let sorted_datatypes = get_sorted_datatypes(global_config); for (column, cell) in row.iter() { insert_columns.append(&mut vec![format!(r#""{}""#, column)]); - let cell = cell.as_object().ok_or(Configuration( - format!("Cell {:?} is not an object", cell).into(), + let cell = cell + .as_object() + .ok_or(SqlxCErr(format!("Cell {:?} is not an object", cell).into()))?; + let cell_valid = cell.get("valid").and_then(|v| v.as_bool()).ok_or(SqlxCErr( + format!("No bool named 'valid' in {:?}", cell).into(), + ))?; + let cell_value = cell.get("value").and_then(|v| v.as_str()).ok_or(SqlxCErr( + format!("No string named 'value' in {:?}", cell).into(), ))?; - let cell_valid = cell - .get("valid") - .and_then(|v| v.as_bool()) - .ok_or(Configuration( - format!("No flag named 'valid' in {:?}", cell).into(), - ))?; - let cell_value = cell - .get("value") - .and_then(|v| v.as_str()) - .ok_or(Configuration( - format!("No str named 'value' in {:?}", cell).into(), - ))?; let mut cell_for_insert = cell.clone(); if cell_valid { cell_for_insert.remove("value"); let sql_type = get_sql_type_from_global_config(&global_config, &base_table, &column, pool).ok_or( - Configuration( + SqlxCErr( format!("Unable to determine SQL type for {}.{}", base_table, column) .into(), ), @@ -1704,7 +1717,7 @@ pub async fn insert_new_row_tx( &sorted_datatypes, cell.get("messages") .and_then(|m| m.as_array()) - .ok_or(Configuration( + .ok_or(SqlxCErr( format!("No array named 'messages' in {:?}", cell).into(), ))?, ); @@ -1714,15 +1727,15 @@ pub async fn insert_new_row_tx( "value": cell_value, "level": cell_message.get("level").and_then(|s| s.as_str()) .ok_or( - Configuration(format!("No 'level' in {:?}", cell_message).into()) + SqlxCErr(format!("No 'level' in {:?}", cell_message).into()) )?, "rule": cell_message.get("rule").and_then(|s| s.as_str()) .ok_or( - Configuration(format!("No 'rule' in {:?}", cell_message).into()) + SqlxCErr(format!("No 'rule' in {:?}", cell_message).into()) )?, "message": cell_message.get("message").and_then(|s| s.as_str()) .ok_or( - Configuration(format!("No 'message' in {:?}", cell_message).into()) + SqlxCErr(format!("No 'message' in {:?}", cell_message).into()) )?, })); } @@ -1744,54 +1757,55 @@ pub async fn insert_new_row_tx( let (_, updates_after, _) = get_rows_to_update(global_config, pool, tx, base_table, &query_as_if) .await - .map_err(|e| Configuration(e.into()))?; + .map_err(|e| SqlxCErr(e.into()))?; // If the row is not already being directed to the conflict table, check it to see if it should // be redirected there: - let table_to_write = - { - if table_to_write.ends_with("_conflict") { - table_to_write.to_string() - } else { - let mut table_to_write = String::from(base_table); - for (column, cell) in row.iter() { - let valid = cell.get("valid").ok_or(Configuration( - format!("No flag named 'valid' in {:?}", cell).into(), - ))?; - if valid == false { - let structure = global_config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(base_table)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("structure")) - .and_then(|s| s.as_str()) - .unwrap_or(""); - if vec!["primary", "unique"].contains(&structure) - || structure.starts_with("tree(") - { - let messages = cell.get("messages").and_then(|m| m.as_array()).ok_or( - Configuration(format!("No 'messages' in {:?}", cell).into()), - )?; - for msg in messages { - let level = msg.get("level").and_then(|l| l.as_str()).ok_or( - Configuration(format!("No 'level' in {:?}", cell).into()), - )?; - if level == "error" { - table_to_write.push_str("_conflict"); - break; - } + let table_to_write = { + if table_to_write.ends_with("_conflict") { + table_to_write.to_string() + } else { + let mut table_to_write = String::from(base_table); + for (column, cell) in row.iter() { + let valid = cell.get("valid").ok_or(SqlxCErr( + format!("No flag named 'valid' in {:?}", cell).into(), + ))?; + if valid == false { + let structure = global_config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(base_table)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|s| s.as_str()) + .unwrap_or_else(|| ""); + if vec!["primary", "unique"].contains(&structure) + || structure.starts_with("tree(") + { + let messages = cell + .get("messages") + .and_then(|m| m.as_array()) + .ok_or(SqlxCErr(format!("No 'messages' in {:?}", cell).into()))?; + for msg in messages { + let level = msg + .get("level") + .and_then(|l| l.as_str()) + .ok_or(SqlxCErr(format!("No 'level' in {:?}", cell).into()))?; + if level == "error" { + table_to_write.push_str("_conflict"); + break; } } } } - table_to_write } - }; + table_to_write + } + }; // Add the new row to the table: let insert_stmt = local_sql_syntax( @@ -1872,6 +1886,10 @@ pub async fn delete_row( Ok(()) } +/// Given a global config map, maps of datatype and rule conditions, a database connection pool, a +/// database transaction, a table name, a row number, and a flag indicating whether this delete +/// is one part of what is effectively an update (i.e., a delete followed by an insert, as opposed +/// to a straight delete), delete the given row from the database. #[async_recursion] pub async fn delete_row_tx( global_config: &SerdeMap, @@ -1895,7 +1913,7 @@ pub async fn delete_row_tx( ); let query = sqlx_query(&sql); let sql_row = query.fetch_one(tx.acquire().await?).await.map_err(|e| { - Configuration( + SqlxCErr( format!( "Got: '{}' while fetching row number {} from table {}", e, row_number, base_table @@ -1914,7 +1932,7 @@ pub async fn delete_row_tx( if !raw_value.is_null() { let sql_type = get_sql_type_from_global_config(global_config, &base_table, &cname, pool) - .ok_or(Configuration( + .ok_or(SqlxCErr( format!("Unable to determine SQL type for {}.{}", base_table, cname) .into(), ))?; @@ -1947,7 +1965,7 @@ pub async fn delete_row_tx( let (updates_before, _, updates_intra) = get_rows_to_update(global_config, pool, tx, base_table, &query_as_if) .await - .map_err(|e| Configuration(e.into()))?; + .map_err(|e| SqlxCErr(e.into()))?; // Process the updates that need to be performed before the update of the target row: process_updates( @@ -2074,7 +2092,7 @@ pub async fn update_row_tx( } else { get_rows_to_update(global_config, pool, tx, table_name, &query_as_if) .await - .map_err(|e| Configuration(e.into()))? + .map_err(|e| SqlxCErr(e.into()))? } }; @@ -2116,21 +2134,15 @@ pub async fn update_row_tx( let mut messages = vec![]; let sorted_datatypes = get_sorted_datatypes(global_config); for (column, cell) in row.iter() { - let cell = cell.as_object().ok_or(Configuration( - format!("Cell {:?} is not an object", cell).into(), + let cell = cell + .as_object() + .ok_or(SqlxCErr(format!("Cell {:?} is not an object", cell).into()))?; + let cell_valid = cell.get("valid").and_then(|v| v.as_bool()).ok_or(SqlxCErr( + format!("No flag named 'valid' in {:?}", cell).into(), + ))?; + let cell_value = cell.get("value").and_then(|v| v.as_str()).ok_or(SqlxCErr( + format!("No str named 'value' in {:?}", cell).into(), ))?; - let cell_valid = cell - .get("valid") - .and_then(|v| v.as_bool()) - .ok_or(Configuration( - format!("No flag named 'valid' in {:?}", cell).into(), - ))?; - let cell_value = cell - .get("value") - .and_then(|v| v.as_str()) - .ok_or(Configuration( - format!("No str named 'value' in {:?}", cell).into(), - ))?; // Begin by adding an extra 'update' row to the message table indicating that the value of // this column has been updated (if that is the case). @@ -2146,7 +2158,7 @@ pub async fn update_row_tx( &column, pool, ) - .ok_or(Configuration( + .ok_or(SqlxCErr( format!("Unable to determine SQL type for {}.{}", table_name, column).into(), ))?; assignments.push(format!( @@ -2161,7 +2173,7 @@ pub async fn update_row_tx( &sorted_datatypes, cell.get("messages") .and_then(|m| m.as_array()) - .ok_or(Configuration( + .ok_or(SqlxCErr( format!("No array named 'messages' in {:?}", cell).into(), ))?, ); @@ -2170,13 +2182,13 @@ pub async fn update_row_tx( "column": String::from(column), "value": String::from(cell_value), "level": cell_message.get("level").and_then(|s| s.as_str()).ok_or( - Configuration(format!("No 'level' in {:?}", cell_message).into()) + SqlxCErr(format!("No 'level' in {:?}", cell_message).into()) )?, "rule": cell_message.get("rule").and_then(|s| s.as_str()).ok_or( - Configuration(format!("No 'rule' in {:?}", cell_message).into()) + SqlxCErr(format!("No 'rule' in {:?}", cell_message).into()) )?, "message": cell_message.get("message").and_then(|s| s.as_str()).ok_or( - Configuration(format!("No 'message' in {:?}", cell_message).into()) + SqlxCErr(format!("No 'message' in {:?}", cell_message).into()) )?, })); } @@ -2199,7 +2211,7 @@ pub async fn update_row_tx( // Next, figure out where to put the new version of the row: let mut table_to_write = String::from(table_name); for (column, cell) in row.iter() { - let valid = cell.get("valid").ok_or(Configuration( + let valid = cell.get("valid").ok_or(SqlxCErr( format!("No flag named 'valid' in {:?}", cell).into(), ))?; if valid == false { @@ -2214,19 +2226,19 @@ pub async fn update_row_tx( .and_then(|c| c.as_object()) .and_then(|c| c.get("structure")) .and_then(|s| s.as_str()) - .unwrap_or(""); + .unwrap_or_else(|| ""); if vec!["primary", "unique"].contains(&structure) || structure.starts_with("tree(") { - let messages = - cell.get("messages") - .and_then(|m| m.as_array()) - .ok_or(Configuration( - format!("No array named 'messages' in {:?}", cell).into(), - ))?; + let messages = cell + .get("messages") + .and_then(|m| m.as_array()) + .ok_or(SqlxCErr( + format!("No array named 'messages' in {:?}", cell).into(), + ))?; for msg in messages { let level = msg .get("level") .and_then(|l| l.as_str()) - .ok_or(Configuration(format!("No 'level' in {:?}", msg).into()))?; + .ok_or(SqlxCErr(format!("No 'level' in {:?}", msg).into()))?; if level == "error" { table_to_write.push_str("_conflict"); break; diff --git a/src/validate.rs b/src/validate.rs index 182af89e..f4519f58 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -2,7 +2,8 @@ use enquote::unquote; use indexmap::IndexMap; use serde_json::{json, Value as SerdeValue}; use sqlx::{ - any::AnyPool, query as sqlx_query, Acquire, Error::Configuration, Row, Transaction, ValueRef, + any::AnyPool, query as sqlx_query, Acquire, Error::Configuration as SqlxCErr, Row, Transaction, + ValueRef, }; use std::collections::HashMap; @@ -50,9 +51,9 @@ pub struct QueryAsIf { /// Given a config map, maps of compiled datatype and rule conditions, a database connection /// pool, a table name, a row to validate, and a row number in case the row already exists, -/// perform both intra- and inter-row validation and return the validated row. Note that this -/// function is idempotent. Optionally, if a transaction is given, use that instead of the pool -/// for database access. +/// perform both intra- and inter-row validation and return the validated row. Optionally, if a +/// transaction is given, use that instead of the pool for database access. Note that this +/// function is idempotent. pub async fn validate_row( config: &SerdeMap, compiled_datatype_conditions: &HashMap, @@ -66,7 +67,7 @@ pub async fn validate_row( ) -> Result { // Fallback to a default transaction if it is not given. Since we do not commit before it falls // out of scope the transaction will be rolled back at the end of this function. And since this - // function is read-only the rollback is inconsequential. + // function is read-only the rollback is trivial and therefore inconsequential. let default_tx = &mut pool.begin().await?; let tx = match tx { Some(tx) => tx, @@ -84,8 +85,8 @@ pub async fn validate_row( None => None, Some(SerdeValue::String(s)) => Some(s.to_string()), _ => { - return Err(Configuration( - format!("Nulltype is not a string in cell: {:?}", cell).into(), + return Err(SqlxCErr( + format!("No string 'nulltype' in cell: {:?}.", cell).into(), )) } }; @@ -93,28 +94,24 @@ pub async fn validate_row( Some(SerdeValue::String(s)) => s.to_string(), Some(SerdeValue::Number(n)) => format!("{}", n), _ => { - return Err(Configuration( - format!( - "Field 'value' of: {:#?} is neither a number nor a string.", - cell - ) - .into(), + return Err(SqlxCErr( + format!("No string/number 'value' in cell: {:#?}.", cell).into(), )) } }; let valid = match cell.get("valid").and_then(|v| v.as_bool()) { Some(b) => b, None => { - return Err(Configuration( - format!("No bool named 'valid' in cell: {:?}", cell).into(), + return Err(SqlxCErr( + format!("No bool 'valid' in cell: {:?}.", cell).into(), )) } }; let messages = match cell.get("messages").and_then(|m| m.as_array()) { Some(a) => a.to_vec(), None => { - return Err(Configuration( - format!("No array named 'messages' in cell: {:?}", cell).into(), + return Err(SqlxCErr( + format!("No array 'messages' in cell: {:?}.", cell).into(), )) } }; @@ -281,7 +278,7 @@ pub async fn get_matching_values( for arg in args { if let Expression::Label(arg) = *arg { // Remove the enclosing quotes from the values being returned: - let label = unquote(&arg).unwrap_or(arg); + let label = unquote(&arg).unwrap_or_else(|_| arg); if let Some(s) = matching_string { if label.contains(s) { values.push(label); @@ -309,7 +306,7 @@ pub async fn get_matching_values( .and_then(|c| c.as_object()) .and_then(|c| c.get("structure")) .and_then(|d| d.as_str()) - .unwrap_or(""), + .unwrap_or_else(|| ""), ); let sql_type = @@ -1132,7 +1129,7 @@ fn contains_dt_violation(messages: &Vec) -> bool { for m in messages { if m.get("rule") .and_then(|r| r.as_str()) - .unwrap() + .unwrap_or_else(|| "") .starts_with("datatype:") { contains_dt_violation = true; @@ -1197,7 +1194,7 @@ fn with_tree_sql( pool: &AnyPool, ) -> (String, Vec) { let empty_string = String::new(); - let extra_clause = extra_clause.unwrap_or(&empty_string); + let extra_clause = extra_clause.unwrap_or_else(|| &empty_string); let child_col = tree.get("child").and_then(|c| c.as_str()).unwrap(); let parent_col = tree.get("parent").and_then(|c| c.as_str()).unwrap(); From a2e7aca387f7118656d7b5ec51a1f73c66397054 Mon Sep 17 00:00:00 2001 From: Michael Cuffaro Date: Mon, 24 Jul 2023 11:37:14 -0400 Subject: [PATCH 31/31] remove unnecessary code from delete_row() --- src/lib.rs | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 86c0e581..4f8ca68f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1906,49 +1906,6 @@ pub async fn delete_row_tx( Some(base) => base, }; - // First, use the row number to fetch the row from the database: - let sql = format!( - "SELECT * FROM \"{}_view\" WHERE row_number = {}", - base_table, row_number - ); - let query = sqlx_query(&sql); - let sql_row = query.fetch_one(tx.acquire().await?).await.map_err(|e| { - SqlxCErr( - format!( - "Got: '{}' while fetching row number {} from table {}", - e, row_number, base_table - ) - .into(), - ) - })?; - - // TODO: This isn't the only place we do this. Factor this out into its own function. - let mut row = SerdeMap::new(); - for column in sql_row.columns() { - let cname = column.name(); - if !vec!["row_number", "message"].contains(&cname) { - let raw_value = sql_row.try_get_raw(format!(r#"{}"#, cname).as_str())?; - let value; - if !raw_value.is_null() { - let sql_type = - get_sql_type_from_global_config(global_config, &base_table, &cname, pool) - .ok_or(SqlxCErr( - format!("Unable to determine SQL type for {}.{}", base_table, cname) - .into(), - ))?; - value = get_column_value(&sql_row, &cname, &sql_type); - } else { - value = String::from(""); - } - let cell = json!({ - "value": value, - "valid": true, - "messages": json!([]), - }); - row.insert(cname.to_string(), json!(cell)); - } - } - // Used to validate the given row, counterfactually, "as if" the row did not exist in the // database: let query_as_if = QueryAsIf {