diff --git a/Makefile b/Makefile index 5832e279..b7d671b1 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ MAKEFLAGS += --warn-undefined-variables build: mkdir build -.PHONY: doc readme valve_debug valve_release test sqlite_test pg_test api_test sqlite_api_test \ +.PHONY: clean doc readme valve_debug valve_release test sqlite_test pg_test api_test sqlite_api_test \ pg_qpi_test random_test_data random_test sqlite_random_test pg_random_test guess_test_data \ perf_test_data sqlite_perf_test pg_perf_test perf_test @@ -36,26 +36,25 @@ valve_debug: cargo build ln -s target/debug/ontodev_valve valve -build/valve.db: test/src/table.tsv clean valve | build - ./valve $< $@ +build/valve.db: valve test/src/table.tsv | build + ./$^ $@ test/output: mkdir -p test/output -test: sqlite_test pg_test api_test random_test +test: clean_test_db sqlite_test pg_test api_test random_test -tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 \ - table9 table10 table11 +tables_to_test := $(shell cut -f 1 test/src/table.tsv) sqlite_test: build/valve.db test/src/table.tsv | test/output @echo "Testing valve on sqlite ..." test/round_trip.sh $^ - scripts/export.py messages $< $| $(tables_to_test) + scripts/export_messages.py $< $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages.tsv test/output/messages.tsv - scripts/export.py messages --a1 $< $| $(tables_to_test) + scripts/export_messages.py --a1 $< $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages_a1.tsv test/output/messages.tsv # The "pk" test is run on table7 only since it is the only table whose primary keys are all valid: - scripts/export.py messages --pk $< $| table7 + scripts/export_messages.py --pk $< $| table7 diff --strip-trailing-cr -q test/expected/messages_pk.tsv test/output/messages.tsv @echo "Test succeeded!" @@ -63,12 +62,12 @@ pg_test: valve test/src/table.tsv | test/output @echo "Testing valve on postgresql ..." ./$^ postgresql:///valve_postgres test/round_trip.sh postgresql:///valve_postgres $(word 2,$^) - scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test) + scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages.tsv test/output/messages.tsv - scripts/export.py messages --a1 postgresql:///valve_postgres $| $(tables_to_test) + scripts/export_messages.py --a1 postgresql:///valve_postgres $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages_a1.tsv test/output/messages.tsv # The "pk" test is run on table7 only since it is the only table whose primary keys are all valid: - scripts/export.py messages --pk postgresql:///valve_postgres $| table7 + scripts/export_messages.py --pk postgresql:///valve_postgres $| table7 diff --strip-trailing-cr -q test/expected/messages_pk.tsv test/output/messages.tsv @echo "Test succeeded!" @@ -77,22 +76,28 @@ api_test: sqlite_api_test pg_api_test sqlite_api_test: valve test/src/table.tsv build/valve.db test/insert_update.sh | test/output @echo "Testing API functions on sqlite ..." ./$< --api_test $(word 2,$^) $(word 3,$^) - $(word 4,$^) $(word 3,$^) - scripts/export.py messages $(word 3,$^) $| $(tables_to_test) + $(word 4,$^) $(word 3,$^) $(word 2,$^) + scripts/export_messages.py $(word 3,$^) $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv echo "select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id" | sqlite3 -header -tabs build/valve.db > test/output/history.tsv diff --strip-trailing-cr -q test/expected/history.tsv test/output/history.tsv + # We drop all of the db tables because the schema for the next test (random test) is different + # from the schema used for this test. + ./$< --drop_all $(word 2,$^) $(word 3,$^) @echo "Test succeeded!" pg_api_test: valve test/src/table.tsv test/insert_update.sh | test/output @echo "Testing API functions on postgresql ..." ./$< $(word 2,$^) postgresql:///valve_postgres ./$< --api_test $(word 2,$^) postgresql:///valve_postgres - $(word 3,$^) postgresql:///valve_postgres - scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test) + $(word 3,$^) postgresql:///valve_postgres $(word 2,$^) + scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test) diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv psql postgresql:///valve_postgres -c "COPY (select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id) TO STDOUT WITH NULL AS ''" > test/output/history.tsv tail -n +2 test/expected/history.tsv | diff --strip-trailing-cr -q test/output/history.tsv - + # We drop all of the db tables because the schema for the next test (random test) is different + # from the schema used for this test. + ./$< --drop_all $(word 2,$^) postgresql:///valve_postgres @echo "Test succeeded!" sqlite_random_db = build/valve_random.db @@ -106,13 +111,13 @@ $(random_test_dir)/ontology: random_test_data: test/generate_random_test_data.py valve valve test/random_test_data/table.tsv | $(random_test_dir)/ontology ./$< $$(date +"%s") 100 5 $(word 3,$^) $| -sqlite_random_test: valve clean random_test_data | build test/output +sqlite_random_test: valve random_test_data | build test/output @echo "Testing with random data on sqlite ..." ./$< $(random_test_dir)/table.tsv $(sqlite_random_db) test/round_trip.sh $(sqlite_random_db) $(random_test_dir)/table.tsv @echo "Test succeeded!" -pg_random_test: valve clean random_test_data | build test/output +pg_random_test: valve random_test_data | build test/output @echo "Testing with random data on postgresql ..." ./$< $(random_test_dir)/table.tsv postgresql:///valve_postgres test/round_trip.sh postgresql:///valve_postgres $(random_test_dir)/table.tsv @@ -155,17 +160,20 @@ $(perf_test_db): valve perf_test_data $(perf_test_dir)/*.tsv | build $(perf_test time -p ./$< --verbose $(perf_test_dir)/table.tsv $@ sqlite_perf_test: build/valve_perf.db | test/output - time -p scripts/export.py messages $< $| $(tables_to_test) + time -p scripts/export_messages.py $< $| $(tables_to_test) pg_perf_test: valve $(perf_test_dir)/ontology | test/output time -p ./$< --verbose $(perf_test_dir)/table.tsv postgresql:///valve_postgres - time -p scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test) + time -p scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test) perf_test: sqlite_perf_test pg_perf_test clean: rm -Rf build/valve.db* build/valve_random.db* test/output $(random_test_dir)/ontology valve +clean_test_db: + rm -Rf build/valve.db + clean_guess_db: rm -Rf build/valve_guess.db diff --git a/README.md b/README.md index 02113b60..183c8a1d 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,9 @@ valve --help ``` to see command line options. +### API +See [Valve] + ### Python bindings See [valve.py](https://github.com/ontodev/valve.py) diff --git a/scripts/export.py b/scripts/export_messages.py similarity index 75% rename from scripts/export.py rename to scripts/export_messages.py index bc61c259..380d8497 100755 --- a/scripts/export.py +++ b/scripts/export_messages.py @@ -115,52 +115,6 @@ def get_column_order_and_info_for_sqlite(cursor, table): } -def export_data(cursor, is_sqlite, args): - """ - Given a database cursor, a flag indicating whether this is a sqlite or postgres db, and a - dictionary containing: an output directory, "output", and a list of tables, "tables": export all - of the given database tables to .tsv files in the output directory. - """ - output_dir = os.path.normpath(args["output_dir"]) - tables = args["tables"] - - for table in tables: - try: - if is_sqlite: - columns_info = get_column_order_and_info_for_sqlite(cursor, table) - else: - columns_info = get_column_order_and_info_for_postgres(cursor, table) - unsorted_columns = columns_info["unsorted_columns"] - - select = [f'"{column}"' for column in unsorted_columns] - select = ", ".join(select) - - # Fetch the rows from the table and write them to a corresponding TSV file in the - # output directory: - cursor.execute(f'SELECT {select} FROM "{table}_text_view" ORDER BY "row_number"') - colnames = [d[0] for d in cursor.description] - rows = map(lambda r: dict(zip(colnames, r)), cursor) - fieldnames = [c for c in colnames if c != "row_number"] - with open(f"{output_dir}/{table}.tsv", "w", newline="\n") as csvfile: - writer = csv.DictWriter( - csvfile, - fieldnames=fieldnames, - delimiter="\t", - doublequote=False, - strict=True, - lineterminator="\n", - quoting=csv.QUOTE_NONE, - escapechar=None, - quotechar=None, - ) - writer.writeheader() - for row in rows: - del row["row_number"] - writer.writerow(row) - except sqlite3.OperationalError as e: - print(f"ERROR while exporting {table}: {e}", file=sys.stderr) - - def export_messages(cursor, is_sqlite, args): """ Given a database cursor, a flag indicating whether this is a sqlite or postgres db, and a @@ -289,42 +243,24 @@ def col_to_a1(column, columns): if __name__ == "__main__": - prog_parser = ArgumentParser(description="Database table export utility") - sub_parsers = prog_parser.add_subparsers(help="Possible sub-commands") - - sub1 = sub_parsers.add_parser( - "data", - description="Export table data", - help="Export table data. For command-line options, run: `%(prog)s data --help`", + parser = ArgumentParser(description="Export Valve messages") + pgroup = parser.add_mutually_exclusive_group() + pgroup.add_argument("--a1", action="store_true", help="Output error messages in A1 format") + pgroup.add_argument("--pk", action="store_true", help="Identify rows using primary keys") + + parser.add_argument( + "db", + help="""Either a database connection URL or a path to a SQLite database file. In the + case of a URL, you must use one of the following schemes: potgresql:// + (for postgreSQL), sqlite:// or file: (for SQLite). + """, ) - - sub1.set_defaults(func=export_data) - - sub2 = sub_parsers.add_parser( - "messages", - description="Export error messages", - help="Export error messages. For command-line options, run: `%(prog)s messages --help`", + parser.add_argument("output_dir", help="The name of the directory in which to save TSV files") + parser.add_argument( + "tables", metavar="table", nargs="+", help="The name of a table to export to TSV" ) - sub2_group = sub2.add_mutually_exclusive_group() - sub2_group.add_argument("--a1", action="store_true", help="Output error messages in A1 format") - sub2_group.add_argument("--pk", action="store_true", help="Identify rows using primary keys") - sub2.set_defaults(func=export_messages) - - for sub in [sub1, sub2]: - sub.add_argument( - "db", - help="""Either a database connection URL or a path to a SQLite database file. In the - case of a URL, you must use one of the following schemes: potgresql:// - (for postgreSQL), sqlite:// or file: (for SQLite). - """, - ) - sub.add_argument("output_dir", help="The name of the directory in which to save TSV files") - sub.add_argument( - "tables", metavar="table", nargs="+", help="The name of a table to export to TSV" - ) - args = prog_parser.parse_args() - func = args.func + args = parser.parse_args() args = vars(args) if not os.path.isdir(args["output_dir"]): @@ -336,7 +272,7 @@ def col_to_a1(column, columns): if db.startswith("postgresql://"): with psycopg2.connect(db) as conn: cursor = conn.cursor() - func(cursor, False, args) + export_messages(cursor, False, args) else: m = re.search(r"(^(file:|sqlite://))?(.+?)(\?.+)?$", db) if m: @@ -348,7 +284,7 @@ def col_to_a1(column, columns): db = f"file:{path}{params}" with sqlite3.connect(db, uri=True) as conn: cursor = conn.cursor() - func(cursor, True, args) + export_messages(cursor, True, args) else: print(f"Could not parse database specification: {db}", file=sys.stderr) sys.exit(1) diff --git a/scripts/guess.py b/scripts/guess.py index 0f9ab864..b5eda161 100755 --- a/scripts/guess.py +++ b/scripts/guess.py @@ -155,14 +155,14 @@ def get_higher_datatypes(datatype_hierarchies, universals, depth): def get_sql_type(config, datatype): """Given the config map and the name of a datatype, climb the datatype tree (as required), - and return the first 'SQLite type' found.""" + and return the first 'SQL type' found.""" if "datatype" not in config: print("Missing datatypes in config") sys.exit(1) if datatype not in config["datatype"]: return None - if config["datatype"][datatype].get("SQLite type"): - return config["datatype"][datatype]["SQLite type"] + if config["datatype"][datatype].get("SQL type"): + return config["datatype"][datatype]["SQL type"] return get_sql_type(config, config["datatype"][datatype].get("parent")) @@ -260,9 +260,9 @@ def is_match(datatype): # If the datatype has no associated condition then it matches anything: if not datatype.get("condition"): return True - # If the SQLite type is NULL this datatype is ruled out: - sqlite_type = datatype.get("SQLite type") - if sqlite_type and sqlite_type.casefold() == "null": + # If the SQL type is NULL this datatype is ruled out: + sql_type = datatype.get("SQL type") + if sql_type and sql_type.casefold() == "null": return False condition = get_compiled_condition(datatype["condition"], config["parser"]) diff --git a/src/api_test.rs b/src/api_test.rs index 11f0f3ea..e551a1da 100644 --- a/src/api_test.rs +++ b/src/api_test.rs @@ -1,41 +1,17 @@ use ontodev_valve::{ - delete_row, get_compiled_datatype_conditions, get_compiled_rule_conditions, - get_parsed_structure_conditions, get_record_to_redo, get_record_to_undo, insert_new_row, redo, - undo, update_row, - validate::{get_matching_values, validate_row}, - valve, - valve_grammar::StartParser, - ColumnRule, CompiledCondition, ParsedStructure, SerdeMap, ValveCommand, + valve::{Valve, ValveError}, + SerdeMap, }; use rand::distributions::{Alphanumeric, DistString, Distribution, Uniform}; use rand::{random, thread_rng}; -use serde_json::{json, Value as SerdeValue}; -use sqlx::{ - any::{AnyConnectOptions, AnyKind, AnyPool, AnyPoolOptions}, - query as sqlx_query, - Error::Configuration as SqlxCErr, - Row, ValueRef, -}; -use std::{collections::HashMap, str::FromStr}; - -async fn test_matching( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - parsed_structure_conditions: &HashMap, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +use serde_json::json; +use sqlx::{any::AnyPool, query as sqlx_query, Row, ValueRef}; + +async fn test_matching(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_matching() ... "); + // Test the get_matching_values() function: - let matching_values = get_matching_values( - &config, - &compiled_datatype_conditions, - &parsed_structure_conditions, - &pool, - "table2", - "child", - None, - ) - .await?; + let matching_values = valve.get_matching_values("table2", "child", None).await?; assert_eq!( matching_values, json!([ @@ -52,16 +28,9 @@ async fn test_matching( ]) ); - let matching_values = get_matching_values( - &config, - &compiled_datatype_conditions, - &parsed_structure_conditions, - &pool, - "table6", - "child", - Some("7"), - ) - .await?; + let matching_values = valve + .get_matching_values("table6", "child", Some("7")) + .await?; assert_eq!( matching_values, json!([ @@ -73,13 +42,9 @@ async fn test_matching( Ok(()) } -async fn test_idempotent_validate_and_update( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_idempotent_validate_and_update() ... "); + // We test that validate_row() is idempotent by running it multiple times on the same row: let row = json!({ "child": {"messages": [], "valid": true, "value": "b"}, @@ -95,71 +60,28 @@ async fn test_idempotent_validate_and_update( }, }); - let result_row_1 = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table2", - row.as_object().unwrap(), - Some(1), - None, - ) - .await?; - - let result_row_2 = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table2", - &result_row_1, - Some(1), - None, - ) - .await?; + let result_row_1 = valve + .validate_row("table2", row.as_object().unwrap(), None) + .await?; + + let result_row_2 = valve.validate_row("table2", &result_row_1, None).await?; assert_eq!(result_row_1, result_row_2); - let result_row = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table2", - &result_row_2, - Some(1), - None, - ) - .await?; + let result_row = valve.validate_row("table2", &result_row_2, None).await?; assert_eq!(result_row, result_row_2); // Update the row we constructed and validated above in the database: - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table2", - &row.as_object().unwrap(), - &1, - "VALVE", - ) - .await?; + valve + .update_row("table2", &1, &row.as_object().unwrap()) + .await?; eprintln!("done."); Ok(()) } -async fn test_validate_and_insert_1( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_validate_and_insert_1() ... "); + // Validate and insert a new row: let row = json!({ "id": {"messages": [], "valid": true, "value": "BFO:0000027"}, @@ -175,42 +97,19 @@ async fn test_validate_and_insert_1( "type": {"messages": [], "valid": true, "value": "owl:Class"}, }); - let result_row = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table3", - row.as_object().unwrap(), - None, - None, - ) - .await?; - - let _new_row_num = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table3", - &result_row, - None, - "VALVE", - ) - .await?; + let result_row = valve + .validate_row("table3", row.as_object().unwrap(), None) + .await?; + + let (_new_row_num, _new_row) = valve.insert_row("table3", &result_row).await?; eprintln!("done."); Ok(()) } -async fn test_validate_and_update( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_validate_and_update(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_validate_and_update() ... "); + // Validate and update an existing row: let row = json!({ "child": {"messages": [], "valid": true, "value": 2}, @@ -226,42 +125,19 @@ async fn test_validate_and_update( }, }); - let result_row = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table6", - row.as_object().unwrap(), - Some(1), - None, - ) - .await?; - - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table6", - &result_row, - &1, - "VALVE", - ) - .await?; + let result_row = valve + .validate_row("table6", row.as_object().unwrap(), None) + .await?; + + valve.update_row("table6", &1, &result_row).await?; eprintln!("done."); Ok(()) } -async fn test_validate_and_insert_2( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_validate_and_insert_2() ... "); + // Validate and insert a new row: let row = json!({ "child": {"messages": [], "valid": true, "value": 2}, @@ -277,42 +153,19 @@ async fn test_validate_and_insert_2( }, }); - let result_row = validate_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - None, - "table6", - row.as_object().unwrap(), - None, - None, - ) - .await?; - - let _new_row_num = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table6", - &result_row, - None, - "VALVE", - ) - .await?; + let result_row = valve + .validate_row("table6", row.as_object().unwrap(), None) + .await?; + + let (_new_row_num, _new_row) = valve.insert_row("table6", &result_row).await?; eprintln!("done."); Ok(()) } -async fn test_dependencies( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_dependencies(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_dependencies() ... "); + // Test cases for updates/inserts/deletes with dependencies. let row = json!({ "foreign_column": {"messages": [], "valid": true, "value": "w"}, @@ -320,17 +173,9 @@ async fn test_dependencies( "numeric_foreign_column": {"messages": [], "valid": true, "value": ""}, }); - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row.as_object().unwrap(), - &1, - "VALVE", - ) - .await?; + valve + .update_row("table10", &1, &row.as_object().unwrap()) + .await?; let row = json!({ "child": {"messages": [], "valid": true, "value": "b"}, @@ -340,28 +185,11 @@ async fn test_dependencies( "bar": {"messages": [], "valid": true, "value": "f"}, }); - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table11", - &row.as_object().unwrap(), - &2, - "VALVE", - ) - .await?; - - delete_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table11", - &4, - "VALVE", - ) - .await?; + valve + .update_row("table11", &2, &row.as_object().unwrap()) + .await?; + + valve.delete_row("table11", &4).await?; let row = json!({ "foreign_column": {"messages": [], "valid": true, "value": "i"}, @@ -369,24 +197,16 @@ async fn test_dependencies( "numeric_foreign_column": {"messages": [], "valid": true, "value": "9"}, }); - let _new_row_num = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row.as_object().unwrap(), - None, - "VALVE", - ) - .await?; + let (_new_row_num, _new_row) = valve + .insert_row("table10", &row.as_object().unwrap()) + .await?; eprintln!("done."); Ok(()) } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum DbOperation { +enum DbOperation { Insert, Delete, Update, @@ -394,7 +214,7 @@ pub enum DbOperation { Redo, } -async fn generate_operation_sequence(pool: &AnyPool) -> Result, sqlx::Error> { +async fn generate_operation_sequence(pool: &AnyPool) -> Result, ValveError> { /* Algorithm: ---------- @@ -491,15 +311,11 @@ async fn generate_operation_sequence(pool: &AnyPool) -> Result, Ok(operations) } -async fn test_randomized_api_test_with_undo_redo( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), ValveError> { // Randomly generate a number of insert/update/delete operations, possibly followed by undos // and/or redos. eprint!("Running test_randomized_api_test_with_undo_redo() ... "); + fn generate_value() -> String { let mut value = Alphanumeric.sample_string(&mut rand::thread_rng(), 10); while random::() && random::() { @@ -533,86 +349,43 @@ async fn test_randomized_api_test_with_undo_redo( row } - let operations_list = generate_operation_sequence(pool).await?; + let operations_list = generate_operation_sequence(&valve.pool).await?; for operation in operations_list { match operation { DbOperation::Delete => { let query = sqlx_query("SELECT MAX(row_number) AS row_number FROM table1_view"); - let sql_row = query.fetch_one(pool).await?; + let sql_row = query.fetch_one(&valve.pool).await?; let raw_row_number = sql_row.try_get_raw("row_number")?; if raw_row_number.is_null() { - return Err(SqlxCErr("No rows in table1_view".into())); + return Err(ValveError::DataError("No rows in table1_view".into())); } else { let row_number: i64 = sql_row.get("row_number"); let row_number = row_number as u32; - delete_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table1", - &row_number, - "VALVE", - ) - .await?; + valve.delete_row("table1", &row_number).await?; } } DbOperation::Update => { let query = sqlx_query("SELECT MAX(row_number) AS row_number FROM table1_view"); - let sql_row = query.fetch_one(pool).await?; + let sql_row = query.fetch_one(&valve.pool).await?; let raw_row_number = sql_row.try_get_raw("row_number")?; if raw_row_number.is_null() { - return Err(SqlxCErr("No rows in table1_view".into())); + return Err(ValveError::DataError("No rows in table1_view".into())); } else { let row_number: i64 = sql_row.get("row_number"); let row_number = row_number as u32; let row = generate_row(); - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table1", - &row, - &row_number, - "VALVE", - ) - .await?; + valve.update_row("table1", &row_number, &row).await?; } } DbOperation::Insert => { let row = generate_row(); - let _rn = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table1", - &row, - None, - "VALVE", - ) - .await?; + let (_rn, _r) = valve.insert_row("table1", &row).await?; } DbOperation::Undo => { - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.undo().await?; } DbOperation::Redo => { - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.redo().await?; } }; } @@ -621,43 +394,9 @@ async fn test_randomized_api_test_with_undo_redo( Ok(()) } -async fn verify_undo_redo( - pool: &AnyPool, - undo_should_exist: bool, - redo_should_exist: bool, -) -> Result<(), sqlx::Error> { - let rec_to_undo = get_record_to_undo(pool).await?; - if undo_should_exist { - if let None = rec_to_undo { - assert!(false, "Expected a record to undo."); - } - } else { - if let Some(_) = rec_to_undo { - assert!(false, "Did not expect a record to undo."); - } - } - - let rec_to_redo = get_record_to_redo(pool).await?; - if redo_should_exist { - if let None = rec_to_redo { - assert!(false, "Expected a record to redo."); - } - } else { - if let Some(_) = rec_to_redo { - assert!(false, "Did not expect a record to redo."); - } - } - - Ok(()) -} - -async fn test_undo_redo( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, -) -> Result<(), sqlx::Error> { +async fn test_undo_redo(valve: &Valve) -> Result<(), ValveError> { eprint!("Running test_undo_redo() ... "); + // Undo/redo tests let row_1 = json!({ "foreign_column": {"messages": [], "valid": true, "value": "j"}, @@ -670,367 +409,81 @@ async fn test_undo_redo( "numeric_foreign_column": {"messages": [], "valid": true, "value": "11"}, }); - // Our initial undo/redo state: - verify_undo_redo(pool, false, false).await?; - // Undo/redo test 1: - let _rn = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row_1.as_object().unwrap(), - None, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, false, true).await?; - - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, false, true).await?; + let (_rn, _r) = valve + .insert_row("table10", &row_1.as_object().unwrap()) + .await?; + + valve.undo().await?; + + valve.redo().await?; + + valve.undo().await?; // Undo/redo test 2: - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row_2.as_object().unwrap(), - &8, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, false, true).await?; - - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, false, true).await?; + valve + .update_row("table10", &8, &row_2.as_object().unwrap()) + .await?; + + valve.undo().await?; + + valve.redo().await?; + + valve.undo().await?; // Undo/redo test 3: - delete_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &8, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, false, true).await?; - - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; - - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, false, true).await?; + valve.delete_row("table10", &8).await?; + + valve.undo().await?; + + valve.redo().await?; + + valve.undo().await?; // Undo/redo test 4: - let rn = insert_new_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row_1.as_object().unwrap(), - None, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; - - update_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &row_2.as_object().unwrap(), - &rn, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; + let (rn, _row) = valve + .insert_row("table10", &row_1.as_object().unwrap()) + .await?; - // Undo update: - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve + .update_row("table10", &rn, &row_2.as_object().unwrap()) + .await?; - verify_undo_redo(pool, true, true).await?; + // Undo update: + valve.undo().await?; // Redo update: - redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; - - delete_row( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "table10", - &rn, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, false).await?; + valve.redo().await?; - // Undo delete: - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; + valve.delete_row("table10", &rn).await?; - verify_undo_redo(pool, true, true).await?; + // Undo delete: + valve.undo().await?; // Undo update: - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, true, true).await?; + valve.undo().await?; // Undo insert: - undo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - "VALVE", - ) - .await?; - - verify_undo_redo(pool, false, true).await?; + valve.undo().await?; eprintln!("done."); Ok(()) } -pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> { - let config = valve( - table, - database, - &ValveCommand::Config, - false, - false, - "table", - ) - .await?; - let config: SerdeValue = serde_json::from_str(config.as_str()).unwrap(); - let config = config.as_object().unwrap(); - - // To connect to a postgresql database listening to a unix domain socket: - // ---------------------------------------------------------------------- - // let connection_options = - // AnyConnectOptions::from_str("postgres:///testdb?host=/var/run/postgresql")?; - // - // To query the connection type at runtime via the pool: - // ----------------------------------------------------- - // let db_type = pool.any_kind(); - - let connection_options; - if database.starts_with("postgresql://") { - connection_options = AnyConnectOptions::from_str(database)?; - } else { - let connection_string; - if !database.starts_with("sqlite://") { - connection_string = format!("sqlite://{}?mode=rwc", database); - } else { - connection_string = database.to_string(); - } - connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); - } - - let pool = AnyPoolOptions::new() - .max_connections(5) - .connect_with(connection_options) - .await?; - if pool.any_kind() == AnyKind::Sqlite { - sqlx_query("PRAGMA foreign_keys = ON") - .execute(&pool) - .await?; - } - - let parser = StartParser::new(); - let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); - let parsed_structure_conditions = get_parsed_structure_conditions(&config, &parser); - let compiled_rule_conditions = - get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); - +pub async fn run_api_tests(table: &str, database: &str) -> Result<(), ValveError> { + let valve = Valve::build(table, database, false, false).await?; // NOTE that you must use an external script to fetch the data from the database and run a diff // against a known good sample to verify that these tests yield the expected results: - test_undo_redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_matching( - &config, - &compiled_datatype_conditions, - &parsed_structure_conditions, - &pool, - ) - .await?; - test_idempotent_validate_and_update( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_validate_and_insert_1( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_validate_and_update( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_validate_and_insert_2( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_dependencies( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; - test_randomized_api_test_with_undo_redo( - &config, - &compiled_datatype_conditions, - &compiled_rule_conditions, - &pool, - ) - .await?; + test_matching(&valve).await?; + test_idempotent_validate_and_update(&valve).await?; + test_validate_and_insert_1(&valve).await?; + test_validate_and_update(&valve).await?; + test_validate_and_insert_2(&valve).await?; + test_dependencies(&valve).await?; + test_undo_redo(&valve).await?; + test_randomized_api_test_with_undo_redo(&valve).await?; + + // TODO: Add some tests for the new API functions like save. Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 355cfc4e..6e0a8622 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,6 +12,9 @@ //! ``` //! to see command line options. //! +//! ## API +//! See [Valve] +//! //! ## Python bindings //! See [valve.py](https://github.com/ontodev/valve.py) @@ -20,17 +23,24 @@ extern crate lalrpop_util; pub mod ast; pub mod validate; +pub mod valve; lalrpop_mod!(pub valve_grammar); -use crate::validate::{ - validate_row, validate_rows_constraints, validate_rows_intra, validate_rows_trees, - validate_tree_foreign_keys, validate_under, QueryAsIf, QueryAsIfKind, ResultRow, +use crate::{ + ast::Expression, + validate::{ + validate_row_tx, validate_rows_constraints, validate_rows_intra, validate_rows_trees, + QueryAsIf, QueryAsIfKind, ResultRow, + }, + valve::ValveError, + valve::ValveRow, + valve_grammar::StartParser, }; -use crate::{ast::Expression, valve_grammar::StartParser}; use async_recursion::async_recursion; use chrono::Utc; use crossbeam; +use csv::{ReaderBuilder, StringRecord, StringRecordsIter}; use futures::executor::block_on; use indexmap::IndexMap; use indoc::indoc; @@ -45,9 +55,7 @@ use regex::Regex; use serde_json::{json, Value as SerdeValue}; use sqlx::{ any::{AnyConnectOptions, AnyKind, AnyPool, AnyPoolOptions, AnyRow}, - query as sqlx_query, Acquire, Column, - Error::Configuration as SqlxCErr, - Row, Transaction, ValueRef, + query as sqlx_query, Acquire, Column, Row, Transaction, ValueRef, }; use std::{ collections::{BTreeMap, HashMap}, @@ -72,16 +80,43 @@ static MULTI_THREADED: bool = true; static SQL_PARAM: &str = "VALVEPARAM"; lazy_static! { - static ref PG_SQL_TYPES: Vec<&'static str> = - vec!["text", "varchar", "numeric", "integer", "real"]; - static ref SL_SQL_TYPES: Vec<&'static str> = vec!["text", "numeric", "integer", "real"]; + static ref SQL_TYPES: Vec<&'static str> = vec!["text", "varchar", "numeric", "integer", "real"]; } -/// An alias for [serde_json::Map](..//serde_json/struct.Map.html). +/// Alias for [serde_json::Map](..//serde_json/struct.Map.html). // Note: serde_json::Map is // [backed by a BTreeMap by default](https://docs.serde.rs/serde_json/map/index.html) pub type SerdeMap = serde_json::Map; +// TODO: Possibly replace these with the tracing library (see nanobot.rs). +/// Write a debugging message to STDERR. +#[macro_export] +macro_rules! debug { + () => (eprintln!()); + ($($arg:tt)*) => (eprintln!("{} - DEBUG {}", Utc::now(), format_args!($($arg)*))); +} + +/// Write an information message to STDERR. +#[macro_export] +macro_rules! info { + () => (eprintln!()); + ($($arg:tt)*) => (eprintln!("{} - INFO {}", Utc::now(), format_args!($($arg)*))); +} + +/// Write a warning message to STDERR. +#[macro_export] +macro_rules! warn { + () => (eprintln!()); + ($($arg:tt)*) => (eprintln!("{} - WARN {}", Utc::now(), format_args!($($arg)*))); +} + +/// Write an error message to STDERR. +#[macro_export] +macro_rules! error { + () => (eprintln!()); + ($($arg:tt)*) => (eprintln!("{} - ERROR {}", Utc::now(), format_args!($($arg)*))); +} + /// Represents a structure such as those found in the `structure` column of the `column` table in /// both its parsed format (i.e., as an [Expression](ast/enum.Expression.html)) as well as in its /// original format (i.e., as a plain String). @@ -145,13 +180,47 @@ impl std::fmt::Debug for ColumnRule { } } +/// Given a string representing the location of a database, return a database connection pool. +pub async fn get_pool_from_connection_string(database: &str) -> Result { + let connection_options; + if database.starts_with("postgresql://") { + connection_options = AnyConnectOptions::from_str(database)?; + } else { + let connection_string; + if !database.starts_with("sqlite://") { + connection_string = format!("sqlite://{}?mode=rwc", database); + } else { + connection_string = database.to_string(); + } + connection_options = AnyConnectOptions::from_str(connection_string.as_str())?; + } + + let pool = AnyPoolOptions::new() + // TODO: Make max_connections configurable. + .max_connections(5) + .connect_with(connection_options) + .await?; + Ok(pool) +} + /// Given the path to a configuration table (either a table.tsv file or a database containing a /// table named "table"), load and check the 'table', 'column', and 'datatype' tables, and return -/// SerdeMaps corresponding to specials, tables, datatypes, and rules. +/// SerdeMaps corresponding to specials, tables, datatypes, rules, constraints, and a vector +/// containing the names of the tables in the dattatabse in sorted order. pub fn read_config_files( path: &str, - config_table: &str, -) -> (SerdeMap, SerdeMap, SerdeMap, SerdeMap) { + parser: &StartParser, + pool: &AnyPool, +) -> ( + SerdeMap, + SerdeMap, + SerdeMap, + SerdeMap, + SerdeMap, + Vec, + HashMap>, + HashMap>, +) { let special_table_types = json!({ "table": {"required": true}, "column": {"required": true}, @@ -174,7 +243,7 @@ pub fn read_config_files( if path.to_lowercase().ends_with(".tsv") { read_tsv_into_vector(path) } else { - read_db_table_into_vector(path, config_table) + read_db_table_into_vector(path, "table") } }; @@ -292,13 +361,7 @@ pub fn read_config_files( let mut datatypes_config = SerdeMap::new(); let rows = get_special_config("datatype", &specials_config, &tables_config, path); for mut row in rows { - for column in vec![ - "datatype", - "parent", - "condition", - "SQLite type", - "PostgreSQL type", - ] { + for column in vec!["datatype", "parent", "condition", "SQL type"] { if !row.contains_key(column) || row.get(column) == None { panic!("Missing required column '{}' reading '{}'", column, path); } @@ -310,7 +373,7 @@ pub fn read_config_files( } } - for column in vec!["parent", "condition", "SQLite type", "PostgreSQL type"] { + for column in vec!["parent", "condition", "SQL type"] { if row.get(column).and_then(|c| c.as_str()).unwrap() == "" { row.remove(&column.to_string()); } @@ -423,6 +486,124 @@ pub fn read_config_files( } } + // Initialize the constraints config: + let mut constraints_config = SerdeMap::new(); + constraints_config.insert(String::from("foreign"), SerdeValue::Object(SerdeMap::new())); + constraints_config.insert(String::from("unique"), SerdeValue::Object(SerdeMap::new())); + constraints_config.insert(String::from("primary"), SerdeValue::Object(SerdeMap::new())); + constraints_config.insert(String::from("tree"), SerdeValue::Object(SerdeMap::new())); + constraints_config.insert(String::from("under"), SerdeValue::Object(SerdeMap::new())); + + for table_name in tables_config.keys().cloned().collect::>() { + let optional_path = tables_config + .get(&table_name) + .and_then(|r| r.get("path")) + .and_then(|p| p.as_str()); + + let mut path = None; + match optional_path { + None => { + // If an entry of the tables_config has no path then it is an internal table which + // need not be configured explicitly. Currently the only examples are the message + // and history tables. + if table_name != "message" && table_name != "history" { + panic!("No path defined for table {}", table_name); + } + continue; + } + Some(p) if !Path::new(p).is_file() => { + warn!("File does not exist {}", p); + } + Some(p) if Path::new(p).canonicalize().is_err() => { + warn!("File path could not be made canonical {}", p); + } + Some(p) => path = Some(p.to_string()), + }; + + let defined_columns: Vec = tables_config + .get(&table_name) + .and_then(|r| r.get("column")) + .and_then(|v| v.as_object()) + .and_then(|o| Some(o.keys())) + .and_then(|k| Some(k.cloned())) + .and_then(|k| Some(k.collect())) + .unwrap(); + + // We use column_order to explicitly indicate the order in which the columns should appear + // in the table, for later reference. The default is to preserve the order from the actual + // table file. If that does not exist, we use the ordering in defined_columns. + let mut column_order = vec![]; + if let Some(path) = path { + // Get the actual columns from the data itself. Note that we set has_headers to + // false(even though the files have header rows) in order to explicitly read the + // header row. + let mut rdr = ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(File::open(path.clone()).unwrap_or_else(|err| { + panic!("Unable to open '{}': {}", path.clone(), err); + })); + let mut iter = rdr.records(); + if let Some(result) = iter.next() { + let actual_columns = result + .unwrap() + .iter() + .map(|c| c.to_string()) + .collect::>(); + // Make sure that the actual columns found in the table file, and the columns + // defined in the column config, exactly match in terms of their content: + for column_name in &actual_columns { + column_order.push(json!(column_name)); + if !defined_columns.contains(&column_name.to_string()) { + panic!( + "Column '{}.{}' not in column config", + table_name, column_name + ); + } + } + for column_name in &defined_columns { + if !actual_columns.contains(&column_name.to_string()) { + panic!( + "Defined column '{}.{}' not found in table", + table_name, column_name + ); + } + } + } else { + panic!("'{}' is empty", path); + } + } + + if column_order.is_empty() { + column_order = defined_columns.iter().map(|c| json!(c)).collect::>(); + } + tables_config + .get_mut(&table_name) + .and_then(|t| t.as_object_mut()) + .and_then(|o| { + o.insert( + String::from("column_order"), + SerdeValue::Array(column_order), + ) + }); + + // Populate the constraints config: + let table_constraints = get_table_constraints( + &mut tables_config, + &mut datatypes_config, + parser, + &table_name, + &pool, + ); + for constraint_type in vec!["foreign", "unique", "primary", "tree", "under"] { + let table_constraints = table_constraints.get(constraint_type).unwrap().clone(); + constraints_config + .get_mut(constraint_type) + .and_then(|o| o.as_object_mut()) + .and_then(|o| o.insert(table_name.to_string(), table_constraints)); + } + } + // Manually add the messsage table config: tables_config.insert( "message".to_string(), @@ -505,7 +686,10 @@ pub fn read_config_files( "row", "from", "to", + "summary", "user", + "undone_by", + "timestamp", ], "column": { "table": { @@ -557,16 +741,43 @@ pub fn read_config_files( "datatype": "line", "structure": "", }, + "timestamp": { + "table": "history", + "column": "timestamp", + "description": "The time of the change, or of the undo.", + "datatype": "line", + "structure": "", + }, + } }), ); + // Sort the tables (aside from the message and history tables) according to their foreign key + // dependencies so that tables are always loaded after the tables they depend on. + let (sorted_tables, table_dependencies_in, table_dependencies_out) = verify_table_deps_and_sort( + &tables_config + .keys() + .cloned() + // We are filtering out history and message here because the fact that all of the table + // views depend on them is not reflected in the constraints configuration. They will be + // taken account of within verify_table_deps_and_sort() and manually added to the sorted + // table list that is returned. + .filter(|m| m != "history" && m != "message") + .collect(), + &constraints_config, + ); + // Finally, return all the configs: ( specials_config, tables_config, datatypes_config, rules_config, + constraints_config, + sorted_tables, + table_dependencies_in, + table_dependencies_out, ) } @@ -731,11 +942,9 @@ pub fn get_parsed_structure_conditions( /// contained in the message and history tables. The SQL generated is in the form of a tuple of /// Strings, with the first string being a SQL statement for dropping the view, and the second /// string being a SQL statement for creating it. -fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { - let mut drop_view_sql = format!(r#"DROP VIEW IF EXISTS "{}_view""#, table); +pub fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> String { let message_t; if pool.any_kind() == AnyKind::Postgres { - drop_view_sql.push_str(" CASCADE"); message_t = format!( indoc! {r#" ( @@ -775,7 +984,6 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { t = table, ); } - drop_view_sql.push_str(";"); let history_t; if pool.any_kind() == AnyKind::Postgres { @@ -834,7 +1042,7 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { history_t = history_t, ); - (drop_view_sql, create_view_sql) + create_view_sql } /// Given the tables configuration map, the name of a table and a database connection pool, @@ -845,11 +1053,7 @@ fn get_sql_for_standard_view(table: &str, pool: &AnyPool) -> (String, String) { /// errors. Like the function for generating a standard view, the SQL generated by this function is /// returned in the form of a tuple of Strings, with the first string being a SQL statement /// for dropping the view, and the second string being a SQL statement for creating it. -fn get_sql_for_text_view( - tables_config: &mut SerdeMap, - table: &str, - pool: &AnyPool, -) -> (String, String) { +pub fn get_sql_for_text_view(tables_config: &SerdeMap, table: &str, pool: &AnyPool) -> String { let is_clause = if pool.any_kind() == AnyKind::Sqlite { "IS" } else { @@ -869,11 +1073,6 @@ fn get_sql_for_text_view( // Add a second "text view" such that the datatypes of all values are TEXT and appear // directly in their corresponsing columns (rather than as NULLs) even when they have // SQL datatype errors. - let mut drop_view_sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); - if pool.any_kind() == AnyKind::Postgres { - drop_view_sql.push_str(" CASCADE"); - } - let mut inner_columns = real_columns .iter() .map(|c| { @@ -883,469 +1082,62 @@ fn get_sql_for_text_view( SELECT value FROM "message" WHERE "row" = "row_number" - AND "column" = '{column}' - AND "table" = '{table}' - ORDER BY "message_id" DESC - LIMIT 1 - ) - ELSE {casted_column} - END AS "{column}""#, - casted_column = if pool.any_kind() == AnyKind::Sqlite { - cast_column_sql_to_text(c, "non-text") - } else { - format!("\"{}\"::TEXT", c) - }, - column = c, - table = table, - ) - }) - .collect::>(); - - let mut outer_columns = real_columns - .iter() - .map(|c| format!("t.\"{}\"", c)) - .collect::>(); - - let inner_columns = { - let mut v = vec![ - "row_number".to_string(), - "message".to_string(), - "history".to_string(), - ]; - v.append(&mut inner_columns); - v - }; - - let outer_columns = { - let mut v = vec![ - "t.row_number".to_string(), - "t.message".to_string(), - "t.history".to_string(), - ]; - v.append(&mut outer_columns); - v - }; - - let create_view_sql = format!( - r#"CREATE VIEW "{table}_text_view" AS - SELECT {outer_columns} - FROM ( - SELECT {inner_columns} - FROM "{table}_view" - ) t"#, - outer_columns = outer_columns.join(", "), - inner_columns = inner_columns.join(", "), - table = table, - ); - - (drop_view_sql, create_view_sql) -} - -/// Given config maps for tables and datatypes, a database connection pool, and a StartParser, -/// read in the TSV files corresponding to the tables defined in the tables config, and use that -/// information to fill in constraints information into a new config map that is then returned along -/// with a list of the tables in the database sorted according to their mutual dependencies. If -/// the flag `verbose` is set to true, emit SQL to create the database schema to STDOUT. -/// If `command` is set to [ValveCommand::Create], execute the SQL statements to create the -/// database using the given connection pool. If it is set to [ValveCommand::Load], execute the SQL -/// to load it as well. -pub async fn configure_db( - tables_config: &mut SerdeMap, - datatypes_config: &mut SerdeMap, - pool: &AnyPool, - parser: &StartParser, - verbose: bool, - command: &ValveCommand, -) -> Result<(Vec, SerdeMap), sqlx::Error> { - // This is the SerdeMap that we will be returning: - let mut constraints_config = SerdeMap::new(); - constraints_config.insert(String::from("foreign"), SerdeValue::Object(SerdeMap::new())); - constraints_config.insert(String::from("unique"), SerdeValue::Object(SerdeMap::new())); - constraints_config.insert(String::from("primary"), SerdeValue::Object(SerdeMap::new())); - constraints_config.insert(String::from("tree"), SerdeValue::Object(SerdeMap::new())); - constraints_config.insert(String::from("under"), SerdeValue::Object(SerdeMap::new())); - - // Begin by reading in the TSV files corresponding to the tables defined in tables_config, and - // use that information to create the associated database tables, while saving constraint - // information to constrains_config. - let mut setup_statements = HashMap::new(); - for table_name in tables_config.keys().cloned().collect::>() { - let optional_path = tables_config - .get(&table_name) - .and_then(|r| r.get("path")) - .and_then(|p| p.as_str()); - - let mut path = None; - match optional_path { - None => { - // If an entry of the tables_config has no path then it is an internal table which - // need not be configured explicitly. Currently the only examples are the message - // and history tables. - if table_name != "message" && table_name != "history" { - panic!("No path defined for table {}", table_name); - } - continue; - } - Some(p) if !Path::new(p).is_file() => { - eprintln!("WARN: File does not exist {}", p); - } - Some(p) if Path::new(p).canonicalize().is_err() => { - eprintln!("WARN: File path could not be made canonical {}", p); - } - Some(p) => path = Some(p.to_string()), - }; - - let defined_columns: Vec = tables_config - .get(&table_name) - .and_then(|r| r.get("column")) - .and_then(|v| v.as_object()) - .and_then(|o| Some(o.keys())) - .and_then(|k| Some(k.cloned())) - .and_then(|k| Some(k.collect())) - .unwrap(); - - // We use column_order to explicitly indicate the order in which the columns should appear - // in the table, for later reference. The default is to preserve the order from the actual - // table file. If that does not exist, we use the ordering in defined_columns. - let mut column_order = vec![]; - if let Some(path) = path { - // Get the actual columns from the data itself. Note that we set has_headers to - // false(even though the files have header rows) in order to explicitly read the - // header row. - let mut rdr = csv::ReaderBuilder::new() - .has_headers(false) - .delimiter(b'\t') - .from_reader(File::open(path.clone()).unwrap_or_else(|err| { - panic!("Unable to open '{}': {}", path.clone(), err); - })); - let mut iter = rdr.records(); - if let Some(result) = iter.next() { - let actual_columns = result - .unwrap() - .iter() - .map(|c| c.to_string()) - .collect::>(); - // Make sure that the actual columns found in the table file, and the columns - // defined in the column config, exactly match in terms of their content: - for column_name in &actual_columns { - column_order.push(json!(column_name)); - if !defined_columns.contains(&column_name.to_string()) { - panic!( - "Column '{}.{}' not in column config", - table_name, column_name - ); - } - } - for column_name in &defined_columns { - if !actual_columns.contains(&column_name.to_string()) { - panic!( - "Defined column '{}.{}' not found in table", - table_name, column_name - ); - } - } - } else { - panic!("'{}' is empty", path); - } - } - - if column_order.is_empty() { - column_order = defined_columns.iter().map(|c| json!(c)).collect::>(); - } - tables_config - .get_mut(&table_name) - .and_then(|t| t.as_object_mut()) - .and_then(|o| { - o.insert( - String::from("column_order"), - SerdeValue::Array(column_order), - ) - }); - - // Create the table and its corresponding conflict table: - let mut table_statements = vec![]; - for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { - let (mut statements, table_constraints) = - create_table_statement(tables_config, datatypes_config, parser, &table, &pool); - table_statements.append(&mut statements); - if !table.ends_with("_conflict") { - for constraint_type in vec!["foreign", "unique", "primary", "tree", "under"] { - let table_constraints = table_constraints.get(constraint_type).unwrap().clone(); - constraints_config - .get_mut(constraint_type) - .and_then(|o| o.as_object_mut()) - .and_then(|o| o.insert(table_name.to_string(), table_constraints)); - } - } - } - - let (drop_view_sql, create_view_sql) = get_sql_for_standard_view(&table_name, pool); - let (drop_text_view_sql, create_text_view_sql) = - get_sql_for_text_view(tables_config, &table_name, pool); - table_statements.push(drop_text_view_sql); - table_statements.push(drop_view_sql); - table_statements.push(create_view_sql); - table_statements.push(create_text_view_sql); - - setup_statements.insert(table_name.to_string(), table_statements); - } - - // Sort the tables according to their foreign key dependencies so that tables are always loaded - // after the tables they depend on. Ignore the internal message and history tables: - let sorted_tables = verify_table_deps_and_sort( - &setup_statements.keys().cloned().collect(), - &constraints_config, - ); - - if *command != ValveCommand::Config || verbose { - // Generate DDL for the history table: - let mut history_statements = vec![]; - history_statements.push({ - let mut sql = r#"DROP TABLE IF EXISTS "history""#.to_string(); - if pool.any_kind() == AnyKind::Postgres { - sql.push_str(" CASCADE"); - } - sql.push_str(";"); - sql - }); - history_statements.push(format!( - indoc! {r#" - CREATE TABLE "history" ( - {row_number} - "table" TEXT, - "row" BIGINT, - "from" TEXT, - "to" TEXT, - "summary" TEXT, - "user" TEXT, - "undone_by" TEXT, - {timestamp} - ); - "#}, - row_number = { - if pool.any_kind() == AnyKind::Sqlite { - "\"history_id\" INTEGER PRIMARY KEY," - } else { - "\"history_id\" SERIAL PRIMARY KEY," - } - }, - timestamp = { - if pool.any_kind() == AnyKind::Sqlite { - "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" - } else { - "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" - } - }, - )); - history_statements - .push(r#"CREATE INDEX "history_tr_idx" ON "history"("table", "row");"#.to_string()); - setup_statements.insert("history".to_string(), history_statements); - - // Generate DDL for the message table: - let mut message_statements = vec![]; - message_statements.push({ - let mut sql = r#"DROP TABLE IF EXISTS "message""#.to_string(); - if pool.any_kind() == AnyKind::Postgres { - sql.push_str(" CASCADE"); - } - sql.push_str(";"); - sql - }); - message_statements.push(format!( - indoc! {r#" - CREATE TABLE "message" ( - {} - "table" TEXT, - "row" BIGINT, - "column" TEXT, - "value" TEXT, - "level" TEXT, - "rule" TEXT, - "message" TEXT - ); - "#}, - { - if pool.any_kind() == AnyKind::Sqlite { - "\"message_id\" INTEGER PRIMARY KEY," - } else { - "\"message_id\" SERIAL PRIMARY KEY," - } - }, - )); - message_statements.push( - r#"CREATE INDEX "message_trc_idx" ON "message"("table", "row", "column");"#.to_string(), - ); - setup_statements.insert("message".to_string(), message_statements); - - // Add the message and history tables to the beginning of the list of tables to create - // (the message table in particular needs to be at the beginning since the table views all - // reference it). - let mut tables_to_create = vec!["message".to_string(), "history".to_string()]; - tables_to_create.append(&mut sorted_tables.clone()); - for table in &tables_to_create { - let table_statements = setup_statements.get(table).unwrap(); - if *command != ValveCommand::Config { - for stmt in table_statements { - sqlx_query(stmt) - .execute(pool) - .await - .expect(format!("The SQL statement: {} returned an error", stmt).as_str()); - } - } - if verbose { - let output = String::from(table_statements.join("\n")); - println!("{}\n", output); - } - } - } - - return Ok((sorted_tables, constraints_config)); -} - -/// Various VALVE commands, used with [valve()](valve). -#[derive(Debug, PartialEq, Eq)] -pub enum ValveCommand { - /// Configure but do not create or load. - Config, - /// Configure and create but do not load. - Create, - /// Configure, create, and load. - Load, -} - -/// Given a path to a configuration table (either a table.tsv file or a database containing a -/// table named "table"), and a directory in which to find/create a database: configure the -/// database using the configuration which can be looked up using the table table, and -/// optionally create and/or load it according to the value of `command` (see [ValveCommand]). -/// If the `verbose` flag is set to true, output status messages while loading. If `config_table` -/// is given and `table_table` indicates a database, query the table called `config_table` for the -/// table table information. Returns the configuration map as a String. If `initial_load` is set to -/// true, then (SQLite only) the database settings will be tuned for initial loading. Note that -/// these settings are unsafe and should be used for initial loading only, as data integrity will -/// not be guaranteed in the case of an interrupted transaction. -pub async fn valve( - table_table: &str, - database: &str, - command: &ValveCommand, - verbose: bool, - initial_load: bool, - config_table: &str, -) -> Result { - let parser = StartParser::new(); - - let (specials_config, mut tables_config, mut datatypes_config, rules_config) = - read_config_files(&table_table.to_string(), config_table); - - // To connect to a postgresql database listening to a unix domain socket: - // ---------------------------------------------------------------------- - // let connection_options = - // AnyConnectOptions::from_str("postgres:///testdb?host=/var/run/postgresql")?; - // - // To query the connection type at runtime via the pool: - // ----------------------------------------------------- - // let db_type = pool.any_kind(); - - let connection_options; - if database.starts_with("postgresql://") { - connection_options = AnyConnectOptions::from_str(database)?; - } else { - let connection_string; - if !database.starts_with("sqlite://") { - connection_string = format!("sqlite://{}?mode=rwc", database); - } else { - connection_string = database.to_string(); - } - connection_options = AnyConnectOptions::from_str(connection_string.as_str()).unwrap(); - } - - let pool = AnyPoolOptions::new() - .max_connections(5) - .connect_with(connection_options) - .await?; - if *command == ValveCommand::Load && pool.any_kind() == AnyKind::Sqlite { - sqlx_query("PRAGMA foreign_keys = ON") - .execute(&pool) - .await?; - if initial_load { - // These pragmas are unsafe but they are used during initial loading since data - // integrity is not a priority in this case. - sqlx_query("PRAGMA journal_mode = OFF") - .execute(&pool) - .await?; - sqlx_query("PRAGMA synchronous = 0").execute(&pool).await?; - sqlx_query("PRAGMA cache_size = 1000000") - .execute(&pool) - .await?; - sqlx_query("PRAGMA temp_store = MEMORY") - .execute(&pool) - .await?; - } - } + AND "column" = '{column}' + AND "table" = '{table}' + ORDER BY "message_id" DESC + LIMIT 1 + ) + ELSE {casted_column} + END AS "{column}""#, + casted_column = if pool.any_kind() == AnyKind::Sqlite { + cast_column_sql_to_text(c, "non-text") + } else { + format!("\"{}\"::TEXT", c) + }, + column = c, + table = table, + ) + }) + .collect::>(); - let (sorted_table_list, constraints_config) = configure_db( - &mut tables_config, - &mut datatypes_config, - &pool, - &parser, - verbose, - command, - ) - .await?; + let mut outer_columns = real_columns + .iter() + .map(|c| format!("t.\"{}\"", c)) + .collect::>(); - let mut config = SerdeMap::new(); - config.insert( - String::from("special"), - SerdeValue::Object(specials_config.clone()), - ); - config.insert( - String::from("table"), - SerdeValue::Object(tables_config.clone()), - ); - config.insert( - String::from("datatype"), - SerdeValue::Object(datatypes_config.clone()), - ); - config.insert( - String::from("rule"), - SerdeValue::Object(rules_config.clone()), - ); - config.insert( - String::from("constraints"), - SerdeValue::Object(constraints_config.clone()), - ); - let mut sorted_table_serdevalue_list: Vec = vec![]; - for table in &sorted_table_list { - sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); - } - config.insert( - String::from("sorted_table_list"), - SerdeValue::Array(sorted_table_serdevalue_list), - ); + let inner_columns = { + let mut v = vec![ + "row_number".to_string(), + "message".to_string(), + "history".to_string(), + ]; + v.append(&mut inner_columns); + v + }; - let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); - let compiled_rule_conditions = - get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); + let outer_columns = { + let mut v = vec![ + "t.row_number".to_string(), + "t.message".to_string(), + "t.history".to_string(), + ]; + v.append(&mut outer_columns); + v + }; - if *command == ValveCommand::Load { - if verbose { - eprintln!( - "{} - Processing {} tables.", - Utc::now(), - sorted_table_list.len() - ); - } - load_db( - &config, - &pool, - &compiled_datatype_conditions, - &compiled_rule_conditions, - verbose, - ) - .await?; - } + let create_view_sql = format!( + r#"CREATE VIEW "{table}_text_view" AS + SELECT {outer_columns} + FROM ( + SELECT {inner_columns} + FROM "{table}_view" + ) t"#, + outer_columns = outer_columns.join(", "), + inner_columns = inner_columns.join(", "), + table = table, + ); - let config = SerdeValue::Object(config); - Ok(config.to_string()) + create_view_sql } /// Given a table name, a column name, and a database pool, construct an SQL string to extract the @@ -1444,7 +1236,7 @@ pub async fn get_affected_rows( global_config: &SerdeMap, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, -) -> Result, String> { +) -> Result, ValveError> { // Since the consequence of an update could involve currently invalid rows // (in the conflict table) becoming valid or vice versa, we need to check rows for // which the value of the column is the same as `value` @@ -1465,12 +1257,8 @@ pub async fn get_affected_rows( let query = sqlx_query(&sql); let mut table_rows = IndexMap::new(); - for row in query - .fetch_all(tx.acquire().await.map_err(|e| e.to_string())?) - .await - .map_err(|e| e.to_string())? - { - let mut table_row = SerdeMap::new(); + for row in query.fetch_all(tx.acquire().await?).await? { + let mut table_row = ValveRow::new(); let mut row_number: Option = None; for column in row.columns() { let cname = column.name(); @@ -1492,7 +1280,8 @@ pub async fn get_affected_rows( table_row.insert(cname.to_string(), json!(cell)); } } - let row_number = row_number.ok_or("Row: has no row number".to_string())?; + let row_number = + row_number.ok_or(ValveError::DataError("Row: has no row number".to_string()))?; table_rows.insert(row_number, table_row); } @@ -1508,7 +1297,7 @@ pub async fn get_row_from_db( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, -) -> Result { +) -> Result { let sql = format!( "{} WHERE row_number = {}", query_with_message_values(table, global_config, pool), @@ -1517,7 +1306,7 @@ pub async fn get_row_from_db( let query = sqlx_query(&sql); let rows = query.fetch_all(tx.acquire().await?).await?; if rows.len() == 0 { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!( "In get_row_from_db(). No rows found for row_number: {}", row_number @@ -1534,14 +1323,18 @@ pub async fn get_row_from_db( } else { let messages: &str = sql_row.get("message"); match serde_json::from_str::(messages) { - Err(e) => return Err(SqlxCErr(e.into())), + Err(e) => return Err(ValveError::SerdeJsonError(e.into())), Ok(SerdeValue::Array(m)) => m, - _ => return Err(SqlxCErr(format!("{} is not an array.", messages).into())), + _ => { + return Err(ValveError::DataError( + format!("{} is not an array.", messages).into(), + )) + } } } }; - let mut row = SerdeMap::new(); + let mut row = ValveRow::new(); for column in sql_row.columns() { let cname = column.name(); if !vec!["row_number", "message"].contains(&cname) { @@ -1582,7 +1375,7 @@ pub async fn get_db_value( row_number: &u32, pool: &AnyPool, tx: &mut Transaction<'_, sqlx::Any>, -) -> Result { +) -> Result { let is_clause = if pool.any_kind() == AnyKind::Sqlite { "IS" } else { @@ -1616,14 +1409,14 @@ pub async fn get_db_value( ); let query = sqlx_query(&sql); - let rows = query - .fetch_all(tx.acquire().await.map_err(|e| e.to_string())?) - .await - .map_err(|e| e.to_string())?; + let rows = query.fetch_all(tx.acquire().await?).await?; if rows.len() == 0 { - return Err(format!( - "In get_db_value(). No rows found for row_number: {}", - row_number + return Err(ValveError::DataError( + format!( + "In get_db_value(). No rows found for row_number: {}", + row_number + ) + .into(), )); } let result_row = &rows[0]; @@ -1647,16 +1440,19 @@ pub async fn get_rows_to_update( IndexMap>, IndexMap>, ), - String, + ValveError, > { - fn get_cell_value(row: &SerdeMap, column: &str) -> Result { + fn get_cell_value(row: &ValveRow, column: &str) -> Result { match row.get(column).and_then(|cell| cell.get("value")) { Some(SerdeValue::String(s)) => Ok(format!("{}", s)), Some(SerdeValue::Number(n)) => Ok(format!("{}", n)), Some(SerdeValue::Bool(b)) => Ok(format!("{}", b)), - _ => Err(format!( - "Value missing or of unknown type in column {} of row to update: {:?}", - column, row + _ => Err(ValveError::DataError( + format!( + "Value missing or of unknown type in column {} of row to update: {:?}", + column, row + ) + .into(), )), } } @@ -1695,8 +1491,8 @@ pub async fn get_rows_to_update( let updates_before = match query_as_if.kind { QueryAsIfKind::Add => { if let None = query_as_if.row { - eprintln!( - "WARN: No row in query_as_if: {:?} for {:?}", + warn!( + "No row in query_as_if: {:?} for {:?}", query_as_if, query_as_if.kind ); } @@ -1730,8 +1526,8 @@ pub async fn get_rows_to_update( let updates_after = match &query_as_if.row { None => { if query_as_if.kind != QueryAsIfKind::Remove { - eprintln!( - "WARN: No row in query_as_if: {:?} for {:?}", + warn!( + "No row in query_as_if: {:?} for {:?}", query_as_if, query_as_if.kind ); } @@ -1805,8 +1601,8 @@ pub async fn get_rows_to_update( let updates = match query_as_if.kind { QueryAsIfKind::Add => { if let None = query_as_if.row { - eprintln!( - "WARN: No row in query_as_if: {:?} for {:?}", + warn!( + "No row in query_as_if: {:?} for {:?}", query_as_if, query_as_if.kind ); } @@ -1856,11 +1652,11 @@ pub async fn process_updates( updates: &IndexMap>, query_as_if: &QueryAsIf, do_not_recurse: bool, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { for (update_table, rows_to_update) in updates { for (row_number, row) in rows_to_update { // Validate each row 'counterfactually': - let vrow = validate_row( + let vrow = validate_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -1900,18 +1696,18 @@ pub async fn record_row_change( tx: &mut Transaction<'_, sqlx::Any>, table: &str, row_number: &u32, - from: Option<&SerdeMap>, - to: Option<&SerdeMap>, + from: Option<&ValveRow>, + to: Option<&ValveRow>, user: &str, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { if let (None, None) = (from, to) { - return Err(SqlxCErr( + return Err(ValveError::InputError( "Arguments 'from' and 'to' to function record_row_change() cannot both be None".into(), )); } - fn to_text(smap: Option<&SerdeMap>, quoted: bool) -> String { - match smap { + fn to_text(row: Option<&ValveRow>, quoted: bool) -> String { + match row { None => "NULL".to_string(), Some(r) => { let inner = format!("{}", json!(r)).replace("'", "''"); @@ -1932,7 +1728,7 @@ pub async fn record_row_change( } } - fn summarize(from: Option<&SerdeMap>, to: Option<&SerdeMap>) -> Result { + fn summarize(from: Option<&ValveRow>, to: Option<&ValveRow>) -> Result { // Constructs a summary of the form: // { // "column":"bar", @@ -1955,7 +1751,9 @@ pub async fn record_row_change( SerdeValue::Bool(b) => Some(format!("{}", b)), _ => None, }) - .ok_or(format!("No value in {}", cell))?; + .ok_or(ValveError::DataError( + format!("No value in {}", cell).into(), + ))?; let new_value = to .get(column) .and_then(|v| v.get("value")) @@ -1965,7 +1763,9 @@ pub async fn record_row_change( SerdeValue::Bool(b) => Some(format!("{}", b)), _ => None, }) - .ok_or(format!("No value for column: {} in {:?}", column, to))?; + .ok_or(ValveError::DataError( + format!("No value for column: {} in {:?}", column, to).into(), + ))?; if new_value != old_value { let mut column_summary = SerdeMap::new(); column_summary.insert("column".to_string(), json!(column)); @@ -1989,7 +1789,7 @@ pub async fn record_row_change( } } - let summary = summarize(from, to).map_err(|e| SqlxCErr(e.into()))?; + let summary = summarize(from, to)?; let (from, to) = (to_text(from, true), to_text(to, true)); let sql = format!( r#"INSERT INTO "history" ("table", "row", "from", "to", "summary", "user") @@ -2003,18 +1803,18 @@ pub async fn record_row_change( } /// Given a row and a column name, extract the contents of the row as a JSON object and return it. -fn get_json_from_row(row: &AnyRow, column: &str) -> Option { +pub fn get_json_from_row(row: &AnyRow, column: &str) -> Option { let raw_value = row.try_get_raw(column).unwrap(); if !raw_value.is_null() { let value: &str = row.get(column); match serde_json::from_str::(value) { Err(e) => { - eprintln!("WARN: {}", e); + warn!("{}", e); None } Ok(SerdeValue::Object(value)) => Some(value), _ => { - eprintln!("WARN: {} is not an object.", value); + warn!("{} is not an object.", value); None } } @@ -2028,13 +1828,13 @@ fn get_json_from_row(row: &AnyRow, column: &str) -> Option { /// (otherwise). When setting the record to undone, user is used for the 'undone_by' field of the /// history table, otherwise undone_by is set to NULL and the user is indicated as the one /// responsible for the change (instead of whoever made the change originally). -async fn switch_undone_state( +pub async fn switch_undone_state( user: &str, history_id: u16, undone_state: bool, tx: &mut Transaction<'_, sqlx::Any>, pool: &AnyPool, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // Set the history record to undone: let timestamp = { if pool.any_kind() == AnyKind::Sqlite { @@ -2060,259 +1860,9 @@ async fn switch_undone_state( Ok(()) } -/// Given a database pool fetch the last row inserted to the history table that has not been undone. -pub async fn get_record_to_undo(pool: &AnyPool) -> Result, sqlx::Error> { - // Look in the history table, get the row with the greatest ID, get the row number, - // from, and to, and determine whether the last operation was a delete, insert, or update. - let is_clause = if pool.any_kind() == AnyKind::Sqlite { - "IS" - } else { - "IS NOT DISTINCT FROM" - }; - let sql = format!( - r#"SELECT * FROM "history" - WHERE "undone_by" {} NULL - ORDER BY "history_id" DESC LIMIT 1"#, - is_clause - ); - let query = sqlx_query(&sql); - let result_row = query.fetch_optional(pool).await?; - Ok(result_row) -} - -/// Given a database pool fetch the row in the history table that has been most recently marked as -/// undone. -pub async fn get_record_to_redo(pool: &AnyPool) -> Result, sqlx::Error> { - // Look in the history table, get the row with the greatest ID, get the row number, - // from, and to, and determine whether the last operation was a delete, insert, or update. - let is_not_clause = if pool.any_kind() == AnyKind::Sqlite { - "IS NOT" - } else { - "IS DISTINCT FROM" - }; - let is_clause = if pool.any_kind() == AnyKind::Sqlite { - "IS" - } else { - "IS NOT DISTINCT FROM" - }; - let sql = format!( - r#"SELECT * FROM "history" h1 - WHERE "undone_by" {is_not} NULL - AND NOT EXISTS ( - SELECT 1 FROM "history" h2 - WHERE h2.history_id > h1.history_id - AND "undone_by" {is} NULL - ) - ORDER BY "timestamp" DESC LIMIT 1"#, - is_not = is_not_clause, - is = is_clause - ); - let query = sqlx_query(&sql); - let result_row = query.fetch_optional(pool).await?; - Ok(result_row) -} - -/// Given a global configuration map, maps of compiled datatype and ruled conditions, a database -/// connection pool, and the user who initiated the undo, find the last recorded change to the -/// database and undo it, indicating in the history table that undo_user is responsible. -#[async_recursion] -pub async fn undo( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - undo_user: &str, -) -> Result<(), sqlx::Error> { - let last_change = match get_record_to_undo(pool).await? { - None => { - eprintln!("WARN: Nothing to undo."); - return Ok(()); - } - Some(r) => r, - }; - let history_id: i32 = last_change.get("history_id"); - let history_id = history_id as u16; - let table: &str = last_change.get("table"); - let row_number: i64 = last_change.get("row"); - let row_number = row_number as u32; - let from = get_json_from_row(&last_change, "from"); - let to = get_json_from_row(&last_change, "to"); - - match (from, to) { - (None, None) => { - return Err(SqlxCErr( - "Cannot redo unknown operation from None to None".into(), - )) - } - (None, Some(_)) => { - // Undo an insert: - let mut tx = pool.begin().await?; - - delete_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &row_number, - ) - .await?; - - switch_undone_state(undo_user, history_id, true, &mut tx, pool).await?; - tx.commit().await?; - } - (Some(from), None) => { - // Undo a delete: - let mut tx = pool.begin().await?; - - insert_new_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &from, - Some(row_number), - false, - ) - .await?; - - switch_undone_state(undo_user, history_id, true, &mut tx, pool).await?; - tx.commit().await?; - } - (Some(from), Some(_)) => { - // Undo an an update: - let mut tx = pool.begin().await?; - - update_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &from, - &row_number, - false, - false, - ) - .await?; - - switch_undone_state(undo_user, history_id, true, &mut tx, pool).await?; - tx.commit().await?; - } - } - Ok(()) -} - -/// Given a global configuration map, maps of compiled datatype and ruled conditions, a database -/// connection pool, and the user who initiated the redo, find the last recorded change to the -/// database that was undone and redo it, indicating in the history table that redo_user is -/// responsible for the redo. -#[async_recursion] -pub async fn redo( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - redo_user: &str, -) -> Result<(), sqlx::Error> { - let last_undo = match get_record_to_redo(pool).await? { - None => { - eprintln!("WARN: Nothing to redo."); - return Ok(()); - } - Some(last_undo) => { - let undone_by = last_undo.try_get_raw("undone_by")?; - if undone_by.is_null() { - eprintln!("WARN: Nothing to redo."); - return Ok(()); - } - last_undo - } - }; - let history_id: i32 = last_undo.get("history_id"); - let history_id = history_id as u16; - let table: &str = last_undo.get("table"); - let row_number: i64 = last_undo.get("row"); - let row_number = row_number as u32; - let from = get_json_from_row(&last_undo, "from"); - let to = get_json_from_row(&last_undo, "to"); - - match (from, to) { - (None, None) => { - return Err(SqlxCErr( - "Cannot redo unknown operation from None to None".into(), - )) - } - (None, Some(to)) => { - // Redo an insert: - let mut tx = pool.begin().await?; - - insert_new_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &to, - Some(row_number), - false, - ) - .await?; - - switch_undone_state(redo_user, history_id, false, &mut tx, pool).await?; - tx.commit().await?; - } - (Some(_), None) => { - // Redo a delete: - let mut tx = pool.begin().await?; - - delete_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &row_number, - ) - .await?; - - switch_undone_state(redo_user, history_id, false, &mut tx, pool).await?; - tx.commit().await?; - } - (Some(_), Some(to)) => { - // Redo an an update: - let mut tx = pool.begin().await?; - - update_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &to, - &row_number, - false, - false, - ) - .await?; - - switch_undone_state(redo_user, history_id, false, &mut tx, pool).await?; - tx.commit().await?; - } - } - Ok(()) -} - /// Given a global config map and a table name, return a list of the columns from the table /// that may potentially result in database conflicts. -fn get_conflict_columns(global_config: &SerdeMap, table_name: &str) -> Vec { +pub fn get_conflict_columns(global_config: &SerdeMap, table_name: &str) -> Vec { let mut conflict_columns = vec![]; let primaries = global_config .get("constraints") @@ -2392,7 +1942,7 @@ fn get_conflict_columns(global_config: &SerdeMap, table_name: &str) -> Vec bool { +pub fn is_sql_type_error(sql_type: &str, value: &str) -> bool { let sql_type = sql_type.to_lowercase(); if sql_type == "numeric" { // f64 @@ -2420,58 +1970,10 @@ fn is_sql_type_error(sql_type: &str, value: &str) -> bool { } } -/// A wrapper around [insert_new_row_tx()] in which the following steps are also performed: -/// - A database transaction is created and then committed once the given new row has been inserted. -/// - The row is validated before insertion and the update to the database is recorded to the -/// history table indicating that the given user is responsible for the change. -#[async_recursion] -pub async fn insert_new_row( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - table: &str, - row: &SerdeMap, - new_row_number: Option, - user: &str, -) -> Result { - let mut tx = pool.begin().await?; - - let row = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - Some(&mut tx), - table, - row, - new_row_number, - None, - ) - .await?; - - let rn = insert_new_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table, - &row, - new_row_number, - true, - ) - .await?; - - record_row_change(&mut tx, table, &rn, None, Some(&row), user).await?; - tx.commit().await?; - Ok(rn) -} - /// Given a global config map, compiled datatype and rule conditions, a database connection pool, a /// database transaction, a table name, and a row, assign the given new row number to the row and /// insert it to the database using the given transaction, then return the new row number. -/// If skip_validation is set to true, omit the implicit call to [validate_row()]. +/// If skip_validation is set to true, omit the implicit call to [validate_row_tx()]. #[async_recursion] pub async fn insert_new_row_tx( global_config: &SerdeMap, @@ -2480,14 +1982,14 @@ pub async fn insert_new_row_tx( pool: &AnyPool, tx: &mut Transaction, table: &str, - row: &SerdeMap, + row: &ValveRow, new_row_number: Option, skip_validation: bool, -) -> Result { +) -> Result { // Send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = if !skip_validation { - validate_row( + validate_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -2547,21 +2049,26 @@ pub async fn insert_new_row_tx( for (column, cell) in row.iter() { insert_columns.append(&mut vec![format!(r#""{}""#, column)]); - let cell = cell - .as_object() - .ok_or(SqlxCErr(format!("Cell {:?} is not an object", cell).into()))?; - let valid = cell.get("valid").and_then(|v| v.as_bool()).ok_or(SqlxCErr( - format!("No bool named 'valid' in {:?}", cell).into(), + let cell = cell.as_object().ok_or(ValveError::InputError( + format!("Cell {:?} is not an object", cell).into(), ))?; - let value = cell.get("value").and_then(|v| v.as_str()).ok_or(SqlxCErr( - format!("No string named 'value' in {:?}", cell).into(), - ))?; - let nulltype = cell.get("nulltype").and_then(|n| n.as_str()); + let valid = cell + .get("valid") + .and_then(|v| v.as_bool()) + .ok_or(ValveError::InputError( + format!("No bool named 'valid' in {:?}", cell).into(), + ))?; + let value = cell + .get("value") + .and_then(|v| v.as_str()) + .ok_or(ValveError::InputError( + format!("No string named 'value' in {:?}", cell).into(), + ))?; let messages = sort_messages( &sorted_datatypes, cell.get("messages") .and_then(|m| m.as_array()) - .ok_or(SqlxCErr( + .ok_or(ValveError::InputError( format!("No array named 'messages' in {:?}", cell).into(), ))?, ); @@ -2572,36 +2079,30 @@ pub async fn insert_new_row_tx( "value": value, "level": message.get("level").and_then(|s| s.as_str()) .ok_or( - SqlxCErr(format!("No 'level' in {:?}", message).into()) + ValveError::InputError(format!("No 'level' in {:?}", message).into()) )?, "rule": message.get("rule").and_then(|s| s.as_str()) .ok_or( - SqlxCErr(format!("No 'rule' in {:?}", message).into()) + ValveError::InputError(format!("No 'rule' in {:?}", message).into()) )?, "message": message.get("message").and_then(|s| s.as_str()) .ok_or( - SqlxCErr(format!("No 'message' in {:?}", message).into()) + ValveError::InputError(format!("No 'message' in {:?}", message).into()) )?, })); } - match nulltype { - None => { - let sql_type = get_sql_type_from_global_config(global_config, table, column, pool) - .ok_or(SqlxCErr( - format!("Could not get SQL type for {}.{}", table, column).into(), - ))?; - if is_sql_type_error(&sql_type, value) { - insert_values.push(String::from("NULL")); - } else { - insert_values.push(cast_sql_param_from_text(&sql_type)); - insert_params.push(String::from(value)); - } - } - _ => { - insert_values.push(String::from("NULL")); - } - }; + let sql_type = get_sql_type_from_global_config(global_config, table, column, pool).ok_or( + ValveError::ConfigError( + format!("Could not get SQL type for {}.{}", table, column).into(), + ), + )?; + if is_sql_type_error(&sql_type, value) { + insert_values.push(String::from("NULL")); + } else { + insert_values.push(cast_sql_param_from_text(&sql_type)); + insert_params.push(String::from(value)); + } if !use_conflict_table && !valid && conflict_columns.contains(&json!(column)) { use_conflict_table = true; @@ -2620,9 +2121,8 @@ pub async fn insert_new_row_tx( // Look through the valve config to see which tables are dependent on this table // and find the rows that need to be updated: - let (_, updates_after, _) = get_rows_to_update(global_config, pool, tx, table, &query_as_if) - .await - .map_err(|e| SqlxCErr(e.into()))?; + let (_, updates_after, _) = + get_rows_to_update(global_config, pool, tx, table, &query_as_if).await?; // Check it to see if the row should be redirected to the conflict table: let table_to_write = { @@ -2665,56 +2165,23 @@ pub async fn insert_new_row_tx( table, new_row_number, column, value, level, rule, message ); let query = sqlx_query(&message_sql); - query.execute(tx.acquire().await?).await?; - } - - // Now process the updates that need to be performed after the update of the target row: - process_updates( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - tx, - &updates_after, - &query_as_if, - false, - ) - .await?; - - Ok(new_row_number) -} - -/// A wrapper around [delete_row_tx()] in which the database transaction is implicitly created -/// and then committed once the given row has been deleted, and the change to the database is -/// recorded in the history table indicating that the given user is responsible for the change. -#[async_recursion] -pub async fn delete_row( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - table: &str, - row_number: &u32, - user: &str, -) -> Result<(), sqlx::Error> { - let mut tx = pool.begin().await?; - - let row = get_row_from_db(global_config, pool, &mut tx, &table, row_number).await?; - record_row_change(&mut tx, &table, row_number, Some(&row), None, user).await?; + query.execute(tx.acquire().await?).await?; + } - delete_row_tx( + // Now process the updates that need to be performed after the update of the target row: + process_updates( global_config, compiled_datatype_conditions, compiled_rule_conditions, pool, - &mut tx, - table, - row_number, + tx, + &updates_after, + &query_as_if, + false, ) .await?; - tx.commit().await?; - Ok(()) + Ok(new_row_number) } /// Given a global config map, maps of datatype and rule conditions, a database connection pool, a @@ -2728,7 +2195,7 @@ pub async fn delete_row_tx( tx: &mut Transaction, table: &str, row_number: &u32, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // Used to validate the given row, counterfactually, "as if" the row did not exist in the // database: let query_as_if = QueryAsIf { @@ -2743,9 +2210,7 @@ pub async fn delete_row_tx( // rows that need to be updated. Since this is a delete there will only be rows to update // before and none after the delete: let (updates_before, _, updates_intra) = - get_rows_to_update(global_config, pool, tx, table, &query_as_if) - .await - .map_err(|e| SqlxCErr(e.into()))?; + get_rows_to_update(global_config, pool, tx, table, &query_as_if).await?; // Process the updates that need to be performed before the update of the target row: process_updates( @@ -2798,73 +2263,10 @@ pub async fn delete_row_tx( Ok(()) } -/// A wrapper around [update_row_tx()] in which the database transaction is implicitly created -/// and then committed once the given row has been updated, the given row is validated before -/// the update, and the update is recorded to the history table indicating that the given user -/// is responsible for the change. -#[async_recursion] -pub async fn update_row( - global_config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - pool: &AnyPool, - table_name: &str, - row: &SerdeMap, - row_number: &u32, - user: &str, -) -> Result<(), sqlx::Error> { - let mut tx = pool.begin().await?; - - // Get the old version of the row from the database so that we can later record it to the - // history table: - let old_row = get_row_from_db(global_config, pool, &mut tx, table_name, row_number).await?; - - let row = validate_row( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - Some(&mut tx), - table_name, - row, - Some(*row_number), - None, - ) - .await?; - - update_row_tx( - global_config, - compiled_datatype_conditions, - compiled_rule_conditions, - pool, - &mut tx, - table_name, - &row, - row_number, - true, - false, - ) - .await?; - - // Record the row update in the history table: - record_row_change( - &mut tx, - table_name, - row_number, - Some(&old_row), - Some(&row), - user, - ) - .await?; - - tx.commit().await?; - Ok(()) -} - /// Given global config map, maps of compiled datatype and rule conditions, a database connection /// pool, a database transaction, a table name, a row, and the row number to update, update the /// corresponding row in the database. If skip_validation is set, skip the implicit call to -/// [validate_row()]. If do_not_recurse, is set, do not look for rows which could be affected by +/// [validate_row_tx()]. If do_not_recurse, is set, do not look for rows which could be affected by /// this update. #[async_recursion] pub async fn update_row_tx( @@ -2874,11 +2276,11 @@ pub async fn update_row_tx( pool: &AnyPool, tx: &mut Transaction, table: &str, - row: &SerdeMap, + row: &ValveRow, row_number: &u32, skip_validation: bool, do_not_recurse: bool, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // First, look through the valve config to see which tables are dependent on this table and find // the rows that need to be updated. The variable query_as_if is used to validate the given row, // counterfactually, "as if" the version of the row in the database currently were replaced with @@ -2894,9 +2296,7 @@ pub async fn update_row_tx( if do_not_recurse { (IndexMap::new(), IndexMap::new(), IndexMap::new()) } else { - get_rows_to_update(global_config, pool, tx, table, &query_as_if) - .await - .map_err(|e| SqlxCErr(e.into()))? + get_rows_to_update(global_config, pool, tx, table, &query_as_if).await? } }; @@ -2916,7 +2316,7 @@ pub async fn update_row_tx( // Send the row through the row validator to determine if any fields are problematic and // to mark them with appropriate messages: let row = if !skip_validation { - validate_row( + validate_row_tx( global_config, compiled_datatype_conditions, compiled_rule_conditions, @@ -2987,12 +2387,12 @@ pub async fn update_row_tx( Ok(()) } -/// Given a path, read a TSV file and return a vector of rows represented as SerdeMaps. +/// Given a path, read a TSV file and return a vector of rows represented as ValveRows. /// Note: Use this function to read "small" TSVs only. In particular, use this for the special /// configuration tables. -fn read_tsv_into_vector(path: &str) -> Vec { +pub fn read_tsv_into_vector(path: &str) -> Vec { let mut rdr = - csv::ReaderBuilder::new() + ReaderBuilder::new() .delimiter(b'\t') .from_reader(File::open(path).unwrap_or_else(|err| { panic!("Unable to open '{}': {}", path, err); @@ -3001,7 +2401,7 @@ fn read_tsv_into_vector(path: &str) -> Vec { let rows: Vec<_> = rdr .deserialize() .map(|result| { - let row: SerdeMap = result.expect(format!("Error reading: {}", path).as_str()); + let row: ValveRow = result.expect(format!("Error reading: {}", path).as_str()); row }) .collect(); @@ -3017,8 +2417,8 @@ fn read_tsv_into_vector(path: &str) -> Vec { let val = val.as_str().unwrap(); let trimmed_val = val.trim(); if trimmed_val != val { - eprintln!( - "Error: Value '{}' of column '{}' in row {} of table '{}' {}", + error!( + "Value '{}' of column '{}' in row {} of table '{}' {}", val, col, i, path, "has leading and/or trailing whitespace." ); process::exit(1); @@ -3029,9 +2429,9 @@ fn read_tsv_into_vector(path: &str) -> Vec { rows } -/// Given a database at the specified location, query the "table" table and return a vector of rows -/// represented as SerdeMaps. -fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { +/// Given a database at the specified location, query the given table and return a vector of rows +/// represented as ValveRows. +pub fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec { let connection_options; if database.starts_with("postgresql://") { connection_options = AnyConnectOptions::from_str(database).unwrap(); @@ -3056,7 +2456,7 @@ fn read_db_table_into_vector(database: &str, config_table: &str) -> Vec Vec, parser: &StartParser, compiled_datatype_conditions: &HashMap, @@ -3221,20 +2621,12 @@ fn compile_condition( /// Given the config map, the name of a datatype, and a database connection pool used to determine /// the database type, climb the datatype tree (as required), and return the first 'SQL type' found. -fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Option { +pub fn get_sql_type(dt_config: &SerdeMap, datatype: &String, pool: &AnyPool) -> Option { if !dt_config.contains_key(datatype) { - return None; + return Some("TEXT".to_string()); } - let sql_type_column = { - if pool.any_kind() == AnyKind::Sqlite { - "SQLite type" - } else { - "PostgreSQL type" - } - }; - - if let Some(sql_type) = dt_config.get(datatype).and_then(|d| d.get(sql_type_column)) { + if let Some(sql_type) = dt_config.get(datatype).and_then(|d| d.get("SQL type")) { return Some(sql_type.as_str().and_then(|s| Some(s.to_string())).unwrap()); } @@ -3273,7 +2665,7 @@ pub fn get_sql_type_from_global_config( /// Given a SQL type, return the appropriate CAST(...) statement for casting the SQL_PARAM /// from a TEXT column. -fn cast_sql_param_from_text(sql_type: &str) -> String { +pub fn cast_sql_param_from_text(sql_type: &str) -> String { let s = sql_type.to_lowercase(); if s == "numeric" { format!("CAST(NULLIF({}, '') AS NUMERIC)", SQL_PARAM) @@ -3288,7 +2680,7 @@ fn cast_sql_param_from_text(sql_type: &str) -> String { /// Given a SQL type, return the appropriate CAST(...) statement for casting the SQL_PARAM /// to a TEXT column. -fn cast_column_sql_to_text(column: &str, sql_type: &str) -> String { +pub fn cast_column_sql_to_text(column: &str, sql_type: &str) -> String { if sql_type.to_lowercase() == "text" { format!(r#""{}""#, column) } else { @@ -3319,7 +2711,7 @@ pub fn get_column_value(row: &AnyRow, column: &str, sql_type: &str) -> String { /// SQL_PARAM, and given a database pool, if the pool is of type Sqlite, then change the syntax used /// for unbound parameters to Sqlite syntax, which uses "?", otherwise use Postgres syntax, which /// uses numbered parameters, i.e., $1, $2, ... -fn local_sql_syntax(pool: &AnyPool, sql: &String) -> String { +pub fn local_sql_syntax(pool: &AnyPool, sql: &String) -> String { // Do not replace instances of SQL_PARAM if they are within quotation marks. let rx = Regex::new(&format!( r#"('[^'\\]*(?:\\.[^'\\]*)*'|"[^"\\]*(?:\\.[^"\\]*)*")|\b{}\b"#, @@ -3354,7 +2746,14 @@ fn local_sql_syntax(pool: &AnyPool, sql: &String) -> String { /// under dependencies, returns the list of tables sorted according to their foreign key /// dependencies, such that if table_a depends on table_b, then table_b comes before table_a in the /// list that is returned. -fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) -> Vec { +pub fn verify_table_deps_and_sort( + table_list: &Vec, + constraints: &SerdeMap, +) -> ( + Vec, + HashMap>, + HashMap>, +) { fn get_cycles(g: &DiGraphMap<&str, ()>) -> Result, Vec>> { let mut cycles = vec![]; match toposort(&g, None) { @@ -3387,6 +2786,7 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) } } + // Check for intra-table cycles: let trees = constraints.get("tree").and_then(|t| t.as_object()).unwrap(); for table_name in table_list { let mut dependency_graph = DiGraphMap::<&str, ()>::new(); @@ -3431,6 +2831,7 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) }; } + // Check for inter-table cycles: let foreign_keys = constraints .get("foreign") .and_then(|f| f.as_object()) @@ -3481,7 +2882,26 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) match get_cycles(&dependency_graph) { Ok(sorted_table_list) => { - return sorted_table_list; + let mut table_dependencies_in = HashMap::new(); + for node in dependency_graph.nodes() { + let neighbors = dependency_graph + .neighbors_directed(node, petgraph::Direction::Incoming) + .map(|n| n.to_string()) + .collect::>(); + table_dependencies_in.insert(node.to_string(), neighbors); + } + let mut table_dependencies_out = HashMap::new(); + for node in dependency_graph.nodes() { + let neighbors = dependency_graph + .neighbors_directed(node, petgraph::Direction::Outgoing) + .map(|n| n.to_string()) + .collect::>(); + table_dependencies_out.insert(node.to_string(), neighbors); + } + let mut sorted_table_list = sorted_table_list.clone(); + let mut with_specials = vec!["message".to_string(), "history".to_string()]; + with_specials.append(&mut sorted_table_list); + return (with_specials, table_dependencies_in, table_dependencies_out); } Err(cycles) => { let mut message = String::new(); @@ -3537,36 +2957,25 @@ fn verify_table_deps_and_sort(table_list: &Vec, constraints: &SerdeMap) }; } -/// Given the config maps for tables and datatypes, and a table name, generate a SQL schema string, -/// including each column C and its matching C_meta column, then return the schema string as well as -/// a list of the table's constraints. -fn create_table_statement( - tables_config: &mut SerdeMap, - datatypes_config: &mut SerdeMap, +/// Given table configuration map and a datatype configuration map, a parser, a table name, and a +/// database connection pool, return a configuration map representing all of the table constraints. +pub fn get_table_constraints( + tables_config: &SerdeMap, + datatypes_config: &SerdeMap, parser: &StartParser, - table_name: &String, + table_name: &str, pool: &AnyPool, -) -> (Vec, SerdeValue) { - let mut drop_table_sql = format!(r#"DROP TABLE IF EXISTS "{}""#, table_name); - if pool.any_kind() == AnyKind::Postgres { - drop_table_sql.push_str(" CASCADE"); - } - drop_table_sql.push_str(";"); - let mut statements = vec![drop_table_sql]; - let mut create_lines = vec![ - format!(r#"CREATE TABLE "{}" ("#, table_name), - String::from(r#" "row_number" BIGINT,"#), - ]; - - let normal_table_name; - if let Some(s) = table_name.strip_suffix("_conflict") { - normal_table_name = String::from(s); - } else { - normal_table_name = table_name.to_string(); - } +) -> SerdeValue { + let mut table_constraints = json!({ + "foreign": [], + "unique": [], + "primary": [], + "tree": [], + "under": [], + }); let column_names = tables_config - .get(&normal_table_name) + .get(table_name) .and_then(|t| t.get("column_order")) .and_then(|c| c.as_array()) .unwrap() @@ -3575,20 +2984,12 @@ fn create_table_statement( .collect::>(); let columns = tables_config - .get(normal_table_name.as_str()) + .get(table_name) .and_then(|c| c.as_object()) .and_then(|o| o.get("column")) .and_then(|c| c.as_object()) .unwrap(); - let mut table_constraints = json!({ - "foreign": [], - "unique": [], - "primary": [], - "tree": [], - "under": [], - }); - let mut colvals: Vec = vec![]; for column_name in &column_names { let column = columns @@ -3598,64 +2999,22 @@ fn create_table_statement( colvals.push(column.clone()); } - let c = colvals.len(); - let mut r = 0; for row in colvals { - r += 1; - let sql_type = get_sql_type( - datatypes_config, - &row.get("datatype") - .and_then(|d| d.as_str()) - .and_then(|s| Some(s.to_string())) - .unwrap(), - pool, - ); - - if let None = sql_type { - panic!("Missing SQL type for {}", row.get("datatype").unwrap()); - } - let sql_type = sql_type.unwrap(); - - let short_sql_type = { - if sql_type.to_lowercase().as_str().starts_with("varchar(") { - "VARCHAR" - } else { - &sql_type - } - }; - - if pool.any_kind() == AnyKind::Postgres { - if !PG_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { - panic!( - "Unrecognized PostgreSQL SQL type '{}' for datatype: '{}'. \ - Accepted SQL types for PostgreSQL are: {}", - sql_type, - row.get("datatype").and_then(|d| d.as_str()).unwrap(), - PG_SQL_TYPES.join(", ") - ); - } - } else { - if !SL_SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { - panic!( - "Unrecognized SQLite SQL type '{}' for datatype '{}'. \ - Accepted SQL datatypes for SQLite are: {}", - sql_type, - row.get("datatype").and_then(|d| d.as_str()).unwrap(), - SL_SQL_TYPES.join(", ") - ); - } - } - + let datatype = row + .get("datatype") + .and_then(|d| d.as_str()) + .and_then(|s| Some(s.to_string())) + .unwrap(); + let sql_type = get_sql_type(datatypes_config, &datatype, pool) + .expect(&format!("Unable to determine SQL type for {}", datatype)); let column_name = row.get("column").and_then(|s| s.as_str()).unwrap(); - let mut line = format!(r#" "{}" {}"#, column_name, sql_type); let structure = row.get("structure").and_then(|s| s.as_str()); if let Some(structure) = structure { - if structure != "" && !table_name.ends_with("_conflict") { + if structure != "" { let parsed_structure = parser.parse(structure).unwrap(); for expression in parsed_structure { match *expression { Expression::Label(value) if value == "primary" => { - line.push_str(" PRIMARY KEY"); let primary_keys = table_constraints .get_mut("primary") .and_then(|v| v.as_array_mut()) @@ -3663,7 +3022,6 @@ fn create_table_statement( primary_keys.push(SerdeValue::String(column_name.to_string())); } Expression::Label(value) if value == "unique" => { - line.push_str(" UNIQUE"); let unique_constraints = table_constraints .get_mut("unique") .and_then(|v| v.as_array_mut()) @@ -3707,7 +3065,7 @@ fn create_table_statement( .and_then(|d| d.as_str()); if let None = child_datatype { panic!( - "Could not determine SQL datatype for {} of tree({})", + "Could not determine datatype for {} of tree({})", child, child ); } @@ -3778,20 +3136,136 @@ fn create_table_statement( } } } - if r >= c + } + + return table_constraints; +} + +/// Given table configuration map and a datatype configuration map, a parser, a table name, and a +/// database connection pool, return a list of DDL statements that can be used to create the +/// database tables. +pub fn get_table_ddl( + tables_config: &SerdeMap, + datatypes_config: &SerdeMap, + parser: &StartParser, + table_name: &String, + pool: &AnyPool, +) -> Vec { + let mut statements = vec![]; + let mut create_lines = vec![ + format!(r#"CREATE TABLE "{}" ("#, table_name), + String::from(r#" "row_number" BIGINT,"#), + ]; + + let colvals = { + let normal_table_name; + if let Some(s) = table_name.strip_suffix("_conflict") { + normal_table_name = String::from(s); + } else { + normal_table_name = table_name.to_string(); + } + let column_order = tables_config + .get(&normal_table_name) + .and_then(|t| t.get("column_order")) + .and_then(|c| c.as_array()) + .unwrap() + .iter() + .map(|v| v.as_str().unwrap().to_string()) + .collect::>(); + let columns = tables_config + .get(&normal_table_name) + .and_then(|c| c.as_object()) + .and_then(|o| o.get("column")) + .and_then(|c| c.as_object()) + .unwrap(); + + column_order + .iter() + .map(|column_name| { + columns + .get(column_name) + .and_then(|c| c.as_object()) + .unwrap() + }) + .collect::>() + }; + + let table_constraints = { + // Conflict tables have no database constraints: + if table_name.ends_with("_conflict") { + json!({"foreign": [], "unique": [], "primary": [], "tree": [], "under": [],}) + } else { + get_table_constraints(tables_config, datatypes_config, parser, &table_name, &pool) + } + }; + + let c = colvals.len(); + let mut r = 0; + for row in colvals { + r += 1; + let sql_type = get_sql_type( + datatypes_config, + &row.get("datatype") + .and_then(|d| d.as_str()) + .and_then(|s| Some(s.to_string())) + .unwrap(), + pool, + ) + .unwrap(); + + let short_sql_type = { + if sql_type.to_lowercase().as_str().starts_with("varchar(") { + "VARCHAR" + } else { + &sql_type + } + }; + + if !SQL_TYPES.contains(&short_sql_type.to_lowercase().as_str()) { + panic!( + "Unrecognized SQL type '{}' for datatype: '{}'. Accepted SQL types are: {}", + sql_type, + row.get("datatype").and_then(|d| d.as_str()).unwrap(), + SQL_TYPES.join(", ") + ); + } + + let column_name = row.get("column").and_then(|s| s.as_str()).unwrap(); + let mut line = format!(r#" "{}" {}"#, column_name, sql_type); + + // Check if the column is a primary key and indicate this in the DDL if so: + let primary_constraints = table_constraints + .get("primary") + .and_then(|v| v.as_array()) + .unwrap(); + if primary_constraints.contains(&json!(column_name)) { + line.push_str(" PRIMARY KEY"); + } + + // Check if the column has a unique constraint and indicate this in the DDL if so: + let unique_constraints = table_constraints + .get("unique") + .and_then(|v| v.as_array()) + .unwrap(); + if unique_constraints.contains(&json!(column_name)) { + line.push_str(" UNIQUE"); + } + + // If there are foreign constraints add a column to the end of the statement which we will + // finish after this for loop is done: + if !(r >= c && table_constraints .get("foreign") .and_then(|v| v.as_array()) .and_then(|v| Some(v.is_empty())) - .unwrap() + .unwrap()) { - line.push_str(""); - } else { line.push_str(","); } create_lines.push(line); } + // Add the SQL to indicate any foreign constraints: let foreign_keys = table_constraints .get("foreign") .and_then(|v| v.as_array()) @@ -3843,13 +3317,13 @@ fn create_table_statement( table_name, table_name )); - return (statements, table_constraints); + return statements; } /// Given a list of messages and a HashMap, messages_stats, with which to collect counts of /// message types, count the various message types encountered in the list and increment the counts /// in messages_stats accordingly. -fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap) { +pub fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap) { for message in messages { let message = message.as_object().unwrap(); let level = message.get("level").unwrap(); @@ -3863,14 +3337,14 @@ fn add_message_counts(messages: &Vec, messages_stats: &mut HashMap Vec<&str> { +pub fn get_sorted_datatypes(global_config: &SerdeMap) -> Vec<&str> { let mut graph = DiGraphMap::<&str, ()>::new(); let dt_config = global_config .get("datatype") @@ -3920,7 +3394,10 @@ fn get_sorted_datatypes(global_config: &SerdeMap) -> Vec<&str> { /// `sorted_datatypes`, followed by: /// 2. Messages pertaining to violations of one of the rules in the rule table, followed by: /// 3. Messages pertaining to structure violations. -fn sort_messages(sorted_datatypes: &Vec<&str>, cell_messages: &Vec) -> Vec { +pub fn sort_messages( + sorted_datatypes: &Vec<&str>, + cell_messages: &Vec, +) -> Vec { let mut datatype_messages = vec![]; let mut structure_messages = vec![]; let mut rule_messages = vec![]; @@ -3970,7 +3447,7 @@ fn sort_messages(sorted_datatypes: &Vec<&str>, cell_messages: &Vec) /// to bind to that SQL statement. If the verbose flag is set, the number of errors, warnings, /// and information messages generated are added to messages_stats, the contents of which will /// later be written to stderr. -async fn make_inserts( +pub async fn make_inserts( config: &SerdeMap, table_name: &String, rows: &mut Vec, @@ -3987,7 +3464,7 @@ async fn make_inserts( String, Vec, ), - sqlx::Error, + ValveError, > { fn is_conflict_row(row: &ResultRow, conflict_columns: &Vec) -> bool { for (column, cell) in &row.contents { @@ -4183,7 +3660,7 @@ async fn make_inserts( /// and the chunk number corresponding to the rows, do inter-row validation on the rows and insert /// them to the table. If the verbose flag is set to true, error/warning/info stats will be /// collected in messages_stats and later written to stderr. -async fn validate_rows_inter_and_insert( +pub async fn insert_chunk( config: &SerdeMap, pool: &AnyPool, table_name: &String, @@ -4191,9 +3668,13 @@ async fn validate_rows_inter_and_insert( chunk_number: usize, messages_stats: &mut HashMap, verbose: bool, -) -> Result<(), sqlx::Error> { - // First, do the tree validation: - validate_rows_trees(config, pool, table_name, rows).await?; + validate: bool, +) -> Result<(), ValveError> { + // First, do the tree validation. TODO: I don't remember why this needs to be done first, but + // it does. Add a comment here explaining why. + if validate { + validate_rows_trees(config, pool, table_name, rows).await?; + } // Try to insert the rows to the db first without validating unique and foreign constraints. // If there are constraint violations this will cause a database error, in which case we then @@ -4260,10 +3741,17 @@ async fn validate_rows_inter_and_insert( ); } } - Err(_) => { - validate_rows_constraints(config, pool, table_name, rows).await?; - let (main_sql, main_params, conflict_sql, conflict_params, message_sql, message_params) = - make_inserts( + Err(e) => { + if validate { + validate_rows_constraints(config, pool, table_name, rows).await?; + let ( + main_sql, + main_params, + conflict_sql, + conflict_params, + message_sql, + message_params, + ) = make_inserts( config, table_name, rows, @@ -4274,26 +3762,29 @@ async fn validate_rows_inter_and_insert( ) .await?; - let main_sql = local_sql_syntax(&pool, &main_sql); - let mut main_query = sqlx_query(&main_sql); - for param in &main_params { - main_query = main_query.bind(param); - } - main_query.execute(pool).await?; + let main_sql = local_sql_syntax(&pool, &main_sql); + let mut main_query = sqlx_query(&main_sql); + for param in &main_params { + main_query = main_query.bind(param); + } + main_query.execute(pool).await?; - let conflict_sql = local_sql_syntax(&pool, &conflict_sql); - let mut conflict_query = sqlx_query(&conflict_sql); - for param in &conflict_params { - conflict_query = conflict_query.bind(param); - } - conflict_query.execute(pool).await?; + let conflict_sql = local_sql_syntax(&pool, &conflict_sql); + let mut conflict_query = sqlx_query(&conflict_sql); + for param in &conflict_params { + conflict_query = conflict_query.bind(param); + } + conflict_query.execute(pool).await?; - let message_sql = local_sql_syntax(&pool, &message_sql); - let mut message_query = sqlx_query(&message_sql); - for param in &message_params { - message_query = message_query.bind(param); + let message_sql = local_sql_syntax(&pool, &message_sql); + let mut message_query = sqlx_query(&message_sql); + for param in &message_params { + message_query = message_query.bind(param); + } + message_query.execute(pool).await?; + } else { + return Err(ValveError::DatabaseError(e)); } - message_query.execute(pool).await?; } }; @@ -4305,29 +3796,34 @@ async fn validate_rows_inter_and_insert( /// and the headers of the rows to be inserted, validate each chunk and insert the validated rows /// to the table. If the verbose flag is set to true, error/warning/info stats will be collected in /// messages_stats and later written to stderr. -async fn validate_and_insert_chunks( +pub async fn insert_chunks( config: &SerdeMap, pool: &AnyPool, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, table_name: &String, - chunks: &IntoChunks>, - headers: &csv::StringRecord, + chunks: &IntoChunks>, + headers: &StringRecord, messages_stats: &mut HashMap, verbose: bool, -) -> Result<(), sqlx::Error> { + validate: bool, +) -> Result<(), ValveError> { if !MULTI_THREADED { for (chunk_number, chunk) in chunks.into_iter().enumerate() { let mut rows: Vec<_> = chunk.collect(); - let mut intra_validated_rows = validate_rows_intra( - config, - compiled_datatype_conditions, - compiled_rule_conditions, - table_name, - headers, - &mut rows, - ); - validate_rows_inter_and_insert( + let mut intra_validated_rows = { + let only_nulltype = !validate; + validate_rows_intra( + config, + compiled_datatype_conditions, + compiled_rule_conditions, + table_name, + headers, + &mut rows, + only_nulltype, + ) + }; + insert_chunk( config, pool, table_name, @@ -4335,6 +3831,7 @@ async fn validate_and_insert_chunks( chunk_number, messages_stats, verbose, + validate, ) .await?; } @@ -4359,6 +3856,7 @@ async fn validate_and_insert_chunks( for chunk in batch.into_iter() { let mut rows: Vec<_> = chunk.collect(); workers.push(scope.spawn(move |_| { + let only_nulltype = !validate; validate_rows_intra( config, compiled_datatype_conditions, @@ -4366,6 +3864,7 @@ async fn validate_and_insert_chunks( table_name, headers, &mut rows, + only_nulltype, ) })); } @@ -4379,7 +3878,7 @@ async fn validate_and_insert_chunks( .expect("A child thread panicked"); for (chunk_number, mut intra_validated_rows) in results { - validate_rows_inter_and_insert( + insert_chunk( config, pool, table_name, @@ -4387,6 +3886,7 @@ async fn validate_and_insert_chunks( chunk_number, messages_stats, verbose, + validate, ) .await?; } @@ -4395,171 +3895,3 @@ async fn validate_and_insert_chunks( Ok(()) } } - -/// Given a configuration map, a database connection pool, a parser, HashMaps representing -/// compiled datatype and rule conditions, and a HashMap representing parsed structure conditions, -/// read in the data TSV files corresponding to each configured table, then validate and load all of -/// the corresponding data rows. If the verbose flag is set to true, output progress messages to -/// stderr during load. -async fn load_db( - config: &SerdeMap, - pool: &AnyPool, - compiled_datatype_conditions: &HashMap, - compiled_rule_conditions: &HashMap>>, - verbose: bool, -) -> Result<(), sqlx::Error> { - let mut table_list = vec![]; - for table in config - .get("sorted_table_list") - .and_then(|l| l.as_array()) - .unwrap() - { - table_list.push(table.as_str().and_then(|s| Some(s.to_string())).unwrap()); - } - let table_list = table_list; // Change the table_list to read only after populating it. - let num_tables = table_list.len(); - let mut total_errors = 0; - let mut total_warnings = 0; - let mut total_infos = 0; - let mut table_num = 1; - for table_name in table_list { - if verbose { - eprintln!( - "{} - Loading table {}/{}: {}", - Utc::now(), - table_num, - num_tables, - table_name - ); - } - table_num += 1; - let path = String::from( - config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|o| o.get(&table_name)) - .and_then(|n| n.get("path")) - .and_then(|p| p.as_str()) - .unwrap(), - ); - let mut rdr = csv::ReaderBuilder::new() - .has_headers(false) - .delimiter(b'\t') - .from_reader(File::open(path.clone()).unwrap_or_else(|err| { - panic!("Unable to open '{}': {}", path.clone(), err); - })); - - // Extract the headers, which we will need later: - let mut records = rdr.records(); - let headers; - if let Some(result) = records.next() { - headers = result.unwrap(); - } else { - panic!("'{}' is empty", path); - } - - for header in headers.iter() { - if header.trim().is_empty() { - panic!( - "One or more of the header fields is empty for table '{}'", - table_name - ); - } - } - - // HashMap used to report info about the number of error/warning/info messages for this - // table when the verbose flag is set to true: - let mut messages_stats = HashMap::new(); - messages_stats.insert("error".to_string(), 0); - messages_stats.insert("warning".to_string(), 0); - messages_stats.insert("info".to_string(), 0); - - // Split the data into chunks of size CHUNK_SIZE before passing them to the validation - // logic: - let chunks = records.chunks(CHUNK_SIZE); - validate_and_insert_chunks( - config, - pool, - compiled_datatype_conditions, - compiled_rule_conditions, - &table_name, - &chunks, - &headers, - &mut messages_stats, - verbose, - ) - .await?; - - // We need to wait until all of the rows for a table have been loaded before validating the - // "foreign" constraints on a table's trees, since this checks if the values of one column - // (the tree's parent) are all contained in another column (the tree's child): - // We also need to wait before validating a table's "under" constraints. Although the tree - // associated with such a constraint need not be defined on the same table, it can be. - let mut recs_to_update = - validate_tree_foreign_keys(config, pool, None, &table_name, None).await?; - recs_to_update.append(&mut validate_under(config, pool, None, &table_name, None).await?); - - for record in recs_to_update { - let row_number = record.get("row_number").unwrap(); - let column_name = record.get("column").and_then(|s| s.as_str()).unwrap(); - let value = record.get("value").and_then(|s| s.as_str()).unwrap(); - let level = record.get("level").and_then(|s| s.as_str()).unwrap(); - let rule = record.get("rule").and_then(|s| s.as_str()).unwrap(); - let message = record.get("message").and_then(|s| s.as_str()).unwrap(); - - let sql = local_sql_syntax( - &pool, - &format!( - r#"INSERT INTO "message" - ("table", "row", "column", "value", "level", "rule", "message") - VALUES ({}, {}, {}, {}, {}, {}, {})"#, - SQL_PARAM, row_number, SQL_PARAM, SQL_PARAM, SQL_PARAM, SQL_PARAM, SQL_PARAM - ), - ); - let mut query = sqlx_query(&sql); - query = query.bind(&table_name); - query = query.bind(&column_name); - query = query.bind(&value); - query = query.bind(&level); - query = query.bind(&rule); - query = query.bind(&message); - query.execute(pool).await?; - - if verbose { - // Add the generated message to messages_stats: - let messages = vec![json!({ - "message": message, - "level": level, - })]; - add_message_counts(&messages, &mut messages_stats); - } - } - - if verbose { - // Output a report on the messages generated to stderr: - let errors = messages_stats.get("error").unwrap(); - let warnings = messages_stats.get("warning").unwrap(); - let infos = messages_stats.get("info").unwrap(); - let status_message = format!( - "{} errors, {} warnings, and {} information messages generated for {}", - errors, warnings, infos, table_name - ); - eprintln!("{} - {}", Utc::now(), status_message); - total_errors += errors; - total_warnings += warnings; - total_infos += infos; - } - } - - if verbose { - eprintln!( - "{} - Loading complete with {} errors, {} warnings, and {} information messages", - Utc::now(), - total_errors, - total_warnings, - total_infos - ); - } - - Ok(()) -} diff --git a/src/main.rs b/src/main.rs index 7e61aba4..413b42f6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,52 +1,84 @@ mod api_test; use crate::api_test::run_api_tests; - use argparse::{ArgumentParser, Store, StoreTrue}; - -use ontodev_valve::{ - get_compiled_datatype_conditions, get_compiled_rule_conditions, - get_parsed_structure_conditions, valve, valve_grammar::StartParser, ValveCommand, -}; +use ontodev_valve::{valve::Valve, valve::ValveError}; use serde_json::{from_str, Value as SerdeValue}; use std::{env, process}; -fn cli_args_valid(source: &str, destination: &str, dump_config: bool) -> bool { - source != "" && (dump_config || destination != "") -} - #[async_std::main] -async fn main() -> Result<(), sqlx::Error> { +async fn main() -> Result<(), ValveError> { + // Command line parameters and their default values. See below for descriptions. Note that some + // of these are mutually exclusive. This is accounted for below. + // TODO: Use a more powerful command-line parser library that can automatically take care of + // things like mutually exclusive options, since argparse doesn't seem to be able to do it. + let mut verbose = false; let mut api_test = false; let mut dump_config = false; + let mut dump_schema = false; + let mut table_order = false; + let mut show_deps_in = false; + let mut show_deps_out = false; + let mut drop_all = false; let mut create_only = false; - let mut config_table = String::new(); - let mut verbose = false; let mut initial_load = false; + let mut save = String::new(); + let mut save_all = false; + let mut save_dir = String::new(); let mut source = String::new(); let mut destination = String::new(); + // TODO: Add a "dry_run" parameter. { // this block limits scope of borrows by ap.refer() method let mut ap = ArgumentParser::new(); - ap.set_description( - r#"A lightweight validation engine written in rust. If neither - --api_test nor --dump_config is specified, the configuration referred - to by SOURCE will be read and a new database will be created and loaded - with the indicated data."#, + ap.set_description(r#"Valve is a lightweight validation engine written in rust."#); + ap.refer(&mut verbose).add_option( + &["--verbose"], + StoreTrue, + r#"Write informative messages about what Valve is doing to stderr."#, ); ap.refer(&mut api_test).add_option( &["--api_test"], StoreTrue, - r#"Read the configuration referred to by SOURCE and test the functions that - are callable externally on the existing, pre-loaded database indicated by - DESTINATION."#, + r#"Read the configuration referred to by SOURCE and run a set of predefined tests on the + existing, pre-loaded database indicated by DESTINATION."#, ); ap.refer(&mut dump_config).add_option( &["--dump_config"], StoreTrue, - r#"Read the configuration referred to by SOURCE and send it to stdout as a - JSON-formatted string."#, + r#"Read the configuration referred to by SOURCE and print it as a JSON-formatted + string."#, + ); + ap.refer(&mut dump_schema).add_option( + &["--dump_schema"], + StoreTrue, + r#"Read the configuration referred to by SOURCE and print the SQL that will be used to + create the database to stdout."#, + ); + ap.refer(&mut table_order).add_option( + &["--table_order"], + StoreTrue, + r#"Read the configuration referred to by SOURCE and print the order in which the + configured tables will be created, as determined by their dependency relations."#, + ); + ap.refer(&mut show_deps_in).add_option( + &["--show_deps_in"], + StoreTrue, + r#"Read the configuration referred to by SOURCE and print the incoming dependencies + for each configured table."#, + ); + ap.refer(&mut show_deps_out).add_option( + &["--show_deps_out"], + StoreTrue, + r#"Read the configuration referred to by SOURCE andprint the outgoing dependencies + for each configured table."#, + ); + ap.refer(&mut drop_all).add_option( + &["--drop_all"], + StoreTrue, + r#"Read the configuration referred to by SOURCE and drop all of the configured tables + in the given database."#, ); ap.refer(&mut create_only).add_option( &["--create_only"], @@ -54,18 +86,6 @@ async fn main() -> Result<(), sqlx::Error> { r#"Read the configuration referred to by SOURCE, and create a corresponding database in DESTINATION but do not load it."#, ); - ap.refer(&mut config_table).add_option( - &["--config_table"], - Store, - r#"When reading configuration from a database, the name to use to refer to the main - configuration table (defaults to "table")"#, - ); - ap.refer(&mut verbose).add_option( - &["--verbose"], - StoreTrue, - r#"Write the SQL used to create the database to stdout after configuring it, and then - while loading the database, write progress messages to stderr."#, - ); ap.refer(&mut initial_load).add_option( &["--initial_load"], StoreTrue, @@ -74,20 +94,39 @@ async fn main() -> Result<(), sqlx::Error> { only, as data integrity will not be guaranteed in the case of an interrupted transaction."#, ); + ap.refer(&mut save).add_option( + &["--save"], + Store, + r#"Read the configuration referred to by SOURCE and save the configured data tables + from the given list as TSV files to their configured paths (as specified in the table + configuration). Optionally, specify --save-path to save the files at an alternative + location."#, + ); + ap.refer(&mut save_all).add_option( + &["--save_all"], + StoreTrue, + r#"Read the configuration referred to by SOURCE and save the all configured data tables + as TSV files to their configured paths (as specified in the table configuration). + Optionally, specify --save-path to save the files at an alternative location."#, + ); + ap.refer(&mut save_dir).add_option( + &["--save_dir"], + Store, + r#"Ignored if neither --save nor --save-all has been specified. Saves the tables to the + given path instead of to their configured paths."#, + ); ap.refer(&mut source).add_argument( "SOURCE", Store, - r#"(Required.) The location of the valve configuration entrypoint. Can be - one of (A) A URL of the form `postgresql://...` or `sqlite://...` indicating a - database connection where the valve configuration can be read from a table named - "table"; (B) The filename (including path) of the table file (usually called - table.tsv)."#, + r#"The location of the valve configuration entrypoint. Can be one of (A) A URL of the + form `postgresql://...` or `sqlite://...` indicating a database connection where + the valve configuration can be read from a table named "table"; (B) The filename + (including path) of the table file (usually called table.tsv)."#, ); ap.refer(&mut destination).add_argument( "DESTINATION", Store, - r#"(Required unless the --dump_config option has been specified.) Can be - one of (A) A URL of the form `postgresql://...` or `sqlite://...` + r#"Can be one of (A) A URL of the form `postgresql://...` or `sqlite://...` (B) The filename (including path) of a sqlite database."#, ); @@ -96,75 +135,117 @@ async fn main() -> Result<(), sqlx::Error> { let args: Vec = env::args().collect(); let program_name = &args[0]; - if !cli_args_valid(&source, &destination, dump_config) { - if source == "" { - eprintln!("Parameter SOURCE is required."); - } else if destination == "" { - eprintln!("Parameter DESTINATION is required."); - } - eprintln!("To see command-line usage, run {} --help", program_name); + let advice = format!("Run `{} --help` for command line usage.", program_name); + + let mutually_exclusive_options = vec![ + api_test, + dump_config, + dump_schema, + table_order, + show_deps_in, + show_deps_out, + drop_all, + create_only, + save != "" || save_all, + ]; + + if mutually_exclusive_options + .iter() + .filter(|&i| *i == true) + .count() + > 1 + { + eprintln!( + "More than one mutually exclusive option specified. {}.", + advice + ); process::exit(1); } - if config_table.trim() == "" { - config_table = "table".to_string(); + let destination_optional = + dump_config || dump_schema || table_order || show_deps_in || show_deps_out; + + if source == "" { + eprintln!("Parameter SOURCE is required. {}", advice); + process::exit(1); + } else if !destination_optional && destination == "" { + eprintln!("Parameter DESTINATION is required. {}", advice); + process::exit(1); } if api_test { run_api_tests(&source, &destination).await?; + } else if save_all || save != "" { + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + let save_dir = { + if save_dir == "" { + None + } else { + Some(save_dir.clone()) + } + }; + if save_all { + valve.save_all_tables(&save_dir).unwrap(); + } else { + let tables = save.split(',').collect::>(); + valve.save_tables(&tables, &save_dir).unwrap(); + } } else if dump_config { - let config = valve( - &source, - &String::from(":memory:"), - &ValveCommand::Config, - false, - false, - &config_table, - ) - .await?; - let mut config: SerdeValue = serde_json::from_str(config.as_str()).unwrap(); - let config = config.as_object_mut().unwrap(); - let parser = StartParser::new(); - - let datatype_conditions = get_compiled_datatype_conditions(&config, &parser); - let structure_conditions = get_parsed_structure_conditions(&config, &parser); - let rule_conditions = - get_compiled_rule_conditions(&config, datatype_conditions.clone(), &parser); - - let datatype_conditions = format!("{:?}", datatype_conditions).replace(r"\", r"\\"); + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + let mut config = valve.config.clone(); + let datatype_conditions = + format!("{:?}", valve.compiled_datatype_conditions).replace(r"\", r"\\"); let datatype_conditions: SerdeValue = from_str(&datatype_conditions).unwrap(); config.insert(String::from("datatype_conditions"), datatype_conditions); - let structure_conditions = format!("{:?}", structure_conditions).replace(r"\", r"\\"); + let structure_conditions = + format!("{:?}", valve.parsed_structure_conditions).replace(r"\", r"\\"); let structure_conditions: SerdeValue = from_str(&structure_conditions).unwrap(); config.insert(String::from("structure_conditions"), structure_conditions); - let rule_conditions = format!("{:?}", rule_conditions).replace(r"\", r"\\"); + let rule_conditions = format!("{:?}", valve.compiled_rule_conditions).replace(r"\", r"\\"); let rule_conditions: SerdeValue = from_str(&rule_conditions).unwrap(); config.insert(String::from("rule_conditions"), rule_conditions); - let config = serde_json::to_string(config).unwrap(); + let config = serde_json::to_string(&config).unwrap(); println!("{}", config); + } else if dump_schema { + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + valve.dump_schema().await?; + } else if table_order { + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + let sorted_table_list = valve.get_sorted_table_list(false); + println!("{}", sorted_table_list.join(", ")); + } else if show_deps_in || show_deps_out { + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + let dependencies = valve.collect_dependencies(show_deps_in); + for (table, deps) in dependencies.iter() { + let deps = { + let deps = deps.iter().map(|s| format!("'{}'", s)).collect::>(); + if deps.is_empty() { + "None".to_string() + } else { + deps.join(", ") + } + }; + let preamble = { + if show_deps_in { + format!("Tables that depend on '{}'", table) + } else { + format!("Table '{}' depends on", table) + } + }; + println!("{}: {}", preamble, deps); + } + } else if drop_all { + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + valve.drop_all_tables().await?; } else if create_only { - valve( - &source, - &destination, - &ValveCommand::Create, - verbose, - false, - &config_table, - ) - .await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + valve.create_all_tables().await?; } else { - valve( - &source, - &destination, - &ValveCommand::Load, - verbose, - initial_load, - &config_table, - ) - .await?; + let valve = Valve::build(&source, &destination, verbose, initial_load).await?; + valve.load_all_tables(true).await?; } Ok(()) diff --git a/src/validate.rs b/src/validate.rs index 326b9eca..26c70f53 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -1,18 +1,15 @@ -use enquote::unquote; +use crate::{ + cast_sql_param_from_text, error, get_column_value, get_sql_type_from_global_config, + is_sql_type_error, local_sql_syntax, + valve::{ValveError, ValveRow}, + ColumnRule, CompiledCondition, SerdeMap, +}; +use chrono::Utc; use indexmap::IndexMap; use serde_json::{json, Value as SerdeValue}; -use sqlx::{ - any::AnyPool, query as sqlx_query, Acquire, Error::Configuration as SqlxCErr, Row, Transaction, - ValueRef, -}; +use sqlx::{any::AnyPool, query as sqlx_query, Acquire, Row, Transaction, ValueRef}; use std::collections::HashMap; -use crate::{ - ast::Expression, cast_column_sql_to_text, cast_sql_param_from_text, get_column_value, - get_sql_type_from_global_config, is_sql_type_error, local_sql_syntax, ColumnRule, - CompiledCondition, ParsedStructure, SerdeMap, SQL_PARAM, -}; - /// Represents a particular cell in a particular row of data with vaildation results. #[derive(Clone, Debug)] pub struct ResultCell { @@ -46,26 +43,26 @@ pub struct QueryAsIf { // named 'foo' so we need to use an alias: pub alias: String, pub row_number: u32, - pub row: Option, + pub row: Option, } /// Given a config map, maps of compiled datatype and rule conditions, a database connection -/// pool, a table name, a row to validate and a row number in the case where the row already exists, -/// perform both intra- and inter-row validation and return the validated row. Optionally, if a -/// transaction is given, use that instead of the pool for database access. Optionally, if -/// query_as_if is given, validate the row counterfactually according to that parameter. -/// Note that this function is idempotent. -pub async fn validate_row( +/// pool, a table name, a row to validate and a row number in the case where the row already +/// exists, perform both intra- and inter-row validation and return the validated row. +/// Optionally, if a transaction is given, use that instead of the pool for database access. +/// Optionally, if query_as_if is given, validate the row counterfactually according to that +/// parameter. Note that this function is idempotent. +pub async fn validate_row_tx( config: &SerdeMap, compiled_datatype_conditions: &HashMap, compiled_rule_conditions: &HashMap>>, pool: &AnyPool, tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &str, - row: &SerdeMap, + row: &ValveRow, row_number: Option, query_as_if: Option<&QueryAsIf>, -) -> Result { +) -> Result { // Fallback to a default transaction if it is not given. Since we do not commit before it falls // out of scope the transaction will be rolled back at the end of this function. And since this // function is read-only the rollback is trivial and therefore inconsequential. @@ -86,7 +83,7 @@ pub async fn validate_row( None => None, Some(SerdeValue::String(s)) => Some(s.to_string()), _ => { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!("No string 'nulltype' in cell: {:?}.", cell).into(), )) } @@ -95,7 +92,7 @@ pub async fn validate_row( Some(SerdeValue::String(s)) => s.to_string(), Some(SerdeValue::Number(n)) => format!("{}", n), _ => { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!("No string/number 'value' in cell: {:#?}.", cell).into(), )) } @@ -103,7 +100,7 @@ pub async fn validate_row( let valid = match cell.get("valid").and_then(|v| v.as_bool()) { Some(b) => b, None => { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!("No bool 'valid' in cell: {:?}.", cell).into(), )) } @@ -111,7 +108,7 @@ pub async fn validate_row( let messages = match cell.get("messages").and_then(|m| m.as_array()) { Some(a) => a.to_vec(), None => { - return Err(SqlxCErr( + return Err(ValveError::DataError( format!("No array 'messages' in cell: {:?}.", cell).into(), )) } @@ -244,195 +241,6 @@ pub async fn validate_row( Ok(result_row) } -/// Given a config map, a map of compiled datatype conditions, a database connection pool, a table -/// name, a column name, and (optionally) a string to match, return a JSON array of possible valid -/// values for the given column which contain the matching string as a substring (or all of them if -/// no matching string is given). The JSON array returned is formatted for Typeahead, i.e., it takes -/// the form: `[{"id": id, "label": label, "order": order}, ...]`. -pub async fn get_matching_values( - config: &SerdeMap, - compiled_datatype_conditions: &HashMap, - parsed_structure_conditions: &HashMap, - pool: &AnyPool, - table_name: &str, - column_name: &str, - matching_string: Option<&str>, -) -> Result { - let dt_name = config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column_name)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("datatype")) - .and_then(|d| d.as_str()) - .unwrap(); - - let dt_condition = compiled_datatype_conditions - .get(dt_name) - .and_then(|d| Some(d.parsed.clone())); - - let mut values = vec![]; - match dt_condition { - Some(Expression::Function(name, args)) if name == "in" => { - for arg in args { - if let Expression::Label(arg) = *arg { - // Remove the enclosing quotes from the values being returned: - let label = unquote(&arg).unwrap_or_else(|_| arg); - if let Some(s) = matching_string { - if label.contains(s) { - values.push(label); - } - } - } - } - } - _ => { - // If the datatype for the column does not correspond to an `in(...)` function, then we - // check the column's structure constraints. If they include a - // `from(foreign_table.foreign_column)` condition, then the values are taken from the - // foreign column. Otherwise if the structure includes an - // `under(tree_table.tree_column, value)` condition, then get the values from the tree - // column that are under `value`. - let structure = parsed_structure_conditions.get( - config - .get("table") - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_object()) - .and_then(|t| t.get("column")) - .and_then(|c| c.as_object()) - .and_then(|c| c.get(column_name)) - .and_then(|c| c.as_object()) - .and_then(|c| c.get("structure")) - .and_then(|d| d.as_str()) - .unwrap_or_else(|| ""), - ); - - let sql_type = - get_sql_type_from_global_config(&config, table_name, &column_name, pool).unwrap(); - - match structure { - Some(ParsedStructure { original, parsed }) => { - let matching_string = { - match matching_string { - None => "%".to_string(), - Some(s) => format!("%{}%", s), - } - }; - - match parsed { - Expression::Function(name, args) if name == "from" => { - let foreign_key = &args[0]; - if let Expression::Field(ftable, fcolumn) = &**foreign_key { - let fcolumn_text = cast_column_sql_to_text(&fcolumn, &sql_type); - let sql = local_sql_syntax( - &pool, - &format!( - r#"SELECT "{}" FROM "{}" WHERE {} LIKE {}"#, - fcolumn, ftable, fcolumn_text, SQL_PARAM - ), - ); - let rows = sqlx_query(&sql) - .bind(&matching_string) - .fetch_all(pool) - .await?; - for row in rows.iter() { - values.push(get_column_value(&row, &fcolumn, &sql_type)); - } - } - } - Expression::Function(name, args) if name == "under" || name == "tree" => { - let mut tree_col = "not set"; - let mut under_val = Some("not set".to_string()); - if name == "under" { - if let Expression::Field(_, column) = &**&args[0] { - tree_col = column; - } - if let Expression::Label(label) = &**&args[1] { - under_val = Some(label.to_string()); - } - } else { - let tree_key = &args[0]; - if let Expression::Label(label) = &**tree_key { - tree_col = label; - under_val = None; - } - } - - let tree = config - .get("constraints") - .and_then(|c| c.as_object()) - .and_then(|c| c.get("tree")) - .and_then(|t| t.as_object()) - .and_then(|t| t.get(table_name)) - .and_then(|t| t.as_array()) - .and_then(|t| { - t.iter().find(|o| o.get("child").unwrap() == tree_col) - }) - .expect( - format!("No tree: '{}.{}' found", table_name, tree_col) - .as_str(), - ) - .as_object() - .unwrap(); - let child_column = tree.get("child").and_then(|c| c.as_str()).unwrap(); - - let (tree_sql, mut params) = with_tree_sql( - &config, - tree, - &table_name.to_string(), - &table_name.to_string(), - under_val.as_ref(), - None, - pool, - ); - let child_column_text = - cast_column_sql_to_text(&child_column, &sql_type); - let sql = local_sql_syntax( - &pool, - &format!( - r#"{} SELECT "{}" FROM "tree" WHERE {} LIKE {}"#, - tree_sql, child_column, child_column_text, SQL_PARAM - ), - ); - params.push(matching_string); - - let mut query = sqlx_query(&sql); - for param in ¶ms { - query = query.bind(param); - } - - let rows = query.fetch_all(pool).await?; - for row in rows.iter() { - values.push(get_column_value(&row, &child_column, &sql_type)); - } - } - _ => panic!("Unrecognised structure: {}", original), - }; - } - None => (), - }; - } - }; - - let mut typeahead_values = vec![]; - for (i, v) in values.iter().enumerate() { - // enumerate() begins at 0 but we need to begin at 1: - let i = i + 1; - typeahead_values.push(json!({ - "id": v, - "label": v, - "order": i, - })); - } - - Ok(json!(typeahead_values)) -} - /// Given a config map, a db connection pool, a table name, and an optional extra row, validate /// any associated under constraints for the current column. Optionally, if a transaction is /// given, use that instead of the pool for database access. @@ -442,7 +250,7 @@ pub async fn validate_under( mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, extra_row: Option<&ResultRow>, -) -> Result, sqlx::Error> { +) -> Result, ValveError> { let mut results = vec![]; let ukeys = config .get("constraints") @@ -634,7 +442,7 @@ pub async fn validate_tree_foreign_keys( mut tx: Option<&mut Transaction<'_, sqlx::Any>>, table_name: &String, extra_row: Option<&ResultRow>, -) -> Result, sqlx::Error> { +) -> Result, ValveError> { let tkeys = config .get("constraints") .and_then(|c| c.as_object()) @@ -737,7 +545,7 @@ pub async fn validate_rows_trees( pool: &AnyPool, table_name: &String, rows: &mut Vec, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { let column_names = config .get("table") .and_then(|t| t.get(table_name)) @@ -797,7 +605,7 @@ pub async fn validate_rows_constraints( pool: &AnyPool, table_name: &String, rows: &mut Vec, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { let column_names = config .get("table") .and_then(|t| t.get(table_name)) @@ -860,8 +668,8 @@ pub async fn validate_rows_constraints( } /// Given a config map, compiled datatype and rule conditions, a table name, the headers for the -/// table, and a number of rows to validate, validate all of the rows and return the validated -/// versions. +/// table, and a number of rows to validate, run intra-row validatation on all of the rows and +/// return the validated versions. pub fn validate_rows_intra( config: &SerdeMap, compiled_datatype_conditions: &HashMap, @@ -869,11 +677,15 @@ pub fn validate_rows_intra( table_name: &String, headers: &csv::StringRecord, rows: &Vec>, + only_nulltype: bool, ) -> Vec { let mut result_rows = vec![]; for row in rows { match row { - Err(err) => eprintln!("Error while processing row for '{}': {}", table_name, err), + Err(err) => error!( + "While processing row for '{}', got error '{}'", + table_name, err + ), Ok(row) => { let mut result_row = ResultRow { row_number: None, @@ -913,26 +725,28 @@ pub fn validate_rows_intra( ); } - for column_name in &column_names { - let context = result_row.clone(); - let cell = result_row.contents.get_mut(column_name).unwrap(); - validate_cell_rules( - config, - compiled_rule_conditions, - table_name, - &column_name, - &context, - cell, - ); - - if cell.nulltype == None { - validate_cell_datatype( + if !only_nulltype { + for column_name in &column_names { + let context = result_row.clone(); + let cell = result_row.contents.get_mut(column_name).unwrap(); + validate_cell_rules( config, - compiled_datatype_conditions, + compiled_rule_conditions, table_name, &column_name, + &context, cell, ); + + if cell.nulltype == None { + validate_cell_datatype( + config, + compiled_datatype_conditions, + table_name, + &column_name, + cell, + ); + } } } result_rows.push(result_row); @@ -944,10 +758,10 @@ pub fn validate_rows_intra( result_rows } -/// Given a row represented as a SerdeMap, remove any duplicate messages from the row's cells, so +/// Given a row represented as a ValveRow, remove any duplicate messages from the row's cells, so /// that no cell has messages with the same level, rule, and message text. -fn remove_duplicate_messages(row: &SerdeMap) -> Result { - let mut deduped_row = SerdeMap::new(); +pub fn remove_duplicate_messages(row: &ValveRow) -> Result { + let mut deduped_row = ValveRow::new(); for (column_name, cell) in row.iter() { let mut messages = cell .get("messages") @@ -981,12 +795,12 @@ fn remove_duplicate_messages(row: &SerdeMap) -> Result { Ok(deduped_row) } -/// Given a result row, convert it to a SerdeMap and return it. +/// Given a result row, convert it to a ValveRow and return it. /// Note that if the incoming result row has an associated row_number, this is ignored. -fn result_row_to_config_map(incoming: &ResultRow) -> SerdeMap { - let mut outgoing = SerdeMap::new(); +pub fn result_row_to_config_map(incoming: &ResultRow) -> ValveRow { + let mut outgoing = ValveRow::new(); for (column, cell) in incoming.contents.iter() { - let mut cell_map = SerdeMap::new(); + let mut cell_map = ValveRow::new(); if let Some(nulltype) = &cell.nulltype { cell_map.insert( "nulltype".to_string(), @@ -1009,7 +823,7 @@ fn result_row_to_config_map(incoming: &ResultRow) -> SerdeMap { /// Generate a SQL Select clause that is a union of: (a) the literal values of the given extra row, /// and (b) a Select statement over `table_name` of all the fields in the extra row. -fn select_with_extra_row( +pub fn select_with_extra_row( config: &SerdeMap, extra_row: &ResultRow, table: &str, @@ -1057,7 +871,7 @@ fn select_with_extra_row( /// Given a map representing a tree constraint, a table name, a root from which to generate a /// sub-tree of the tree, and an extra SQL clause, generate the SQL for a WITH clause representing /// the sub-tree. -fn with_tree_sql( +pub fn with_tree_sql( config: &SerdeMap, tree: &SerdeMap, table_name: &str, @@ -1111,7 +925,7 @@ fn with_tree_sql( /// validate, validate the cell's nulltype condition. If the cell's value is one of the allowable /// nulltype values for this column, then fill in the cell's nulltype value before returning the /// cell. -fn validate_cell_nulltype( +pub fn validate_cell_nulltype( config: &SerdeMap, compiled_datatype_conditions: &HashMap, table_name: &String, @@ -1138,7 +952,7 @@ fn validate_cell_nulltype( /// Given a config map, compiled datatype conditions, a table name, a column name, and a cell to /// validate, validate the cell's datatype and return the validated cell. -fn validate_cell_datatype( +pub fn validate_cell_datatype( config: &SerdeMap, compiled_datatype_conditions: &HashMap, table_name: &String, @@ -1254,7 +1068,7 @@ fn validate_cell_datatype( /// Given a config map, compiled rule conditions, a table name, a column name, the row context, /// and the cell to validate, look in the rule table (if it exists) and validate the cell according /// to any applicable rules. -fn validate_cell_rules( +pub fn validate_cell_rules( config: &SerdeMap, compiled_rules: &HashMap>>, table_name: &String, @@ -1345,7 +1159,7 @@ fn validate_cell_rules( /// Generates an SQL fragment representing the "as if" portion of a query that will be used for /// counterfactual validation. -fn as_if_to_sql( +pub fn as_if_to_sql( global_config: &SerdeMap, pool: &AnyPool, as_if: &QueryAsIf, @@ -1464,7 +1278,7 @@ fn as_if_to_sql( /// check the cell value against any foreign keys that have been defined for the column. If there is /// a violation, indicate it with an error message attached to the cell. Optionally, if a /// transaction is given, use that instead of the pool for database access. -async fn validate_cell_foreign_constraints( +pub async fn validate_cell_foreign_constraints( config: &SerdeMap, pool: &AnyPool, mut tx: Option<&mut Transaction<'_, sqlx::Any>>, @@ -1472,7 +1286,7 @@ async fn validate_cell_foreign_constraints( column_name: &String, cell: &mut ResultCell, query_as_if: Option<&QueryAsIf>, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { let fkeys = config .get("constraints") .and_then(|c| c.as_object()) @@ -1602,7 +1416,7 @@ async fn validate_cell_foreign_constraints( /// validate that none of the "tree" constraints on the column are violated, and indicate any /// violations by attaching error messages to the cell. Optionally, if a transaction is /// given, use that instead of the pool for database access. -async fn validate_cell_trees( +pub async fn validate_cell_trees( config: &SerdeMap, pool: &AnyPool, mut tx: Option<&mut Transaction<'_, sqlx::Any>>, @@ -1611,7 +1425,7 @@ async fn validate_cell_trees( cell: &mut ResultCell, context: &ResultRow, prev_results: &Vec, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // If the current column is the parent column of a tree, validate that adding the current value // will not result in a cycle between this and the parent column: let tkeys = config @@ -1784,7 +1598,7 @@ async fn validate_cell_trees( /// `row_number` is set to None, then no row corresponding to the given cell is assumed to exist /// in the table. Optionally, if a transaction is given, use that instead of the pool for database /// access. -async fn validate_cell_unique_constraints( +pub async fn validate_cell_unique_constraints( config: &SerdeMap, pool: &AnyPool, mut tx: Option<&mut Transaction<'_, sqlx::Any>>, @@ -1793,7 +1607,7 @@ async fn validate_cell_unique_constraints( cell: &mut ResultCell, prev_results: &Vec, row_number: Option, -) -> Result<(), sqlx::Error> { +) -> Result<(), ValveError> { // If the column has a primary or unique key constraint, or if it is the child associated with // a tree, then if the value of the cell is a duplicate either of one of the previously // validated rows in the batch, or a duplicate of a validated row that has already been inserted diff --git a/src/valve.rs b/src/valve.rs new file mode 100644 index 00000000..9866390a --- /dev/null +++ b/src/valve.rs @@ -0,0 +1,1828 @@ +use crate::{ + add_message_counts, + ast::Expression, + cast_column_sql_to_text, delete_row_tx, get_column_value, get_compiled_datatype_conditions, + get_compiled_rule_conditions, get_json_from_row, get_parsed_structure_conditions, + get_pool_from_connection_string, get_row_from_db, get_sql_for_standard_view, + get_sql_for_text_view, get_sql_type, get_sql_type_from_global_config, get_table_ddl, info, + insert_chunks, insert_new_row_tx, local_sql_syntax, read_config_files, record_row_change, + switch_undone_state, update_row_tx, + validate::{validate_row_tx, validate_tree_foreign_keys, validate_under, with_tree_sql}, + valve_grammar::StartParser, + verify_table_deps_and_sort, warn, ColumnRule, CompiledCondition, ParsedStructure, SerdeMap, + CHUNK_SIZE, SQL_PARAM, +}; +use chrono::Utc; +use csv::{QuoteStyle, ReaderBuilder, WriterBuilder}; +use enquote::unquote; +use futures::{executor::block_on, TryStreamExt}; +use indexmap::IndexMap; +use indoc::indoc; +use itertools::Itertools; +use regex::Regex; +use serde_json::{json, Value as SerdeValue}; +use sqlx::{ + any::{AnyKind, AnyPool, AnyRow}, + query as sqlx_query, Row, ValueRef, +}; +use std::{collections::HashMap, fs::File, path::Path}; + +/// Alias for [serde_json::Map](..//serde_json/struct.Map.html). +// Note: serde_json::Map is +// [backed by a BTreeMap by default](https://docs.serde.rs/serde_json/map/index.html) +pub type ValveRow = serde_json::Map; + +/// Main entrypoint for the Valve API. +#[derive(Clone, Debug)] +pub struct Valve { + /// The valve configuration map. + pub config: SerdeMap, + /// Pre-compiled datatype conditions. + pub compiled_datatype_conditions: HashMap, + /// Pre-compiled rule conditions. + pub compiled_rule_conditions: HashMap>>, + /// Parsed structure conditions: + pub parsed_structure_conditions: HashMap, + /// Lists of tables that depend on a given table, indexed by table. + pub table_dependencies_in: HashMap>, + /// Lists of tables that a given table depends on, indexed by table. + pub table_dependencies_out: HashMap>, + /// The database connection pool. + pub pool: AnyPool, + /// The user associated with this valve instance. + pub user: String, + /// Produce more logging output. + pub verbose: bool, +} + +#[derive(Debug)] +pub enum ValveError { + /// An error in the Valve configuration: + ConfigError(String), + /// An error that occurred while reading or writing to a CSV/TSV: + CsvError(csv::Error), + /// An error involving the data: + DataError(String), + /// An error generated by the underlying database: + DatabaseError(sqlx::Error), + /// An error in the inputs to a function: + InputError(String), + /// An error that occurred while reading/writing to stdio: + IOError(std::io::Error), + /// An error that occurred while serialising or deserialising to/from JSON: + SerdeJsonError(serde_json::Error), +} + +impl From for ValveError { + fn from(e: csv::Error) -> Self { + Self::CsvError(e) + } +} + +impl From for ValveError { + fn from(e: sqlx::Error) -> Self { + Self::DatabaseError(e) + } +} + +impl From for ValveError { + fn from(e: serde_json::Error) -> Self { + Self::SerdeJsonError(e) + } +} + +impl From for ValveError { + fn from(e: std::io::Error) -> Self { + Self::IOError(e) + } +} + +impl Valve { + /// Given a path to a table table, a path to a database, a flag for verbose output, and a flag + /// indicating whether the database should be configured for initial loading: Set up a database + /// connection, configure VALVE, and return a new Valve struct. + pub async fn build( + table_path: &str, + database: &str, + verbose: bool, + initial_load: bool, + ) -> Result { + let pool = get_pool_from_connection_string(database).await?; + if pool.any_kind() == AnyKind::Sqlite { + sqlx_query("PRAGMA foreign_keys = ON") + .execute(&pool) + .await?; + if initial_load { + // These pragmas are unsafe but they are used during initial loading since data + // integrity is not a priority in this case. + sqlx_query("PRAGMA journal_mode = OFF") + .execute(&pool) + .await?; + sqlx_query("PRAGMA synchronous = 0").execute(&pool).await?; + sqlx_query("PRAGMA cache_size = 1000000") + .execute(&pool) + .await?; + sqlx_query("PRAGMA temp_store = MEMORY") + .execute(&pool) + .await?; + } + } + + let parser = StartParser::new(); + let ( + specials_config, + tables_config, + datatypes_config, + rules_config, + constraints_config, + sorted_table_list, + table_dependencies_in, + table_dependencies_out, + ) = read_config_files(table_path, &parser, &pool); + + let mut config = SerdeMap::new(); + config.insert( + String::from("special"), + SerdeValue::Object(specials_config.clone()), + ); + config.insert( + String::from("table"), + SerdeValue::Object(tables_config.clone()), + ); + config.insert( + String::from("datatype"), + SerdeValue::Object(datatypes_config.clone()), + ); + config.insert( + String::from("rule"), + SerdeValue::Object(rules_config.clone()), + ); + config.insert( + String::from("constraints"), + SerdeValue::Object(constraints_config.clone()), + ); + let mut sorted_table_serdevalue_list: Vec = vec![]; + for table in &sorted_table_list { + sorted_table_serdevalue_list.push(SerdeValue::String(table.to_string())); + } + config.insert( + String::from("sorted_table_list"), + SerdeValue::Array(sorted_table_serdevalue_list), + ); + + let compiled_datatype_conditions = get_compiled_datatype_conditions(&config, &parser); + let compiled_rule_conditions = + get_compiled_rule_conditions(&config, compiled_datatype_conditions.clone(), &parser); + let parsed_structure_conditions = get_parsed_structure_conditions(&config, &parser); + + Ok(Self { + config: config, + compiled_datatype_conditions: compiled_datatype_conditions, + compiled_rule_conditions: compiled_rule_conditions, + parsed_structure_conditions: parsed_structure_conditions, + table_dependencies_in: table_dependencies_in, + table_dependencies_out: table_dependencies_out, + pool: pool, + user: String::from("VALVE"), + verbose: verbose, + }) + } + + /// Convenience function to retrieve the path to Valve's "table table", the main entrypoint + /// to Valve's configuration. + pub fn get_path(&self) -> String { + self.config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get("table")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("path")) + .and_then(|p| p.as_str()) + .unwrap() + .to_string() + } + + /// Controls the maximum length of a username. + const USERNAME_MAX_LEN: usize = 20; + + /// Sets the user name, which must be a short, trimmed, string without newlines, for this Valve + /// instance. + pub fn set_user(&mut self, user: &str) -> Result<&mut Self, ValveError> { + if user.len() > Self::USERNAME_MAX_LEN { + return Err(ValveError::ConfigError(format!( + "Username '{}' is longer than {} characters.", + user, + Self::USERNAME_MAX_LEN + ))); + } else { + let user_regex = Regex::new(r#"^\S([^\n]*\S)*$"#).unwrap(); + if !user_regex.is_match(user) { + return Err(ValveError::ConfigError(format!( + "Username '{}' is not a short, trimmed, string without newlines.", + user, + ))); + } + } + self.user = user.to_string(); + Ok(self) + } + + /// Given a SQL string, execute it using the connection pool associated with the Valve instance. + async fn execute_sql(&self, sql: &str) -> Result<(), ValveError> { + sqlx_query(&sql).execute(&self.pool).await?; + Ok(()) + } + + /// Return the list of configured tables in sorted order, or reverse sorted order if the + /// reverse flag is set. + pub fn get_sorted_table_list(&self, reverse: bool) -> Vec<&str> { + let mut sorted_tables = self + .config + .get("sorted_table_list") + .and_then(|l| l.as_array()) + .and_then(|l| Some(l.iter().map(|i| i.as_str().unwrap()))) + .and_then(|l| Some(l.collect::>())) + .unwrap(); + if reverse { + sorted_tables.reverse(); + } + sorted_tables + } + + /// Given the name of a table, determine whether its current instantiation in the database + /// differs from the way it has been configured. The answer to this question is yes whenever + /// (1) the number of columns or any of their names differs from their configured values, or + /// the order of database columns differs from the configured order; (2) The SQL type of one or + /// more columns does not match the configured SQL type for that column; (3) Some column with a + /// 'unique', 'primary', or 'from(table, column)' in its column configuration fails to be + /// associated, in the database, with a unique constraint, primary key, or foreign key, + /// respectively; or vice versa; (4) The table does not exist in the database. + pub async fn table_has_changed(&self, table: &str) -> Result { + // A clojure that, given a parsed structure condition, a table and column name, and an + // unsigned integer representing whether the given column, in the case of a SQLite database, + // is a primary key (in the case of PostgreSQL, the sqlite_pk parameter is ignored): + // determine whether the structure of the column is properly reflected in the db. E.g., a + // `from(table.column)` struct should be associated with a foreign key, `primary` with a + // primary key, `unique` with a unique constraint. + let structure_has_changed = |pstruct: &Expression, + table: &str, + column: &str, + sqlite_pk: &u32| + -> Result { + // A clojure to determine whether the given column has the given constraint type, which + // can be one of 'UNIQUE', 'PRIMARY KEY', 'FOREIGN KEY': + let column_has_constraint_type = |constraint_type: &str| -> Result { + if self.pool.any_kind() == AnyKind::Postgres { + let sql = format!( + r#"SELECT 1 + FROM information_schema.table_constraints tco + JOIN information_schema.key_column_usage kcu + ON kcu.constraint_name = tco.constraint_name + AND kcu.constraint_schema = tco.constraint_schema + AND kcu.table_name = '{}' + WHERE tco.constraint_type = '{}' + AND kcu.column_name = '{}'"#, + table, constraint_type, column + ); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() > 1 { + unreachable!(); + } + Ok(rows.len() == 1) + } else { + if constraint_type == "PRIMARY KEY" { + return Ok(*sqlite_pk == 1); + } else if constraint_type == "UNIQUE" { + let sql = format!(r#"PRAGMA INDEX_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let idx_name = row.get::("name"); + let unique = row.get::("unique") as u8; + if unique == 1 { + let sql = format!(r#"PRAGMA INDEX_INFO("{}")"#, idx_name); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() == 1 { + let cname = rows[0].get::("name"); + if cname == column { + return Ok(true); + } + } + } + } + Ok(false) + } else if constraint_type == "FOREIGN KEY" { + let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let cname = row.get::("from"); + if cname == column { + return Ok(true); + } + } + Ok(false) + } else { + return Err(ValveError::InputError( + format!("Unrecognized constraint type: '{}'", constraint_type).into(), + )); + } + } + }; + + // Check if there is a change to whether this column is a primary/unique key: + let is_primary = match pstruct { + Expression::Label(label) if label == "primary" => true, + _ => false, + }; + if is_primary != column_has_constraint_type("PRIMARY KEY")? { + return Ok(true); + } else if !is_primary { + let is_unique = match pstruct { + Expression::Label(label) if label == "unique" => true, + _ => false, + }; + let unique_in_db = column_has_constraint_type("UNIQUE")?; + if is_unique != unique_in_db { + // A child of a tree constraint implies a unique db constraint, so if there is a + // unique constraint in the db that is not configured, that is the explanation, + // and in that case we do not count this as a change to the column. + if !unique_in_db { + return Ok(true); + } else { + let trees = + self.config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|o| o.get("tree")) + .and_then(|t| t.as_object()) + .and_then(|o| o.get(table)) + .and_then(|t| t.as_array()) + .and_then(|a| { + Some(a.iter().map(|o| { + o.as_object().and_then(|o| o.get("child")).unwrap() + })) + }) + .unwrap() + .collect::>(); + if !trees.contains(&&SerdeValue::String(column.to_string())) { + return Ok(true); + } + } + } + } + + match pstruct { + Expression::Function(name, args) if name == "from" => { + match &*args[0] { + Expression::Field(cfg_ftable, cfg_fcolumn) => { + if self.pool.any_kind() == AnyKind::Sqlite { + let sql = format!(r#"PRAGMA FOREIGN_KEY_LIST("{}")"#, table); + for row in block_on(sqlx_query(&sql).fetch_all(&self.pool))? { + let from = row.get::("from"); + if from == column { + let db_ftable = row.get::("table"); + let db_fcolumn = row.get::("to"); + if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + return Ok(true); + } + } + } + } else { + let sql = format!( + r#"SELECT + ccu.table_name AS foreign_table_name, + ccu.column_name AS foreign_column_name + FROM information_schema.table_constraints AS tc + JOIN information_schema.key_column_usage AS kcu + ON tc.constraint_name = kcu.constraint_name + AND tc.table_schema = kcu.table_schema + JOIN information_schema.constraint_column_usage AS ccu + ON ccu.constraint_name = tc.constraint_name + WHERE tc.constraint_type = 'FOREIGN KEY' + AND tc.table_name = '{}' + AND kcu.column_name = '{}'"#, + table, column + ); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + if rows.len() == 0 { + // If the table doesn't even exist return true. + return Ok(true); + } else if rows.len() > 1 { + // This seems impossible given how PostgreSQL works: + unreachable!(); + } else { + let row = &rows[0]; + let db_ftable = row.get::("foreign_table_name"); + let db_fcolumn = row.get::("foreign_column_name"); + if *cfg_ftable != db_ftable || *cfg_fcolumn != db_fcolumn { + return Ok(true); + } + } + } + } + _ => { + return Err(ValveError::InputError( + format!("Unrecognized structure: {:?}", pstruct).into(), + )); + } + }; + } + _ => (), + }; + + Ok(false) + }; + + let (columns_config, configured_column_order) = { + let table_config = self + .config + .get("table") + .and_then(|tc| tc.get(table)) + .and_then(|t| t.as_object()) + .unwrap(); + let columns_config = table_config + .get("column") + .and_then(|c| c.as_object()) + .unwrap(); + let configured_column_order = { + let mut configured_column_order = { + if table == "message" { + vec!["message_id".to_string()] + } else if table == "history" { + vec!["history_id".to_string()] + } else { + vec!["row_number".to_string()] + } + }; + configured_column_order.append( + &mut table_config + .get("column_order") + .and_then(|c| c.as_array()) + .and_then(|a| Some(a.iter())) + .and_then(|a| Some(a.map(|c| c.as_str().unwrap().to_string()))) + .and_then(|a| Some(a.collect::>())) + .unwrap(), + ); + configured_column_order + }; + + (columns_config, configured_column_order) + }; + + let db_columns_in_order = { + if self.pool.any_kind() == AnyKind::Sqlite { + let sql = format!( + r#"SELECT 1 FROM sqlite_master WHERE "type" = 'table' AND "name" = '{}'"#, + table + ); + let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; + if rows.len() == 0 { + if self.verbose { + info!( + "The table '{}' will be recreated as it does not exist in the database", + table + ); + } + return Ok(true); + } else if rows.len() == 1 { + // Otherwise send another query to the db to get the column info: + let sql = format!(r#"PRAGMA TABLE_INFO("{}")"#, table); + let rows = block_on(sqlx_query(&sql).fetch_all(&self.pool))?; + rows.iter() + .map(|r| { + ( + r.get::("name"), + r.get::("type"), + r.get::("pk") as u32, + ) + }) + .collect::>() + } else { + unreachable!(); + } + } else { + let sql = format!( + r#"SELECT "column_name", "data_type" + FROM "information_schema"."columns" + WHERE "table_name" = '{}' + ORDER BY "ordinal_position""#, + table, + ); + let rows = sqlx_query(&sql).fetch_all(&self.pool).await?; + if rows.len() == 0 { + if self.verbose { + info!( + "The table '{}' will be recreated as it does not exist in the database", + table + ); + } + return Ok(true); + } + // Otherwise we get the column name: + rows.iter() + .map(|r| { + ( + r.get::("column_name"), + r.get::("data_type"), + // The third entry is just a dummy so that the datatypes in the two + // wings of this if/else block match. + 0, + ) + }) + .collect::>() + } + }; + + // Check if the order of the configured columns matches the order of the columns in the + // database: + let db_column_order = db_columns_in_order + .iter() + .map(|c| c.0.clone()) + .collect::>(); + if db_column_order != configured_column_order { + if self.verbose { + info!( + "The table '{}' will be recreated since the database columns: {:?} \ + and/or their order does not match the configured columns: {:?}", + table, db_column_order, configured_column_order + ); + } + return Ok(true); + } + + // Check, for all tables, whether their column configuration matches the contents of the + // database: + for (cname, ctype, pk) in &db_columns_in_order { + // Do not consider these special columns: + if (table == "message" && cname == "message_id") + || (table == "message" && cname == "row") + || (table == "history" && cname == "history_id") + || (table == "history" && cname == "timestamp") + || (table == "history" && cname == "row") + || cname == "row_number" + { + continue; + } + let column_config = columns_config + .get(cname) + .and_then(|c| c.as_object()) + .unwrap(); + let sql_type = + get_sql_type_from_global_config(&self.config, table, &cname, &self.pool).unwrap(); + + // Check the column's SQL type: + if sql_type.to_lowercase() != ctype.to_lowercase() { + let s = sql_type.to_lowercase(); + let c = ctype.to_lowercase(); + // CHARACTER VARYING and VARCHAR are synonyms so we ignore this difference. + if !((s.starts_with("varchar") || s.starts_with("character varying")) + && (c.starts_with("varchar") || c.starts_with("character varying"))) + { + if self.verbose { + info!( + "The table '{}' will be recreated because the SQL type of column '{}', \ + {}, does not match the configured value: {}", + table, + cname, + ctype, + sql_type + ); + } + return Ok(true); + } + } + + // Check the column's structure: + let structure = column_config.get("structure").and_then(|d| d.as_str()); + match structure { + Some(structure) if structure != "" => { + let parsed_structure = self + .parsed_structure_conditions + .get(structure) + .and_then(|p| Some(p.parsed.clone())) + .unwrap(); + if structure_has_changed(&parsed_structure, table, &cname, &pk)? { + if self.verbose { + info!( + "The table '{}' will be recreated because the database \ + constraints for column '{}' do not match the configured \ + structure, '{}'", + table, cname, structure + ); + } + return Ok(true); + } + } + _ => (), + }; + } + + Ok(false) + } + + /// Generates and returns the DDL required to setup the database. + pub async fn get_setup_statements(&self) -> Result>, ValveError> { + let tables_config = self + .config + .get("table") + .and_then(|t| t.as_object()) + .unwrap() + .clone(); + let datatypes_config = self + .config + .get("datatype") + .and_then(|d| d.as_object()) + .unwrap() + .clone(); + + let parser = StartParser::new(); + + // Begin by reading in the TSV files corresponding to the tables defined in tables_config, + // and use that information to create the associated database tables, while saving + // constraint information to constrains_config. + let mut setup_statements = HashMap::new(); + for table_name in tables_config.keys().cloned().collect::>() { + // Generate the statements for creating the table and its corresponding conflict table: + let mut table_statements = vec![]; + for table in vec![table_name.to_string(), format!("{}_conflict", table_name)] { + let mut statements = get_table_ddl( + &tables_config, + &datatypes_config, + &parser, + &table, + &self.pool, + ); + table_statements.append(&mut statements); + } + + let create_view_sql = get_sql_for_standard_view(&table_name, &self.pool); + let create_text_view_sql = + get_sql_for_text_view(&tables_config, &table_name, &self.pool); + table_statements.push(create_view_sql); + table_statements.push(create_text_view_sql); + + setup_statements.insert(table_name.to_string(), table_statements); + } + + let text_type = get_sql_type(&datatypes_config, &"text".to_string(), &self.pool).unwrap(); + + // Generate DDL for the history table: + let mut history_statements = vec![]; + history_statements.push(format!( + indoc! {r#" + CREATE TABLE "history" ( + {history_id} + "table" {text_type}, + "row" BIGINT, + "from" {text_type}, + "to" {text_type}, + "summary" {text_type}, + "user" {text_type}, + "undone_by" {text_type}, + {timestamp} + ); + "#}, + history_id = { + if self.pool.any_kind() == AnyKind::Sqlite { + "\"history_id\" INTEGER PRIMARY KEY," + } else { + "\"history_id\" SERIAL PRIMARY KEY," + } + }, + text_type = text_type, + timestamp = { + if self.pool.any_kind() == AnyKind::Sqlite { + "\"timestamp\" TIMESTAMP DEFAULT(STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW'))" + } else { + "\"timestamp\" TIMESTAMP DEFAULT CURRENT_TIMESTAMP" + } + }, + )); + history_statements + .push(r#"CREATE INDEX "history_tr_idx" ON "history"("table", "row");"#.to_string()); + setup_statements.insert("history".to_string(), history_statements); + + // Generate DDL for the message table: + let mut message_statements = vec![]; + message_statements.push(format!( + indoc! {r#" + CREATE TABLE "message" ( + {message_id} + "table" {text_type}, + "row" BIGINT, + "column" {text_type}, + "value" {text_type}, + "level" {text_type}, + "rule" {text_type}, + "message" {text_type} + ); + "#}, + message_id = { + if self.pool.any_kind() == AnyKind::Sqlite { + "\"message_id\" INTEGER PRIMARY KEY," + } else { + "\"message_id\" SERIAL PRIMARY KEY," + } + }, + text_type = text_type, + )); + message_statements.push( + r#"CREATE INDEX "message_trc_idx" ON "message"("table", "row", "column");"#.to_string(), + ); + setup_statements.insert("message".to_string(), message_statements); + + return Ok(setup_statements); + } + + /// Writes the database schema to stdout. + pub async fn dump_schema(&self) -> Result<(), ValveError> { + let setup_statements = self.get_setup_statements().await?; + for table in self.get_sorted_table_list(false) { + let table_statements = setup_statements.get(table).unwrap(); + let output = String::from(table_statements.join("\n")); + println!("{}\n", output); + } + Ok(()) + } + + /// Create all configured database tables and views if they do not already exist as configured. + pub async fn create_all_tables(&self) -> Result<&Self, ValveError> { + let setup_statements = self.get_setup_statements().await?; + let sorted_table_list = self.get_sorted_table_list(false); + for table in &sorted_table_list { + if self.table_has_changed(*table).await? { + self.drop_tables(&vec![table]).await?; + let table_statements = setup_statements.get(*table).unwrap(); + for stmt in table_statements { + self.execute_sql(stmt).await?; + } + } + } + + Ok(self) + } + + /// Checks whether the given table exists in the database. + pub async fn table_exists(&self, table: &str) -> Result { + let sql = { + if self.pool.any_kind() == AnyKind::Sqlite { + format!( + r#"SELECT 1 + FROM "sqlite_master" + WHERE "type" = 'table' AND name = '{}' + LIMIT 1"#, + table + ) + } else { + format!( + r#"SELECT 1 + FROM "information_schema"."tables" + WHERE "table_schema" = 'public' + AND "table_name" = '{}'"#, + table + ) + } + }; + let query = sqlx_query(&sql); + let rows = query.fetch_all(&self.pool).await?; + return Ok(rows.len() > 0); + } + + /// Get all the incoming (tables that depend on it) or outgoing (tables it depends on) + /// dependencies of the given table. + pub fn get_dependencies(&self, table: &str, incoming: bool) -> Vec { + let mut dependent_tables = vec![]; + if table != "message" && table != "history" { + let direct_deps = { + if incoming { + self.table_dependencies_in.get(table).unwrap().to_vec() + } else { + self.table_dependencies_out.get(table).unwrap().to_vec() + } + }; + for direct_dep in direct_deps { + let mut indirect_deps = self.get_dependencies(&direct_dep, incoming); + dependent_tables.append(&mut indirect_deps); + dependent_tables.push(direct_dep); + } + } + dependent_tables + } + + /// Given a list of tables, fill it in with any further tables that are dependent upon tables + /// in the given list. If deletion_order is true, the tables are sorted as required for + /// deleting them all sequentially, otherwise they are ordered in reverse. + pub fn add_dependencies(&self, tables: &Vec<&str>, deletion_order: bool) -> Vec { + let mut with_dups = vec![]; + for table in tables { + let dependent_tables = self.get_dependencies(table, true); + for dep_table in dependent_tables { + with_dups.push(dep_table.to_string()); + } + with_dups.push(table.to_string()); + } + // The algorithm above gives the tables in the order needed for deletion. But we want + // this function to return the creation order by default so we reverse it unless + // the deletion_order flag is set to true. + if !deletion_order { + with_dups.reverse(); + } + + // Remove the duplicates from the returned table list: + let mut tables_in_order = vec![]; + for table in with_dups.iter().unique() { + tables_in_order.push(table.to_string()); + } + tables_in_order + } + + /// Given a subset of the configured tables, return them in sorted dependency order, or in + /// reverse if `reverse` is set to true. + pub fn sort_tables( + &self, + table_subset: &Vec<&str>, + reverse: bool, + ) -> Result, ValveError> { + let full_table_list = self.get_sorted_table_list(false); + if !table_subset + .iter() + .all(|item| full_table_list.contains(item)) + { + return Err(ValveError::InputError(format!( + "[{}] contains tables that are not in the configured table list: [{}]", + table_subset.join(", "), + full_table_list.join(", ") + ))); + } + + let constraints_config = self + .config + .get("constraints") + .and_then(|c| c.as_object()) + .ok_or(ValveError::ConfigError( + "Unable to retrieve configured constraints.".into(), + ))?; + + // Filter out message and history since they are not represented in the constraints config. + // They will be added implicitly to the list returned by verify_table_deps_and_sort. + let filtered_subset = table_subset + .iter() + .filter(|m| **m != "history" && **m != "message") + .map(|s| s.to_string()) + .collect::>(); + + let (sorted_subset, _, _) = + verify_table_deps_and_sort(&filtered_subset, &constraints_config); + + // Since the result of verify_table_deps_and_sort() will include dependencies of the tables + // in its input list, we filter those out here: + let mut sorted_subset = sorted_subset + .iter() + .filter(|m| table_subset.contains(&m.as_str())) + .map(|s| s.to_string()) + .collect::>(); + + if reverse { + sorted_subset.reverse(); + } + Ok(sorted_subset) + } + + /// Returns an IndexMap, indexed by configured table, containing lists of their dependencies. + /// If incoming is true, the lists are incoming dependencies, else they are outgoing. + pub fn collect_dependencies(&self, incoming: bool) -> IndexMap> { + let tables = self.get_sorted_table_list(false); + let mut dependencies = IndexMap::new(); + for table in tables { + dependencies.insert(table.to_string(), self.get_dependencies(table, incoming)); + } + dependencies + } + + /// Drop all configured tables, in reverse dependency order. + pub async fn drop_all_tables(&self) -> Result<&Self, ValveError> { + // Drop all of the database tables in the reverse of their sorted order: + self.drop_tables(&self.get_sorted_table_list(true)).await?; + Ok(self) + } + + /// Given a vector of table names, drop those tables, in the given order. + pub async fn drop_tables(&self, tables: &Vec<&str>) -> Result<&Self, ValveError> { + let drop_list = self.add_dependencies(tables, true); + for table in &drop_list { + if *table != "message" && *table != "history" { + let sql = format!(r#"DROP VIEW IF EXISTS "{}_text_view""#, table); + self.execute_sql(&sql).await?; + let sql = format!(r#"DROP VIEW IF EXISTS "{}_view""#, table); + self.execute_sql(&sql).await?; + let sql = format!(r#"DROP TABLE IF EXISTS "{}_conflict""#, table); + self.execute_sql(&sql).await?; + } + let sql = format!(r#"DROP TABLE IF EXISTS "{}""#, table); + self.execute_sql(&sql).await?; + } + + Ok(self) + } + + /// Truncate all configured tables, in reverse dependency order. + pub async fn truncate_all_tables(&self) -> Result<&Self, ValveError> { + self.truncate_tables(&self.get_sorted_table_list(true)) + .await?; + Ok(self) + } + + /// Given a vector of table names, truncate those tables, in the given order. + pub async fn truncate_tables(&self, tables: &Vec<&str>) -> Result<&Self, ValveError> { + self.create_all_tables().await?; + let truncate_list = self.add_dependencies(tables, true); + + // We must use CASCADE in the case of PostgreSQL since we cannot truncate a table, T, that + // depends on another table, T', even in the case where we have previously truncated T'. + // SQLite does not need this. However SQLite does require that the tables be truncated in + // deletion order (which means that it must be checking that T' is empty). + let truncate_sql = |table: &str| -> String { + if self.pool.any_kind() == AnyKind::Postgres { + format!(r#"TRUNCATE TABLE "{}" RESTART IDENTITY CASCADE"#, table) + } else { + format!(r#"DELETE FROM "{}""#, table) + } + }; + + for table in &truncate_list { + let sql = truncate_sql(&table); + self.execute_sql(&sql).await?; + if *table != "message" && *table != "history" { + let sql = truncate_sql(&format!("{}_conflict", table)); + self.execute_sql(&sql).await?; + } + } + + Ok(self) + } + + /// Load all configured tables in dependency order. If `validate` is false, just try to insert + /// all rows, irrespective of whether they are valid or not or will possibly trigger a db error. + pub async fn load_all_tables(&self, validate: bool) -> Result<&Self, ValveError> { + let table_list = self.get_sorted_table_list(false); + if self.verbose { + info!("Processing {} tables.", table_list.len()); + } + self.load_tables(&table_list, validate).await + } + + /// Given a vector of table names, load those tables in the given order. If `validate` is false, + /// just try to insert all rows, irrespective of whether they are valid or not or will possibly + /// trigger a db error. + pub async fn load_tables( + &self, + table_list: &Vec<&str>, + validate: bool, + ) -> Result<&Self, ValveError> { + let list_for_truncation = self.sort_tables(table_list, true)?; + self.truncate_tables( + &list_for_truncation + .iter() + .map(|i| i.as_str()) + .collect::>(), + ) + .await?; + + let num_tables = table_list.len(); + let mut total_errors = 0; + let mut total_warnings = 0; + let mut total_infos = 0; + let mut table_num = 1; + for table_name in table_list { + if *table_name == "message" || *table_name == "history" { + continue; + } + let table_name = table_name.to_string(); + let path = String::from( + self.config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|o| o.get(&table_name)) + .and_then(|n| n.get("path")) + .and_then(|p| p.as_str()) + .unwrap(), + ); + let mut rdr = { + match File::open(path.clone()) { + Err(e) => { + warn!("Unable to open '{}': {}", path.clone(), e); + continue; + } + Ok(table_file) => ReaderBuilder::new() + .has_headers(false) + .delimiter(b'\t') + .from_reader(table_file), + } + }; + if self.verbose { + info!("Loading table {}/{}: {}", table_num, num_tables, table_name); + } + table_num += 1; + + // Extract the headers, which we will need later: + let mut records = rdr.records(); + let headers; + if let Some(result) = records.next() { + headers = result.unwrap(); + } else { + panic!("'{}' is empty", path); + } + + for header in headers.iter() { + if header.trim().is_empty() { + panic!( + "One or more of the header fields is empty for table '{}'", + table_name + ); + } + } + + // HashMap used to report info about the number of error/warning/info messages for this + // table when the verbose flag is set to true: + let mut messages_stats = HashMap::new(); + messages_stats.insert("error".to_string(), 0); + messages_stats.insert("warning".to_string(), 0); + messages_stats.insert("info".to_string(), 0); + + // Split the data into chunks of size CHUNK_SIZE before passing them to the validation + // logic: + let chunks = records.chunks(CHUNK_SIZE); + insert_chunks( + &self.config, + &self.pool, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &table_name, + &chunks, + &headers, + &mut messages_stats, + self.verbose, + validate, + ) + .await?; + + if validate { + // We need to wait until all of the rows for a table have been loaded before + // validating the "foreign" constraints on a table's trees, since this checks if the + // values of one column (the tree's parent) are all contained in another column (the + // tree's child). We also need to wait before validating a table's "under" + // constraints. Although the tree associated with such a constraint need not be + // defined on the same table, it can be. + let mut recs_to_update = + validate_tree_foreign_keys(&self.config, &self.pool, None, &table_name, None) + .await?; + recs_to_update.append( + &mut validate_under(&self.config, &self.pool, None, &table_name, None).await?, + ); + + for record in recs_to_update { + let row_number = record.get("row_number").unwrap(); + let column_name = record.get("column").and_then(|s| s.as_str()).unwrap(); + let value = record.get("value").and_then(|s| s.as_str()).unwrap(); + let level = record.get("level").and_then(|s| s.as_str()).unwrap(); + let rule = record.get("rule").and_then(|s| s.as_str()).unwrap(); + let message = record.get("message").and_then(|s| s.as_str()).unwrap(); + + let sql = local_sql_syntax( + &self.pool, + &format!( + r#"INSERT INTO "message" + ("table", "row", "column", "value", "level", "rule", "message") + VALUES ({}, {}, {}, {}, {}, {}, {})"#, + SQL_PARAM, + row_number, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM, + SQL_PARAM + ), + ); + let mut query = sqlx_query(&sql); + query = query.bind(&table_name); + query = query.bind(&column_name); + query = query.bind(&value); + query = query.bind(&level); + query = query.bind(&rule); + query = query.bind(&message); + query.execute(&self.pool).await?; + + if self.verbose { + // Add the generated message to messages_stats: + let messages = vec![json!({ + "message": message, + "level": level, + })]; + add_message_counts(&messages, &mut messages_stats); + } + } + } + + if self.verbose { + // Output a report on the messages generated to stderr: + let errors = messages_stats.get("error").unwrap(); + let warnings = messages_stats.get("warning").unwrap(); + let infos = messages_stats.get("info").unwrap(); + let status_message = format!( + "{} errors, {} warnings, and {} information messages generated for {}", + errors, warnings, infos, table_name + ); + info!("{}", status_message); + total_errors += errors; + total_warnings += warnings; + total_infos += infos; + } + } + + if self.verbose { + info!( + "Loading complete with {} errors, {} warnings, and {} information messages", + total_errors, total_warnings, total_infos + ); + } + Ok(self) + } + + /// Save all configured tables to their configured path's, unless save_dir is specified, + /// in which case save them there instead. + pub fn save_all_tables(&self, save_dir: &Option) -> Result<&Self, ValveError> { + let tables = self.get_sorted_table_list(false); + self.save_tables(&tables, save_dir)?; + Ok(self) + } + + /// Given a vector of table names, save those tables to their configured path's, unless + /// save_dir is specified, in which case save them there instead. + pub fn save_tables( + &self, + tables: &Vec<&str>, + save_dir: &Option, + ) -> Result<&Self, ValveError> { + let table_paths: HashMap = self + .config + .get("table") + .unwrap() + .as_object() + .unwrap() + .iter() + .filter(|(k, v)| { + !["message", "history"].contains(&k.as_str()) + && tables.contains(&k.as_str()) + && v.get("path").is_some() + }) + .map(|(k, v)| { + ( + k.clone(), + v.get("path").unwrap().as_str().unwrap().to_string(), + ) + }) + .collect(); + + info!( + "Saving tables: {} ...", + table_paths + .keys() + .map(|k| k.to_string()) + .collect::>() + .join(", ") + ); + for (table, path) in table_paths.iter() { + let columns: Vec<&str> = self + .config + .get("table") + .and_then(|v| v.as_object()) + .and_then(|o| o.get(table)) + .and_then(|v| v.as_object()) + .and_then(|o| o.get("column_order")) + .and_then(|v| v.as_array()) + .and_then(|v| Some(v.iter().map(|i| i.as_str().unwrap()).collect())) + .unwrap(); + + let path = match save_dir { + Some(s) => format!( + "{}/{}", + s, + Path::new(path) + .file_name() + .and_then(|n| n.to_str()) + .unwrap() + ), + None => path.to_string(), + }; + self.save_table(table, &columns, &path)?; + } + + Ok(self) + } + + /// Save the given table with the given columns at the given path as a TSV file. + pub fn save_table( + &self, + table: &str, + columns: &Vec<&str>, + path: &str, + ) -> Result<&Self, ValveError> { + // TODO: Do some validation on the path. + + let mut quoted_columns = vec!["\"row_number\"".to_string()]; + quoted_columns.append( + &mut columns + .iter() + .map(|v| enquote::enquote('"', v)) + .collect::>(), + ); + let text_view = format!("\"{}_text_view\"", table); + let sql = format!( + r#"SELECT {} from {} ORDER BY "row_number""#, + quoted_columns.join(", "), + text_view + ); + + let mut writer = WriterBuilder::new() + .delimiter(b'\t') + .quote_style(QuoteStyle::Never) + .from_path(path)?; + writer.write_record(columns)?; + let mut stream = sqlx_query(&sql).fetch(&self.pool); + while let Some(row) = block_on(stream.try_next()).unwrap() { + let mut record: Vec<&str> = vec![]; + for column in columns.iter() { + let cell = row.try_get::<&str, &str>(column).ok().unwrap_or_default(); + record.push(cell); + } + writer.write_record(record)?; + } + writer.flush()?; + + Ok(self) + } + + /// Given a table name and a row, return the validated row. + pub async fn validate_row( + &self, + table_name: &str, + row: &ValveRow, + row_number: Option, + ) -> Result { + validate_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + None, + table_name, + row, + row_number, + None, + ) + .await + } + + /// Given a table name and a row as JSON, add the row to the table in the database, and return + /// the validated row, including its new row_number. + pub async fn insert_row( + &self, + table_name: &str, + row: &ValveRow, + ) -> Result<(u32, ValveRow), ValveError> { + let mut tx = self.pool.begin().await?; + + let row = validate_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + Some(&mut tx), + table_name, + row, + None, + None, + ) + .await?; + + let rn = insert_new_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + &row, + None, + true, + ) + .await?; + + record_row_change(&mut tx, table_name, &rn, None, Some(&row), &self.user).await?; + tx.commit().await?; + Ok((rn, row)) + } + + /// Given a table name, a row number, and a row, update the row in the database, and return the + /// validated row. + pub async fn update_row( + &self, + table_name: &str, + row_number: &u32, + row: &ValveRow, + ) -> Result { + let mut tx = self.pool.begin().await?; + + // Get the old version of the row from the database so that we can later record it to the + // history table: + let old_row = + get_row_from_db(&self.config, &self.pool, &mut tx, table_name, &row_number).await?; + + let row = validate_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + Some(&mut tx), + table_name, + row, + Some(*row_number), + None, + ) + .await?; + + update_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + &row, + row_number, + true, + false, + ) + .await?; + + // Record the row update in the history table: + record_row_change( + &mut tx, + table_name, + row_number, + Some(&old_row), + Some(&row), + &self.user, + ) + .await?; + + tx.commit().await?; + Ok(row) + } + + /// Given a table name and a row number, delete that row from the table. + pub async fn delete_row(&self, table_name: &str, row_number: &u32) -> Result<(), ValveError> { + let mut tx = self.pool.begin().await?; + + let row = + get_row_from_db(&self.config, &self.pool, &mut tx, &table_name, row_number).await?; + + record_row_change( + &mut tx, + &table_name, + row_number, + Some(&row), + None, + &self.user, + ) + .await?; + + delete_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table_name, + row_number, + ) + .await?; + + tx.commit().await?; + Ok(()) + } + + /// Return the next change that can be undone, or None if there isn't any. + pub async fn get_record_to_undo(&self) -> Result, ValveError> { + // Look in the history table, get the row with the greatest ID, get the row number, + // from, and to, and determine whether the last operation was a delete, insert, or update. + let is_clause = if self.pool.any_kind() == AnyKind::Sqlite { + "IS" + } else { + "IS NOT DISTINCT FROM" + }; + let sql = format!( + r#"SELECT * FROM "history" + WHERE "undone_by" {} NULL + ORDER BY "history_id" DESC LIMIT 1"#, + is_clause + ); + let query = sqlx_query(&sql); + let result_row = query.fetch_optional(&self.pool).await?; + Ok(result_row) + } + + /// Return the next change that can be redone, or None if there isn't any. + pub async fn get_record_to_redo(&self) -> Result, ValveError> { + // Look in the history table, get the row with the greatest ID, get the row number, + // from, and to, and determine whether the last operation was a delete, insert, or update. + let is_not_clause = if self.pool.any_kind() == AnyKind::Sqlite { + "IS NOT" + } else { + "IS DISTINCT FROM" + }; + let sql = format!( + r#"SELECT * FROM "history" + WHERE "undone_by" {} NULL + ORDER BY "timestamp" DESC LIMIT 1"#, + is_not_clause + ); + let query = sqlx_query(&sql); + let result_row = query.fetch_optional(&self.pool).await?; + Ok(result_row) + } + + /// Undo one change and return the change record or None if there was no change to undo. + pub async fn undo(&self) -> Result, ValveError> { + let last_change = match self.get_record_to_undo().await? { + None => { + warn!("Nothing to undo."); + return Ok(None); + } + Some(r) => r, + }; + let history_id: i32 = last_change.get("history_id"); + let history_id = history_id as u16; + let table: &str = last_change.get("table"); + let row_number: i64 = last_change.get("row"); + let row_number = row_number as u32; + let from = get_json_from_row(&last_change, "from"); + let to = get_json_from_row(&last_change, "to"); + + match (from, to) { + (None, None) => { + return Err(ValveError::DataError( + "Cannot redo unknown operation from None to None".into(), + )) + } + (None, Some(_)) => { + // Undo an insert: + let mut tx = self.pool.begin().await?; + + delete_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &row_number, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(None) + } + (Some(from), None) => { + // Undo a delete: + let mut tx = self.pool.begin().await?; + + insert_new_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &from, + Some(row_number), + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(from)) + } + (Some(from), Some(_)) => { + // Undo an an update: + let mut tx = self.pool.begin().await?; + + update_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &from, + &row_number, + false, + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, true, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(from)) + } + } + } + + /// Redo one change and return the change record or None if there was no change to redo. + pub async fn redo(&self) -> Result, ValveError> { + let last_undo = match self.get_record_to_redo().await? { + None => { + warn!("Nothing to redo."); + return Ok(None); + } + Some(last_undo) => { + let undone_by = last_undo.try_get_raw("undone_by")?; + if undone_by.is_null() { + warn!("Nothing to redo."); + return Ok(None); + } + last_undo + } + }; + let history_id: i32 = last_undo.get("history_id"); + let history_id = history_id as u16; + let table: &str = last_undo.get("table"); + let row_number: i64 = last_undo.get("row"); + let row_number = row_number as u32; + let from = get_json_from_row(&last_undo, "from"); + let to = get_json_from_row(&last_undo, "to"); + + match (from, to) { + (None, None) => { + return Err(ValveError::DataError( + "Cannot redo unknown operation from None to None".into(), + )) + } + (None, Some(to)) => { + // Redo an insert: + let mut tx = self.pool.begin().await?; + + insert_new_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &to, + Some(row_number), + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(to)) + } + (Some(_), None) => { + // Redo a delete: + let mut tx = self.pool.begin().await?; + + delete_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &row_number, + ) + .await?; + + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(None) + } + (Some(_), Some(to)) => { + // Redo an an update: + let mut tx = self.pool.begin().await?; + + update_row_tx( + &self.config, + &self.compiled_datatype_conditions, + &self.compiled_rule_conditions, + &self.pool, + &mut tx, + table, + &to, + &row_number, + false, + false, + ) + .await?; + + switch_undone_state(&self.user, history_id, false, &mut tx, &self.pool).await?; + tx.commit().await?; + Ok(Some(to)) + } + } + } + + /// Given a table name, a column name, and (optionally) a string to match, return a JSON array + /// of possible valid values for the given column which contain the matching string as a + /// substring (or all of them if no matching string is given). The JSON array returned is + /// formatted for Typeahead, i.e., it takes the form: + /// `[{"id": id, "label": label, "order": order}, ...]`. + pub async fn get_matching_values( + &self, + table_name: &str, + column_name: &str, + matching_string: Option<&str>, + ) -> Result { + let config = &self.config; + let compiled_datatype_conditions = &self.compiled_datatype_conditions; + let parsed_structure_conditions = &self.parsed_structure_conditions; + let pool = &self.pool; + let dt_name = config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column_name)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("datatype")) + .and_then(|d| d.as_str()) + .unwrap(); + + let dt_condition = compiled_datatype_conditions + .get(dt_name) + .and_then(|d| Some(d.parsed.clone())); + + let mut values = vec![]; + match dt_condition { + Some(Expression::Function(name, args)) if name == "in" => { + for arg in args { + if let Expression::Label(arg) = *arg { + // Remove the enclosing quotes from the values being returned: + let label = unquote(&arg).unwrap_or_else(|_| arg); + if let Some(s) = matching_string { + if label.contains(s) { + values.push(label); + } + } + } + } + } + _ => { + // If the datatype for the column does not correspond to an `in(...)` function, then + // we check the column's structure constraints. If they include a + // `from(foreign_table.foreign_column)` condition, then the values are taken from + // the foreign column. Otherwise if the structure includes an + // `under(tree_table.tree_column, value)` condition, then get the values from the + // tree column that are under `value`. + let structure = parsed_structure_conditions.get( + config + .get("table") + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_object()) + .and_then(|t| t.get("column")) + .and_then(|c| c.as_object()) + .and_then(|c| c.get(column_name)) + .and_then(|c| c.as_object()) + .and_then(|c| c.get("structure")) + .and_then(|d| d.as_str()) + .unwrap_or_else(|| ""), + ); + + let sql_type = + get_sql_type_from_global_config(&config, table_name, &column_name, &pool) + .unwrap(); + + match structure { + Some(ParsedStructure { original, parsed }) => { + let matching_string = { + match matching_string { + None => "%".to_string(), + Some(s) => format!("%{}%", s), + } + }; + + match parsed { + Expression::Function(name, args) if name == "from" => { + let foreign_key = &args[0]; + if let Expression::Field(ftable, fcolumn) = &**foreign_key { + let fcolumn_text = cast_column_sql_to_text(&fcolumn, &sql_type); + let sql = local_sql_syntax( + &pool, + &format!( + r#"SELECT "{}" FROM "{}" WHERE {} LIKE {}"#, + fcolumn, ftable, fcolumn_text, SQL_PARAM + ), + ); + let rows = sqlx_query(&sql) + .bind(&matching_string) + .fetch_all(pool) + .await?; + for row in rows.iter() { + values.push(get_column_value(&row, &fcolumn, &sql_type)); + } + } + } + Expression::Function(name, args) + if name == "under" || name == "tree" => + { + let mut tree_col = "not set"; + let mut under_val = Some("not set".to_string()); + if name == "under" { + if let Expression::Field(_, column) = &**&args[0] { + tree_col = column; + } + if let Expression::Label(label) = &**&args[1] { + under_val = Some(label.to_string()); + } + } else { + let tree_key = &args[0]; + if let Expression::Label(label) = &**tree_key { + tree_col = label; + under_val = None; + } + } + + let tree = config + .get("constraints") + .and_then(|c| c.as_object()) + .and_then(|c| c.get("tree")) + .and_then(|t| t.as_object()) + .and_then(|t| t.get(table_name)) + .and_then(|t| t.as_array()) + .and_then(|t| { + t.iter().find(|o| o.get("child").unwrap() == tree_col) + }) + .expect( + format!("No tree: '{}.{}' found", table_name, tree_col) + .as_str(), + ) + .as_object() + .unwrap(); + let child_column = + tree.get("child").and_then(|c| c.as_str()).unwrap(); + + let (tree_sql, mut params) = with_tree_sql( + &config, + tree, + &table_name.to_string(), + &table_name.to_string(), + under_val.as_ref(), + None, + &pool, + ); + let child_column_text = + cast_column_sql_to_text(&child_column, &sql_type); + let sql = local_sql_syntax( + &pool, + &format!( + r#"{} SELECT "{}" FROM "tree" WHERE {} LIKE {}"#, + tree_sql, child_column, child_column_text, SQL_PARAM + ), + ); + params.push(matching_string); + + let mut query = sqlx_query(&sql); + for param in ¶ms { + query = query.bind(param); + } + + let rows = query.fetch_all(pool).await?; + for row in rows.iter() { + values.push(get_column_value(&row, &child_column, &sql_type)); + } + } + _ => panic!("Unrecognised structure: {}", original), + }; + } + None => (), + }; + } + }; + + let mut typeahead_values = vec![]; + for (i, v) in values.iter().enumerate() { + // enumerate() begins at 0 but we need to begin at 1: + let i = i + 1; + typeahead_values.push(json!({ + "id": v, + "label": v, + "order": i, + })); + } + + Ok(json!(typeahead_values)) + } +} diff --git a/test/expected/history.tsv b/test/expected/history.tsv index 86afe795..23f15d77 100644 --- a/test/expected/history.tsv +++ b/test/expected/history.tsv @@ -1,15 +1,15 @@ history_id table row from to summary user undone_by -1 table10 9 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} VALVE VALVE -2 table10 8 {"foreign_column":{"messages":[],"valid":true,"value":"h"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"8"},"other_foreign_column":{"messages":[],"valid":true,"value":"h"}} {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'h' to 'k'","old_value":"h","value":"k"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 8 to 11","old_value":"8","value":"11"},{"column":"other_foreign_column","level":"update","message":"Value changed from 'h' to 'k'","old_value":"h","value":"k"}] VALVE VALVE -3 table10 8 {"foreign_column":{"messages":[],"valid":true,"value":"h"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"8"},"other_foreign_column":{"messages":[],"valid":true,"value":"h"}} VALVE VALVE -4 table10 10 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} VALVE VALVE -5 table10 10 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'j' to 'k'","old_value":"j","value":"k"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 10 to 11","old_value":"10","value":"11"},{"column":"other_foreign_column","level":"update","message":"Value changed from 'j' to 'k'","old_value":"j","value":"k"}] VALVE VALVE -6 table10 10 {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} VALVE VALVE -7 table2 1 {"bar":{"messages":[],"valid":true,"value":""},"child":{"messages":[],"valid":true,"value":"a"},"foo":{"messages":[{"column":"foo","level":"error","message":"bar cannot be null if foo is not null","rule":"rule:foo-2","value":"5"},{"column":"foo","level":"error","message":"bar must be 'y' or 'z' if foo = 5","rule":"rule:foo-4","value":"5"}],"valid":false,"value":"5"},"parent":{"messages":[],"valid":true,"value":"b"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"B"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"b"},"foo":{"messages":[],"valid":true,"value":"1"},"parent":{"messages":[],"valid":true,"value":"f"},"xyzzy":{"messages":[{"level":"error","message":"Value 'w' of column xyzzy is not in table2.child","rule":"under:not-in-tree"}],"valid":false,"value":"w"}} [{"column":"bar","level":"update","message":"Value changed from '' to 'B'","old_value":"","value":"B"},{"column":"child","level":"update","message":"Value changed from 'a' to 'b'","old_value":"a","value":"b"},{"column":"foo","level":"update","message":"Value changed from 5 to 1","old_value":"5","value":"1"},{"column":"parent","level":"update","message":"Value changed from 'b' to 'f'","old_value":"b","value":"f"},{"column":"xyzzy","level":"update","message":"Value changed from 'd' to 'w'","old_value":"d","value":"w"}] VALVE -8 table3 11 {"id":{"messages":[],"valid":true,"value":"BFO:0000027"},"label":{"messages":[],"valid":true,"value":"bazaar"},"parent":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"},{"level":"error","message":"Value 'barrie' of column parent is not in column label","rule":"tree:foreign"}],"valid":false,"value":"barrie"},"source":{"messages":[{"level":"error","message":"Value 'BFOBBER' of column source is not in table1.prefix","rule":"key:foreign"}],"valid":false,"value":"BFOBBER"},"type":{"messages":[],"valid":true,"value":"owl:Class"}} VALVE -9 table6 1 {"bar":{"messages":[],"valid":true,"value":""},"child":{"messages":[],"valid":true,"value":"1"},"foo":{"messages":[{"column":"foo","level":"error","message":"bar cannot be null if foo is not null","rule":"rule:foo-2","value":"e"},{"column":"foo","level":"error","message":"bar must be 25 or 26 if foo = 'e'","rule":"rule:foo-4","value":"e"}],"valid":false,"value":"e"},"parent":{"messages":[],"valid":true,"value":"2"},"xyzzy":{"messages":[],"valid":true,"value":"4"}} {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"2"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"2"},"foo":{"messages":[],"valid":true,"value":"a"},"parent":{"messages":[],"valid":true,"value":"6"},"xyzzy":{"messages":[{"level":"error","message":"Value '23' of column xyzzy is not in table6.child","rule":"under:not-in-tree"}],"valid":false,"value":"23"}} [{"column":"bar","level":"update","message":"Value changed from '' to 2","old_value":"","value":"2"},{"column":"child","level":"update","message":"Value changed from 1 to 2","old_value":"1","value":"2"},{"column":"foo","level":"update","message":"Value changed from 'e' to 'a'","old_value":"e","value":"a"},{"column":"parent","level":"update","message":"Value changed from 2 to 6","old_value":"2","value":"6"},{"column":"xyzzy","level":"update","message":"Value changed from 4 to 23","old_value":"4","value":"23"}] VALVE -10 table6 10 {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"2"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"2"},"foo":{"messages":[],"valid":true,"value":"a"},"parent":{"messages":[],"valid":true,"value":"6"},"xyzzy":{"messages":[{"level":"error","message":"Value '23' of column xyzzy is not in table6.child","rule":"under:not-in-tree"}],"valid":false,"value":"23"}} VALVE -11 table10 1 {"foreign_column":{"messages":[],"valid":true,"value":"a"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"1"},"other_foreign_column":{"messages":[],"valid":true,"value":"a"}} {"foreign_column":{"messages":[],"valid":true,"value":"w"},"numeric_foreign_column":{"messages":[{"level":"error","message":"numeric_foreign_column should be a positive or negative integer","rule":"datatype:integer"},{"level":"error","message":"numeric_foreign_column should be a line of text that does not begin or end with whitespace","rule":"datatype:trimmed_line"}],"valid":false,"value":""},"other_foreign_column":{"messages":[],"valid":true,"value":"z"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'a' to 'w'","old_value":"a","value":"w"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 1 to ''","old_value":"1","value":""},{"column":"other_foreign_column","level":"update","message":"Value changed from 'a' to 'z'","old_value":"a","value":"z"}] VALVE -12 table11 2 {"bar":{"messages":[],"valid":true,"value":"f"},"child":{"messages":[],"valid":true,"value":"b"},"foo":{"messages":[],"valid":true,"value":"e"},"parent":{"messages":[],"valid":true,"value":"c"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} {"bar":{"messages":[],"valid":true,"value":"f"},"child":{"messages":[],"valid":true,"value":"b"},"foo":{"messages":[{"level":"error","message":"Values of foo must be unique","rule":"key:primary"}],"valid":false,"value":"d"},"parent":{"messages":[],"valid":true,"value":"c"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} [{"column":"foo","level":"update","message":"Value changed from 'e' to 'd'","old_value":"e","value":"d"}] VALVE -13 table11 4 {"bar":{"messages":[],"valid":true,"value":"z"},"child":{"messages":[],"valid":true,"value":"f"},"foo":{"messages":[],"valid":true,"value":"e"},"parent":{"messages":[],"valid":true,"value":"g"},"xyzzy":{"messages":[],"valid":true,"value":"x"}} VALVE -14 table10 11 {"foreign_column":{"messages":[],"valid":true,"value":"i"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"9"},"other_foreign_column":{"messages":[],"valid":true,"value":"i"}} VALVE +1 table2 1 {"bar":{"messages":[],"valid":true,"value":""},"child":{"messages":[],"valid":true,"value":"a"},"foo":{"messages":[{"column":"foo","level":"error","message":"bar cannot be null if foo is not null","rule":"rule:foo-2","value":"5"},{"column":"foo","level":"error","message":"bar must be 'y' or 'z' if foo = 5","rule":"rule:foo-4","value":"5"}],"valid":false,"value":"5"},"parent":{"messages":[],"valid":true,"value":"b"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"B"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"b"},"foo":{"messages":[],"valid":true,"value":"1"},"parent":{"messages":[],"valid":true,"value":"f"},"xyzzy":{"messages":[{"level":"error","message":"Value 'w' of column xyzzy is not in table2.child","rule":"under:not-in-tree"}],"valid":false,"value":"w"}} [{"column":"bar","level":"update","message":"Value changed from '' to 'B'","old_value":"","value":"B"},{"column":"child","level":"update","message":"Value changed from 'a' to 'b'","old_value":"a","value":"b"},{"column":"foo","level":"update","message":"Value changed from 5 to 1","old_value":"5","value":"1"},{"column":"parent","level":"update","message":"Value changed from 'b' to 'f'","old_value":"b","value":"f"},{"column":"xyzzy","level":"update","message":"Value changed from 'd' to 'w'","old_value":"d","value":"w"}] VALVE +2 table3 11 {"id":{"messages":[],"valid":true,"value":"BFO:0000027"},"label":{"messages":[],"valid":true,"value":"bazaar"},"parent":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"},{"level":"error","message":"Value 'barrie' of column parent is not in column label","rule":"tree:foreign"}],"valid":false,"value":"barrie"},"source":{"messages":[{"level":"error","message":"Value 'BFOBBER' of column source is not in table1.prefix","rule":"key:foreign"}],"valid":false,"value":"BFOBBER"},"type":{"messages":[],"valid":true,"value":"owl:Class"}} VALVE +3 table6 1 {"bar":{"messages":[],"valid":true,"value":""},"child":{"messages":[],"valid":true,"value":"1"},"foo":{"messages":[{"column":"foo","level":"error","message":"bar cannot be null if foo is not null","rule":"rule:foo-2","value":"e"},{"column":"foo","level":"error","message":"bar must be 25 or 26 if foo = 'e'","rule":"rule:foo-4","value":"e"}],"valid":false,"value":"e"},"parent":{"messages":[],"valid":true,"value":"2"},"xyzzy":{"messages":[],"valid":true,"value":"4"}} {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"2"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"2"},"foo":{"messages":[],"valid":true,"value":"a"},"parent":{"messages":[],"valid":true,"value":"6"},"xyzzy":{"messages":[{"level":"error","message":"Value '23' of column xyzzy is not in table6.child","rule":"under:not-in-tree"}],"valid":false,"value":"23"}} [{"column":"bar","level":"update","message":"Value changed from '' to 2","old_value":"","value":"2"},{"column":"child","level":"update","message":"Value changed from 1 to 2","old_value":"1","value":"2"},{"column":"foo","level":"update","message":"Value changed from 'e' to 'a'","old_value":"e","value":"a"},{"column":"parent","level":"update","message":"Value changed from 2 to 6","old_value":"2","value":"6"},{"column":"xyzzy","level":"update","message":"Value changed from 4 to 23","old_value":"4","value":"23"}] VALVE +4 table6 10 {"bar":{"messages":[{"level":"error","message":"An unrelated error","rule":"custom:unrelated"}],"valid":false,"value":"2"},"child":{"messages":[{"level":"error","message":"Values of child must be unique","rule":"tree:child-unique"}],"valid":false,"value":"2"},"foo":{"messages":[],"valid":true,"value":"a"},"parent":{"messages":[],"valid":true,"value":"6"},"xyzzy":{"messages":[{"level":"error","message":"Value '23' of column xyzzy is not in table6.child","rule":"under:not-in-tree"}],"valid":false,"value":"23"}} VALVE +5 table10 1 {"foreign_column":{"messages":[],"valid":true,"value":"a"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"1"},"other_foreign_column":{"messages":[],"valid":true,"value":"a"}} {"foreign_column":{"messages":[],"valid":true,"value":"w"},"numeric_foreign_column":{"messages":[{"level":"error","message":"numeric_foreign_column should be a positive or negative integer","rule":"datatype:integer"},{"level":"error","message":"numeric_foreign_column should be a line of text that does not begin or end with whitespace","rule":"datatype:trimmed_line"}],"valid":false,"value":""},"other_foreign_column":{"messages":[],"valid":true,"value":"z"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'a' to 'w'","old_value":"a","value":"w"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 1 to ''","old_value":"1","value":""},{"column":"other_foreign_column","level":"update","message":"Value changed from 'a' to 'z'","old_value":"a","value":"z"}] VALVE +6 table11 2 {"bar":{"messages":[],"valid":true,"value":"f"},"child":{"messages":[],"valid":true,"value":"b"},"foo":{"messages":[],"valid":true,"value":"e"},"parent":{"messages":[],"valid":true,"value":"c"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} {"bar":{"messages":[],"valid":true,"value":"f"},"child":{"messages":[],"valid":true,"value":"b"},"foo":{"messages":[{"level":"error","message":"Values of foo must be unique","rule":"key:primary"}],"valid":false,"value":"d"},"parent":{"messages":[],"valid":true,"value":"c"},"xyzzy":{"messages":[],"valid":true,"value":"d"}} [{"column":"foo","level":"update","message":"Value changed from 'e' to 'd'","old_value":"e","value":"d"}] VALVE +7 table11 4 {"bar":{"messages":[],"valid":true,"value":"z"},"child":{"messages":[],"valid":true,"value":"f"},"foo":{"messages":[],"valid":true,"value":"e"},"parent":{"messages":[],"valid":true,"value":"g"},"xyzzy":{"messages":[],"valid":true,"value":"x"}} VALVE +8 table10 9 {"foreign_column":{"messages":[],"valid":true,"value":"i"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"9"},"other_foreign_column":{"messages":[],"valid":true,"value":"i"}} VALVE +9 table10 10 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} VALVE VALVE +10 table10 8 {"foreign_column":{"messages":[],"valid":true,"value":"h"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"8"},"other_foreign_column":{"messages":[],"valid":true,"value":"h"}} {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'h' to 'k'","old_value":"h","value":"k"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 8 to 11","old_value":"8","value":"11"},{"column":"other_foreign_column","level":"update","message":"Value changed from 'h' to 'k'","old_value":"h","value":"k"}] VALVE VALVE +11 table10 8 {"foreign_column":{"messages":[],"valid":true,"value":"h"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"8"},"other_foreign_column":{"messages":[],"valid":true,"value":"h"}} VALVE VALVE +12 table10 11 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} VALVE VALVE +13 table10 11 {"foreign_column":{"messages":[],"valid":true,"value":"j"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"10"},"other_foreign_column":{"messages":[],"valid":true,"value":"j"}} {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} [{"column":"foreign_column","level":"update","message":"Value changed from 'j' to 'k'","old_value":"j","value":"k"},{"column":"numeric_foreign_column","level":"update","message":"Value changed from 10 to 11","old_value":"10","value":"11"},{"column":"other_foreign_column","level":"update","message":"Value changed from 'j' to 'k'","old_value":"j","value":"k"}] VALVE VALVE +14 table10 11 {"foreign_column":{"messages":[],"valid":true,"value":"k"},"numeric_foreign_column":{"messages":[],"valid":true,"value":"11"},"other_foreign_column":{"messages":[],"valid":true,"value":"k"}} VALVE VALVE diff --git a/test/guess_test_data/column.tsv b/test/guess_test_data/column.tsv index 2659b524..69e5dbf8 100644 --- a/test/guess_test_data/column.tsv +++ b/test/guess_test_data/column.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty datatype_name -datatype PostgreSQL type empty datatype_name +datatype SQL type empty datatype_name datatype RDF type empty datatype_name datatype HTML type empty datatype_name rule table table_name diff --git a/test/guess_test_data/column_expected.tsv b/test/guess_test_data/column_expected.tsv index f7e6a20e..d0bf3745 100644 --- a/test/guess_test_data/column_expected.tsv +++ b/test/guess_test_data/column_expected.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty datatype_name -datatype PostgreSQL type empty datatype_name +datatype SQL type empty datatype_name datatype RDF type empty datatype_name datatype HTML type empty datatype_name rule table table_name diff --git a/test/guess_test_data/datatype.tsv b/test/guess_test_data/datatype.tsv index c118588d..156b46d9 100644 --- a/test/guess_test_data/datatype.tsv +++ b/test/guess_test_data/datatype.tsv @@ -1,22 +1,22 @@ -datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +natural_number integer match(/\d+/) a natural number, including zero INTEGER +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore diff --git a/test/insert_update.sh b/test/insert_update.sh index 698c5c06..0c30e71d 100755 --- a/test/insert_update.sh +++ b/test/insert_update.sh @@ -1,20 +1,20 @@ #!/usr/bin/env bash -if [[ $# -lt 1 ]] +if [[ $# -lt 2 ]] then - echo "Usage: $(basename $0) DATABASE" + echo "Usage: $(basename $0) DATABASE TABLE_CONFIG" exit 1 fi db=$1 -shift +table_defs=$2 +shift 2 if [[ $# -gt 0 ]] then echo "Warning: Extra arguments: '$*' will be ignored" fi pwd=$(dirname $(readlink -f $0)) -export_script=$pwd/../scripts/export.py output_dir=$pwd/output expected_dir=$pwd/expected @@ -25,7 +25,7 @@ do table_path=$pwd/output/$table_path table_file=$(basename $table_path) table=${table_file%.*} - ${export_script} data $db $output_dir $table + ./valve --save $table --save_dir $output_dir $table_defs $db diff -q $expected_dir/${table}.tsv ${table_path} ret_value=$(expr $ret_value + $?) done diff --git a/test/perf_test_data/column.tsv b/test/perf_test_data/column.tsv index f7e6a20e..d0bf3745 100644 --- a/test/perf_test_data/column.tsv +++ b/test/perf_test_data/column.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty datatype_name -datatype PostgreSQL type empty datatype_name +datatype SQL type empty datatype_name datatype RDF type empty datatype_name datatype HTML type empty datatype_name rule table table_name diff --git a/test/perf_test_data/datatype.tsv b/test/perf_test_data/datatype.tsv index c118588d..156b46d9 100644 --- a/test/perf_test_data/datatype.tsv +++ b/test/perf_test_data/datatype.tsv @@ -1,22 +1,22 @@ -datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +natural_number integer match(/\d+/) a natural number, including zero INTEGER +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore diff --git a/test/random_test_data/column.tsv b/test/random_test_data/column.tsv index f7e6a20e..d0bf3745 100644 --- a/test/random_test_data/column.tsv +++ b/test/random_test_data/column.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty datatype_name -datatype PostgreSQL type empty datatype_name +datatype SQL type empty datatype_name datatype RDF type empty datatype_name datatype HTML type empty datatype_name rule table table_name diff --git a/test/random_test_data/datatype.tsv b/test/random_test_data/datatype.tsv index c118588d..156b46d9 100644 --- a/test/random_test_data/datatype.tsv +++ b/test/random_test_data/datatype.tsv @@ -1,22 +1,22 @@ -datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER -nonspace trimmed_line exclude(/\s/) text without whitespace -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT TEXT xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +natural_number integer match(/\d+/) a natural number, including zero INTEGER +nonspace trimmed_line exclude(/\s/) text without whitespace +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore diff --git a/test/round_trip.sh b/test/round_trip.sh index fb93e7a5..82368470 100755 --- a/test/round_trip.sh +++ b/test/round_trip.sh @@ -15,8 +15,11 @@ then fi pwd=$(dirname $(readlink -f $0)) -export_script=$pwd/../scripts/export.py output_dir=$pwd/output +valve="./valve" + +# Use valve to save all of th configured tables: +${valve} --save_all --save_dir ${output_dir} ${table_defs} $db num_tables=$(expr $(cat $table_defs | wc -l) - 1) table_paths=$(tail -$num_tables $table_defs | cut -f 2) @@ -28,7 +31,6 @@ do table_path=$pwd/$table_path table_file=$(basename $table_path) table=${table_file%.*} - ${export_script} data $db $output_dir $table diff --strip-trailing-cr -q ${table_path} $output_dir/${table}.tsv ret_value=$(expr $ret_value + $?) done diff --git a/test/src/column.tsv b/test/src/column.tsv index 9c6c8256..05707f63 100644 --- a/test/src/column.tsv +++ b/test/src/column.tsv @@ -16,8 +16,7 @@ datatype transform empty word datatype condition empty datatype_condition datatype structure empty trimmed_line datatype description empty trimmed_text -datatype SQLite type empty trimmed_line -datatype PostgreSQL type empty trimmed_line +datatype SQL type empty trimmed_line datatype RDF type empty trimmed_line datatype HTML type empty datatype_name rule table table_name diff --git a/test/src/datatype.tsv b/test/src/datatype.tsv index c162ca56..dd90b419 100644 --- a/test/src/datatype.tsv +++ b/test/src/datatype.tsv @@ -1,24 +1,24 @@ -datatype parent transform condition structure description SQLite type PostgreSQL type RDF type HTML type -CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE -IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI -column_name trimmed_line match(/\S([^\n]*\S)*/) a column name -datatype_condition line exclude(/\n/) a datatype condition specification -datatype_name word exclude(/\W/) a datatype name -description trimmed_text match(/\S(.*\S)*/) a brief description -empty text equals('') the empty string NULL NULL null -integer nonspace match(/-?\d+/) a positive or negative integer INTEGER INTEGER -label trimmed_line match(/\S([^\n]*\S)*/) -line text exclude(/\n/) a line of text input -natural_number integer match(/\d+/) a natural number, including zero INTEGER INTEGER -nonspace trimmed_line exclude(/\s/) text without whitespace -numeric nonspace match(/-?\d+(\.\d+)?/) a positive or negative number NUMERIC NUMERIC -path line exclude(/\n/) a path to a file -prefix word exclude(/\W/) a prefix for a CURIE -real nonspace match(/-?\d+(\.\d+)?/) a positive or negative real number REAL REAL -suffix word exclude(/\W/) a suffix for a CURIE -table_name word exclude(/\W/) a table name -table_type word lowercase in('table', 'column', 'datatype') a table type -text any text TEXT VARCHAR(100) xsd:string textarea -trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace -trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace -word nonspace exclude(/\W/) a single word: letters, numbers, underscore +datatype parent transform condition structure description SQL type RDF type HTML type +CURIE nonspace match(/\S+:\S+/) concat(prefix, ":", suffix) a Compact URI CURIE +IRI nonspace exclude(/\s/) an Internationalized Resource Identifier IRI +column_name trimmed_line match(/\S([^\n]*\S)*/) a column name +datatype_condition line exclude(/\n/) a datatype condition specification +datatype_name word exclude(/\W/) a datatype name +description trimmed_text match(/\S(.*\S)*/) a brief description +empty text equals('') the empty string NULL null +integer nonspace match(/-?\d+/) a positive or negative integer INTEGER +label trimmed_line match(/\S([^\n]*\S)*/) +line text exclude(/\n/) a line of text input +natural_number integer match(/\d+/) a natural number, including zero INTEGER +nonspace trimmed_line exclude(/\s/) text without whitespace +numeric nonspace match(/-?\d+(\.\d+)?/) a positive or negative number NUMERIC +path line exclude(/\n/) a path to a file +prefix word exclude(/\W/) a prefix for a CURIE +real nonspace match(/-?\d+(\.\d+)?/) a positive or negative real number REAL +suffix word exclude(/\W/) a suffix for a CURIE +table_name word exclude(/\W/) a table name +table_type word lowercase in('table', 'column', 'datatype') a table type +text any text TEXT xsd:string textarea +trimmed_line line match(/\S([^\n]*\S)*/) a line of text that does not begin or end with whitespace +trimmed_text text exclude(/^\s+|\s+$/) text that does not begin or end with whitespace +word nonspace exclude(/\W/) a single word: letters, numbers, underscore