Skip to content

Commit

Permalink
implement save_all_tables(), save_tables(), and save_table(), use new…
Browse files Browse the repository at this point in the history
… ValveError type in Results, change export.py to export_messages.py
  • Loading branch information
lmcmicu committed Jan 14, 2024
1 parent c8df0c9 commit 16b6d58
Show file tree
Hide file tree
Showing 8 changed files with 520 additions and 470 deletions.
27 changes: 13 additions & 14 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,31 +44,30 @@ test/output:

test: clean_test_db sqlite_test pg_test api_test random_test

tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 \
table9 table10 table11
tables_to_test := $(shell cut -f 1 test/src/table.tsv)

sqlite_test: build/valve.db test/src/table.tsv | test/output
@echo "Testing valve on sqlite ..."
test/round_trip.sh $^
scripts/export.py messages $< $| $(tables_to_test)
scripts/export_messages.py $< $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages.tsv test/output/messages.tsv
scripts/export.py messages --a1 $< $| $(tables_to_test)
scripts/export_messages.py --a1 $< $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages_a1.tsv test/output/messages.tsv
# The "pk" test is run on table7 only since it is the only table whose primary keys are all valid:
scripts/export.py messages --pk $< $| table7
scripts/export_messages.py --pk $< $| table7
diff --strip-trailing-cr -q test/expected/messages_pk.tsv test/output/messages.tsv
@echo "Test succeeded!"

pg_test: valve test/src/table.tsv | test/output
@echo "Testing valve on postgresql ..."
./$^ postgresql:///valve_postgres
test/round_trip.sh postgresql:///valve_postgres $(word 2,$^)
scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test)
scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages.tsv test/output/messages.tsv
scripts/export.py messages --a1 postgresql:///valve_postgres $| $(tables_to_test)
scripts/export_messages.py --a1 postgresql:///valve_postgres $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages_a1.tsv test/output/messages.tsv
# The "pk" test is run on table7 only since it is the only table whose primary keys are all valid:
scripts/export.py messages --pk postgresql:///valve_postgres $| table7
scripts/export_messages.py --pk postgresql:///valve_postgres $| table7
diff --strip-trailing-cr -q test/expected/messages_pk.tsv test/output/messages.tsv
@echo "Test succeeded!"

Expand All @@ -77,8 +76,8 @@ api_test: sqlite_api_test pg_api_test
sqlite_api_test: valve test/src/table.tsv build/valve.db test/insert_update.sh | test/output
@echo "Testing API functions on sqlite ..."
./$< --api_test $(word 2,$^) $(word 3,$^)
$(word 4,$^) $(word 3,$^)
scripts/export.py messages $(word 3,$^) $| $(tables_to_test)
$(word 4,$^) $(word 3,$^) $(word 2,$^)
scripts/export_messages.py $(word 3,$^) $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv
echo "select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id" | sqlite3 -header -tabs build/valve.db > test/output/history.tsv
diff --strip-trailing-cr -q test/expected/history.tsv test/output/history.tsv
Expand All @@ -91,8 +90,8 @@ pg_api_test: valve test/src/table.tsv test/insert_update.sh | test/output
@echo "Testing API functions on postgresql ..."
./$< $(word 2,$^) postgresql:///valve_postgres
./$< --api_test $(word 2,$^) postgresql:///valve_postgres
$(word 3,$^) postgresql:///valve_postgres
scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test)
$(word 3,$^) postgresql:///valve_postgres $(word 2,$^)
scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv
psql postgresql:///valve_postgres -c "COPY (select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id) TO STDOUT WITH NULL AS ''" > test/output/history.tsv
tail -n +2 test/expected/history.tsv | diff --strip-trailing-cr -q test/output/history.tsv -
Expand Down Expand Up @@ -161,11 +160,11 @@ $(perf_test_db): valve perf_test_data $(perf_test_dir)/*.tsv | build $(perf_test
time -p ./$< --verbose $(perf_test_dir)/table.tsv $@

sqlite_perf_test: build/valve_perf.db | test/output
time -p scripts/export.py messages $< $| $(tables_to_test)
time -p scripts/export_messages.py $< $| $(tables_to_test)

pg_perf_test: valve $(perf_test_dir)/ontology | test/output
time -p ./$< --verbose $(perf_test_dir)/table.tsv postgresql:///valve_postgres
time -p scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test)
time -p scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test)

perf_test: sqlite_perf_test pg_perf_test

Expand Down
98 changes: 17 additions & 81 deletions scripts/export.py → scripts/export_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,52 +115,6 @@ def get_column_order_and_info_for_sqlite(cursor, table):
}


def export_data(cursor, is_sqlite, args):
"""
Given a database cursor, a flag indicating whether this is a sqlite or postgres db, and a
dictionary containing: an output directory, "output", and a list of tables, "tables": export all
of the given database tables to .tsv files in the output directory.
"""
output_dir = os.path.normpath(args["output_dir"])
tables = args["tables"]

for table in tables:
try:
if is_sqlite:
columns_info = get_column_order_and_info_for_sqlite(cursor, table)
else:
columns_info = get_column_order_and_info_for_postgres(cursor, table)
unsorted_columns = columns_info["unsorted_columns"]

select = [f'"{column}"' for column in unsorted_columns]
select = ", ".join(select)

# Fetch the rows from the table and write them to a corresponding TSV file in the
# output directory:
cursor.execute(f'SELECT {select} FROM "{table}_text_view" ORDER BY "row_number"')
colnames = [d[0] for d in cursor.description]
rows = map(lambda r: dict(zip(colnames, r)), cursor)
fieldnames = [c for c in colnames if c != "row_number"]
with open(f"{output_dir}/{table}.tsv", "w", newline="\n") as csvfile:
writer = csv.DictWriter(
csvfile,
fieldnames=fieldnames,
delimiter="\t",
doublequote=False,
strict=True,
lineterminator="\n",
quoting=csv.QUOTE_NONE,
escapechar=None,
quotechar=None,
)
writer.writeheader()
for row in rows:
del row["row_number"]
writer.writerow(row)
except sqlite3.OperationalError as e:
print(f"ERROR while exporting {table}: {e}", file=sys.stderr)


def export_messages(cursor, is_sqlite, args):
"""
Given a database cursor, a flag indicating whether this is a sqlite or postgres db, and a
Expand Down Expand Up @@ -289,42 +243,24 @@ def col_to_a1(column, columns):


if __name__ == "__main__":
prog_parser = ArgumentParser(description="Database table export utility")
sub_parsers = prog_parser.add_subparsers(help="Possible sub-commands")

sub1 = sub_parsers.add_parser(
"data",
description="Export table data",
help="Export table data. For command-line options, run: `%(prog)s data --help`",
parser = ArgumentParser(description="Export Valve messages")
pgroup = parser.add_mutually_exclusive_group()
pgroup.add_argument("--a1", action="store_true", help="Output error messages in A1 format")
pgroup.add_argument("--pk", action="store_true", help="Identify rows using primary keys")

parser.add_argument(
"db",
help="""Either a database connection URL or a path to a SQLite database file. In the
case of a URL, you must use one of the following schemes: potgresql://<URL>
(for postgreSQL), sqlite://<relative path> or file:<relative path> (for SQLite).
""",
)

sub1.set_defaults(func=export_data)

sub2 = sub_parsers.add_parser(
"messages",
description="Export error messages",
help="Export error messages. For command-line options, run: `%(prog)s messages --help`",
parser.add_argument("output_dir", help="The name of the directory in which to save TSV files")
parser.add_argument(
"tables", metavar="table", nargs="+", help="The name of a table to export to TSV"
)
sub2_group = sub2.add_mutually_exclusive_group()
sub2_group.add_argument("--a1", action="store_true", help="Output error messages in A1 format")
sub2_group.add_argument("--pk", action="store_true", help="Identify rows using primary keys")
sub2.set_defaults(func=export_messages)

for sub in [sub1, sub2]:
sub.add_argument(
"db",
help="""Either a database connection URL or a path to a SQLite database file. In the
case of a URL, you must use one of the following schemes: potgresql://<URL>
(for postgreSQL), sqlite://<relative path> or file:<relative path> (for SQLite).
""",
)
sub.add_argument("output_dir", help="The name of the directory in which to save TSV files")
sub.add_argument(
"tables", metavar="table", nargs="+", help="The name of a table to export to TSV"
)

args = prog_parser.parse_args()
func = args.func
args = parser.parse_args()
args = vars(args)

if not os.path.isdir(args["output_dir"]):
Expand All @@ -336,7 +272,7 @@ def col_to_a1(column, columns):
if db.startswith("postgresql://"):
with psycopg2.connect(db) as conn:
cursor = conn.cursor()
func(cursor, False, args)
export_messages(cursor, False, args)
else:
m = re.search(r"(^(file:|sqlite://))?(.+?)(\?.+)?$", db)
if m:
Expand All @@ -348,7 +284,7 @@ def col_to_a1(column, columns):
db = f"file:{path}{params}"
with sqlite3.connect(db, uri=True) as conn:
cursor = conn.cursor()
func(cursor, True, args)
export_messages(cursor, True, args)
else:
print(f"Could not parse database specification: {db}", file=sys.stderr)
sys.exit(1)
32 changes: 17 additions & 15 deletions src/api_test.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
use ontodev_valve::{SerdeMap, Valve};
use ontodev_valve::{SerdeMap, Valve, ValveError};
use rand::distributions::{Alphanumeric, DistString, Distribution, Uniform};
use rand::{random, thread_rng};
use serde_json::json;
use sqlx::{any::AnyPool, query as sqlx_query, Error::Configuration as SqlxCErr, Row, ValueRef};
use sqlx::{any::AnyPool, query as sqlx_query, Row, ValueRef};

async fn test_matching(valve: &Valve) -> Result<(), sqlx::Error> {
async fn test_matching(valve: &Valve) -> Result<(), ValveError> {
eprint!("Running test_matching() ... ");

// Test the get_matching_values() function:
Expand Down Expand Up @@ -39,7 +39,7 @@ async fn test_matching(valve: &Valve) -> Result<(), sqlx::Error> {
Ok(())
}

async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), sqlx::Error> {
async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), ValveError> {
eprint!("Running test_idempotent_validate_and_update() ... ");

// We test that validate_row() is idempotent by running it multiple times on the same row:
Expand Down Expand Up @@ -76,7 +76,7 @@ async fn test_idempotent_validate_and_update(valve: &Valve) -> Result<(), sqlx::
Ok(())
}

async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), sqlx::Error> {
async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), ValveError> {
eprint!("Running test_validate_and_insert_1() ... ");

// Validate and insert a new row:
Expand Down Expand Up @@ -104,7 +104,7 @@ async fn test_validate_and_insert_1(valve: &Valve) -> Result<(), sqlx::Error> {
Ok(())
}

async fn test_validate_and_update(valve: &Valve) -> Result<(), sqlx::Error> {
async fn test_validate_and_update(valve: &Valve) -> Result<(), ValveError> {
eprint!("Running test_validate_and_update() ... ");

// Validate and update an existing row:
Expand Down Expand Up @@ -132,7 +132,7 @@ async fn test_validate_and_update(valve: &Valve) -> Result<(), sqlx::Error> {
Ok(())
}

async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), sqlx::Error> {
async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), ValveError> {
eprint!("Running test_validate_and_insert_2() ... ");

// Validate and insert a new row:
Expand Down Expand Up @@ -160,7 +160,7 @@ async fn test_validate_and_insert_2(valve: &Valve) -> Result<(), sqlx::Error> {
Ok(())
}

async fn test_dependencies(valve: &Valve) -> Result<(), sqlx::Error> {
async fn test_dependencies(valve: &Valve) -> Result<(), ValveError> {
eprint!("Running test_dependencies() ... ");

// Test cases for updates/inserts/deletes with dependencies.
Expand Down Expand Up @@ -211,7 +211,7 @@ enum DbOperation {
Redo,
}

async fn generate_operation_sequence(pool: &AnyPool) -> Result<Vec<DbOperation>, sqlx::Error> {
async fn generate_operation_sequence(pool: &AnyPool) -> Result<Vec<DbOperation>, ValveError> {
/*
Algorithm:
----------
Expand Down Expand Up @@ -308,7 +308,7 @@ async fn generate_operation_sequence(pool: &AnyPool) -> Result<Vec<DbOperation>,
Ok(operations)
}

async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> {
async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), ValveError> {
// Randomly generate a number of insert/update/delete operations, possibly followed by undos
// and/or redos.
eprint!("Running test_randomized_api_test_with_undo_redo() ... ");
Expand Down Expand Up @@ -354,7 +354,7 @@ async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sq
let sql_row = query.fetch_one(&valve.pool).await?;
let raw_row_number = sql_row.try_get_raw("row_number")?;
if raw_row_number.is_null() {
return Err(SqlxCErr("No rows in table1_view".into()));
return Err(ValveError::DataError("No rows in table1_view".into()));
} else {
let row_number: i64 = sql_row.get("row_number");
let row_number = row_number as u32;
Expand All @@ -366,7 +366,7 @@ async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sq
let sql_row = query.fetch_one(&valve.pool).await?;
let raw_row_number = sql_row.try_get_raw("row_number")?;
if raw_row_number.is_null() {
return Err(SqlxCErr("No rows in table1_view".into()));
return Err(ValveError::DataError("No rows in table1_view".into()));
} else {
let row_number: i64 = sql_row.get("row_number");
let row_number = row_number as u32;
Expand All @@ -391,7 +391,7 @@ async fn test_randomized_api_test_with_undo_redo(valve: &Valve) -> Result<(), sq
Ok(())
}

async fn test_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> {
async fn test_undo_redo(valve: &Valve) -> Result<(), ValveError> {
eprint!("Running test_undo_redo() ... ");

// Undo/redo tests
Expand Down Expand Up @@ -467,8 +467,8 @@ async fn test_undo_redo(valve: &Valve) -> Result<(), sqlx::Error> {
Ok(())
}

pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Error> {
let valve = Valve::build(table, database, false, false, false).await?;
pub async fn run_api_tests(table: &str, database: &str) -> Result<(), ValveError> {
let valve = Valve::build(table, database, false, false).await?;
// NOTE that you must use an external script to fetch the data from the database and run a diff
// against a known good sample to verify that these tests yield the expected results:
test_matching(&valve).await?;
Expand All @@ -480,5 +480,7 @@ pub async fn run_api_tests(table: &str, database: &str) -> Result<(), sqlx::Erro
test_undo_redo(&valve).await?;
test_randomized_api_test_with_undo_redo(&valve).await?;

// TODO: Add some tests for the new API functions like save.

Ok(())
}
Loading

0 comments on commit 16b6d58

Please sign in to comment.