Skip to content

Commit

Permalink
Merge pull request #71 from ontodev/new-api
Browse files Browse the repository at this point in the history
New API and other new features
  • Loading branch information
lmcmicu authored Feb 5, 2024
2 parents 6f8c291 + 341c1cb commit 669d4db
Show file tree
Hide file tree
Showing 21 changed files with 3,025 additions and 2,573 deletions.
48 changes: 28 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ MAKEFLAGS += --warn-undefined-variables
build:
mkdir build

.PHONY: doc readme valve_debug valve_release test sqlite_test pg_test api_test sqlite_api_test \
.PHONY: clean doc readme valve_debug valve_release test sqlite_test pg_test api_test sqlite_api_test \
pg_qpi_test random_test_data random_test sqlite_random_test pg_random_test guess_test_data \
perf_test_data sqlite_perf_test pg_perf_test perf_test

Expand All @@ -36,39 +36,38 @@ valve_debug:
cargo build
ln -s target/debug/ontodev_valve valve

build/valve.db: test/src/table.tsv clean valve | build
./valve $< $@
build/valve.db: valve test/src/table.tsv | build
./$^ $@

test/output:
mkdir -p test/output

test: sqlite_test pg_test api_test random_test
test: clean_test_db sqlite_test pg_test api_test random_test

tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 \
table9 table10 table11
tables_to_test := $(shell cut -f 1 test/src/table.tsv)

sqlite_test: build/valve.db test/src/table.tsv | test/output
@echo "Testing valve on sqlite ..."
test/round_trip.sh $^
scripts/export.py messages $< $| $(tables_to_test)
scripts/export_messages.py $< $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages.tsv test/output/messages.tsv
scripts/export.py messages --a1 $< $| $(tables_to_test)
scripts/export_messages.py --a1 $< $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages_a1.tsv test/output/messages.tsv
# The "pk" test is run on table7 only since it is the only table whose primary keys are all valid:
scripts/export.py messages --pk $< $| table7
scripts/export_messages.py --pk $< $| table7
diff --strip-trailing-cr -q test/expected/messages_pk.tsv test/output/messages.tsv
@echo "Test succeeded!"

pg_test: valve test/src/table.tsv | test/output
@echo "Testing valve on postgresql ..."
./$^ postgresql:///valve_postgres
test/round_trip.sh postgresql:///valve_postgres $(word 2,$^)
scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test)
scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages.tsv test/output/messages.tsv
scripts/export.py messages --a1 postgresql:///valve_postgres $| $(tables_to_test)
scripts/export_messages.py --a1 postgresql:///valve_postgres $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages_a1.tsv test/output/messages.tsv
# The "pk" test is run on table7 only since it is the only table whose primary keys are all valid:
scripts/export.py messages --pk postgresql:///valve_postgres $| table7
scripts/export_messages.py --pk postgresql:///valve_postgres $| table7
diff --strip-trailing-cr -q test/expected/messages_pk.tsv test/output/messages.tsv
@echo "Test succeeded!"

Expand All @@ -77,22 +76,28 @@ api_test: sqlite_api_test pg_api_test
sqlite_api_test: valve test/src/table.tsv build/valve.db test/insert_update.sh | test/output
@echo "Testing API functions on sqlite ..."
./$< --api_test $(word 2,$^) $(word 3,$^)
$(word 4,$^) $(word 3,$^)
scripts/export.py messages $(word 3,$^) $| $(tables_to_test)
$(word 4,$^) $(word 3,$^) $(word 2,$^)
scripts/export_messages.py $(word 3,$^) $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv
echo "select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id" | sqlite3 -header -tabs build/valve.db > test/output/history.tsv
diff --strip-trailing-cr -q test/expected/history.tsv test/output/history.tsv
# We drop all of the db tables because the schema for the next test (random test) is different
# from the schema used for this test.
./$< --drop_all $(word 2,$^) $(word 3,$^)
@echo "Test succeeded!"

pg_api_test: valve test/src/table.tsv test/insert_update.sh | test/output
@echo "Testing API functions on postgresql ..."
./$< $(word 2,$^) postgresql:///valve_postgres
./$< --api_test $(word 2,$^) postgresql:///valve_postgres
$(word 3,$^) postgresql:///valve_postgres
scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test)
$(word 3,$^) postgresql:///valve_postgres $(word 2,$^)
scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test)
diff --strip-trailing-cr -q test/expected/messages_after_api_test.tsv test/output/messages.tsv
psql postgresql:///valve_postgres -c "COPY (select \"history_id\", \"table\", \"row\", \"from\", \"to\", \"summary\", \"user\", \"undone_by\" from history where history_id < 15 order by history_id) TO STDOUT WITH NULL AS ''" > test/output/history.tsv
tail -n +2 test/expected/history.tsv | diff --strip-trailing-cr -q test/output/history.tsv -
# We drop all of the db tables because the schema for the next test (random test) is different
# from the schema used for this test.
./$< --drop_all $(word 2,$^) postgresql:///valve_postgres
@echo "Test succeeded!"

sqlite_random_db = build/valve_random.db
Expand All @@ -106,13 +111,13 @@ $(random_test_dir)/ontology:
random_test_data: test/generate_random_test_data.py valve valve test/random_test_data/table.tsv | $(random_test_dir)/ontology
./$< $$(date +"%s") 100 5 $(word 3,$^) $|

sqlite_random_test: valve clean random_test_data | build test/output
sqlite_random_test: valve random_test_data | build test/output
@echo "Testing with random data on sqlite ..."
./$< $(random_test_dir)/table.tsv $(sqlite_random_db)
test/round_trip.sh $(sqlite_random_db) $(random_test_dir)/table.tsv
@echo "Test succeeded!"

pg_random_test: valve clean random_test_data | build test/output
pg_random_test: valve random_test_data | build test/output
@echo "Testing with random data on postgresql ..."
./$< $(random_test_dir)/table.tsv postgresql:///valve_postgres
test/round_trip.sh postgresql:///valve_postgres $(random_test_dir)/table.tsv
Expand Down Expand Up @@ -155,17 +160,20 @@ $(perf_test_db): valve perf_test_data $(perf_test_dir)/*.tsv | build $(perf_test
time -p ./$< --verbose $(perf_test_dir)/table.tsv $@

sqlite_perf_test: build/valve_perf.db | test/output
time -p scripts/export.py messages $< $| $(tables_to_test)
time -p scripts/export_messages.py $< $| $(tables_to_test)

pg_perf_test: valve $(perf_test_dir)/ontology | test/output
time -p ./$< --verbose $(perf_test_dir)/table.tsv postgresql:///valve_postgres
time -p scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test)
time -p scripts/export_messages.py postgresql:///valve_postgres $| $(tables_to_test)

perf_test: sqlite_perf_test pg_perf_test

clean:
rm -Rf build/valve.db* build/valve_random.db* test/output $(random_test_dir)/ontology valve

clean_test_db:
rm -Rf build/valve.db

clean_guess_db:
rm -Rf build/valve_guess.db

Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ valve --help
```
to see command line options.

### API
See [Valve]

### Python bindings
See [valve.py](https://github.com/ontodev/valve.py)

Expand Down
98 changes: 17 additions & 81 deletions scripts/export.py → scripts/export_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,52 +115,6 @@ def get_column_order_and_info_for_sqlite(cursor, table):
}


def export_data(cursor, is_sqlite, args):
"""
Given a database cursor, a flag indicating whether this is a sqlite or postgres db, and a
dictionary containing: an output directory, "output", and a list of tables, "tables": export all
of the given database tables to .tsv files in the output directory.
"""
output_dir = os.path.normpath(args["output_dir"])
tables = args["tables"]

for table in tables:
try:
if is_sqlite:
columns_info = get_column_order_and_info_for_sqlite(cursor, table)
else:
columns_info = get_column_order_and_info_for_postgres(cursor, table)
unsorted_columns = columns_info["unsorted_columns"]

select = [f'"{column}"' for column in unsorted_columns]
select = ", ".join(select)

# Fetch the rows from the table and write them to a corresponding TSV file in the
# output directory:
cursor.execute(f'SELECT {select} FROM "{table}_text_view" ORDER BY "row_number"')
colnames = [d[0] for d in cursor.description]
rows = map(lambda r: dict(zip(colnames, r)), cursor)
fieldnames = [c for c in colnames if c != "row_number"]
with open(f"{output_dir}/{table}.tsv", "w", newline="\n") as csvfile:
writer = csv.DictWriter(
csvfile,
fieldnames=fieldnames,
delimiter="\t",
doublequote=False,
strict=True,
lineterminator="\n",
quoting=csv.QUOTE_NONE,
escapechar=None,
quotechar=None,
)
writer.writeheader()
for row in rows:
del row["row_number"]
writer.writerow(row)
except sqlite3.OperationalError as e:
print(f"ERROR while exporting {table}: {e}", file=sys.stderr)


def export_messages(cursor, is_sqlite, args):
"""
Given a database cursor, a flag indicating whether this is a sqlite or postgres db, and a
Expand Down Expand Up @@ -289,42 +243,24 @@ def col_to_a1(column, columns):


if __name__ == "__main__":
prog_parser = ArgumentParser(description="Database table export utility")
sub_parsers = prog_parser.add_subparsers(help="Possible sub-commands")

sub1 = sub_parsers.add_parser(
"data",
description="Export table data",
help="Export table data. For command-line options, run: `%(prog)s data --help`",
parser = ArgumentParser(description="Export Valve messages")
pgroup = parser.add_mutually_exclusive_group()
pgroup.add_argument("--a1", action="store_true", help="Output error messages in A1 format")
pgroup.add_argument("--pk", action="store_true", help="Identify rows using primary keys")

parser.add_argument(
"db",
help="""Either a database connection URL or a path to a SQLite database file. In the
case of a URL, you must use one of the following schemes: potgresql://<URL>
(for postgreSQL), sqlite://<relative path> or file:<relative path> (for SQLite).
""",
)

sub1.set_defaults(func=export_data)

sub2 = sub_parsers.add_parser(
"messages",
description="Export error messages",
help="Export error messages. For command-line options, run: `%(prog)s messages --help`",
parser.add_argument("output_dir", help="The name of the directory in which to save TSV files")
parser.add_argument(
"tables", metavar="table", nargs="+", help="The name of a table to export to TSV"
)
sub2_group = sub2.add_mutually_exclusive_group()
sub2_group.add_argument("--a1", action="store_true", help="Output error messages in A1 format")
sub2_group.add_argument("--pk", action="store_true", help="Identify rows using primary keys")
sub2.set_defaults(func=export_messages)

for sub in [sub1, sub2]:
sub.add_argument(
"db",
help="""Either a database connection URL or a path to a SQLite database file. In the
case of a URL, you must use one of the following schemes: potgresql://<URL>
(for postgreSQL), sqlite://<relative path> or file:<relative path> (for SQLite).
""",
)
sub.add_argument("output_dir", help="The name of the directory in which to save TSV files")
sub.add_argument(
"tables", metavar="table", nargs="+", help="The name of a table to export to TSV"
)

args = prog_parser.parse_args()
func = args.func
args = parser.parse_args()
args = vars(args)

if not os.path.isdir(args["output_dir"]):
Expand All @@ -336,7 +272,7 @@ def col_to_a1(column, columns):
if db.startswith("postgresql://"):
with psycopg2.connect(db) as conn:
cursor = conn.cursor()
func(cursor, False, args)
export_messages(cursor, False, args)
else:
m = re.search(r"(^(file:|sqlite://))?(.+?)(\?.+)?$", db)
if m:
Expand All @@ -348,7 +284,7 @@ def col_to_a1(column, columns):
db = f"file:{path}{params}"
with sqlite3.connect(db, uri=True) as conn:
cursor = conn.cursor()
func(cursor, True, args)
export_messages(cursor, True, args)
else:
print(f"Could not parse database specification: {db}", file=sys.stderr)
sys.exit(1)
12 changes: 6 additions & 6 deletions scripts/guess.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,14 @@ def get_higher_datatypes(datatype_hierarchies, universals, depth):

def get_sql_type(config, datatype):
"""Given the config map and the name of a datatype, climb the datatype tree (as required),
and return the first 'SQLite type' found."""
and return the first 'SQL type' found."""
if "datatype" not in config:
print("Missing datatypes in config")
sys.exit(1)
if datatype not in config["datatype"]:
return None
if config["datatype"][datatype].get("SQLite type"):
return config["datatype"][datatype]["SQLite type"]
if config["datatype"][datatype].get("SQL type"):
return config["datatype"][datatype]["SQL type"]
return get_sql_type(config, config["datatype"][datatype].get("parent"))


Expand Down Expand Up @@ -260,9 +260,9 @@ def is_match(datatype):
# If the datatype has no associated condition then it matches anything:
if not datatype.get("condition"):
return True
# If the SQLite type is NULL this datatype is ruled out:
sqlite_type = datatype.get("SQLite type")
if sqlite_type and sqlite_type.casefold() == "null":
# If the SQL type is NULL this datatype is ruled out:
sql_type = datatype.get("SQL type")
if sql_type and sql_type.casefold() == "null":
return False

condition = get_compiled_condition(datatype["condition"], config["parser"])
Expand Down
Loading

0 comments on commit 669d4db

Please sign in to comment.