Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prototype guess operation #69

Merged
merged 32 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
21dc326
require that actual and defined columns always match when the former …
lmcmicu Nov 6, 2023
c133071
read config from file when generating random data
lmcmicu Nov 7, 2023
33ce7af
fix path to valve in random data generation script
lmcmicu Nov 7, 2023
729a894
add guess test files
lmcmicu Nov 7, 2023
cdb657f
setup Makefile for guess tests
lmcmicu Nov 9, 2023
701cc84
tweaks to Makefile
lmcmicu Nov 9, 2023
474c7a5
initial version of guess prototype
lmcmicu Nov 9, 2023
d401835
guess nulltype
lmcmicu Nov 9, 2023
5a66118
guess primary/unique
lmcmicu Nov 9, 2023
64502b6
tweak
lmcmicu Nov 9, 2023
f02740e
annotate datatype (WIP)
lmcmicu Nov 11, 2023
5200b5f
add stubs for functions to retrieve from() structures
lmcmicu Nov 13, 2023
bc7cb2c
small optimization
lmcmicu Nov 13, 2023
6dfac26
add db parameter
lmcmicu Nov 13, 2023
4a83313
fix unsupported format error in help
lmcmicu Nov 13, 2023
c956c59
rename foreign stub
lmcmicu Nov 13, 2023
76be3bd
implement datatype guess
lmcmicu Nov 13, 2023
a2f64ee
call lstrip() on in() conditions
lmcmicu Nov 15, 2023
2a4db64
implement get_potential_foreign_columns()
lmcmicu Nov 15, 2023
e8e163a
implement get_froms()
lmcmicu Nov 15, 2023
43e48ec
do froms before uniques
lmcmicu Nov 15, 2023
d6e2862
textify from structures
lmcmicu Nov 16, 2023
70cc3d7
fix bugs that causes infinite loop and that attempt to dereference a …
lmcmicu Nov 16, 2023
0554aa6
optimize sampling
lmcmicu Nov 16, 2023
81d973a
tweak
lmcmicu Nov 16, 2023
ea0630a
also sort datatypes by depth
lmcmicu Nov 16, 2023
14e00a0
don't duplicate datatype check
lmcmicu Nov 16, 2023
2e12dda
make get_hierarchy_for_dt() an outer function
lmcmicu Nov 16, 2023
318da29
redesign algorithm for get_datatype()
lmcmicu Nov 20, 2023
0ea815f
add verbose flag
lmcmicu Nov 22, 2023
4a4ea32
write table and column config to db
lmcmicu Nov 26, 2023
f4495be
do not store label if it is the same as the normalized column name, a…
lmcmicu Nov 27, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 66 additions & 32 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ MAKEFLAGS += --warn-undefined-variables
build:
mkdir build

.PHONY: doc time test sqlite_test pg_test
.PHONY: api_test sqlite_api_test pg_qpi_test
.PHONY: random_test_data random_test sqlite_random_test pg_random_test
.PHONY: doc readme valve_debug valve_release test sqlite_test pg_test api_test sqlite_api_test \
pg_qpi_test random_test_data random_test sqlite_random_test pg_random_test guess_test_data \
perf_test_data sqlite_perf_test pg_perf_test perf_test


doc:
cargo doc --document-private-items
Expand All @@ -23,21 +24,28 @@ readme:
cargo readme --no-title > README.md

valve: src/*.rs src/*.lalrpop
@$(MAKE) valve_debug

valve_release:
rm -f valve
cargo build --release
ln -s target/release/ontodev_valve valve
# cargo build
# ln -s target/debug/ontodev_valve valve

build/valve.db: test/src/table.tsv valve clean | build
valve_debug:
rm -f valve
cargo build
ln -s target/debug/ontodev_valve valve

build/valve.db: test/src/table.tsv clean valve | build
./valve $< $@

test/output:
mkdir -p test/output

test: sqlite_test pg_test api_test random_test

tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 table9 table10 table11
tables_to_test = column datatype rule table table1 table2 table3 table4 table5 table6 table7 table8 \
table9 table10 table11

sqlite_test: build/valve.db test/src/table.tsv | test/output
@echo "Testing valve on sqlite ..."
Expand Down Expand Up @@ -93,10 +101,10 @@ random_test_dir = test/random_test_data
random_test: sqlite_random_test pg_random_test

$(random_test_dir)/ontology:
mkdir -p $(random_test_dir)/ontology
mkdir -p $@

random_test_data: test/generate_random_test_data.py | $(random_test_dir)/ontology
./$< $$(date +"%s") 100 5 $|
random_test_data: test/generate_random_test_data.py valve valve test/random_test_data/table.tsv | $(random_test_dir)/ontology
./$< $$(date +"%s") 100 5 $(word 3,$^) $|

sqlite_random_test: valve clean random_test_data | build test/output
@echo "Testing with random data on sqlite ..."
Expand All @@ -110,40 +118,66 @@ pg_random_test: valve clean random_test_data | build test/output
test/round_trip.sh postgresql:///valve_postgres $(random_test_dir)/table.tsv
@echo "Test succeeded!"

test/perf_test_data/ontology: test/generate_random_test_data.py
mkdir $@
./$< 1 10000 5 $@
guess_test_dir = test/guess_test_data
guess_test_db = build/valve_guess.db

$(guess_test_dir)/table1.tsv: test/generate_random_test_data.py valve $(guess_test_dir)/*.tsv
./$< 0 30000 5 $(guess_test_dir)/table.tsv $(guess_test_dir)

$(guess_test_dir)/ontology:
mkdir -p $@

guess_test_data: test/generate_random_test_data.py $(guess_test_dir)/table1.tsv valve confirm_overwrite.sh $(guess_test_dir)/*.tsv | $(guess_test_dir)/ontology
./confirm_overwrite.sh $(guess_test_dir)/ontology
rm -f $(guess_test_dir)/table1.tsv
./$< 0 30000 5 $(guess_test_dir)/table.tsv $(guess_test_dir)
rm -f $(guess_test_dir)/ontology/*.tsv
./$< 0 30000 5 $(guess_test_dir)/table_expected.tsv $|
rm -f $(guess_test_dir)/ontology/table1.tsv

build/valve_perf.db: valve | test/perf_test_data/ontology build
@if [ -f $@ ]; \
then \
echo "'$@' exists but is out of date. To rebuild '$@', run \`make cleanperfdb\`" \
"before running \`make $@\`" ; \
false; \
fi
time -p ./$< --verbose test/perf_test_data/table.tsv $@
$(guess_test_db): valve guess_test_data $(guess_test_dir)/*.tsv | build $(guess_test_dir)/ontology
rm -f $@
./$< $(guess_test_dir)/table.tsv $@

perf_test_dir = test/perf_test_data
perf_test_db = build/valve_perf.db

$(perf_test_dir)/ontology:
mkdir -p $@

perf_test_data: test/generate_random_test_data.py valve confirm_overwrite.sh $(perf_test_dir)/*.tsv | $(perf_test_dir)/ontology
./confirm_overwrite.sh $(perf_test_dir)/ontology
rm -f $(perf_test_dir)/ontology/*.tsv
./$< $$(date +"%s") 10000 5 $(perf_test_dir)/table.tsv $|

$(perf_test_db): valve perf_test_data $(perf_test_dir)/*.tsv | build $(perf_test_dir)/ontology
rm -f $@
time -p ./$< --verbose $(perf_test_dir)/table.tsv $@

.PHONY: sqlite_perf_test
sqlite_perf_test: build/valve_perf.db | test/output
time -p scripts/export.py messages $< $| $(tables_to_test)

.PHONY: pg_perf_test
pg_perf_test: valve test/perf_test_data/ontology | test/output
time -p ./$< --verbose test/perf_test_data/table.tsv postgresql:///valve_postgres
pg_perf_test: valve $(perf_test_dir)/ontology | test/output
time -p ./$< --verbose $(perf_test_dir)/table.tsv postgresql:///valve_postgres
time -p scripts/export.py messages postgresql:///valve_postgres $| $(tables_to_test)

.PHONY: perf_test
perf_test: sqlite_perf_test pg_perf_test

clean:
rm -Rf build/valve.db build/valve_random.db test/output $(random_test_dir)/ontology
rm -Rf build/valve.db* build/valve_random.db* test/output $(random_test_dir)/ontology valve

cleanperfdb:
clean_guess_db:
rm -Rf build/valve_guess.db

clean_guess_data:
rm -Rf $(guess_test_dir)/table1.tsv $(guess_test_dir)/ontology

clean_perf_db:
rm -Rf build/valve_perf.db

cleanperfdata:
rm -Rf test/perf_test_data/ontology
clean_perf_data:
rm -Rf $(perf_test_dir)/ontology

cleanall: clean cleanperfdb cleanperfdata
cleanall: clean clean_perf_db clean_perf_data clean_guess_db clean_guess_data
cargo clean
rm -Rf valve
rm -f valve
14 changes: 14 additions & 0 deletions confirm_overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env sh

if [ -d $1 -a ! -z "$(ls -A $1)" ]
then
printf "$1 already exists and contains the following files: $(ls -A -m -w 0 $1)\nAre you sure (y/n)? "
read enter
if [ $enter = 'y' ]
then
exit 0
else
echo "Understood. Exiting with error code."
exit 1
fi
fi
Loading