-
Notifications
You must be signed in to change notification settings - Fork 4
/
Makefile
100 lines (76 loc) · 3.58 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
SHELL := bash
PROVCONVERT=utility/provconvert/bin/provconvert
PYTHON=venv/bin/python3
MAKEFILE_SINGLE=$(abspath dataset.makefile)
DATASETS = MIMIC-PXC7 CM-Buildings CM-Routes CM-RouteSets PG-T PG-D
OUTPUT_DIRS = $(DATASETS:%=outputs/%)
LOG_FILENAME := session_$(shell date -u +%Y%m%d.%H%M%S).txt
help:
@echo "Provenance Kernels Evaluation Pipeline Makefile"
@echo "Current supported targets: venv, data, kernels, experiments, plots"
# Setting up the Python virtual environment
venv: venv/bin/activate
venv/bin/activate: scripts/requirements.txt
@echo "> Setting up the Python virtual environment"
@test -d venv || python3 -m venv ./venv
@. venv/bin/activate; pip install -U setuptools pip wheel; pip install -Ur scripts/requirements.txt
@touch venv/bin/activate
# Setting up the datasets
datasets/MIMIC-PXC7:
@echo "> Generating MIMIC provenance graphs..."
@venv/bin/python3 scripts/mimic/generate_provenance.py
datasets/CM-Buildings:
@echo "> Unpacking CM-Buildings graphs..."
@tar -xzf datasets/CM-Buildings.tar.gz --directory datasets
datasets/CM-Routes:
@echo "> Unpacking CM-Routes graphs..."
@tar -xzf datasets/CM-Routes.tar.gz --directory datasets
datasets/CM-RouteSets:
@echo "> Unpacking CM-RouteSets graphs..."
@tar -xzf datasets/CM-RouteSets.tar.gz --directory datasets
datasets/PG-T:
@echo "> Unpacking PG-T graphs..."
@tar -xzf datasets/PG-T.tar.gz --directory datasets
datasets/PG-D:
@echo "> Unpacking PG-D graphs..."
@tar -xzf datasets/PG-D.tar.gz --directory datasets
data: datasets/CM-Buildings datasets/CM-Routes datasets/CM-RouteSets datasets/PG-T datasets/PG-D
plots: experiments outputs/timings_for_gen_graphs.pickled
@echo "> Generating the plots and table included in the paper"
@venv/bin/ipython -c "%run plots.ipynb"
# The following goals will be called using the dataset Makefile on the each dataset
kernels types experiments plots clean-app-data clean-kernels clean-pickled-kernels clean-cached-experiments clean-experiments: $(DATASETS)
$(DATASETS): venv data
@echo "--------- Execute [$(MAKECMDGOALS)] on $@ dataset ---------"
@date
@$(MAKE) --file $(MAKEFILE_SINGLE) $(MAKECMDGOALS) DATASET=$@
outputs/results.tar:
@find outputs -name "scoring.pickled" | xargs tar -cvf outputs/results.tar
@find outputs -name "timings.pickled" | xargs tar -rvf outputs/results.tar
@find outputs -name "selected.csv" | xargs tar -rvf outputs/results.tar
@find outputs -name "cv_sets.pickled" | xargs tar -rvf outputs/results.tar
@[ ! -f outputs/*.txt ] || tar -rvf outputs/results.tar outputs/*.txt
@tar -rvf outputs/results.tar plots/
outputs/results.tar.gz: outputs/results.tar
@gzip outputs/results.tar
save-results: outputs/results.tar.gz
run-experiments:
# Time the experiments and save any console output to the log file
@mkdir -p "outputs"
@time $(MAKE) plots | tee outputs/$(LOG_FILENAME)
# Other maintenance goals
clean:
rm -rf venv
find . -name "*.pyc" -delete
rm -rf outputs
rm -rf plots
.PHONY: help data clean kernels types experiments plots clean-app-data clean-kernels clean-pickled-kernels clean-cached-experiments clean-experiments save-results run-experiments
.PHONY: $(DATASETS)
# Measuring the time spent calculating provenance types on randomly generated provenance graphs
GENERATED_DIR := outputs/generated
GENERATED_GRAPHS := $(shell echo $(GENERATED_DIR)/{10,100,1000,10000,100000,1000000}_{1,2,4,8,16}_entity_{1..20}_e1.json)
$(GENERATED_DIR)/%.json:
@echo Generating $@...
@${PROVCONVERT} --generator $(subst _,:,$*) --outfile $@
outputs/timings_for_gen_graphs.pickled: ${GENERATED_GRAPHS}
@$(PYTHON) scripts/measure-types-generation-cost.py $(GENERATED_DIR) $@