Skip to content

Commit

Permalink
Merge branch 'main' into martin/ubuntu2204
Browse files Browse the repository at this point in the history
  • Loading branch information
Bouncner authored May 22, 2024
2 parents 8bab39b + 89bc5b3 commit 2291427
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 41 deletions.
57 changes: 29 additions & 28 deletions .github/workflows/haupt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,23 @@ jobs:
calibration_run: ${{ steps.calibration.outputs.calibration_run }}

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@master
with:
submodules: recursive

- name: Install dependencies for Act setup
if: ${{ env.ACT }}
run: |
sudo apt-get update -y -qq
sudo apt-get install -y -qq git build-essential cmake python3-pip
sudo apt-get update -y
sudo apt-get install -y git build-essential cmake python3-pip
- name: Install dependencies
run: |
sudo apt-get update -y -qq
sudo apt-get update -y
# We don't use Hyrise's install_dependencies script as it includes much more than needed for this small setup here.
sudo apt-get install -y -qq ninja-build libboost-all-dev postgresql-server-dev-all libtbb-dev systemtap-sdt-dev lld numactl
DEBIAN_FRONTEND=noninteractive sudo apt-get install -y ninja-build libboost-all-dev postgresql-server-dev-all libtbb-dev systemtap-sdt-dev lld numactl python3-venv
python3 -m venv ~/venv
source ~/venv/bin/activate
pip3 install -r python/requirements.txt --quiet
- name: Determine core and client counts for database comparison
Expand All @@ -56,8 +58,8 @@ jobs:
echo "CLIENT_COUNT=${client_count}" >> $GITHUB_ENV
echo "COMPARISON_RUNTIME=${comparison_runtime}" >> $GITHUB_ENV
echo "::set-output name=core_count::${core_count}"
echo "::set-output name=client_count::${client_count}"
echo "core_count=${core_count}" >> $GITHUB_OUTPUT
echo "client_count=${client_count}" >> $GITHUB_OUTPUT
- name: Build release server and plugins
if: env.SKIP_HYRISE == 'false'
Expand Down Expand Up @@ -91,7 +93,7 @@ jobs:
python3 encoding_selection_pipeline.py --calibration_dir=calibration/${calibration_run} --skip_phases selection
popd > /dev/null
echo "::set-output name=calibration_run::${calibration_run}"
echo "calibration_run=${calibration_run}" >> $GITHUB_OUTPUT
echo "CALIBRATION_RUN=${calibration_run}" >> $GITHUB_ENV
- name: Run encoding selection
Expand Down Expand Up @@ -124,19 +126,19 @@ jobs:
python3 runner.py --hyrise_server_path=../encoding_plugin/rel/ --execute=evaluation --configurations_dir "evaluation/${{ env.CALIBRATION_RUN }}/configurations__default/TPCH/LPCompressionSelection" --results_dir "evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/LPCompressionSelection" --scale_factor ${{ env.SCALE_FACTOR }} --single_benchmark=TPCH --port 5551 --cores=${{ env.CORE_COUNT }} --clients=${{ env.CLIENT_COUNT }}
popd
- name: Upload benchmark results (default)
uses: actions/upload-artifact@v2
- name: Upload benchmark results (non-constrained)
uses: actions/upload-artifact@master
if: env.SKIP_HYRISE == 'false'
with:
name: comparison_results_hyrise
name: comparison_results_hyrise_non-constrained
path: |
python/db_comparison_results/*.csv
- name: Upload benchmark results (default)
uses: actions/upload-artifact@v2
- name: Upload benchmark results (budget-constrained)
uses: actions/upload-artifact@master
if: env.SKIP_HYRISE == 'false'
with:
name: comparison_results_hyrise
name: comparison_results_hyrise_budget-constrained
path: |
python/evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/*.csv
python/evaluation/${{ env.CALIBRATION_RUN }}/results/TPCH/LPCompressionSelection/*.csv
Expand All @@ -151,13 +153,14 @@ jobs:
client_count: ${{ steps.core_client_counts.outputs.client_count }}

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@master

- uses: actions/checkout@master
if: env.SKIP_MONETDB == 'false'
with:
token: ${{ secrets.PAT }}
repository: MonetDB/MonetDB
ref: 'Sep2022_7' # checking out the latest tag as the current master does not compile with GCC 11 (as of 2022-11-17)
path: ./MonetDB

- uses: actions/checkout@master
Expand Down Expand Up @@ -186,8 +189,8 @@ jobs:
echo "CLIENT_COUNT=${client_count}" >> $GITHUB_ENV
echo "COMPARISON_RUNTIME=${comparison_runtime}" >> $GITHUB_ENV
echo "::set-output name=core_count::${core_count}"
echo "::set-output name=client_count::${client_count}"
echo "core_count=${core_count}" >> $GITHUB_OUTPUT
echo "client_count=${client_count}" >> $GITHUB_OUTPUT
- name: Install dependencies for Act setup
if: ${{ env.ACT }}
Expand All @@ -208,7 +211,6 @@ jobs:
mkdir rel
pushd rel
cmake -DCMAKE_INSTALL_PREFIX=~/monetdb_bin/ -DASSERT=OFF -DCMAKE_BUILD_TYPE=Release .. 1> /dev/null
cmake --build . 1> /dev/null
cmake --build . --target install
echo "${HOME}/monetdb_bin/bin" >> $GITHUB_PATH
popd
Expand All @@ -218,12 +220,6 @@ jobs:
if: env.SKIP_MONETDB == 'false'
run: |
mkdir -p monetdb_farm
echo "pwd" $(pwd)
echo "PATH" $PATH
echo "HOME" "${HOME}"
ls -lisa "${HOME}/monetdb_bin/bin"
pushd tpch-scripts
./tpch_build.sh -s ${{ env.SCALE_FACTOR }} -f ~/monetdb_farm
popd
Expand Down Expand Up @@ -257,7 +253,7 @@ jobs:
popd
- name: Upload benchmark results
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@master
if: env.SKIP_DUCKDB == 'false' || env.SKIP_MONETDB == 'false'
with:
name: comparison_results
Expand All @@ -271,7 +267,7 @@ jobs:
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@master
- uses: r-lib/actions/setup-r@v2

- uses: actions/download-artifact@master
Expand All @@ -281,7 +277,12 @@ jobs:

- uses: actions/download-artifact@master
with:
name: comparison_results_hyrise
name: comparison_results_hyrise_non-constrained
path: results_to_plot

- uses: actions/download-artifact@master
with:
name: comparison_results_hyrise_budget-constrained
path: results_to_plot

- name: Set environment variables
Expand Down Expand Up @@ -309,7 +310,7 @@ jobs:
shell: Rscript {0}

- name: Upload database comparison plot
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@master
with:
name: database_comparison
path: |
Expand Down
2 changes: 1 addition & 1 deletion encoding_plugin
15 changes: 7 additions & 8 deletions python/db_comparison_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@
assert (args.single_query_id is None or (args.single_query_id > 0 and args.single_query_id < 23)), "Unexpected query id"
assert (args.dbms != "duckdb" or Path("{}/tpch-dbgen/sf{}/nation.tbl".format(Path.home(), duckdb_scale_factor_string)).exists()), "Expecting TPC-H dbgen data to be present under fixed path."
assert (args.dbms != "monetdb" or Path("{}/monetdb_farm/SF-{}".format(Path.home(), monetdb_scale_factor_string)).exists()), "Expecting MonetDB farm for requested scale factor to be present under fixed path."
assert (args.dbms != "umbra" or Path("{}/umbra/sf{}".format(Path.home(), int(args.scale_factor))).exists()), "Expecting Umbra database file 'tpch.db' for requested scale factor to be present under fixed path."
assert (args.benchmark != "JOB" or args.dbms in ["hyrise"]), "For now, this script supports the Join Order Benchmark only for HANA and Hyrise."
assert (args.benchmark != "JOB" or args.dbms in ["hyrise"]), "For now, this script supports the Join Order Benchmark only for Hyrise."
assert (args.clients == 1 or args.time >= 300), "When multiple clients are set, a shuffled run is initiated which should last at least 300s."

if args.dbms == "duckdb" and args.clients > 1:
Expand All @@ -81,12 +80,12 @@
duckdb_load_commands = []
duckdb_load_commands.append("""CREATE TABLE nation ( n_nationkey INTEGER not null, n_name CHAR(25) not null, n_regionkey INTEGER not null, n_comment VARCHAR(152), PRIMARY KEY (N_NATIONKEY) );""")
duckdb_load_commands.append("""CREATE TABLE region ( r_regionkey INTEGER not null, r_name CHAR(25) not null, r_comment VARCHAR(152), PRIMARY KEY (R_REGIONKEY) );""")
duckdb_load_commands.append("""CREATE TABLE part ( p_partkey INTEGER not null, p_name VARCHAR(55) not null, p_mfgr CHAR(25) not null, p_brand CHAR(10) not null, p_type VARCHAR(25) not null, p_size INTEGER not null, p_container CHAR(10) not null, p_retailprice REAL not null, p_comment VARCHAR(23) not null, PRIMARY KEY (P_PARTKEY) );""")
duckdb_load_commands.append("""CREATE TABLE supplier ( s_suppkey INTEGER not null, s_name CHAR(25) not null, s_address VARCHAR(40) not null, s_nationkey INTEGER not null, s_phone CHAR(15) not null, s_acctbal REAL not null, s_comment VARCHAR(101) not null, PRIMARY KEY (S_SUPPKEY) );""")
duckdb_load_commands.append("""CREATE TABLE partsupp ( ps_partkey INTEGER not null, ps_suppkey INTEGER not null, ps_availqty INTEGER not null, ps_supplycost REAL not null, ps_comment VARCHAR(199) not null, PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY) );""")
duckdb_load_commands.append("""CREATE TABLE customer ( c_custkey INTEGER not null, c_name VARCHAR(25) not null, c_address VARCHAR(40) not null, c_nationkey INTEGER not null, c_phone CHAR(15) not null, c_acctbal REAL not null, c_mktsegment CHAR(10) not null, c_comment VARCHAR(117) not null, PRIMARY KEY (C_CUSTKEY) );""")
duckdb_load_commands.append("""CREATE TABLE orders ( o_orderkey INTEGER not null, o_custkey INTEGER not null, o_orderstatus CHAR(1) not null, o_totalprice REAL not null, o_orderdate DATE not null, o_orderpriority CHAR(15) not null, o_clerk CHAR(15) not null, o_shippriority INTEGER not null, o_comment VARCHAR(79) not null, PRIMARY KEY (O_ORDERKEY) );""")
duckdb_load_commands.append("""CREATE TABLE lineitem ( l_orderkey INTEGER not null, l_partkey INTEGER not null, l_suppkey INTEGER not null, l_linenumber INTEGER not null, l_quantity REAL not null, l_extendedprice REAL not null, l_discount REAL not null, l_tax REAL not null, l_returnflag CHAR(1) not null, l_linestatus CHAR(1) not null, l_shipdate DATE not null, l_commitdate DATE not null, l_receiptdate DATE not null, l_shipinstruct CHAR(25) not null, l_shipmode CHAR(10) not null, l_comment VARCHAR(44) not null, PRIMARY KEY (L_ORDERKEY,L_LINENUMBER) );""")
duckdb_load_commands.append("""CREATE TABLE part ( p_partkey INTEGER not null, p_name VARCHAR(55) not null, p_mfgr CHAR(25) not null, p_brand CHAR(10) not null, p_type VARCHAR(25) not null, p_size INTEGER not null, p_container CHAR(10) not null, p_retailprice DECIMAL(12,2) not null, p_comment VARCHAR(23) not null, PRIMARY KEY (P_PARTKEY) );""")
duckdb_load_commands.append("""CREATE TABLE supplier ( s_suppkey INTEGER not null, s_name CHAR(25) not null, s_address VARCHAR(40) not null, s_nationkey INTEGER not null, s_phone CHAR(15) not null, s_acctbal DECIMAL(12,2) not null, s_comment VARCHAR(101) not null, PRIMARY KEY (S_SUPPKEY) );""")
duckdb_load_commands.append("""CREATE TABLE partsupp ( ps_partkey INTEGER not null, ps_suppkey INTEGER not null, ps_availqty INTEGER not null, ps_supplycost DECIMAL(12,2) not null, ps_comment VARCHAR(199) not null, PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY) );""")
duckdb_load_commands.append("""CREATE TABLE customer ( c_custkey INTEGER not null, c_name VARCHAR(25) not null, c_address VARCHAR(40) not null, c_nationkey INTEGER not null, c_phone CHAR(15) not null, c_acctbal DECIMAL(12,2) not null, c_mktsegment CHAR(10) not null, c_comment VARCHAR(117) not null, PRIMARY KEY (C_CUSTKEY) );""")
duckdb_load_commands.append("""CREATE TABLE orders ( o_orderkey INTEGER not null, o_custkey INTEGER not null, o_orderstatus CHAR(1) not null, o_totalprice DECIMAL(12,2) not null, o_orderdate DATE not null, o_orderpriority CHAR(15) not null, o_clerk CHAR(15) not null, o_shippriority INTEGER not null, o_comment VARCHAR(79) not null, PRIMARY KEY (O_ORDERKEY) );""")
duckdb_load_commands.append("""CREATE TABLE lineitem ( l_orderkey INTEGER not null, l_partkey INTEGER not null, l_suppkey INTEGER not null, l_linenumber INTEGER not null, l_quantity DECIMAL(12,2) not null, l_extendedprice DECIMAL(12,2) not null, l_discount DECIMAL(12,2) not null, l_tax DECIMAL(12,2) not null, l_returnflag CHAR(1) not null, l_linestatus CHAR(1) not null, l_shipdate DATE not null, l_commitdate DATE not null, l_receiptdate DATE not null, l_shipinstruct CHAR(25) not null, l_shipmode CHAR(10) not null, l_comment VARCHAR(44) not null, PRIMARY KEY (L_ORDERKEY,L_LINENUMBER) );""")

duckdb_load_commands.append("""COPY nation FROM '{}/tpch-dbgen/sf{}/nation.tbl' ( DELIMITER '|');""".format(Path.home(), duckdb_scale_factor_string))
duckdb_load_commands.append("""COPY region FROM '{}/tpch-dbgen/sf{}/region.tbl' ( DELIMITER '|');""".format(Path.home(), duckdb_scale_factor_string))
Expand Down
2 changes: 1 addition & 1 deletion python/helpers/static_tpch_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"""select ps_partkey, sum(ps_supplycost * ps_availqty) as value from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY' group by ps_partkey having sum(ps_supplycost * ps_availqty) > ( select sum(ps_supplycost * ps_availqty) * 0.0001 from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'GERMANY' ) order by value desc;""",
# Q12
"""select l_shipmode, sum(case when o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH' then 1 else 0 end) as high_line_count, sum(case when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count from orders, lineitem where o_orderkey = l_orderkey and l_shipmode in ('MAIL', 'SHIP') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate and l_receiptdate >= date '1994-01-01' and l_receiptdate < date '1994-01-01' + interval '1' year group by l_shipmode order by l_shipmode;""",
# Q13: HANA does not support putting column aliases in the "as" part.
# Q13
"""select c_count, count(*) as custdist from ( select c_custkey as c_custkey, count(o_orderkey) as c_count from customer left outer join orders on c_custkey = o_custkey and o_comment not like '%special%requests%' group by c_custkey ) as c_orders group by c_count order by custdist desc, c_count desc;""",
# Q14
"""select 100.00 * sum(case when p_type like 'PROMO%' then l_extendedprice * (1 - l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from lineitem, part where l_partkey = p_partkey and l_shipdate >= date '1995-09-01' and l_shipdate < date '1995-09-01' + interval '1' month;""",
Expand Down
6 changes: 3 additions & 3 deletions python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
duckdb >= 0.3.2
joblib == 1.1.0
duckdb >= 0.5.1
joblib == 1.2.0
matplotlib == 3.5.1
multiprocess == 0.70.12.2
numpy == 1.22.2
Expand All @@ -8,5 +8,5 @@ pymonetdb == 1.5.0
psycopg2 == 2.9.3
PuLP == 2.6.0
scikit-learn == 1.0.2
#scikit-learn-intelex == 2021.5.3 # Used for fast regressions; might cause issues on the GitHub runners.
#scikit-learn-intelex == 2021.5.3 # Used for fast regressions; might cause issues on the GitHub runners (requires Intel CPU)
xgboost == 1.5.2

0 comments on commit 2291427

Please sign in to comment.