Run Hyrise master with GH actions (#10)

Add Hyrise master to database comparison
hyrise · Jun 19, 2024 · 42c1d5b · 42c1d5b
1 parent 2176dff
commit 42c1d5b
Show file tree

Hide file tree

Showing 9 changed files with 88 additions and 46 deletions.
diff --git a/.github/workflows/haupt.yml b/.github/workflows/haupt.yml
@@ -13,9 +13,10 @@ defaults:
 
 env:
   SKIP_HYRISE: false
+  SKIP_HYRISE_MASTER: false
   SKIP_MONETDB: false
   SKIP_DUCKDB: false
-  SCALE_FACTOR: 0.5
+  SCALE_FACTOR: 1
   CMAKE_GENERATOR: Ninja
 
 jobs:
@@ -45,10 +46,11 @@ jobs:
            # Add repository for older python versions. We use 3.11 as there are several issues with 3.12 (e.g., removed distutils and pip problems).
            sudo add-apt-repository ppa:deadsnakes/ppa --yes
            # We don't use Hyrise's install_dependencies script as it includes much more than needed for this small setup here.
-           sudo apt-get install -y ninja-build libboost-all-dev postgresql-server-dev-16 libtbb-dev libreadline-dev libsqlite3-dev systemtap-sdt-dev lld numactl python3.11-full python3.11-venv
+           sudo apt-get install -y -qq ninja-build libboost-all-dev postgresql-server-dev-16 libtbb-dev libreadline-dev libsqlite3-dev systemtap-sdt-dev numactl python3.11-full python3.11-venv clang-17 lld-17
+           sudo update-alternatives --install /usr/bin/ld.lld ld.lld /usr/bin/ld.lld-17 90
            python3.11 -m venv ~/venv
            source ~/venv/bin/activate
-           python -m pip  install -r python/requirements.txt --quiet
+           python -m pip install -r python/requirements.txt --quiet
 
     - name: Determine core and client counts for database comparison
       id: core_client_counts
@@ -70,10 +72,10 @@ jobs:
       run: |
            mkdir -p encoding_plugin/rel
            pushd encoding_plugin/rel > /dev/null
-           # Erase all encoding types. Hurts performance but allows us to compile in release mode with GitHub runners.
-           # Further, we use the relaxed mode as there are several issues with newer compiler (fixed in Hyrise's master,
-           # but not in the project's code).
-           cmake -DCMAKE_BUILD_TYPE=Release -DHYRISE_RELAXED_BUILD=ON -DERASE_SEGMENT_TYPES=Dictionary,LZ4,RunLength,FSST,FrameOfReference,Unencoded,FixedStringDictionary ..
+           # We use the relaxed mode as there are several issues with newer compiler (fixed in Hyrise's master,
+           # but not in the project's code). On top of relaxed mode (i.e., not all warnings are errors), we
+           # disable issues with deprecated declarations (atomic shared_ptr's).
+           cmake -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DCMAKE_BUILD_TYPE=Release -DCMAKE_UNITY_BUILD=ON -DHYRISE_RELAXED_BUILD=ON -DCMAKE_CXX_FLAGS="-Wno-deprecated-declarations" ..
            cmake --build . --target hyriseServer WorkloadStatisticsPlugin WorkloadHandlerPlugin CommandExecutorPlugin DataCharacteristicsPlugin
            popd > /dev/null
 
@@ -87,7 +89,7 @@ jobs:
 
            source ~/venv/bin/activate
            pushd python > /dev/null
-           python3 runner.py --hyrise_server_path=../encoding_plugin/rel/  --base_benchmark_runs=1 --single_benchmark=TPC-H --execute=calibration --scale_factor ${{ env.SCALE_FACTOR }} --random_encoding_configs_count=3
+           python3 runner.py --hyrise_server_path=../encoding_plugin/rel/ --base_benchmark_runs=1 --single_benchmark=TPC-H --execute=calibration --scale_factor ${{ env.SCALE_FACTOR }} --random_encoding_configs_count=3
            popd > /dev/null
 
     - name: Run calibration - learn runtime and size models
@@ -189,6 +191,14 @@ jobs:
         repository: electrum/tpch-dbgen
         path: ./tpch-dbgen
 
+    - uses: actions/checkout@master
+      if: env.SKIP_HYRISE_MASTER == 'false'
+      with:
+        token: ${{ secrets.PAT }}
+        repository: hyrise/hyrise
+        path: ./hyrise_master
+        submodules: recursive
+
     - name: Determine client and core counts for database comparison
       id: core_client_counts
       run: |
@@ -213,7 +223,8 @@ jobs:
     - name: Install dependencies
       run: |
            sudo apt-get update -y -qq
-           sudo apt-get install -y -qq ninja-build libsqlite3-dev postgresql-server-dev-16 numactl bison python3-venv
+           sudo apt-get install -y -qq ninja-build libsqlite3-dev postgresql-server-dev-16 numactl bison python3-venv libboost-all-dev libtbb-dev libreadline-dev clang-17 lld-17
+           sudo update-alternatives --install /usr/bin/ld.lld ld.lld /usr/bin/ld.lld-17 90
            python3 -m venv ~/venv
            source ~/venv/bin/activate
            pip3 install -r python/requirements.txt  # Not using --quiet to log the installed DuckDB version.
@@ -259,7 +270,7 @@ jobs:
            chmod 644 *.tbl
 
            mkdir -p sf${{ env.SCALE_FACTOR }}
-           mv *.tbl sf${{ env.SCALE_FACTOR }}          
+           mv *.tbl sf${{ env.SCALE_FACTOR }}
            popd
            mv tpch-dbgen ~
 
@@ -273,9 +284,30 @@ jobs:
            python3 db_comparison_runner.py duckdb --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
            popd
 
+    - name: Build Hyrise (master) release server
+      if: env.SKIP_HYRISE_MASTER == 'false'
+      run: |
+           mkdir -p hyrise_master/rel
+           pushd hyrise_master/rel > /dev/null
+           cmake -DCMAKE_C_COMPILER=clang-17 -DCMAKE_CXX_COMPILER=clang++-17 -DCMAKE_BUILD_TYPE=Release -DCMAKE_UNITY_BUILD=ON -DHYRISE_RELAXED_BUILD=ON ..
+           cmake --build . --target hyriseServer
+           popd > /dev/null
+
+    - name: Benchmark Hyrise (master, database comparison)
+      if: env.SKIP_HYRISE_MASTER == 'false'
+      run: |
+           pushd python
+           source ~/venv/bin/activate
+           python3 db_comparison_runner.py hyrise --hyrise_server_path=../hyrise_master/rel/ --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --clients=${{ env.CLIENT_COUNT }} --time=${{ env.COMPARISON_RUNTIME }}
+           python3 db_comparison_runner.py hyrise --hyrise_server_path=../hyrise_master/rel/ --cores=${{ env.CORE_COUNT }} --scale_factor=${{ env.SCALE_FACTOR }} --determine_size_only
+
+           mv db_comparison_results/database_comparison__TPC-H__hyrise.csv db_comparison_results/database_comparison__TPC-H__hyrise_master.csv
+           mv db_comparison_results/size_hyrise.csv db_comparison_results/size_hyrise_master.csv
+           popd
+
     - name: Upload benchmark results
       uses: actions/upload-artifact@master
-      if: env.SKIP_DUCKDB == 'false' || env.SKIP_MONETDB == 'false'
+      if: env.SKIP_DUCKDB == 'false' || env.SKIP_MONETDB == 'false' || env.SKIP_HYRISE_MASTER == 'false'
       with:
         name: comparison_results
         path: |
@@ -334,4 +366,4 @@ jobs:
       with:
         name: database_comparison
         path: |
-          db_comparison.pdf
+          db_comparison*.pdf
diff --git a/R/plot.R b/R/plot.R
@@ -7,8 +7,8 @@ scale_factor = Sys.getenv("SCALE_FACTOR")
 run_name = Sys.getenv("CALIBRATION_RUN")  # currently not used.
 hyrise_core_count = Sys.getenv("HYRISE_CORE_COUNT")
 hyrise_client_count = Sys.getenv("HYRISE_CLIENT_COUNT")
-comparison_core_count = Sys.getenv("HYRISE_CORE_COUNT")
-comparison_client_count = Sys.getenv("HYRISE_CLIENT_COUNT")
+comparison_core_count = Sys.getenv("COMPARISON_CORE_COUNT")
+comparison_client_count = Sys.getenv("COMPARISON_CLIENT_COUNT")
 
 results_dir = paste0("results_to_plot")
 
@@ -38,13 +38,17 @@ hyrise_lp$is_geom_line <- TRUE
 monet_runtimes <- read.csv(paste0(results_dir, "/database_comparison__TPC-H__monetdb.csv"))
 hyrise_runtimes <- read.csv(paste0(results_dir, "/database_comparison__TPC-H__hyrise.csv"))
 duckdb_runtimes <- read.csv(paste0(results_dir, "/database_comparison__TPC-H__duckdb.csv"))
+hyrise_master_runtimes <- read.csv(paste0(results_dir, "/database_comparison__TPC-H__hyrise_master.csv"))
+hyrise_master_runtimes$DATABASE_SYSTEM = "hyrise_master"
 
-monet_size <- read.csv(paste0(results_dir, "/size_monetdb__SF", scale_factor, ".csv"))
-hyrise_size <- read.csv(paste0(results_dir, "/size_hyrise__SF", scale_factor, ".csv"))
-duckdb_size <- read.csv(paste0(results_dir, "/size_duckdb__SF", scale_factor, ".csv"))
+monet_size <- read.csv(paste0(results_dir, "/size_monetdb.csv"))
+hyrise_size <- read.csv(paste0(results_dir, "/size_hyrise.csv"))
+duckdb_size <- read.csv(paste0(results_dir, "/size_duckdb.csv"))
+hyrise_master_size <- read.csv(paste0(results_dir, "/size_hyrise_master.csv"))
+hyrise_master_size$DATABASE_SYSTEM = "hyrise_master"
 
-runtimes <- rbind(monet_runtimes, hyrise_runtimes, duckdb_runtimes)
-sizes <- rbind(monet_size, hyrise_size, duckdb_size)
+runtimes <- rbind(monet_runtimes, hyrise_runtimes, duckdb_runtimes, hyrise_master_runtimes)
+sizes <- rbind(monet_size, hyrise_size, duckdb_size, hyrise_master_size)
 
 runtimes_q_agg <- runtimes %>% group_by(DATABASE_SYSTEM, ITEM_NAME) %>% summarize(median_runtime = mean(RUNTIME_MS), .groups="keep")
 runtimes_db_agg <- runtimes_q_agg %>% group_by(DATABASE_SYSTEM) %>% summarize(cumulative_runtime = sum(median_runtime), .groups="keep")
@@ -66,6 +70,7 @@ joined <- rbind(joined, first_lp)
 joined$DATABASE_SYSTEM[which(joined$DATABASE_SYSTEM == "duckdb")] <- "DuckDB"
 joined$DATABASE_SYSTEM[which(joined$DATABASE_SYSTEM == "monetdb")] <- "MonetDB"
 joined$DATABASE_SYSTEM[which(joined$DATABASE_SYSTEM == "hyrise")] <- "Default Hyrise"
+joined$DATABASE_SYSTEM[which(joined$DATABASE_SYSTEM == "hyrise_master")] <- "Hyrise Master"
 
 max_size <- max(joined$size_gb)
 max_throughput <- max(joined$runs_per_hour)
@@ -96,4 +101,4 @@ g <- ggplot(joined, aes(x=size_gb, y=runs_per_hour, group=DATABASE_SYSTEM, fill=
     force = 0.5,
   )
 
-ggsave("db_comparison.pdf", g, width=7, height=5)
+ggsave(paste0("db_comparison__", strftime(as.POSIXlt(Sys.time(), "UTC") , "%Y-%m-%d"),".pdf"), g, width=7, height=5)
diff --git a/README.md b/README.md
@@ -3,6 +3,13 @@
 
 This repository contains source code and artifacts for the paper [**Robust and Budget-Constrained Encoding Configurations for In-Memory Database Systems**](https://www.vldb.org/pvldb/vol15/p780-boissier.pdf) (VLDB 2022).
 
+<img src="https://github.com/hyrise/encoding_selection/assets/1745857/f14194f5-c48a-49d7-bc9f-c079e35c40db" width="300" alt="Exemplary results of pipeline run, comparing against MonetDB and Duckdb." />
+
+
+Once a month, we automatically execute the encoding system presented in the paper as a GitHub action and compare the performance against MonetDB and DuckDB[^1].
+You can see the most recent results plotted in the `database_comparison.zip` artifact of the [most recent run of the main branch](https://github.com/hyrise/encoding_selection/actions?query=branch%3Amain) (more information below).
+
+
 In case you have any questions, please contact [Martin Boissier](https://hpi.de/plattner/people/phd-students/martin-boissier.html).
 
 
@@ -40,15 +47,15 @@ The whole encoding selection pipeline runs within GitHub actions to ease reprodu
 The `hyrise_full_pipeline` job in the main workflow file [haupt.yml](https://github.com/hyrise/encoding_selection/blob/main/.github/workflows/haupt.yml#L20) lists all steps required from gathering calibration data, learning models, selecting configurations, to evaluating them.
 Due to GitHub restrictions, the pipeline creates only a tiny data set (scale factor of 0.5).
 
-For each run, we compare Hyrise against MonetDB and DuckDB[^1].
-The results are plotted and stored in the artifacts of each run[^2].
+For each run, we compare Hyrise against MonetDB and DuckDB[^2].
+The results are plotted and stored in the artifacts of each run[^3].
 Download `database_comparison(.zip)` of the last succesful run for a plot of the TPC-H benchmark runs.
 
 
 The code (both the plugins as well as the Python scripts) are extracted from a larger project.
 Please excuse the often convoluted and bloated code.
 
-Flowchart of the GitHub runner workflow[^3]:
+Flowchart of the GitHub runner workflow:
 ```mermaid
 flowchart LR;
     Start --> setuph["Setup Hyrise Pipeline<br>(git, apt, pip, ...)"];
@@ -65,7 +72,9 @@ flowchart LR;
     runduckdb --> plot;
 ```
 
-[^1]: Please view the results with a huge grain of salt, especially the DuckDB results.
+[^1]: Please note that the comparison is executed on GitHub action runners and is thus only meant to show reproducibility. We do not aim to establish a performance order with these action runs.
+
+[^2]: Please view the results with a huge grain of salt, especially the DuckDB results.
 We are huge fans of DuckDB and thus wanted to include it.
 But the current benchmark script is probably an unfair comparison, as DuckDB's aim is more on single-user performance (i.e., data scientists/smartists).
 Hyrise's focus on concurrent OLTP/OLAP users.
@@ -74,8 +83,6 @@ Further, we cannot rule out that Python's GIL causes unexpected performance degr
 We have talked to the DuckDB maintainers and decided to exclude DuckDB measurements from the paper for this reason.
 In case you can help us to make a fair comparison, feel free to post a pull request.
 
-[^2]: The plots are meant to show the reproducibility of the results, not to establish a fair comparison.
+[^3]: The plots are meant to show the reproducibility of the results, not to establish a fair comparison.
 To conduct a "fairer" comparison (cf. footnote on DuckDB), the pipeline needs to be run on a dedicated machine.
 We have seen workflow runtimes on GitHub varying from 3h to over 6h (which is than canceled by GitHub) for the same setup.
-
-[^3]: Yes, I just wanted to integrate the flowchart for the sake of integrating a flowchart in Markdown. It isn't that interesting.
diff --git a/python/db_comparison_runner.py b/python/db_comparison_runner.py
@@ -64,7 +64,7 @@
   hyrise_server_path = Path(args.hyrise_server_path).expanduser().resolve()
   assert (hyrise_server_path / "hyriseServer").exists(), "Please pass valid --hyrise_server_path"
 
-monetdb_scale_factor_string = str(args.scale_factor).replace(".", "_")
+monetdb_scale_factor_string = str(args.scale_factor).replace(".", "_") if float(int(args.scale_factor)) != args.scale_factor else str(int(args.scale_factor))
 duckdb_scale_factor_string = int(args.scale_factor) if args.scale_factor >= 1.0 else args.scale_factor
 
 assert (args.single_query_id is None or (args.single_query_id > 0 and args.single_query_id < 23)), "Unexpected query id"
@@ -190,7 +190,7 @@ def get_aggregated_table_size():
         rows_fetched += len(rows)
       print("{:,} rows.".format(rows_fetched), flush=True)
 
-  with open("db_comparison_results/size_{}__SF{}.csv".format(args.dbms, args.scale_factor), "w") as size_file:
+  with open("db_comparison_results/size_{}.csv".format(args.dbms), "w") as size_file:
     size_file.write("DATABASE_SYSTEM,SCALE_FACTOR,SIZE_IN_BYTES\n")
     cumulative_size = 0
     if args.dbms == "monetdb":
@@ -297,7 +297,7 @@ def loop(thread_id, queries, query_id, start_time, successful_runs, timeout, is_
     time_left = start_time + timeout - time.time()
     if time_left < 0:
       break
-    print('\rBenchmarking {}... {:.0f} seconds left'.format(query_name, time_left), end="")
+    print('\rBenchmarking {}... {:.0f} seconds left'.format(query_name, time_left), end="", flush=True)
     time.sleep(min(10, time_left))
 
   while True:

diff --git a/python/encoding_configuration_selector.py b/python/encoding_configuration_selector.py
@@ -250,7 +250,7 @@ def run_compression_selection_comparison(short_name, calibration_run, robustness
 
     dictionary_size = workload['all_dictionary_size']
     dictionary_runtime = workload['all_dictionary_runtime']
-    results_with_static_dictionary = results.append({"MODEL": "Static", "BUDGET": dictionary_size, "SIZE_IN_BYTES": dictionary_size, "CUMULATIVE_RUNTIME_MS": dictionary_runtime}, ignore_index=True)
+    results_with_static_dictionary = pd.concat([results, pd.DataFrame({"MODEL": ["Static"], "BUDGET": [dictionary_size], "SIZE_IN_BYTES": [dictionary_size], "CUMULATIVE_RUNTIME_MS": [dictionary_runtime]})])
 
     # create static dictonary configuration
     dict_configuration = np.zeros((workload['table_count'], workload['max_row_clusters'], workload['max_column_count']), dtype=np.int32)

diff --git a/python/helpers/encoding_selection_helpers.py b/python/helpers/encoding_selection_helpers.py
@@ -189,7 +189,7 @@ def eval_and_append(_title, _result, _metric, _sum_dict, _df_plotting, plot=True
     previous_results_path = Path(plot_file_name).parent / "model_evaluation.csv"
     if previous_results_path.exists():
         df_plotting_previous = pd.read_csv(previous_results_path)
-        df_plotting = df_plotting_previous.append(df_plotting, ignore_index=True)
+        df_plotting = pd.concat([df_plotting_previous, df_plotting])
     df_plotting.to_csv(previous_results_path, index=False)
 
 

diff --git a/python/requirements.txt b/python/requirements.txt
@@ -3,7 +3,7 @@ joblib == 1.4.2
 matplotlib == 3.9.0
 multiprocess == 0.70.16
 numpy == 1.26.4
-pandas == 1.5.3  # Not using 2.x as we widely use DataFrame.append(). Change to pd.concat would be easy though.
+pandas == 2.2.2
 pymonetdb == 1.8.1
 psycopg2-binary == 2.9.9
 PuLP == 2.8.0

diff --git a/python/selection/column_runtime_change_prediction_helper.py b/python/selection/column_runtime_change_prediction_helper.py
@@ -105,7 +105,7 @@ def collect_unified_operator_runtimes_per_column(operator_name, operator_data_fo
         candidates_left.columns = ["TABLE_NAME", "COLUMN_NAME", "DATA_TYPE"]
         candidates_right = df[["RIGHT_TABLE_NAME", "RIGHT_COLUMN_NAME", "DATA_TYPE_RIGHT"]]
         candidates_right.columns = ["TABLE_NAME", "COLUMN_NAME", "DATA_TYPE"]
-        candidates = candidates_left.append(candidates_right)
+        candidates = pd.concat([candidates_left, candidates_right])
     else:
         candidates = df[['TABLE_NAME', 'COLUMN_NAME', 'DATA_TYPE']]
 
@@ -266,10 +266,8 @@ def predict_join_baselines(df, runtime_models_folder, model = 'heteroscedastic',
                   f"(predicted for baseline: {baseline_execution_time:,.2f} ms, actual: {actual_execution_time:,.2f} ms).")
 
         encoding_selection_helpers.adapt_negative_predictions(predictable, "prediction")
-
         predictable["prediction_error"] = predictable.prediction - predictable.execution_time_ms
-
-        results = results.append(predictable[['QUERY_HASH', 'OPERATOR_HASH', 'JOIN_MODEL_TYPE', 'materialize_side', 'prediction']])
+        results = pd.concat([results, predictable[['QUERY_HASH', 'OPERATOR_HASH', 'JOIN_MODEL_TYPE', 'materialize_side', 'prediction']]])
 
     return results
 
@@ -296,7 +294,7 @@ def collect_join_runtimes_per_column(operator_data_folder, runtime_models_folder
     candidates_left.columns = ["TABLE_NAME", "COLUMN_NAME", "DATA_TYPE"]
     candidates_right = df[["RIGHT_TABLE_NAME", "RIGHT_COLUMN_NAME", "DATA_TYPE_RIGHT"]]
     candidates_right.columns = ["TABLE_NAME", "COLUMN_NAME", "DATA_TYPE"]
-    candidates = candidates_left.append(candidates_right)
+    candidates = pd.concat([candidates_left, candidates_right])
 
     # Drop publicates, but keep na() as they mark materialized columns
     candidates = candidates.drop_duplicates()
@@ -374,7 +372,7 @@ def collect_join_runtimes_per_column(operator_data_folder, runtime_models_folder
                                             'VECTOR_COMPRESSION_TYPE',
                                             'JOIN_MODEL_TYPE',
                                             'materialize_side'], dropna=False).agg({'adapted_prediction': 'min'}).reset_index()
-    encoding_change_predictions = encoding_change_predictions.append(encoding_change_predictions_extract_grouped)
+    encoding_change_predictions = pd.concat([encoding_change_predictions, encoding_change_predictions_extract_grouped])
 
     if len(encoding_change_predictions) > 0:
         encoding_change_predictions.VECTOR_COMPRESSION_TYPE = encoding_change_predictions.VECTOR_COMPRESSION_TYPE.replace("", np.nan)