Skip to content

feat(hashjoin): Add fast row size estimation for hash probe #37246

feat(hashjoin): Add fast row size estimation for hash probe

feat(hashjoin): Add fast row size estimation for hash probe #37246

Workflow file for this run

# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "Ubuntu Benchmark"
on:
pull_request:
paths:
- 'velox/**'
- '!velox/docs/**'
- 'third_party/**'
- 'pyvelox/**'
- '.github/workflows/benchmark.yml'
- 'scripts/benchmark-requirements.txt'
push:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.repository }}-${{ github.head_ref || github.sha }}
cancel-in-progress: true
defaults:
run:
shell: bash
jobs:
benchmark:
if: github.repository == 'facebookincubator/velox'
runs-on: 8-core-ubuntu
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
CCACHE_BASEDIR: "${{ github.workspace }}"
BINARY_DIR: "${{ github.workspace }}/benchmarks/"
LINUX_DISTRO: "ubuntu"
RESULTS_ROOT: "${{ github.workspace }}/benchmark-results"
BASELINE_OUTPUT_PATH: "${{ github.workspace }}/benchmark-results/baseline/"
CONTENDER_OUTPUT_PATH: "${{ github.workspace }}/benchmark-results/contender/"
steps:
- name: "Restore ccache"
uses: actions/cache/restore@v3
id: restore-cache
with:
path: ".ccache"
key: ccache-benchmark-${{ github.sha }}
restore-keys: |
ccache-benchmark-
- name: "Checkout Repo"
if: ${{ github.event_name == 'pull_request' }}
uses: actions/checkout@v3
with:
path: 'velox'
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.head_ref }}
fetch-depth: 0
submodules: 'recursive'
- name: "Checkout Merge Base"
if: ${{ github.event_name == 'pull_request' }}
working-directory: velox
run: |
# Choose merge base from upstream main to avoid issues with
# outdated fork branches
git fetch origin
git remote add upstream https://github.com/facebookincubator/velox
git fetch upstream
git status
merge_base=$(git merge-base 'upstream/${{ github.base_ref }}' 'origin/${{ github.head_ref }}') || \
{ echo "::error::Failed to find merge base"; exit 1; }
echo "Merge Base: $merge_base"
git checkout $merge_base
git submodule update --init --recursive
echo $(git log -n 1)
- name: "Install dependencies"
if: ${{ github.event_name == 'pull_request' }}
run: source velox/scripts/setup-ubuntu.sh && install_apt_deps && install_duckdb
- name: Build Baseline Benchmarks
if: ${{ github.event_name == 'pull_request' }}
working-directory: velox
run: |
n_cores=$(nproc)
make benchmarks-basic-build NUM_THREADS=$n_cores MAX_HIGH_MEM_JOBS=$n_cores MAX_LINK_JOBS=$n_cores
ccache -s
mkdir -p ${BINARY_DIR}/baseline/
cp -r --verbose _build/release/velox/benchmarks/basic/* ${BINARY_DIR}/baseline/
- name: "Checkout Contender PR"
if: ${{ github.event_name == 'pull_request' }}
working-directory: velox
run: |
git checkout '${{ github.event.pull_request.head.sha }}'
- name: "Checkout Contender"
if: ${{ github.event_name == 'push' }}
uses: actions/checkout@v3
with:
path: 'velox'
ref: ${{ github.sha }}
submodules: 'recursive'
- name: "Install dependencies"
run: source velox/scripts/setup-ubuntu.sh && install_apt_deps
- name: Build Contender Benchmarks
working-directory: velox
run: |
n_cores=$(nproc)
make benchmarks-basic-build NUM_THREADS=$n_cores MAX_HIGH_MEM_JOBS=$n_cores MAX_LINK_JOBS=$n_cores
ccache -s
mkdir -p ${BINARY_DIR}/contender/
cp -r --verbose _build/release/velox/benchmarks/basic/* ${BINARY_DIR}/contender/
- name: "Save ccache"
uses: actions/cache/save@v3
id: cache
with:
path: ".ccache"
key: ccache-benchmark-${{ github.sha }}
- name: "Install benchmark dependencies"
run: |
python3 -m pip install -r velox/scripts/benchmark-requirements.txt
- name: "Run Benchmarks - Baseline"
if: ${{ github.event_name == 'pull_request' }}
working-directory: 'velox'
run: |
make benchmarks-basic-run \
EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/baseline/ --output_path ${BASELINE_OUTPUT_PATH}"
- name: "Run Benchmarks - Contender"
working-directory: 'velox'
run: |
make benchmarks-basic-run \
EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/contender/ --output_path ${CONTENDER_OUTPUT_PATH}"
- name: "Compare initial results"
id: compare
if: ${{ github.event_name == 'pull_request' }}
run: |
./velox/scripts/benchmark-runner.py compare \
--baseline_path ${BASELINE_OUTPUT_PATH} \
--contender_path ${CONTENDER_OUTPUT_PATH} \
--rerun_json_output "benchmark-results/rerun_json_output_0.json" \
--do_not_fail
- name: "Rerun Benchmarks"
if: ${{ github.event_name == 'pull_request'}}
working-directory: 'velox'
run: |
for i in 1 2 3 4 5; do
CURRENT_JSON_RERUN="${RESULTS_ROOT}/rerun_json_output_$((${i} - 1)).json"
NEXT_JSON_RERUN="${RESULTS_ROOT}/rerun_json_output_${i}.json"
if [ ! -s "${CURRENT_JSON_RERUN}" ]; then
echo "::notice::Rerun iteration ${i} found empty file. Finalizing."
break
fi
echo "::group::Rerun iteration: ${i}"
make benchmarks-basic-run \
EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/baseline/ --output_path ${BASELINE_OUTPUT_PATH}/rerun-${i}/ --rerun_json_input ${CURRENT_JSON_RERUN}"
make benchmarks-basic-run \
EXTRA_BENCHMARK_FLAGS="--binary_path ${BINARY_DIR}/contender/ --output_path ${CONTENDER_OUTPUT_PATH}/rerun-${i}/ --rerun_json_input ${CURRENT_JSON_RERUN}"
./scripts/benchmark-runner.py compare \
--baseline_path ${BASELINE_OUTPUT_PATH}/rerun-${i}/ \
--contender_path ${CONTENDER_OUTPUT_PATH}/rerun-${i}/ \
--rerun_json_output ${NEXT_JSON_RERUN} \
--do_not_fail
echo "::endgroup::"
done
echo "::group::Compare final results"
./scripts/benchmark-runner.py compare \
--baseline_path ${BASELINE_OUTPUT_PATH} \
--contender_path ${CONTENDER_OUTPUT_PATH} \
--recursive \
--do_not_fail
echo "::endgroup::"
- name: "Save PR number"
run: echo "${{ github.event.pull_request.number || 0 }}" > pr_number.txt
- name: "Upload PR number"
uses: actions/upload-artifact@v3
with:
path: "pr_number.txt"
name: "pr_number"
- name: "Upload result artifact"
uses: actions/upload-artifact@v3
with:
path: "benchmark-results"
name: "benchmark-results"