fix: Fix columns stats when insert serialized rows (#11910) #15437
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright (c) Facebook, Inc. and its affiliates. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
name: "Fuzzer Jobs" | |
on: | |
pull_request: | |
paths: | |
- "velox/**" | |
- "!velox/docs/**" | |
- "CMakeLists.txt" | |
- "CMake/**" | |
- "third_party/**" | |
- "scripts/setup-ubuntu.sh" | |
- "scripts/setup-helper-functions.sh" | |
- ".github/workflows/scheduled.yml" | |
push: | |
branches: | |
- "main" | |
paths: | |
- "velox/**" | |
- "!velox/docs/**" | |
- "CMakeLists.txt" | |
- "CMake/**" | |
- "third_party/**" | |
- "scripts/setup-ubuntu.sh" | |
- "scripts/setup-helper-functions.sh" | |
- ".github/workflows/scheduled.yml" | |
schedule: | |
- cron: '0 3 * * *' | |
workflow_dispatch: | |
inputs: | |
ref: | |
description: 'Ref to checkout out' | |
default: 'main' | |
numThreads: | |
description: 'Number of threads' | |
default: 16 | |
maxHighMemJobs: | |
description: 'Number of high memory jobs' | |
default: 8 | |
maxLinkJobs: | |
description: 'Maximum number of link jobs' | |
default: 4 | |
extraCMakeFlags: | |
description: 'Additional CMake flags' | |
default: '' | |
duration: | |
description: 'Duration of fuzzer run in seconds' | |
default: 1800 | |
defaults: | |
run: | |
shell: bash | |
permissions: | |
contents: read | |
concurrency: | |
# This will not cancel fuzzer runs on main (regardless of which trigger) | |
# by making the commit sha part of the group but will use the branch | |
# name in PRs to cancel on going runs on a new commit. | |
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }} | |
cancel-in-progress: true | |
env: | |
# Run for 15 minute on PRs | |
DURATION: "${{ inputs.duration || ( github.event_name != 'schedule' && 900 || 1800 )}}" | |
# minimize artifact duration for PRs, keep them a bit longer for nightly runs | |
RETENTION: "${{ github.event_name == 'pull_request' && 1 || 3 }}" | |
jobs: | |
compile: | |
name: Build | |
# prevent errors when forks ff their main branch | |
if: ${{ github.repository == 'facebookincubator/velox' }} | |
runs-on: 16-core-ubuntu | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
timeout-minutes: 120 | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.ccache" | |
LINUX_DISTRO: "ubuntu" | |
MAKEFLAGS: "NUM_THREADS=${{ inputs.numThreads || 16 }} MAX_HIGH_MEM_JOBS=${{ inputs.maxHighMemJobs || 8 }} MAX_LINK_JOBS=${{ inputs.maxLinkJobs || 4 }}" | |
defaults: | |
run: | |
shell: bash | |
working-directory: velox | |
outputs: | |
presto_bias: ${{ steps.sig-check.outputs.presto_functions }} | |
presto_error: ${{ steps.sig-check.outputs.presto_error }} | |
spark_bias: ${{ steps.sig-check.outputs.spark_functions }} | |
spark_error: ${{ steps.sig-check.outputs.spark_error }} | |
presto_aggregate_bias: ${{ steps.sig-check.outputs.presto_aggregate_functions }} | |
presto_aggregate_error: ${{ steps.sig-check.outputs.presto_aggregate_error }} | |
steps: | |
- name: Get latest commit from main | |
if: ${{ github.event_name != 'schedule' }} | |
working-directory: ${{ github.workspace }} | |
env: | |
GH_TOKEN: ${{ github.token }} | |
id: get-head | |
run: | | |
if [ '${{ github.event_name == 'push' }}' == "true" ]; then | |
# get the parent commit of the current one to get the relevant function signatures | |
head_main=$(gh api -q '.parents.[0].sha' '/repos/facebookincubator/velox/commits/${{ github.sha }}') | |
else | |
head_main=$(gh api -H "Accept: application/vnd.github.sha" /repos/facebookincubator/velox/commits/heads/main) | |
fi | |
echo "head_main=$head_main" >> $GITHUB_OUTPUT | |
- name: Get Function Signature Stash | |
uses: assignUser/stash/restore@v1 | |
id: get-sig | |
with: | |
path: /tmp/signatures | |
key: function-signatures-${{ steps.get-head.outputs.head_main || github.sha }} | |
- name: Restore ccache | |
uses: assignUser/stash/restore@v1 | |
with: | |
path: "${{ env.CCACHE_DIR }}" | |
key: ccache-fuzzer-centos | |
- name: Fix git permissions | |
working-directory: ${{ github.workspace }} | |
# Usually actions/checkout does this but as we run in a container | |
# it doesn't work | |
run: | | |
git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox | |
git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox_main | |
- name: Ensure Stash Dirs Exists | |
working-directory: ${{ github.workspace }} | |
run: | | |
mkdir -p '${{ env.CCACHE_DIR }}' | |
mkdir -p /tmp/signatures | |
- name: Checkout Main | |
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }} | |
uses: actions/checkout@v4 | |
with: | |
ref: ${{ steps.get-head.outputs.head_main || 'main' }} | |
path: velox_main | |
- name: Build PyVelox | |
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }} | |
working-directory: velox_main | |
run: | | |
python3 -m venv .venv | |
source .venv/bin/activate | |
make python-build | |
- name: Create Baseline Signatures | |
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }} | |
working-directory: velox_main | |
run: | | |
source .venv/bin/activate | |
python3 -m pip install deepdiff | |
python3 scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json | |
python3 scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json | |
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_main.json | |
- name: Save Function Signature Stash | |
if: ${{ github.event_name == 'pull_request' && steps.get-sig.outputs.stash-hit != 'true' }} | |
uses: assignUser/stash/save@v1 | |
with: | |
path: /tmp/signatures | |
key: function-signatures-${{ steps.get-head.outputs.head_main }} | |
- name: Checkout Contender | |
uses: actions/checkout@v4 | |
with: | |
path: velox | |
submodules: 'recursive' | |
ref: "${{ inputs.ref }}" | |
- name: Zero Ccache Statistics | |
run: | | |
ccache -sz | |
- name: Build | |
env: | |
EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}" | |
run: | | |
EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(which python3) $EXTRA_CMAKE_FLAGS" | |
make debug | |
- name: Ccache after | |
run: ccache -s | |
- name: Save ccache | |
# see https://github.com/actions/upload-artifact/issues/543 | |
continue-on-error: true | |
if: ${{ github.event_name != 'schedule' }} | |
uses: assignUser/stash/save@v1 | |
with: | |
path: "${{ env.CCACHE_DIR }}" | |
key: ccache-fuzzer-centos | |
- name: Build PyVelox | |
if: ${{ github.event_name != 'schedule' }} | |
env: | |
VELOX_BUILD_DIR: "_build/debug" | |
run: | | |
python3 -m venv .venv | |
source .venv/bin/activate | |
python3 -m pip install -e . | |
- name: Create and test new function signatures | |
if: ${{ github.event_name != 'schedule' }} | |
id: sig-check | |
run: | | |
source .venv/bin/activate | |
python3 -m pip install deepdiff | |
python3 scripts/signature.py gh_bias_check presto spark | |
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contender.json | |
python3 scripts/signature.py bias_aggregates /tmp/signatures/presto_aggregate_signatures_main.json \ | |
/tmp/signatures/presto_aggregate_signatures_contender.json /tmp/signatures/presto_aggregate_bias_functions \ | |
/tmp/signatures/presto_aggregate_errors | |
- name: Upload Signature Artifacts | |
if: ${{ github.event_name != 'schedule' }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: signatures | |
path: /tmp/signatures | |
retention-days: "${{ env.RETENTION }}" | |
- name: Prepare signatures | |
working-directory: /tmp/signatures | |
if: ${{ github.event_name == 'push' }} | |
run: | | |
# Remove irrelevant artifacts | |
rm -f *_bias_functions | |
rm -f *_signatures_main.json | |
# Rename signature files as 'main' files | |
for f in *_signatures_contender.json; do | |
mv "$f" "${f/_contender.json/_main.json}" | |
done | |
- name: Save Function Signature Stash | |
if: ${{ github.event_name == 'push' }} | |
uses: assignUser/stash/save@v1 | |
with: | |
path: /tmp/signatures | |
key: function-signatures-${{ github.sha }} | |
- name: Upload presto fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto | |
path: velox/_build/debug/velox/expression/fuzzer/velox_expression_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload spark expression fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: spark_expression_fuzzer | |
path: velox/_build/debug/velox/expression/fuzzer/spark_expression_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload spark aggregation fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: spark_aggregation_fuzzer | |
path: velox/_build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload aggregation fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: aggregation | |
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload join fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: join | |
path: velox/_build/debug/velox/exec/tests/velox_join_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload exchange fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: exchange | |
path: velox/_build/debug//velox/exec/tests/velox_exchange_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload window fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: window | |
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_window_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload cache fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: cache_fuzzer | |
path: velox/_build/debug/velox/exec/tests/velox_cache_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload row number fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: row_number | |
path: velox/_build/debug//velox/exec/tests/velox_row_number_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
- name: Upload writer fuzzer | |
uses: actions/upload-artifact@v4 | |
with: | |
name: writer | |
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_writer_fuzzer_test | |
retention-days: "${{ env.RETENTION }}" | |
presto-fuzzer-run: | |
name: Presto Fuzzer | |
if: ${{ needs.compile.outputs.presto_bias != 'true' }} | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
needs: compile | |
timeout-minutes: 120 | |
steps: | |
- uses: dorny/paths-filter@v3 | |
if: github.event_name == 'pull_request' | |
id: changes | |
with: | |
filters: | | |
presto: | |
- 'velox/expression/!(test)**' | |
- 'velox/exec/!(test)**' | |
- 'velox/common/!(test)**' | |
- 'velox/core/!(test)**' | |
- 'velox/vector/!(test)**' | |
- name: Set presto specific fuzzer duration | |
env: | |
# Run for 30 minutes instead of 15, when files relevant to presto are touched | |
pr_duration: "${{ steps.changes.outputs.presto == 'true' && 1800 || 900 }}" | |
# Run for 60 minutes if its a scheduled run | |
other_duration: "${{ inputs.duration || (github.event_name == 'push' && 1800 || 3600) }}" | |
is_pr: "${{ github.event_name == 'pull_request' }}" | |
run: | | |
if [ "$is_pr" == "true" ]; then | |
duration=$pr_duration | |
else | |
duration=$other_duration | |
fi | |
echo "DURATION=$duration" >> $GITHUB_ENV | |
- name: Download presto fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: presto | |
- name: Run Presto Fuzzer | |
run: | | |
mkdir -p /tmp/fuzzer_repro/logs/ | |
chmod -R 777 /tmp/fuzzer_repro | |
chmod +x velox_expression_fuzzer_test | |
./velox_expression_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--enable_variadic_signatures \ | |
--velox_fuzzer_enable_complex_types \ | |
--lazy_vector_generation_ratio 0.2 \ | |
--common_dictionary_wraps_generation_ratio=0.3 \ | |
--velox_fuzzer_enable_column_reuse \ | |
--velox_fuzzer_enable_expression_reuse \ | |
--max_expression_trees_per_step 2 \ | |
--retry_with_try \ | |
--enable_dereference \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/fuzzer_repro/logs \ | |
--repro_persist_path=/tmp/fuzzer_repro \ | |
&& echo -e "\n\nFuzzer run finished successfully." | |
- name: Archive production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto-fuzzer-failure-artifacts | |
path: | | |
/tmp/fuzzer_repro | |
presto-bias-fuzzer: | |
name: Presto Bias Fuzzer | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
needs: compile | |
if: ${{ needs.compile.outputs.presto_bias == 'true' }} | |
timeout-minutes: 120 | |
steps: | |
- name: Download presto expression fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: presto | |
- name: Download Signatures | |
uses: actions/download-artifact@v4 | |
with: | |
name: signatures | |
path: /tmp/signatures | |
- name: Run Presto Expression Fuzzer | |
run: | | |
ls /tmp/signatures | |
mkdir -p /tmp/presto_bias_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/presto_bias_fuzzer_repro | |
chmod +x velox_expression_fuzzer_test | |
./velox_expression_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--lazy_vector_generation_ratio 0.2 \ | |
--common_dictionary_wraps_generation_ratio=0.3 \ | |
--assign_function_tickets $(cat /tmp/signatures/presto_bias_functions) \ | |
--duration_sec 3600 \ | |
--enable_variadic_signatures \ | |
--velox_fuzzer_enable_complex_types \ | |
--velox_fuzzer_enable_column_reuse \ | |
--velox_fuzzer_enable_expression_reuse \ | |
--max_expression_trees_per_step 2 \ | |
--retry_with_try \ | |
--enable_dereference \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/presto_bias_fuzzer_repro/logs \ | |
--repro_persist_path=/tmp/presto_bias_fuzzer_repro \ | |
&& echo -e "\n\nPresto Fuzzer run finished successfully." | |
- name: Archive Spark expression production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto-bias-fuzzer-failure-artifacts | |
path: | | |
/tmp/presto_bias_fuzzer_repro | |
spark-aggregate-fuzzer-run: | |
name: Spark Aggregate Fuzzer | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
needs: compile | |
timeout-minutes: 60 | |
steps: | |
- name: Download spark aggregation fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: spark_aggregation_fuzzer | |
- name: Run Spark Aggregate Fuzzer | |
run: | | |
mkdir -p /tmp/spark_aggregate_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/spark_aggregate_fuzzer_repro | |
chmod +x spark_aggregation_fuzzer_test | |
./spark_aggregation_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/spark_aggregate_fuzzer_repro/logs \ | |
--repro_persist_path=/tmp/spark_aggregate_fuzzer_repro \ | |
&& echo -e "\n\nSpark Aggregation Fuzzer run finished successfully." | |
- name: Archive Spark aggregate production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: spark-agg-fuzzer-failure-artifacts | |
path: | | |
/tmp/spark_aggregate_fuzzer_repro | |
spark-bias-fuzzer: | |
name: Spark Bias Fuzzer | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
needs: compile | |
if: ${{ needs.compile.outputs.spark_bias == 'true' }} | |
timeout-minutes: 120 | |
steps: | |
- name: Download spark expression fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: spark_expression_fuzzer | |
- name: Download Signatures | |
uses: actions/download-artifact@v4 | |
with: | |
name: signatures | |
path: /tmp/signatures | |
- name: Run Spark Expression Fuzzer | |
run: | | |
ls /tmp/signatures | |
mkdir -p /tmp/spark_bias_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/spark_bias_fuzzer_repro | |
chmod +x spark_expression_fuzzer_test | |
./spark_expression_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/spark_bias_fuzzer_repro/logs \ | |
--assign_function_tickets $(cat /tmp/signatures/spark_bias_functions) \ | |
--repro_persist_path=/tmp/spark_bias_fuzzer_repro \ | |
&& echo -e "\n\nSpark Fuzzer run finished successfully." | |
- name: Archive Spark expression production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: spark-fuzzer-failure-artifacts | |
path: | | |
/tmp/spark_bias_fuzzer_repro | |
spark-fuzzer: | |
name: Spark Fuzzer | |
if: ${{ needs.compile.outputs.spark_bias != 'true' }} | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
needs: compile | |
timeout-minutes: 120 | |
steps: | |
- name: Download spark expression fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: spark_expression_fuzzer | |
- name: Run Spark Expression Fuzzer | |
run: | | |
mkdir -p /tmp/spark_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/spark_fuzzer_repro | |
chmod +x spark_expression_fuzzer_test | |
./spark_expression_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--enable_variadic_signatures \ | |
--lazy_vector_generation_ratio 0.2 \ | |
--velox_fuzzer_enable_column_reuse \ | |
--velox_fuzzer_enable_expression_reuse \ | |
--max_expression_trees_per_step 2 \ | |
--retry_with_try \ | |
--enable_dereference \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/spark_fuzzer_repro/logs \ | |
--repro_persist_path=/tmp/spark_fuzzer_repro \ | |
&& echo -e "\n\nSpark Fuzzer run finished successfully." | |
- name: Archive Spark expression production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: spark-fuzzer-failure-artifacts | |
path: | | |
/tmp/spark_fuzzer_repro | |
presto-java-join-fuzzer-run: | |
name: Join Fuzzer | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:presto-java | |
needs: compile | |
timeout-minutes: 120 | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.ccache/" | |
LINUX_DISTRO: "centos" | |
steps: | |
- name: Download join fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: join | |
- name: "Checkout Repo" | |
uses: actions/checkout@v4 | |
with: | |
path: velox | |
submodules: 'recursive' | |
ref: "${{ inputs.ref }}" | |
- name: Fix git permissions | |
# Usually actions/checkout does this but as we run in a container | |
# it doesn't work | |
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox | |
- name: Run Join Fuzzer | |
run: | | |
cd velox | |
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog | |
ls -lR $PRESTO_HOME/etc | |
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 & | |
# Sleep for 60 seconds to allow Presto server to start. | |
sleep 60 | |
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;' | |
cd - | |
mkdir -p /tmp/join_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/join_fuzzer_repro | |
chmod +x velox_join_fuzzer_test | |
./velox_join_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/join_fuzzer_repro/logs \ | |
--presto_url=http://127.0.0.1:8080 \ | |
--req_timeout_ms=2000 \ | |
&& echo -e "\n\nJoin fuzzer run finished successfully." | |
- name: Archive join production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto-sot-join-fuzzer-failure-artifacts | |
path: | | |
/tmp/join_fuzzer_repro | |
/tmp/server.log | |
exchange-fuzzer-run: | |
name: Exchange Fuzzer | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
needs: compile | |
timeout-minutes: 120 | |
steps: | |
- name: Download exchange fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: exchange | |
- name: Run exchange Fuzzer | |
run: | | |
cat /proc/sys/vm/max_map_count | |
mkdir -p /tmp/exchange_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/exchange_fuzzer_repro | |
chmod +x velox_exchange_fuzzer_test | |
./velox_exchange_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/exchange_fuzzer_repro/logs \ | |
--repro_path=/tmp/exchange_fuzzer_repro \ | |
&& echo -e "\n\Exchange fuzzer run finished successfully." | |
- name: Archive Exchange production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: exchange-fuzzer-failure-artifacts | |
path: | | |
/tmp/exchange_fuzzer_repro | |
row-number-fuzzer-run: | |
name: RowNumber Fuzzer | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
needs: compile | |
timeout-minutes: 120 | |
steps: | |
- name: Download row number fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: row_number | |
- name: Run RowNumber Fuzzer | |
run: | | |
cat /proc/sys/vm/max_map_count | |
mkdir -p /tmp/row_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/row_fuzzer_repro | |
chmod +x velox_row_number_fuzzer_test | |
./velox_row_number_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/row_fuzzer_repro/logs \ | |
&& echo -e "\n\Row number fuzzer run finished successfully." | |
- name: Archive row number production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: row-fuzzer-failure-artifacts | |
path: | | |
/tmp/row_fuzzer_repro | |
cache-fuzzer-run: | |
name: Cache Fuzzer | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:centos9 | |
needs: compile | |
timeout-minutes: 120 | |
steps: | |
- name: Download cache fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: cache_fuzzer | |
- name: Run cache Fuzzer | |
run: | | |
mkdir -p /tmp/cache_fuzzer_test/logs/ | |
chmod -R 777 /tmp/cache_fuzzer_test | |
chmod +x velox_cache_fuzzer_test | |
./velox_cache_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/cache_fuzzer_test/logs \ | |
&& echo -e "\n\Cache fuzzer run finished successfully." | |
- name: Archive Cache production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: cache-fuzzer-test-logs | |
path: | | |
/tmp/cache_fuzzer_test | |
presto-java-aggregation-fuzzer-run: | |
name: Aggregation Fuzzer with Presto as source of truth | |
needs: compile | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:presto-java | |
timeout-minutes: 120 | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.ccache/" | |
LINUX_DISTRO: "centos" | |
steps: | |
- name: Download aggregation fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: aggregation | |
- name: "Checkout Repo" | |
uses: actions/checkout@v4 | |
with: | |
path: velox | |
submodules: 'recursive' | |
ref: "${{ inputs.ref }}" | |
- name: Fix git permissions | |
# Usually actions/checkout does this but as we run in a container | |
# it doesn't work | |
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox | |
- name: "Run Aggregate Fuzzer" | |
run: | | |
cd velox | |
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog | |
ls -lR $PRESTO_HOME/etc | |
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 & | |
# Sleep for 60 seconds to allow Presto server to start. | |
sleep 60 | |
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;' | |
cd - | |
mkdir -p /tmp/aggregate_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/aggregate_fuzzer_repro | |
chmod +x velox_aggregation_fuzzer_test | |
./velox_aggregation_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/aggregate_fuzzer_repro/logs \ | |
--repro_persist_path=/tmp/aggregate_fuzzer_repro \ | |
--enable_sorted_aggregations=true \ | |
--presto_url=http://127.0.0.1:8080 \ | |
&& echo -e "\n\nAggregation fuzzer run finished successfully." | |
- name: Archive aggregate production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto-sot-aggregate-fuzzer-failure-artifacts | |
path: | | |
/tmp/aggregate_fuzzer_repro | |
/tmp/server.log | |
presto-java-only-bias-function-expression-fuzzer-run: | |
name: Biased Expression Fuzzer with Only Added/Updated Functions and Presto as source of truth | |
needs: compile | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:presto-java | |
timeout-minutes: 120 | |
if: ${{ needs.compile.outputs.presto_bias == 'true' }} | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.ccache/" | |
LINUX_DISTRO: "centos" | |
steps: | |
- name: Download presto expression fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: presto | |
- name: "Checkout Repo" | |
uses: actions/checkout@v4 | |
with: | |
path: velox | |
submodules: 'recursive' | |
ref: "${{ inputs.ref }}" | |
- name: Fix git permissions | |
# Usually actions/checkout does this but as we run in a container | |
# it doesn't work | |
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox | |
- name: Download Signatures | |
uses: actions/download-artifact@v4 | |
with: | |
name: signatures | |
path: /tmp/signatures | |
- name: Run Bias Expression Fuzzer | |
run: | | |
cd velox | |
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog | |
ls -lR $PRESTO_HOME/etc | |
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 & | |
# Sleep for 60 seconds to allow Presto server to start. | |
sleep 60 | |
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;' | |
cd - | |
mkdir -p /tmp/presto_only_bias_function_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/presto_only_bias_function_fuzzer_repro | |
chmod +x velox_expression_fuzzer_test | |
echo "Biased functions:" | |
cat /tmp/signatures/presto_bias_functions | |
# Convert the list of function names with tickets into a list of function names only. | |
function_names="" | |
IFS=',' read -r -a array <<< $(cat /tmp/signatures/presto_bias_functions) | |
for x in ${array[@]}; do | |
if [ -n "$function_names" ]; then | |
function_names+="," | |
fi | |
function_names+=$(echo $x | cut -d '=' -f 1) | |
done | |
echo "Biased function names: $function_names" | |
echo "Running Fuzzer for $DURATION" | |
./velox_expression_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--lazy_vector_generation_ratio 0.2 \ | |
--common_dictionary_wraps_generation_ratio=0.3 \ | |
--only=$function_names \ | |
--duration_sec $DURATION \ | |
--enable_variadic_signatures \ | |
--velox_fuzzer_enable_complex_types \ | |
--velox_fuzzer_enable_column_reuse \ | |
--velox_fuzzer_enable_expression_reuse \ | |
--max_expression_trees_per_step 2 \ | |
--retry_with_try \ | |
--batch_size=6 \ | |
--presto_url=http://127.0.0.1:8080 \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/presto_only_bias_function_fuzzer_repro/logs \ | |
--repro_persist_path=/tmp/presto_only_bias_function_fuzzer_repro \ | |
&& echo -e "\n\nPresto Fuzzer run finished successfully." | |
- name: Archive Presto only-bias-function expression fuzzer production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto-only-bias-function-fuzzer-failure-artifacts | |
path: | | |
/tmp/presto_only_bias_function_fuzzer_repro | |
presto-bias-java-aggregation-fuzzer-run: | |
name: Biased Aggregation Fuzzer with Presto as source of truth | |
needs: compile | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:presto-java | |
timeout-minutes: 120 | |
if: ${{ needs.compile.outputs.presto_aggregate_bias == 'true' }} | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.ccache/" | |
LINUX_DISTRO: "centos" | |
steps: | |
- name: Download aggregation fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: aggregation | |
- name: "Checkout Repo" | |
uses: actions/checkout@v4 | |
with: | |
path: velox | |
submodules: 'recursive' | |
ref: "${{ inputs.ref }}" | |
- name: Fix git permissions | |
# Usually actions/checkout does this but as we run in a container | |
# it doesn't work | |
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox | |
- name: Download Signatures | |
uses: actions/download-artifact@v4 | |
with: | |
name: signatures | |
path: /tmp/signatures | |
- name: "Run Bias Aggregate Fuzzer" | |
run: | | |
cd velox | |
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog | |
ls -lR $PRESTO_HOME/etc | |
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 & | |
# Sleep for 60 seconds to allow Presto server to start. | |
sleep 60 | |
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;' | |
cd - | |
mkdir -p /tmp/aggregate_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/aggregate_fuzzer_repro | |
chmod +x velox_aggregation_fuzzer_test | |
echo "signatures folder" | |
ls /tmp/signatures/ | |
echo "Biased functions:" | |
cat /tmp/signatures/presto_aggregate_bias_functions | |
echo "Running Fuzzer for $DURATION" | |
./velox_aggregation_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/aggregate_fuzzer_repro/logs \ | |
--repro_persist_path=/tmp/aggregate_fuzzer_repro \ | |
--enable_sorted_aggregations=true \ | |
--only=$(cat /tmp/signatures/presto_aggregate_bias_functions) \ | |
--presto_url=http://127.0.0.1:8080 \ | |
&& echo -e "\n\nAggregation fuzzer run finished successfully." | |
- name: Archive bias aggregate production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto-bias-sot-aggregate-fuzzer-failure-artifacts | |
path: | | |
/tmp/aggregate_fuzzer_repro | |
/tmp/server.log | |
surface-signature-errors: | |
name: Signature Changes | |
if: ${{ github.event_name != 'schedule' }} | |
needs: compile | |
runs-on: ubuntu-latest | |
steps: | |
- name: Download Signatures | |
uses: actions/download-artifact@v4 | |
with: | |
name: signatures | |
path: /tmp/signatures | |
- name: Surface Presto function signature errors | |
if: ${{ needs.compile.outputs.presto_error == 'true' }} | |
run: | | |
cat /tmp/signatures/presto_errors | |
exit 1 | |
- name: Surface Aggregate function signature errors | |
if: ${{ needs.compile.outputs.presto_aggregate_error == 'true' }} | |
run: | | |
cat /tmp/signatures/presto_aggregate_errors | |
exit 1 | |
presto-java-window-fuzzer-run: | |
name: Window Fuzzer with Presto as source of truth | |
needs: compile | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:presto-java | |
timeout-minutes: 120 | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.ccache/" | |
LINUX_DISTRO: "centos" | |
steps: | |
- name: Download window fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: window | |
- name: "Checkout Repo" | |
uses: actions/checkout@v4 | |
with: | |
path: velox | |
submodules: 'recursive' | |
ref: "${{ inputs.ref }}" | |
- name: Fix git permissions | |
# Usually actions/checkout does this but as we run in a container | |
# it doesn't work | |
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox | |
- name: "Run Window Fuzzer" | |
run: | | |
cd velox | |
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog | |
ls -lR $PRESTO_HOME/etc | |
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 & | |
# Sleep for 60 seconds to allow Presto server to start. | |
sleep 60 | |
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;' | |
cd - | |
mkdir -p /tmp/window_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/window_fuzzer_repro | |
chmod +x velox_window_fuzzer_test | |
./velox_window_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--batch_size=50 \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--log_dir=/tmp/window_fuzzer_repro/logs \ | |
--repro_persist_path=/tmp/window_fuzzer_repro \ | |
--enable_window_reference_verification \ | |
--presto_url=http://127.0.0.1:8080 \ | |
&& echo -e "\n\nWindow fuzzer run finished successfully." | |
- name: Archive window production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto-sot-window-fuzzer-failure-artifacts | |
path: | | |
/tmp/window_fuzzer_repro | |
/tmp/server.log | |
presto-java-writer-fuzzer-run: | |
name: Writer Fuzzer with Presto as source of truth | |
needs: compile | |
runs-on: ubuntu-latest | |
container: ghcr.io/facebookincubator/velox-dev:presto-java | |
timeout-minutes: 120 | |
env: | |
CCACHE_DIR: "${{ github.workspace }}/.ccache/" | |
LINUX_DISTRO: "centos" | |
steps: | |
- name: Download writer fuzzer | |
uses: actions/download-artifact@v4 | |
with: | |
name: writer | |
- name: "Checkout Repo" | |
uses: actions/checkout@v4 | |
with: | |
path: velox | |
submodules: 'recursive' | |
ref: "${{ inputs.ref }}" | |
- name: Fix git permissions | |
# Usually actions/checkout does this but as we run in a container | |
# it doesn't work | |
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox | |
- name: "Run Writer Fuzzer" | |
run: | | |
cd velox | |
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog | |
ls -lR $PRESTO_HOME/etc | |
echo "jvm config content:" | |
cat $PRESTO_HOME/etc/jvm.config | |
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 & | |
ls -lR /var/log | |
# Sleep for 60 seconds to allow Presto server to start. | |
sleep 60 | |
/opt/presto-cli --version | |
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;' | |
cd - | |
mkdir -p /tmp/writer_fuzzer_repro/logs/ | |
chmod -R 777 /tmp/writer_fuzzer_repro | |
chmod +x velox_writer_fuzzer_test | |
./velox_writer_fuzzer_test \ | |
--seed ${RANDOM} \ | |
--duration_sec $DURATION \ | |
--minloglevel=0 \ | |
--stderrthreshold=2 \ | |
--req_timeout_ms 60000 \ | |
--log_dir=/tmp/writer_fuzzer_repro/logs \ | |
--presto_url=http://127.0.0.1:8080 \ | |
&& echo -e "\n\Writer fuzzer run finished successfully." | |
- name: Archive writer production artifacts | |
if: ${{ !cancelled() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: presto-sot-writer-fuzzer-failure-artifacts | |
path: | | |
/tmp/writer_fuzzer_repro | |
/tmp/server.log | |
/var/log | |
linux-clang: | |
if: ${{ github.event_name == 'schedule' }} | |
name: Build with Clang | |
uses: ./.github/workflows/linux-build-base.yml | |
with: | |
use-clang: true |