Skip to content

feat: Separate null count and minmax from column stats #15219

feat: Separate null count and minmax from column stats

feat: Separate null count and minmax from column stats #15219

Workflow file for this run

# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "Fuzzer Jobs"
on:
pull_request:
paths:
- "velox/**"
- "!velox/docs/**"
- "CMakeLists.txt"
- "CMake/**"
- "third_party/**"
- "scripts/setup-ubuntu.sh"
- "scripts/setup-helper-functions.sh"
- ".github/workflows/scheduled.yml"
push:
branches:
- "main"
paths:
- "velox/**"
- "!velox/docs/**"
- "CMakeLists.txt"
- "CMake/**"
- "third_party/**"
- "scripts/setup-ubuntu.sh"
- "scripts/setup-helper-functions.sh"
- ".github/workflows/scheduled.yml"
schedule:
- cron: '0 3 * * *'
workflow_dispatch:
inputs:
ref:
description: 'Ref to checkout out'
default: 'main'
numThreads:
description: 'Number of threads'
default: 16
maxHighMemJobs:
description: 'Number of high memory jobs'
default: 8
maxLinkJobs:
description: 'Maximum number of link jobs'
default: 4
extraCMakeFlags:
description: 'Additional CMake flags'
default: ''
duration:
description: 'Duration of fuzzer run in seconds'
default: 1800
defaults:
run:
shell: bash
permissions:
contents: read
concurrency:
# This will not cancel fuzzer runs on main (regardless of which trigger)
# by making the commit sha part of the group but will use the branch
# name in PRs to cancel on going runs on a new commit.
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'pull_request' && github.head_ref || github.sha }}
cancel-in-progress: true
env:
# Run for 15 minute on PRs
DURATION: "${{ inputs.duration || ( github.event_name != 'schedule' && 900 || 1800 )}}"
# minimize artifact duration for PRs, keep them a bit longer for nightly runs
RETENTION: "${{ github.event_name == 'pull_request' && 1 || 3 }}"
jobs:
compile:
name: Build
# prevent errors when forks ff their main branch
if: ${{ github.repository == 'facebookincubator/velox' }}
runs-on: 16-core-ubuntu
container: ghcr.io/facebookincubator/velox-dev:centos9
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache"
LINUX_DISTRO: "ubuntu"
MAKEFLAGS: "NUM_THREADS=${{ inputs.numThreads || 16 }} MAX_HIGH_MEM_JOBS=${{ inputs.maxHighMemJobs || 8 }} MAX_LINK_JOBS=${{ inputs.maxLinkJobs || 4 }}"
defaults:
run:
shell: bash
working-directory: velox
outputs:
presto_bias: ${{ steps.sig-check.outputs.presto_functions }}
presto_error: ${{ steps.sig-check.outputs.presto_error }}
spark_bias: ${{ steps.sig-check.outputs.spark_functions }}
spark_error: ${{ steps.sig-check.outputs.spark_error }}
presto_aggregate_bias: ${{ steps.sig-check.outputs.presto_aggregate_functions }}
presto_aggregate_error: ${{ steps.sig-check.outputs.presto_aggregate_error }}
steps:
- name: Get latest commit from main
if: ${{ github.event_name != 'schedule' }}
working-directory: ${{ github.workspace }}
env:
GH_TOKEN: ${{ github.token }}
id: get-head
run: |
if [ '${{ github.event_name == 'push' }}' == "true" ]; then
# get the parent commit of the current one to get the relevant function signatures
head_main=$(gh api -q '.parents.[0].sha' '/repos/facebookincubator/velox/commits/${{ github.sha }}')
else
head_main=$(gh api -H "Accept: application/vnd.github.sha" /repos/facebookincubator/velox/commits/heads/main)
fi
echo "head_main=$head_main" >> $GITHUB_OUTPUT
- name: Get Function Signature Stash
uses: assignUser/stash/restore@v1
id: get-sig
with:
path: /tmp/signatures
key: function-signatures-${{ steps.get-head.outputs.head_main || github.sha }}
- name: Restore ccache
uses: assignUser/stash/restore@v1
with:
path: "${{ env.CCACHE_DIR }}"
key: ccache-fuzzer-centos
- name: Fix git permissions
working-directory: ${{ github.workspace }}
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: |
git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox
git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox_main
- name: Ensure Stash Dirs Exists
working-directory: ${{ github.workspace }}
run: |
mkdir -p '${{ env.CCACHE_DIR }}'
mkdir -p /tmp/signatures
- name: Checkout Main
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
uses: actions/checkout@v4
with:
ref: ${{ steps.get-head.outputs.head_main || 'main' }}
path: velox_main
- name: Build PyVelox
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
working-directory: velox_main
run: |
python3 -m venv .venv
source .venv/bin/activate
make python-build
- name: Create Baseline Signatures
if: ${{ github.event_name != 'schedule' && steps.get-sig.outputs.stash-hit != 'true' }}
working-directory: velox_main
run: |
source .venv/bin/activate
python3 -m pip install deepdiff
python3 scripts/signature.py export --spark /tmp/signatures/spark_signatures_main.json
python3 scripts/signature.py export --presto /tmp/signatures/presto_signatures_main.json
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_main.json
- name: Save Function Signature Stash
if: ${{ github.event_name == 'pull_request' && steps.get-sig.outputs.stash-hit != 'true' }}
uses: assignUser/stash/save@v1
with:
path: /tmp/signatures
key: function-signatures-${{ steps.get-head.outputs.head_main }}
- name: Checkout Contender
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"
- name: Zero Ccache Statistics
run: |
ccache -sz
- name: Build
env:
EXTRA_CMAKE_FLAGS: "-DVELOX_ENABLE_ARROW=ON -DVELOX_BUILD_PYTHON_PACKAGE=ON ${{ inputs.extraCMakeFlags }}"
run: |
EXTRA_CMAKE_FLAGS="-DPYTHON_EXECUTABLE=$(which python3) $EXTRA_CMAKE_FLAGS"
make debug
- name: Ccache after
run: ccache -s
- name: Save ccache
# see https://github.com/actions/upload-artifact/issues/543
continue-on-error: true
if: ${{ github.event_name != 'schedule' }}
uses: assignUser/stash/save@v1
with:
path: "${{ env.CCACHE_DIR }}"
key: ccache-fuzzer-centos
- name: Build PyVelox
if: ${{ github.event_name != 'schedule' }}
env:
VELOX_BUILD_DIR: "_build/debug"
run: |
python3 -m venv .venv
source .venv/bin/activate
python3 -m pip install -e .
- name: Create and test new function signatures
if: ${{ github.event_name != 'schedule' }}
id: sig-check
run: |
source .venv/bin/activate
python3 -m pip install deepdiff
python3 scripts/signature.py gh_bias_check presto spark
python3 scripts/signature.py export_aggregates --presto /tmp/signatures/presto_aggregate_signatures_contender.json
python3 scripts/signature.py bias_aggregates /tmp/signatures/presto_aggregate_signatures_main.json \
/tmp/signatures/presto_aggregate_signatures_contender.json /tmp/signatures/presto_aggregate_bias_functions \
/tmp/signatures/presto_aggregate_errors
- name: Upload Signature Artifacts
if: ${{ github.event_name != 'schedule' }}
uses: actions/upload-artifact@v4
with:
name: signatures
path: /tmp/signatures
retention-days: "${{ env.RETENTION }}"
- name: Prepare signatures
working-directory: /tmp/signatures
if: ${{ github.event_name == 'push' }}
run: |
# Remove irrelevant artifacts
rm -f *_bias_functions
rm -f *_signatures_main.json
# Rename signature files as 'main' files
for f in *_signatures_contender.json; do
mv "$f" "${f/_contender.json/_main.json}"
done
- name: Save Function Signature Stash
if: ${{ github.event_name == 'push' }}
uses: assignUser/stash/save@v1
with:
path: /tmp/signatures
key: function-signatures-${{ github.sha }}
- name: Upload presto fuzzer
uses: actions/upload-artifact@v4
with:
name: presto
path: velox/_build/debug/velox/expression/fuzzer/velox_expression_fuzzer_test
retention-days: "${{ env.RETENTION }}"
- name: Upload spark expression fuzzer
uses: actions/upload-artifact@v4
with:
name: spark_expression_fuzzer
path: velox/_build/debug/velox/expression/fuzzer/spark_expression_fuzzer_test
retention-days: "${{ env.RETENTION }}"
- name: Upload spark aggregation fuzzer
uses: actions/upload-artifact@v4
with:
name: spark_aggregation_fuzzer
path: velox/_build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test
retention-days: "${{ env.RETENTION }}"
- name: Upload aggregation fuzzer
uses: actions/upload-artifact@v4
with:
name: aggregation
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test
retention-days: "${{ env.RETENTION }}"
- name: Upload join fuzzer
uses: actions/upload-artifact@v4
with:
name: join
path: velox/_build/debug/velox/exec/tests/velox_join_fuzzer_test
retention-days: "${{ env.RETENTION }}"
- name: Upload exchange fuzzer
uses: actions/upload-artifact@v4
with:
name: exchange
path: velox/_build/debug//velox/exec/tests/velox_exchange_fuzzer_test
retention-days: "${{ env.RETENTION }}"
- name: Upload window fuzzer
uses: actions/upload-artifact@v4
with:
name: window
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_window_fuzzer_test
retention-days: "${{ env.RETENTION }}"
- name: Upload row number fuzzer
uses: actions/upload-artifact@v4
with:
name: row_number
path: velox/_build/debug//velox/exec/tests/velox_row_number_fuzzer_test
retention-days: "${{ env.RETENTION }}"
- name: Upload writer fuzzer
uses: actions/upload-artifact@v4
with:
name: writer
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_writer_fuzzer_test
retention-days: "${{ env.RETENTION }}"
presto-fuzzer-run:
name: Presto Fuzzer
if: ${{ needs.compile.outputs.presto_bias != 'true' }}
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:centos9
needs: compile
timeout-minutes: 120
steps:
- uses: dorny/paths-filter@v3
if: github.event_name == 'pull_request'
id: changes
with:
filters: |
presto:
- 'velox/expression/!(test)**'
- 'velox/exec/!(test)**'
- 'velox/common/!(test)**'
- 'velox/core/!(test)**'
- 'velox/vector/!(test)**'
- name: Set presto specific fuzzer duration
env:
# Run for 30 minutes instead of 15, when files relevant to presto are touched
pr_duration: "${{ steps.changes.outputs.presto == 'true' && 1800 || 900 }}"
# Run for 60 minutes if its a scheduled run
other_duration: "${{ inputs.duration || (github.event_name == 'push' && 1800 || 3600) }}"
is_pr: "${{ github.event_name == 'pull_request' }}"
run: |
if [ "$is_pr" == "true" ]; then
duration=$pr_duration
else
duration=$other_duration
fi
echo "DURATION=$duration" >> $GITHUB_ENV
- name: Download presto fuzzer
uses: actions/download-artifact@v4
with:
name: presto
- name: Run Presto Fuzzer
run: |
mkdir -p /tmp/fuzzer_repro/logs/
chmod -R 777 /tmp/fuzzer_repro
chmod +x velox_expression_fuzzer_test
./velox_expression_fuzzer_test \
--seed ${RANDOM} \
--enable_variadic_signatures \
--velox_fuzzer_enable_complex_types \
--lazy_vector_generation_ratio 0.2 \
--common_dictionary_wraps_generation_ratio=0.3 \
--velox_fuzzer_enable_column_reuse \
--velox_fuzzer_enable_expression_reuse \
--max_expression_trees_per_step 2 \
--retry_with_try \
--enable_dereference \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/fuzzer_repro/logs \
--repro_persist_path=/tmp/fuzzer_repro \
&& echo -e "\n\nFuzzer run finished successfully."
- name: Archive production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-fuzzer-failure-artifacts
path: |
/tmp/fuzzer_repro
presto-bias-fuzzer:
name: Presto Bias Fuzzer
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:centos9
needs: compile
if: ${{ needs.compile.outputs.presto_bias == 'true' }}
timeout-minutes: 120
steps:
- name: Download presto expression fuzzer
uses: actions/download-artifact@v4
with:
name: presto
- name: Download Signatures
uses: actions/download-artifact@v4
with:
name: signatures
path: /tmp/signatures
- name: Run Presto Expression Fuzzer
run: |
ls /tmp/signatures
mkdir -p /tmp/presto_bias_fuzzer_repro/logs/
chmod -R 777 /tmp/presto_bias_fuzzer_repro
chmod +x velox_expression_fuzzer_test
./velox_expression_fuzzer_test \
--seed ${RANDOM} \
--lazy_vector_generation_ratio 0.2 \
--common_dictionary_wraps_generation_ratio=0.3 \
--assign_function_tickets $(cat /tmp/signatures/presto_bias_functions) \
--duration_sec 3600 \
--enable_variadic_signatures \
--velox_fuzzer_enable_complex_types \
--velox_fuzzer_enable_column_reuse \
--velox_fuzzer_enable_expression_reuse \
--max_expression_trees_per_step 2 \
--retry_with_try \
--enable_dereference \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/presto_bias_fuzzer_repro/logs \
--repro_persist_path=/tmp/presto_bias_fuzzer_repro \
&& echo -e "\n\nPresto Fuzzer run finished successfully."
- name: Archive Spark expression production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-bias-fuzzer-failure-artifacts
path: |
/tmp/presto_bias_fuzzer_repro
spark-aggregate-fuzzer-run:
name: Spark Aggregate Fuzzer
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:centos9
needs: compile
timeout-minutes: 60
steps:
- name: Download spark aggregation fuzzer
uses: actions/download-artifact@v4
with:
name: spark_aggregation_fuzzer
- name: Run Spark Aggregate Fuzzer
run: |
mkdir -p /tmp/spark_aggregate_fuzzer_repro/logs/
chmod -R 777 /tmp/spark_aggregate_fuzzer_repro
chmod +x spark_aggregation_fuzzer_test
./spark_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/spark_aggregate_fuzzer_repro/logs \
--repro_persist_path=/tmp/spark_aggregate_fuzzer_repro \
&& echo -e "\n\nSpark Aggregation Fuzzer run finished successfully."
- name: Archive Spark aggregate production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: spark-agg-fuzzer-failure-artifacts
path: |
/tmp/spark_aggregate_fuzzer_repro
spark-bias-fuzzer:
name: Spark Bias Fuzzer
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:centos9
needs: compile
if: ${{ needs.compile.outputs.spark_bias == 'true' }}
timeout-minutes: 120
steps:
- name: Download spark expression fuzzer
uses: actions/download-artifact@v4
with:
name: spark_expression_fuzzer
- name: Download Signatures
uses: actions/download-artifact@v4
with:
name: signatures
path: /tmp/signatures
- name: Run Spark Expression Fuzzer
run: |
ls /tmp/signatures
mkdir -p /tmp/spark_bias_fuzzer_repro/logs/
chmod -R 777 /tmp/spark_bias_fuzzer_repro
chmod +x spark_expression_fuzzer_test
./spark_expression_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/spark_bias_fuzzer_repro/logs \
--assign_function_tickets $(cat /tmp/signatures/spark_bias_functions) \
--repro_persist_path=/tmp/spark_bias_fuzzer_repro \
&& echo -e "\n\nSpark Fuzzer run finished successfully."
- name: Archive Spark expression production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: spark-fuzzer-failure-artifacts
path: |
/tmp/spark_bias_fuzzer_repro
spark-fuzzer:
name: Spark Fuzzer
if: ${{ needs.compile.outputs.spark_bias != 'true' }}
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:centos9
needs: compile
timeout-minutes: 120
steps:
- name: Download spark expression fuzzer
uses: actions/download-artifact@v4
with:
name: spark_expression_fuzzer
- name: Run Spark Expression Fuzzer
run: |
mkdir -p /tmp/spark_fuzzer_repro/logs/
chmod -R 777 /tmp/spark_fuzzer_repro
chmod +x spark_expression_fuzzer_test
./spark_expression_fuzzer_test \
--seed ${RANDOM} \
--enable_variadic_signatures \
--lazy_vector_generation_ratio 0.2 \
--velox_fuzzer_enable_column_reuse \
--velox_fuzzer_enable_expression_reuse \
--max_expression_trees_per_step 2 \
--retry_with_try \
--enable_dereference \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/spark_fuzzer_repro/logs \
--repro_persist_path=/tmp/spark_fuzzer_repro \
&& echo -e "\n\nSpark Fuzzer run finished successfully."
- name: Archive Spark expression production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: spark-fuzzer-failure-artifacts
path: |
/tmp/spark_fuzzer_repro
presto-java-join-fuzzer-run:
name: Join Fuzzer
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
needs: compile
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:
- name: Download join fuzzer
uses: actions/download-artifact@v4
with:
name: join
- name: "Checkout Repo"
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"
- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox
- name: Run Join Fuzzer
run: |
cd velox
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
cd -
mkdir -p /tmp/join_fuzzer_repro/logs/
chmod -R 777 /tmp/join_fuzzer_repro
chmod +x velox_join_fuzzer_test
./velox_join_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/join_fuzzer_repro/logs \
--presto_url=http://127.0.0.1:8080 \
--req_timeout_ms=2000 \
&& echo -e "\n\nJoin fuzzer run finished successfully."
- name: Archive join production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-sot-join-fuzzer-failure-artifacts
path: |
/tmp/join_fuzzer_repro
/tmp/server.log
exchange-fuzzer-run:
name: Exchange Fuzzer
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:centos9
needs: compile
timeout-minutes: 120
steps:
- name: Download exchange fuzzer
uses: actions/download-artifact@v4
with:
name: exchange
- name: Run exchange Fuzzer
run: |
cat /proc/sys/vm/max_map_count
mkdir -p /tmp/exchange_fuzzer_repro/logs/
chmod -R 777 /tmp/exchange_fuzzer_repro
chmod +x velox_exchange_fuzzer_test
./velox_exchange_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/exchange_fuzzer_repro/logs \
--repro_path=/tmp/exchange_fuzzer_repro \
&& echo -e "\n\Exchange fuzzer run finished successfully."
- name: Archive Exchange production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: exchange-fuzzer-failure-artifacts
path: |
/tmp/exchange_fuzzer_repro
row-number-fuzzer-run:
name: RowNumber Fuzzer
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:centos9
needs: compile
timeout-minutes: 120
steps:
- name: Download row number fuzzer
uses: actions/download-artifact@v4
with:
name: row_number
- name: Run RowNumber Fuzzer
run: |
cat /proc/sys/vm/max_map_count
mkdir -p /tmp/row_fuzzer_repro/logs/
chmod -R 777 /tmp/row_fuzzer_repro
chmod +x velox_row_number_fuzzer_test
./velox_row_number_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/row_fuzzer_repro/logs \
&& echo -e "\n\Row number fuzzer run finished successfully."
- name: Archive row number production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: row-fuzzer-failure-artifacts
path: |
/tmp/row_fuzzer_repro
presto-java-aggregation-fuzzer-run:
name: Aggregation Fuzzer with Presto as source of truth
needs: compile
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:
- name: Download aggregation fuzzer
uses: actions/download-artifact@v4
with:
name: aggregation
- name: "Checkout Repo"
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"
- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox
- name: "Run Aggregate Fuzzer"
run: |
cd velox
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
cd -
mkdir -p /tmp/aggregate_fuzzer_repro/logs/
chmod -R 777 /tmp/aggregate_fuzzer_repro
chmod +x velox_aggregation_fuzzer_test
./velox_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/aggregate_fuzzer_repro/logs \
--repro_persist_path=/tmp/aggregate_fuzzer_repro \
--enable_sorted_aggregations=true \
--presto_url=http://127.0.0.1:8080 \
&& echo -e "\n\nAggregation fuzzer run finished successfully."
- name: Archive aggregate production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-sot-aggregate-fuzzer-failure-artifacts
path: |
/tmp/aggregate_fuzzer_repro
/tmp/server.log
presto-java-only-bias-function-expression-fuzzer-run:
name: Biased Expression Fuzzer with Only Added/Updated Functions and Presto as source of truth
needs: compile
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
if: ${{ needs.compile.outputs.presto_bias == 'true' }}
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:
- name: Download presto expression fuzzer
uses: actions/download-artifact@v4
with:
name: presto
- name: "Checkout Repo"
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"
- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox
- name: Download Signatures
uses: actions/download-artifact@v4
with:
name: signatures
path: /tmp/signatures
- name: Run Bias Expression Fuzzer
run: |
cd velox
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
cd -
mkdir -p /tmp/presto_only_bias_function_fuzzer_repro/logs/
chmod -R 777 /tmp/presto_only_bias_function_fuzzer_repro
chmod +x velox_expression_fuzzer_test
echo "Biased functions:"
cat /tmp/signatures/presto_bias_functions
# Convert the list of function names with tickets into a list of function names only.
function_names=""
IFS=',' read -r -a array <<< $(cat /tmp/signatures/presto_bias_functions)
for x in ${array[@]}; do
if [ -n "$function_names" ]; then
function_names+=","
fi
function_names+=$(echo $x | cut -d '=' -f 1)
done
echo "Biased function names: $function_names"
echo "Running Fuzzer for $DURATION"
./velox_expression_fuzzer_test \
--seed ${RANDOM} \
--lazy_vector_generation_ratio 0.2 \
--common_dictionary_wraps_generation_ratio=0.3 \
--only=$function_names \
--duration_sec $DURATION \
--enable_variadic_signatures \
--velox_fuzzer_enable_complex_types \
--velox_fuzzer_enable_column_reuse \
--velox_fuzzer_enable_expression_reuse \
--max_expression_trees_per_step 2 \
--retry_with_try \
--batch_size=6 \
--presto_url=http://127.0.0.1:8080 \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/presto_only_bias_function_fuzzer_repro/logs \
--repro_persist_path=/tmp/presto_only_bias_function_fuzzer_repro \
&& echo -e "\n\nPresto Fuzzer run finished successfully."
- name: Archive Presto only-bias-function expression fuzzer production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-only-bias-function-fuzzer-failure-artifacts
path: |
/tmp/presto_only_bias_function_fuzzer_repro
presto-bias-java-aggregation-fuzzer-run:
name: Biased Aggregation Fuzzer with Presto as source of truth
needs: compile
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
if: ${{ needs.compile.outputs.presto_aggregate_bias == 'true' }}
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:
- name: Download aggregation fuzzer
uses: actions/download-artifact@v4
with:
name: aggregation
- name: "Checkout Repo"
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"
- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox
- name: Download Signatures
uses: actions/download-artifact@v4
with:
name: signatures
path: /tmp/signatures
- name: "Run Bias Aggregate Fuzzer"
run: |
cd velox
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
cd -
mkdir -p /tmp/aggregate_fuzzer_repro/logs/
chmod -R 777 /tmp/aggregate_fuzzer_repro
chmod +x velox_aggregation_fuzzer_test
echo "signatures folder"
ls /tmp/signatures/
echo "Biased functions:"
cat /tmp/signatures/presto_aggregate_bias_functions
echo "Running Fuzzer for $DURATION"
./velox_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/aggregate_fuzzer_repro/logs \
--repro_persist_path=/tmp/aggregate_fuzzer_repro \
--enable_sorted_aggregations=true \
--only=$(cat /tmp/signatures/presto_aggregate_bias_functions) \
--presto_url=http://127.0.0.1:8080 \
&& echo -e "\n\nAggregation fuzzer run finished successfully."
- name: Archive bias aggregate production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-bias-sot-aggregate-fuzzer-failure-artifacts
path: |
/tmp/aggregate_fuzzer_repro
/tmp/server.log
surface-signature-errors:
name: Signature Changes
if: ${{ github.event_name != 'schedule' }}
needs: compile
runs-on: ubuntu-latest
steps:
- name: Download Signatures
uses: actions/download-artifact@v4
with:
name: signatures
path: /tmp/signatures
- name: Surface Presto function signature errors
if: ${{ needs.compile.outputs.presto_error == 'true' }}
run: |
cat /tmp/signatures/presto_errors
exit 1
- name: Surface Aggregate function signature errors
if: ${{ needs.compile.outputs.presto_aggregate_error == 'true' }}
run: |
cat /tmp/signatures/presto_aggregate_errors
exit 1
presto-java-window-fuzzer-run:
name: Window Fuzzer with Presto as source of truth
needs: compile
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:
- name: Download window fuzzer
uses: actions/download-artifact@v4
with:
name: window
- name: "Checkout Repo"
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"
- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox
- name: "Run Window Fuzzer"
run: |
cd velox
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
cd -
mkdir -p /tmp/window_fuzzer_repro/logs/
chmod -R 777 /tmp/window_fuzzer_repro
chmod +x velox_window_fuzzer_test
./velox_window_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--batch_size=50 \
--minloglevel=0 \
--stderrthreshold=2 \
--log_dir=/tmp/window_fuzzer_repro/logs \
--repro_persist_path=/tmp/window_fuzzer_repro \
--enable_window_reference_verification \
--presto_url=http://127.0.0.1:8080 \
&& echo -e "\n\nWindow fuzzer run finished successfully."
- name: Archive window production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-sot-window-fuzzer-failure-artifacts
path: |
/tmp/window_fuzzer_repro
/tmp/server.log
presto-java-writer-fuzzer-run:
name: Writer Fuzzer with Presto as source of truth
needs: compile
runs-on: ubuntu-latest
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
LINUX_DISTRO: "centos"
steps:
- name: Download writer fuzzer
uses: actions/download-artifact@v4
with:
name: writer
- name: "Checkout Repo"
uses: actions/checkout@v4
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref }}"
- name: Fix git permissions
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}/velox
- name: "Run Writer Fuzzer"
run: |
cd velox
cp ./scripts/presto/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
echo "jvm config content:"
cat $PRESTO_HOME/etc/jvm.config
$PRESTO_HOME/bin/launcher run -v > /tmp/server.log 2>&1 &
ls -lR /var/log
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --version
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
cd -
mkdir -p /tmp/writer_fuzzer_repro/logs/
chmod -R 777 /tmp/writer_fuzzer_repro
chmod +x velox_writer_fuzzer_test
./velox_writer_fuzzer_test \
--seed ${RANDOM} \
--duration_sec $DURATION \
--minloglevel=0 \
--stderrthreshold=2 \
--req_timeout_ms 60000 \
--log_dir=/tmp/writer_fuzzer_repro/logs \
--presto_url=http://127.0.0.1:8080 \
&& echo -e "\n\Writer fuzzer run finished successfully."
- name: Archive writer production artifacts
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: presto-sot-writer-fuzzer-failure-artifacts
path: |
/tmp/writer_fuzzer_repro
/tmp/server.log
/var/log
linux-clang:
if: ${{ github.event_name == 'schedule' }}
name: Build with Clang
uses: ./.github/workflows/linux-build-base.yml
with:
use-clang: true