Skip to content

Add support in AggregationFuzzer to compare against reference DB for non deterministic functions on unsorted inputs #90

Add support in AggregationFuzzer to compare against reference DB for non deterministic functions on unsorted inputs

Add support in AggregationFuzzer to compare against reference DB for non deterministic functions on unsorted inputs #90

Workflow file for this run

# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "Experimental Fuzzer Jobs"
on:
pull_request:
paths:
- ".github/workflows/experimental.yml"
workflow_dispatch:
inputs:
ref:
description: 'Ref to checkout out'
default: 'main'
numThreads:
description: 'Number of threads'
default: 16
maxHighMemJobs:
description: 'Number of high memory jobs'
default: 8
maxLinkJobs:
description: 'Maximum number of link jobs'
default: 4
extraCMakeFlags:
description: 'Additional CMake flags'
default: ''
defaults:
run:
shell: bash
permissions:
contents: read
jobs:
compile:
runs-on: 8-core
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
CCACHE_BASEDIR: "${{ github.workspace }}"
LINUX_DISTRO: "ubuntu"
steps:
- name: "Restore ccache"
uses: actions/cache@v3
with:
path: "${{ env.CCACHE_DIR }}"
# We are using the benchmark ccache as it has all
# required features enabled, so no need to create a new one
key: ccache-benchmark-${{ github.sha }}
restore-keys: |
ccache-benchmark-
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref || 'main' }}"
- name: "Install dependencies"
run: cd velox && source ./scripts/setup-ubuntu.sh
- name: "Build"
run: |
cd velox
make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}"
ccache -s
- name: Upload aggregation fuzzer
uses: actions/upload-artifact@v3
with:
name: aggregation
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test
- name: Upload spark aggregation fuzzer
uses: actions/upload-artifact@v3
with:
name: spark_aggregation_fuzzer
path: velox/_build/debug/velox/functions/sparksql/fuzzer/spark_aggregation_fuzzer_test
- name: Upload aggregation fuzzer
uses: actions/upload-artifact@v3
with:
name: aggregation
path: velox/_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test
- name: Upload join fuzzer
uses: actions/upload-artifact@v3
with:
name: join
path: velox/_build/debug/velox/exec/tests/velox_join_fuzzer_test
presto-java-aggregation-fuzzer-run:
runs-on: 8-core
container: ghcr.io/facebookincubator/velox-dev:presto-java
timeout-minutes: 120
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache/"
CCACHE_BASEDIR: "${{ github.workspace }}"
LINUX_DISTRO: "centos"
steps:
- name: "Restore ccache"
uses: actions/cache@v3
with:
path: "${{ env.CCACHE_DIR }}"
# We are using the benchmark ccache as it has all
# required features enabled, so no need to create a new one
key: ccache-presto-${{ github.sha }}
restore-keys: |
ccache-presto-
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
path: velox
submodules: 'recursive'
ref: "${{ inputs.ref || 'main' }}"
- name: "Build"
run: |
cd velox
source /opt/rh/gcc-toolset-9/enable
make debug NUM_THREADS="${{ inputs.numThreads || 8 }}" MAX_HIGH_MEM_JOBS="${{ inputs.maxHighMemJobs || 8 }}" MAX_LINK_JOBS="${{ inputs.maxLinkJobs || 4 }}" EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_ARROW=ON ${{ inputs.extraCMakeFlags }}"
ccache -s
- name: "Run Aggregate Fuzzer"
run: |
cd velox
cp ./scripts/etc/hive.properties $PRESTO_HOME/etc/catalog
ls -lR $PRESTO_HOME/etc
/opt/start-prestojava.sh > /tmp/server.log 2>&1 &
# Sleep for 60 seconds to allow Presto server to start.
sleep 60
/opt/presto-cli --server 127.0.0.1:8080 --execute 'CREATE SCHEMA hive.tpch;'
mkdir -p /tmp/aggregate_fuzzer_repro/
rm -rfv /tmp/aggregate_fuzzer_repro/*
chmod -R 777 /tmp/aggregate_fuzzer_repro
_build/debug/velox/functions/prestosql/fuzzer/velox_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 3600 \
--logtostderr=1 \
--minloglevel=0 \
--repro_persist_path=/tmp/aggregate_fuzzer_repro \
--enable_sorted_aggregations=true \
--presto_url=http://127.0.0.1:8080 \
&& echo -e "\n\nAggregation fuzzer run finished successfully."
- name: Archive aggregate production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: aggregate-fuzzer-failure-artifacts
path: |
/tmp/aggregate_fuzzer_repro
/tmp/server.log
linux-spark-fuzzer-run:
runs-on: ubuntu-latest
needs: compile
timeout-minutes: 120
steps:
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
ref: "${{ inputs.ref || 'main' }}"
- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh
- name: Download spark aggregation fuzzer
uses: actions/download-artifact@v3
with:
name: spark_aggregation_fuzzer
- name: "Run Spark Aggregate Fuzzer"
run: |
mkdir -p /tmp/spark_aggregate_fuzzer_repro/
chmod -R 777 /tmp/spark_aggregate_fuzzer_repro
chmod +x spark_aggregation_fuzzer_test
./spark_aggregation_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 1800 \
--logtostderr=1 \
--minloglevel=0 \
--repro_persist_path=/tmp/spark_aggregate_fuzzer_repro \
--enable_sorted_aggregations=true \
&& echo -e "\n\nSpark Aggregation Fuzzer run finished successfully."
- name: Archive Spark aggregate production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: spark-agg-fuzzer-failure-artifacts
path: |
/tmp/spark_aggregate_fuzzer_repro
linux-join-fuzzer-run:
runs-on: ubuntu-latest
needs: compile
timeout-minutes: 120
steps:
- name: "Checkout Repo"
uses: actions/checkout@v3
with:
ref: "${{ inputs.ref || 'main' }}"
- name: "Install dependencies"
run: source ./scripts/setup-ubuntu.sh
- name: Download join fuzzer
uses: actions/download-artifact@v3
with:
name: join
- name: "Run Join Fuzzer"
run: |
ls /lib64
mkdir -p /tmp/join_fuzzer_repro/
rm -rfv /tmp/join_fuzzer_repro/*
chmod -R 777 /tmp/join_fuzzer_repro
chmod +x velox_join_fuzzer_test
./velox_join_fuzzer_test \
--seed ${RANDOM} \
--duration_sec 1800 \
--logtostderr=1 \
--minloglevel=0 \
&& echo -e "\n\nAggregation fuzzer run finished successfully."
- name: Archive aggregate production artifacts
if: always()
uses: actions/upload-artifact@v3
with:
name: join-fuzzer-failure-artifacts
path: |
/tmp/join_fuzzer_repro