Skip to content

fix: use arrow's schema instead of spark's for local rel (#3602) #1595

fix: use arrow's schema instead of spark's for local rel (#3602)

fix: use arrow's schema instead of spark's for local rel (#3602) #1595

name: Run Profiling on the TPCH Benchmark
on:
push:
branches: [main]
workflow_dispatch:
env:
DAFT_ANALYTICS_ENABLED: '0'
TPCH_SCALE_FACTOR: '4'
TPCH_NUM_PARTS: '32'
PYTHON_VERSION: '3.9'
UV_SYSTEM_PYTHON: 1
jobs:
profile-daft:
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
steps:
- uses: actions/checkout@v4
- uses: moonrepo/setup-rust@v1
with:
cache: false
- uses: Swatinem/rust-cache@v2
with:
key: ${{ runner.os }}-profile
cache-all-crates: 'true'
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Setup Virtual Env
run: |
python -m venv venv
echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH
pip install uv
- name: Install dependencies
run: |
uv pip install -r requirements-dev.txt
- name: Build Rust Library
run: |
source activate
maturin develop --release
- uses: actions/cache@v4
env:
cache-name: profiling-cache-tpch-data
with:
path: data/tpch-dbgen
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ env.TPCH_SCALE_FACTOR }}-${{ env.TPCH_NUM_PARTS }}-${{ hashFiles('tests/integration/test_tpch.py', 'benchmarking/tpch/**') }}
- name: Generate TPCH Data
run: python benchmarking/tpch/data_generation.py --scale_factor=${{ env.TPCH_SCALE_FACTOR }} --num_parts=${{ env.TPCH_NUM_PARTS }} --generate_parquet
- name: Run Profiling on TPCH Benchmark
env:
DAFT_DEVELOPER_USE_THREAD_POOL: '0'
run: |
py-spy record --native --function -o tpch-${{github.run_id}}.txt -f speedscope -- python benchmarking/tpch/__main__.py --scale_factor=${{ env.TPCH_SCALE_FACTOR }} --num_parts=${{ env.TPCH_NUM_PARTS }} --skip_warmup --skip_questions=11,12,13,14,15,16,17,18,19,20,21,22 || true
- name: Run GIL Profiling on TPCH Benchmark
env:
DAFT_DEVELOPER_USE_THREAD_POOL: '0'
run: |
py-spy record --native --function --gil -o tpch-gil-${{github.run_id}}.txt -f speedscope -- python benchmarking/tpch/__main__.py --scale_factor=${{ env.TPCH_SCALE_FACTOR }} --num_parts=${{ env.TPCH_NUM_PARTS }} --skip_warmup --skip_questions=11,12,13,14,15,16,17,18,19,20,21,22 || true
- name: Upload Profile
uses: actions/upload-artifact@v4
with:
name: speedscope-profile
path: tpch-*.txt
- name: Send Slack notification on failure
uses: slackapi/[email protected]
if: failure()
with:
payload: |
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": ":rotating_light: Run Profiling on the TPCH Benchmark <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|workflow> *FAILED* :rotating_light:"
}
}
]
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK