Skip to content

feat: arrow based table state and checkpoint handling #4564

feat: arrow based table state and checkpoint handling

feat: arrow based table state and checkpoint handling #4564

Workflow file for this run

name: python_build
on:
push:
branches: [main]
pull_request:
branches: [main]
defaults:
run:
working-directory: ./python
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Check Python
run: |
pip install ruff black mypy types-dataclasses typing-extensions
make check-python
- name: Install minimal stable with clippy and rustfmt
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: stable
override: true
- name: Check Rust
run: make check-rust
test-minimal:
name: Python Build (Python 3.7 PyArrow 8.0.0)
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0
# use the same environment we have for python release
container: quay.io/pypa/manylinux2014_x86_64:2022-09-24-4f086d0
steps:
# actions/checkout@v3 is a node action, which runs on a fairly new
# version of node. however, manylinux environment's glibc is too old for
# that version of the node. so we will have to use v1 instead, which is a
# docker based action.
- uses: actions/checkout@v1
- name: Install latest nightly
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- name: Enable manylinux Python targets
run: |
echo "/opt/python/cp37-cp37m/bin" >> $GITHUB_PATH
- name: Build and install deltalake
run: |
# Needed for openssl build
yum install -y perl-IPC-Cmd
pip install virtualenv
virtualenv venv
source venv/bin/activate
make setup
# Install minimum PyArrow version
pip install -e .[pandas,devel] pyarrow==8.0.0
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
- name: Run tests
run: |
source venv/bin/activate
make unit-test
# - name: Run Integration tests
# run: |
# py.test --cov tests -m integration
test:
name: Python Build (Python 3.10 PyArrow latest)
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=1"
CARGO_INCREMENTAL: 0
steps:
- uses: actions/checkout@v3
- name: Install latest nightly
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Start emulated services
run: docker-compose up -d
- name: Build and install deltalake
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
make develop
- name: Download Data Acceptance Tests (DAT) files
run: make setup-dat
- name: Run tests
run: |
source venv/bin/activate
python -m pytest -m '((s3 or azure) and integration) or not integration and not benchmark'
- name: Test without pandas
run: |
source venv/bin/activate
pip uninstall --yes pandas
python -m pytest -m "not pandas and not integration and not benchmark"
pip install pandas
- name: Build Sphinx documentation
run: |
source venv/bin/activate
make build-documentation
benchmark:
name: Python Benchmark
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0
steps:
- uses: actions/checkout@v2
- name: Install latest nightly
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Build deltalake in release mode
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
MATURIN_EXTRA_ARGS=--release make develop
# Download previous benchmark result from cache (if exists)
- name: Download previous benchmark data
uses: actions/cache@v2
with:
path: ./cache
key: ${{ runner.os }}-benchmark
- name: Run benchmark
run: |
source venv/bin/activate
pytest tests/test_benchmark.py -m benchmark --benchmark-json output.json
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@v1
with:
tool: "pytest"
output-file-path: python/output.json
external-data-json-path: ./cache/benchmark-data.json
fail-on-alert: true
test-pyspark:
name: PySpark Integration Tests
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0
steps:
- uses: actions/checkout@v3
- name: Install latest nightly
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v3
with:
python-version: "3.10"
- uses: actions/setup-java@v2
with:
distribution: "zulu"
java-version: "11"
- name: Build and install deltalake
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
make develop-pyspark
- name: Run tests
run: |
source venv/bin/activate
make test-pyspark
multi-python-running:
name: Running with Python ${{ matrix.python-version }}
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0
strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Build and install deltalake
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
make setup
maturin develop
- name: Run deltalake
run: |
source venv/bin/activate
python -c 'import deltalake'