Skip to content

docs: delta lake arrow integration page #4858

docs: delta lake arrow integration page

docs: delta lake arrow integration page #4858

Workflow file for this run

name: python_build
on:
push:
branches: [main]
pull_request:
branches: [main]
defaults:
run:
working-directory: ./python
jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Check Python
run: |
pip install ruff black mypy types-dataclasses typing-extensions
make check-python
- name: Install minimal stable with clippy and rustfmt
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: stable
override: true
- name: Check Rust
run: make check-rust
test-minimal:
name: Python Build (Python 3.8 PyArrow 8.0.0)
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0
steps:
- uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install latest nightly
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- name: Build and install deltalake
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
make setup
# Install minimum PyArrow version
pip install -e .[pandas,devel] pyarrow==8.0.0
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
- name: Run tests
run: |
source venv/bin/activate
make unit-test
# - name: Run Integration tests
# run: |
# py.test --cov tests -m integration
test:
name: Python Build (Python 3.10 PyArrow latest)
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=1"
CARGO_INCREMENTAL: 0
steps:
- uses: actions/checkout@v3
- name: Install latest nightly
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Start emulated services
run: docker-compose up -d
- name: Build and install deltalake
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
make develop
- name: Download Data Acceptance Tests (DAT) files
run: make setup-dat
- name: Run tests
run: |
source venv/bin/activate
python -m pytest -m '((s3 or azure) and integration) or not integration and not benchmark' --doctest-modules
- name: Test without pandas
run: |
source venv/bin/activate
pip uninstall --yes pandas
python -m pytest -m "not pandas and not integration and not benchmark"
pip install pandas
- name: Build Sphinx documentation
run: |
source venv/bin/activate
make build-documentation
benchmark:
name: Python Benchmark
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0
steps:
- uses: actions/checkout@v2
- name: Install latest nightly
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Build deltalake in release mode
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
MATURIN_EXTRA_ARGS=--release make develop
# Download previous benchmark result from cache (if exists)
- name: Download previous benchmark data
uses: actions/cache@v2
with:
path: ./cache
key: ${{ runner.os }}-benchmark
- name: Run benchmark
run: |
source venv/bin/activate
pytest tests/test_benchmark.py -m benchmark --benchmark-json output.json
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@v1
with:
tool: "pytest"
output-file-path: python/output.json
external-data-json-path: ./cache/benchmark-data.json
fail-on-alert: true
test-pyspark:
name: PySpark Integration Tests
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0
steps:
- uses: actions/checkout@v3
- name: Install latest nightly
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- uses: actions/setup-python@v3
with:
python-version: "3.10"
- uses: actions/setup-java@v2
with:
distribution: "zulu"
java-version: "11"
- name: Build and install deltalake
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
make develop-pyspark
- name: Run tests
run: |
source venv/bin/activate
make test-pyspark
multi-python-running:
name: Running with Python ${{ matrix.python-version }}
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Build and install deltalake
run: |
pip install virtualenv
virtualenv venv
source venv/bin/activate
make setup
maturin develop
- name: Run deltalake
run: |
source venv/bin/activate
python -c 'import deltalake'