Skip to content

Commit

Permalink
Merge branch 'main' into move-unity
Browse files Browse the repository at this point in the history
  • Loading branch information
hntd187 authored Dec 15, 2024
2 parents de21779 + a51d75a commit cb3fb18
Show file tree
Hide file tree
Showing 12 changed files with 1,296 additions and 103 deletions.
13 changes: 10 additions & 3 deletions .github/actions/setup-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ name: "Setup Python and Rust Environment"
description: "Set up Python, virtual environment, and Rust toolchain"

inputs:

python-version:
description: "The Python version to set up"
required: true
Expand All @@ -17,7 +16,15 @@ runs:
using: "composite"

steps:

- name: checkout
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
cache-dependency-glob: "python/uv.lock"

- name: Set up Python ${{ inputs.python-version }}
uses: actions/setup-python@v4
with:
Expand All @@ -31,4 +38,4 @@ runs:
override: true
components: rustfmt, clippy

- uses: Swatinem/rust-cache@v2
- uses: Swatinem/rust-cache@v2
5 changes: 1 addition & 4 deletions .github/workflows/python_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ jobs:

- name: Build deltalake in release mode
run: |
python -m venv venv
source venv/bin/activate
MATURIN_EXTRA_ARGS=--release make develop
# Download previous benchmark result from cache (if exists)
Expand All @@ -41,8 +39,7 @@ jobs:

- name: Run benchmark
run: |
source venv/bin/activate
pytest tests/test_benchmark.py -m benchmark --benchmark-json output.json
uv run pytest tests/test_benchmark.py -m benchmark --benchmark-json output.json
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@v1
Expand Down
47 changes: 13 additions & 34 deletions .github/workflows/python_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ jobs:

- name: Check Python
run: |
python -m venv venv
source venv/bin/activate
pip install ruff==0.5.2 mypy==1.10.1 types-dataclasses typing-extensions
uv pip install ruff==0.5.2 mypy==1.10.1 types-dataclasses typing-extensions --system
make check-python
- name: Check Rust
Expand All @@ -47,18 +45,14 @@ jobs:

- name: Build and install deltalake
run: |
python -m venv venv
source venv/bin/activate
make setup
# Install minimum PyArrow version
pip install -e .[pandas,devel] pyarrow==16.0.0
uv sync --extra devel --extra pandas
uv pip install pyarrow==16.0.0
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"

- name: Run tests
run: |
source venv/bin/activate
make unit-test
run: make unit-test

test:
name: Python Build (Python 3.10 PyArrow latest)
Expand All @@ -77,25 +71,19 @@ jobs:
run: docker compose up -d

- name: Build and install deltalake
run: |
python -m venv venv
source venv/bin/activate
make develop
run: make develop

- name: Download Data Acceptance Tests (DAT) files
run: make setup-dat

- name: Run tests
run: |
source venv/bin/activate
python -m pytest -m '((s3 or azure) and integration) or not integration and not benchmark' --doctest-modules
run: uv run pytest -m '((s3 or azure) and integration) or not integration and not benchmark' --doctest-modules

- name: Test without pandas
run: |
source venv/bin/activate
pip uninstall --yes pandas
python -m pytest -m "not pandas and not integration and not benchmark"
pip install pandas
uv pip uninstall pandas
uv run pytest -m "not pandas and not integration and not benchmark"
uv pip install pandas
test-pyspark:
name: PySpark Integration Tests
Expand All @@ -116,15 +104,10 @@ jobs:
java-version: "11"

- name: Build and install deltalake
run: |
python -m venv venv
source venv/bin/activate
make develop-pyspark
run: make develop-pyspark

- name: Run tests
run: |
source venv/bin/activate
make test-pyspark
run: make test-pyspark

multi-python-running:
name: Running with Python ${{ matrix.python-version }}
Expand All @@ -147,12 +130,8 @@ jobs:

- name: Build and install deltalake
run: |
python -m venv venv
source venv/bin/activate
make setup
maturin develop
uv sync
- name: Run deltalake
run: |
source venv/bin/activate
python -c 'import deltalake'
uv run python -c 'import deltalake'
6 changes: 6 additions & 0 deletions crates/core/src/operations/transaction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ use futures::future::BoxFuture;
use object_store::path::Path;
use object_store::Error as ObjectStoreError;
use serde_json::Value;
use tracing::warn;

use self::conflict_checker::{TransactionInfo, WinningCommitSummary};
use crate::checkpoints::{cleanup_expired_logs_for, create_checkpoint_for};
Expand Down Expand Up @@ -702,6 +703,11 @@ impl PostCommit<'_> {
log_store: &LogStoreRef,
version: i64,
) -> DeltaResult<()> {
if !table_state.load_config().require_files {
warn!("Checkpoint creation in post_commit_hook has been skipped due to table being initialized without files.");
return Ok(());
}

let checkpoint_interval = table_state.config().checkpoint_interval() as i64;
if ((version + 1) % checkpoint_interval) == 0 {
create_checkpoint_for(version, table_state, log_store.as_ref()).await?
Expand Down
6 changes: 6 additions & 0 deletions crates/core/src/protocol/checkpoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ pub async fn create_checkpoint_for(
state: &DeltaTableState,
log_store: &dyn LogStore,
) -> Result<(), ProtocolError> {
if !state.load_config().require_files {
return Err(ProtocolError::Generic(
"Table has not yet been initialized with files, therefore creating a checkpoint is not possible.".to_string()
));
}

if version != state.version() {
error!(
"create_checkpoint_for called with version {version} but table state contains: {}. The table state may need to be reloaded",
Expand Down
4 changes: 3 additions & 1 deletion python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ docs/build
*.so

# dat data
dat-data
dat-data

wheels/
22 changes: 11 additions & 11 deletions python/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,25 @@

## Workflow

Most of the workflow is based on the `Makefile` and the `maturin` CLI tool.
Most of the workflow is based on the `Makefile`, [`maturin`](https://www.maturin.rs/) CLI tool and [`uv`](https://docs.astral.sh/uv/).

#### Setup your local environment with virtualenv
### Setup your local environment with virtualenv

```bash
make setup-venv
make setup
```

#### Activate it
### Activate it

```bash
source ./venv/bin/activate
source ./.venv/bin/activate
```

#### Ready to develop with maturin
### Ready to develop with maturin

[maturin](https://github.com/PyO3/maturin) is used to build the python package.
Install delta-rs in the current virtualenv

Install delta-rs in the current virtualenv:

```bash
make develop
Expand Down Expand Up @@ -53,12 +55,10 @@ make unit-test
1. Make a new PR to update the version in pyproject.toml.
2. Once merged, push a tag of the format `python-vX.Y.Z`. This will trigger CI
to create and publish release artifacts.
3. In GitHub, create a new release based on the new tag. For release notes,
3. In GitHub, create a new release based on the new tag. For release notes,
use the generator at a starting point, but please revise them for brevity.
Remove anything that is dev-facing only (chores), and bring all important
changes to the top, leaving less important changes (such as dependabot
changes to the top, leaving less important changes (such as dependabot
updates) at the bottom.
4. Once the artifacts are showing up in PyPI, announce the release in the delta-rs
Slack channel. Be sure to give a shout-out to the new contributors.


40 changes: 18 additions & 22 deletions python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,10 @@ MATURIN_VERSION := $(shell grep 'requires =' pyproject.toml | cut -d= -f2- | tr
PACKAGE_VERSION := $(shell grep version Cargo.toml | head -n 1 | awk '{print $$3}' | tr -d '"' )
DAT_VERSION := 0.0.2

.PHONY: setup-venv
setup-venv: ## Setup the virtualenv
$(info --- Setup virtualenv ---)
python -m venv $(VENV)

.PHONY: setup
setup: ## Setup the requirements
$(info --- Setup dependencies ---)
pip install "$(MATURIN_VERSION)"
uv sync --extra devel --extra pandas

.PHONY: setup-dat
setup-dat: ## Download DAT test files
Expand All @@ -28,33 +23,34 @@ setup-dat: ## Download DAT test files
.PHONY: build
build: setup ## Build Python binding of delta-rs
$(info --- Build Python binding ---)
maturin build $(MATURIN_EXTRA_ARGS)
uvx --from 'maturin[zig]' maturin build $(MATURIN_EXTRA_ARGS)

.PHONY: develop
develop: setup ## Install Python binding of delta-rs
$(info --- Develop with Python binding ---)
maturin develop --extras=devel,pandas $(MATURIN_EXTRA_ARGS)
uvx --from 'maturin[zig]' maturin develop --extras=devel,pandas $(MATURIN_EXTRA_ARGS)

.PHONY: install
install: build ## Install Python binding of delta-rs
$(info --- Uninstall Python binding ---)
pip uninstall -y deltalake
uv pip uninstall deltalake
$(info --- Install Python binding ---)
$(eval TARGET_WHEEL := $(shell ls ../target/wheels/deltalake-${PACKAGE_VERSION}-*.whl))
pip install $(TARGET_WHEEL)[devel,pandas]
uv pip install $(TARGET_WHEEL)[devel,pandas]

.PHONY: develop-pyspark
develop-pyspark: setup
develop-pyspark:
uv sync --all-extras
$(info --- Develop with Python binding ---)
maturin develop --extras=devel,pandas,pyspark $(MATURIN_EXTRA_ARGS)
uvx --from 'maturin[zig]' maturin develop --extras=devel,pandas,pyspark $(MATURIN_EXTRA_ARGS)

.PHONY: format
format: ## Format the code
$(info --- Rust format ---)
cargo fmt
$(info --- Python format ---)
ruff check . --fix
ruff format .
uv run ruff check . --fix
uv run ruff format .

.PHONY: check-rust
check-rust: ## Run check on Rust
Expand All @@ -66,35 +62,35 @@ check-rust: ## Run check on Rust
.PHONY: check-python
check-python: ## Run check on Python
$(info Check Python format)
ruff format --check --diff .
uv run ruff format --check --diff .
$(info Check Python linting)
ruff check .
uv run ruff check .
$(info Check Python mypy)
mypy
uv run mypy

.PHONY: unit-test
unit-test: ## Run unit test
$(info --- Run Python unit-test ---)
python -m pytest --doctest-modules
uv run pytest --doctest-modules

.PHONY: test-cov
test-cov: ## Create coverage report
$(info --- Run Python unit-test ---)
python -m pytest --doctest-modules --cov --cov-config=pyproject.toml --cov-report=term --cov-report=html
uv run pytest --doctest-modules --cov --cov-config=pyproject.toml --cov-report=term --cov-report=html

.PHONY: test-pyspark
test-pyspark:
python -m pytest -m 'pyspark and integration'
uv run pytest -m 'pyspark and integration'

.PHONY: build-documentation
build-documentation: ## Build documentation with Sphinx
$(info --- Run build of the Sphinx documentation ---)
sphinx-build -Wn -b html -d ./docs/build/doctrees ./docs/source ./docs/build/html
uv run sphinx-build -Wn -b html -d ./docs/build/doctrees ./docs/source ./docs/build/html

.PHONY: build-docs
build-docs: ## Build documentation with mkdocs
$(info --- Run build of the documentation ---)
(cd ..; pip install -r docs/requirements.txt; mkdocs build)
(cd ..; uv pip install -r docs/requirements.txt; mkdocs build)

.PHONY: clean
clean: ## Run clean
Expand Down
Loading

0 comments on commit cb3fb18

Please sign in to comment.