diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml index 2e6d3bf782..223c13f531 100644 --- a/.github/workflows/python_build.yml +++ b/.github/workflows/python_build.yml @@ -18,7 +18,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: 3.7 + python-version: 3.8 - name: Check Python run: | @@ -36,20 +36,19 @@ jobs: run: make check-rust test-minimal: - name: Python Build (Python 3.7 PyArrow 8.0.0) + name: Python Build (Python 3.8 PyArrow 8.0.0) runs-on: ubuntu-latest env: RUSTFLAGS: "-C debuginfo=line-tables-only" CARGO_INCREMENTAL: 0 - # use the same environment we have for python release - container: quay.io/pypa/manylinux2014_x86_64:2022-09-24-4f086d0 steps: - # actions/checkout@v3 is a node action, which runs on a fairly new - # version of node. however, manylinux environment's glibc is too old for - # that version of the node. so we will have to use v1 instead, which is a - # docker based action. - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 - name: Install latest nightly uses: actions-rs/toolchain@v1 @@ -60,14 +59,8 @@ jobs: - uses: Swatinem/rust-cache@v2 - - name: Enable manylinux Python targets - run: | - echo "/opt/python/cp37-cp37m/bin" >> $GITHUB_PATH - - name: Build and install deltalake run: | - # Needed for openssl build - yum install -y perl-IPC-Cmd pip install virtualenv virtualenv venv source venv/bin/activate @@ -238,7 +231,7 @@ jobs: strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 diff --git a/README.md b/README.md index a22ba8a295..9aa98823c6 100644 --- a/README.md +++ b/README.md @@ -41,10 +41,10 @@ The Delta Lake project aims to unlock the power of the Deltalake for as many use by providing native low-level APIs aimed at developers and integrators, as well as a high-level operations API that lets you query, inspect, and operate your Delta Lake with ease. -| Source | Downloads | Installation Command | Docs | -| --------------------- | --------------------------------- | ----------------------- | --------------- | -| **[PyPi][pypi]** | [![Downloads][pypi-dl]][pypi] | `pip install deltalake` | [Docs][py-docs] | -| **[Crates.io][pypi]** | [![Downloads][crates-dl]][crates] | `cargo add deltalake` | [Docs][rs-docs] | +| Source | Downloads | Installation Command | Docs | +| ----------------------- | --------------------------------- | ----------------------- | --------------- | +| **[PyPi][pypi]** | [![Downloads][pypi-dl]][pypi] | `pip install deltalake` | [Docs][py-docs] | +| **[Crates.io][crates]** | [![Downloads][crates-dl]][crates] | `cargo add deltalake` | [Docs][rs-docs] | [pypi]: https://pypi.org/project/deltalake/ [pypi-dl]: https://img.shields.io/pypi/dm/deltalake?style=flat-square&color=00ADD4 @@ -130,36 +130,36 @@ of features outlined in the Delta [protocol][protocol] is also [tracked](#protoc ### Cloud Integrations -| Storage | Rust | Python | Comment | -| -------------------- | :-------------------: | :-------------------: | ----------------------------------- | -| Local | ![done] | ![done] | | -| S3 - AWS | ![done] | ![done] | requires lock for concurrent writes | -| S3 - MinIO | ![done] | ![done] | requires lock for concurrent writes | -| S3 - R2 | ![done] | ![done] | requires lock for concurrent writes | -| Azure Blob | ![done] | ![done] | | -| Azure ADLS Gen2 | ![done] | ![done] | | -| Microsoft OneLake | ![done] | ![done] | | -| Google Cloud Storage | ![done] | ![done] | | +| Storage | Rust | Python | Comment | +| -------------------- | :-----: | :-----: | ----------------------------------- | +| Local | ![done] | ![done] | | +| S3 - AWS | ![done] | ![done] | requires lock for concurrent writes | +| S3 - MinIO | ![done] | ![done] | requires lock for concurrent writes | +| S3 - R2 | ![done] | ![done] | requires lock for concurrent writes | +| Azure Blob | ![done] | ![done] | | +| Azure ADLS Gen2 | ![done] | ![done] | | +| Microsoft OneLake | ![done] | ![done] | | +| Google Cloud Storage | ![done] | ![done] | | ### Supported Operations -| Operation | Rust | Python | Description | -| --------------------- | :----------------------: | :-----------------: | ------------------------------------------- | -| Create | ![done] | ![done] | Create a new table | -| Read | ![done] | ![done] | Read data from a table | -| Vacuum | ![done] | ![done] | Remove unused files and log entries | -| Delete - partitions | | ![done] | Delete a table partition | -| Delete - predicates | ![done] | ![done] | Delete data based on a predicate | -| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file | -| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file | +| Operation | Rust | Python | Description | +| --------------------- | :----------------------: | :----------------------: | ------------------------------------------- | +| Create | ![done] | ![done] | Create a new table | +| Read | ![done] | ![done] | Read data from a table | +| Vacuum | ![done] | ![done] | Remove unused files and log entries | +| Delete - partitions | | ![done] | Delete a table partition | +| Delete - predicates | ![done] | ![done] | Delete data based on a predicate | +| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file | +| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file | | Merge | [![semi-done]][merge-rs] | [![semi-done]][merge-py] | Merge two tables (limited to full re-write) | -| FS check | ![done] | ![done] | Remove corrupted files from table | +| FS check | ![done] | ![done] | Remove corrupted files from table | ### Protocol Support Level | Writer Version | Requirement | Status | | -------------- | --------------------------------------------- | :------------------: | -| Version 2 | Append Only Tables | ![done] +| Version 2 | Append Only Tables | ![done] | | Version 2 | Column Invariants | ![done] | | Version 3 | Enforce `delta.checkpoint.writeStatsAsJson` | [![open]][writer-rs] | | Version 3 | Enforce `delta.checkpoint.writeStatsAsStruct` | [![open]][writer-rs] | diff --git a/python/Cargo.toml b/python/Cargo.toml index 12e64d8fef..2c85fcb262 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -14,9 +14,6 @@ name = "deltalake" crate-type = ["cdylib"] doc = false -[package.metadata.maturin] -name = "deltalake._internal" - [dependencies] # arrow arrow-schema = { workspace = true, features = ["serde"] } @@ -43,7 +40,7 @@ reqwest = { version = "*", features = ["native-tls-vendored"] } [dependencies.pyo3] version = "0.19" -features = ["extension-module", "abi3", "abi3-py37"] +features = ["extension-module", "abi3", "abi3-py38"] [dependencies.deltalake] path = "../crates/deltalake" diff --git a/python/pyproject.toml b/python/pyproject.toml index cc525fef50..09a7e4b37c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin>=0.14,<0.15"] +requires = ["maturin>=1,<2"] build-backend = "maturin" [project] @@ -7,18 +7,18 @@ name = "deltalake" description = "Native Delta Lake Python binding based on delta-rs with Pandas integration" readme = "README.md" license = {file = "LICENSE.txt"} -requires-python = ">=3.7" +requires-python = ">=3.8" keywords = ["deltalake", "delta", "datalake", "pandas", "arrow"] classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11" + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12" ] dependencies = [ - "pyarrow>=8", - 'typing-extensions;python_version<"3.8"', + "pyarrow>=8" ] [project.optional-dependencies] @@ -49,6 +49,9 @@ pyspark = [ documentation = "https://delta-io.github.io/delta-rs/" repository = "https://github.com/delta-io/delta-rs/tree/main/python/" +[tool.maturin] +module-name = "deltalake._internal" + [tool.mypy] files = "deltalake/*.py" exclude = "^tests"