Skip to content

Commit

Permalink
Merge branch 'main' into docs-optimize-vacuum
Browse files Browse the repository at this point in the history
  • Loading branch information
rtyler authored Nov 17, 2023
2 parents 079ad6f + c15d768 commit 9abc86b
Show file tree
Hide file tree
Showing 38 changed files with 1,140 additions and 614 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
crates/ @wjones127 @roeap @rtyler
delta-inspect/ @wjones127 @rtyler
proofs/ @houqp
python/ @wjones127 @fvaleye @roeap
python/ @wjones127 @fvaleye @roeap @ion-elgreco
tlaplus/ @houqp
.github/ @wjones127 @rtyler
docs/ @MrPowers
4 changes: 3 additions & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ jobs:
with:
src: docs/src/python

build:
build-deploy:
needs:
[
lint,
markdown-link-check,
]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v3

Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/issue_comments.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Comment Commands
on:
issue_comment:
types: created

permissions:
issues: write

jobs:
issue_assign:
runs-on: ubuntu-latest
if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
concurrency:
# Only run one a time per user
group: ${{ github.actor }}-issue-assign
steps:
- run: |
CODE=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -LI https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees/${{ github.event.comment.user.login }} -o /dev/null -w '%{http_code}\n' -s)
if [ "$CODE" -eq "204" ]
then
echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
else
echo "Issue ${{ github.event.issue.number }} cannot be assigned to ${{ github.event.comment.user.login }}"
fi
25 changes: 9 additions & 16 deletions .github/workflows/python_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: 3.8

- name: Check Python
run: |
Expand All @@ -36,20 +36,19 @@ jobs:
run: make check-rust

test-minimal:
name: Python Build (Python 3.7 PyArrow 8.0.0)
name: Python Build (Python 3.8 PyArrow 8.0.0)
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0

# use the same environment we have for python release
container: quay.io/pypa/manylinux2014_x86_64:2022-09-24-4f086d0
steps:
# actions/checkout@v3 is a node action, which runs on a fairly new
# version of node. however, manylinux environment's glibc is too old for
# that version of the node. so we will have to use v1 instead, which is a
# docker based action.
- uses: actions/checkout@v1
- uses: actions/checkout@v3

- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install latest nightly
uses: actions-rs/toolchain@v1
Expand All @@ -60,14 +59,8 @@ jobs:

- uses: Swatinem/rust-cache@v2

- name: Enable manylinux Python targets
run: |
echo "/opt/python/cp37-cp37m/bin" >> $GITHUB_PATH
- name: Build and install deltalake
run: |
# Needed for openssl build
yum install -y perl-IPC-Cmd
pip install virtualenv
virtualenv venv
source venv/bin/activate
Expand Down Expand Up @@ -238,7 +231,7 @@ jobs:

strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/python_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ jobs:
release-pypi-windows,
]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Trigger the docs release event
uses: peter-evans/repository-dispatch@v2
Expand Down
4 changes: 4 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ Please take note of our [code of conduct](CODE_OF_CONDUCT.md).
If you want to start contributing, first look at our good first issues: https://github.com/delta-io/delta-rs/contribute

If you want to contribute something more substantial, see our "Projects seeking contributors" section on our roadmap: https://github.com/delta-io/delta-rs/issues/1128

## Claiming an issue

If you want to claim an issue to work on, you can write the word `take` as a comment in it and you will be automatically assigned.
31 changes: 16 additions & 15 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,24 @@ debug = "line-tables-only"

[workspace.dependencies]
# arrow
arrow = { version = "47" }
arrow-array = { version = "47" }
arrow-buffer = { version = "47" }
arrow-cast = { version = "47" }
arrow-ord = { version = "47" }
arrow-row = { version = "47" }
arrow-schema = { version = "47" }
arrow-select = { version = "47" }
parquet = { version = "47" }
arrow = { version = "48.0.1" }
arrow-array = { version = "48.0.1" }
arrow-buffer = { version = "48.0.1" }
arrow-cast = { version = "48.0.1" }
arrow-ord = { version = "48.0.1" }
arrow-row = { version = "48.0.1" }
arrow-schema = { version = "48.0.1" }
arrow-select = { version = "48.0.1" }
parquet = { version = "48.0.1" }

# datafusion
datafusion = { version = "32" }
datafusion-expr = { version = "32" }
datafusion-common = { version = "32" }
datafusion-proto = { version = "32" }
datafusion-sql = { version = "32" }
datafusion-physical-expr = { version = "32" }
datafusion = { version = "33.0.0" }
datafusion-expr = { version = "33.0.0" }
datafusion-common = { version = "33.0.0" }
datafusion-proto = { version = "33.0.0" }
datafusion-sql = { version = "33.0.0" }
datafusion-physical-expr = { version = "33.0.0" }


# serde
serde = { version = "1", features = ["derive"] }
Expand Down
50 changes: 25 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ The Delta Lake project aims to unlock the power of the Deltalake for as many use
by providing native low-level APIs aimed at developers and integrators, as well as a high-level operations
API that lets you query, inspect, and operate your Delta Lake with ease.

| Source | Downloads | Installation Command | Docs |
| --------------------- | --------------------------------- | ----------------------- | --------------- |
| **[PyPi][pypi]** | [![Downloads][pypi-dl]][pypi] | `pip install deltalake` | [Docs][py-docs] |
| **[Crates.io][pypi]** | [![Downloads][crates-dl]][crates] | `cargo add deltalake` | [Docs][rs-docs] |
| Source | Downloads | Installation Command | Docs |
| ----------------------- | --------------------------------- | ----------------------- | --------------- |
| **[PyPi][pypi]** | [![Downloads][pypi-dl]][pypi] | `pip install deltalake` | [Docs][py-docs] |
| **[Crates.io][crates]** | [![Downloads][crates-dl]][crates] | `cargo add deltalake` | [Docs][rs-docs] |

[pypi]: https://pypi.org/project/deltalake/
[pypi-dl]: https://img.shields.io/pypi/dm/deltalake?style=flat-square&color=00ADD4
Expand Down Expand Up @@ -130,36 +130,36 @@ of features outlined in the Delta [protocol][protocol] is also [tracked](#protoc

### Cloud Integrations

| Storage | Rust | Python | Comment |
| -------------------- | :-------------------: | :-------------------: | ----------------------------------- |
| Local | ![done] | ![done] | |
| S3 - AWS | ![done] | ![done] | requires lock for concurrent writes |
| S3 - MinIO | ![done] | ![done] | requires lock for concurrent writes |
| S3 - R2 | ![done] | ![done] | requires lock for concurrent writes |
| Azure Blob | ![done] | ![done] | |
| Azure ADLS Gen2 | ![done] | ![done] | |
| Microsoft OneLake | ![done] | ![done] | |
| Google Cloud Storage | ![done] | ![done] | |
| Storage | Rust | Python | Comment |
| -------------------- | :-----: | :-----: | ----------------------------------- |
| Local | ![done] | ![done] | |
| S3 - AWS | ![done] | ![done] | requires lock for concurrent writes |
| S3 - MinIO | ![done] | ![done] | requires lock for concurrent writes |
| S3 - R2 | ![done] | ![done] | requires lock for concurrent writes |
| Azure Blob | ![done] | ![done] | |
| Azure ADLS Gen2 | ![done] | ![done] | |
| Microsoft OneLake | ![done] | ![done] | |
| Google Cloud Storage | ![done] | ![done] | |

### Supported Operations

| Operation | Rust | Python | Description |
| --------------------- | :----------------------: | :-----------------: | ------------------------------------------- |
| Create | ![done] | ![done] | Create a new table |
| Read | ![done] | ![done] | Read data from a table |
| Vacuum | ![done] | ![done] | Remove unused files and log entries |
| Delete - partitions | | ![done] | Delete a table partition |
| Delete - predicates | ![done] | ![done] | Delete data based on a predicate |
| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file |
| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file |
| Operation | Rust | Python | Description |
| --------------------- | :----------------------: | :----------------------: | ------------------------------------------- |
| Create | ![done] | ![done] | Create a new table |
| Read | ![done] | ![done] | Read data from a table |
| Vacuum | ![done] | ![done] | Remove unused files and log entries |
| Delete - partitions | | ![done] | Delete a table partition |
| Delete - predicates | ![done] | ![done] | Delete data based on a predicate |
| Optimize - compaction | ![done] | ![done] | Harmonize the size of data file |
| Optimize - Z-order | ![done] | ![done] | Place similar data into the same file |
| Merge | [![semi-done]][merge-rs] | [![semi-done]][merge-py] | Merge two tables (limited to full re-write) |
| FS check | ![done] | ![done] | Remove corrupted files from table |
| FS check | ![done] | ![done] | Remove corrupted files from table |

### Protocol Support Level

| Writer Version | Requirement | Status |
| -------------- | --------------------------------------------- | :------------------: |
| Version 2 | Append Only Tables | ![done]
| Version 2 | Append Only Tables | ![done] |
| Version 2 | Column Invariants | ![done] |
| Version 3 | Enforce `delta.checkpoint.writeStatsAsJson` | [![open]][writer-rs] |
| Version 3 | Enforce `delta.checkpoint.writeStatsAsStruct` | [![open]][writer-rs] |
Expand Down
3 changes: 2 additions & 1 deletion crates/deltalake-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ reqwest = { version = "0.11.18", default-features = false, features = [
# Datafusion
dashmap = { version = "5", optional = true }

sqlparser = { version = "0.38", optional = true }
sqlparser = { version = "0.39", optional = true }

# NOTE dependencies only for integration tests
fs_extra = { version = "1.3.0", optional = true }
Expand All @@ -130,6 +130,7 @@ tempfile = "3"
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
utime = "0.3"
hyper = { version = "0.14", features = ["server"] }
criterion = "0.5"

[features]
azure = ["object_store/azure"]
Expand Down
4 changes: 2 additions & 2 deletions crates/deltalake-core/benches/read_checkpoint.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use criterion::{criterion_group, criterion_main, Criterion};
use deltalake::table::state::DeltaTableState;
use deltalake::DeltaTableConfig;
use deltalake_core::table::state::DeltaTableState;
use deltalake_core::DeltaTableConfig;
use std::fs::File;
use std::io::Read;

Expand Down
4 changes: 4 additions & 0 deletions crates/deltalake-core/src/delta_datafusion/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ impl<'a> ContextProvider for DeltaContextProvider<'a> {
fn get_window_meta(&self, name: &str) -> Option<Arc<datafusion_expr::WindowUDF>> {
self.state.window_functions().get(name).cloned()
}

fn get_table_source(&self, _name: TableReference) -> DFResult<Arc<dyn TableSource>> {
unimplemented!()
}
}

/// Parse a string predicate into an `Expr`
Expand Down
Loading

0 comments on commit 9abc86b

Please sign in to comment.