Skip to content

Commit

Permalink
Merge branch 'main' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
rtyler authored Nov 30, 2023
2 parents ed1f531 + c0b652e commit c75a085
Show file tree
Hide file tree
Showing 121 changed files with 7,680 additions and 3,791 deletions.
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
crates/ @wjones127 @roeap @rtyler
delta-inspect/ @wjones127 @rtyler
proofs/ @houqp
python/ @wjones127 @fvaleye @roeap
python/ @wjones127 @fvaleye @roeap @ion-elgreco
tlaplus/ @houqp
.github/ @wjones127 @rtyler
docs/ @MrPowers
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,11 @@ jobs:

- name: Run tests with rustls (default)
run: |
cargo test -p deltalake --features integration_test,azure,s3,gcs,datafusion
cargo test --features integration_test,azure,s3,gcs,datafusion
- name: Run tests with native-tls
run: |
cargo test -p deltalake --no-default-features --features integration_test,s3-native-tls,datafusion
cargo test --no-default-features --features integration_test,s3-native-tls,datafusion
parquet2_test:
runs-on: ubuntu-latest
Expand Down
26 changes: 12 additions & 14 deletions .github/workflows/dev_pr/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
rust:
- delta-inspect/**/*
- proofs/**/*
- rust/**/*

binding/python:
- python/**/*

ci:
- .github/**.*

Expand All @@ -16,17 +8,23 @@ documentation:
- CONTRIBUTING.md
- python/docs/**/*

storage/aws:
- aws/**/*

delta-inspect:
- delta-inspect/**/*

binding/rust:
- rust/**/*

proofs:
- proofs/**/*

tlaplus:
- tlaplus/**/*

binding/python:
- python/**/*

binding/rust:
- crates/**/*

crate/core:
- crates/deltalake-core/**/*

crate/sql:
- crates/deltalake-sql/**/*
4 changes: 3 additions & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ jobs:
with:
src: docs/src/python

build:
build-deploy:
needs:
[
lint,
markdown-link-check,
]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- uses: actions/checkout@v3

Expand Down
25 changes: 25 additions & 0 deletions .github/workflows/issue_comments.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Comment Commands
on:
issue_comment:
types: created

permissions:
issues: write

jobs:
issue_assign:
runs-on: ubuntu-latest
if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
concurrency:
# Only run one a time per user
group: ${{ github.actor }}-issue-assign
steps:
- run: |
CODE=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -LI https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees/${{ github.event.comment.user.login }} -o /dev/null -w '%{http_code}\n' -s)
if [ "$CODE" -eq "204" ]
then
echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
else
echo "Issue ${{ github.event.issue.number }} cannot be assigned to ${{ github.event.comment.user.login }}"
fi
27 changes: 10 additions & 17 deletions .github/workflows/python_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: 3.8

- name: Check Python
run: |
Expand All @@ -36,20 +36,19 @@ jobs:
run: make check-rust

test-minimal:
name: Python Build (Python 3.7 PyArrow 8.0.0)
name: Python Build (Python 3.8 PyArrow 8.0.0)
runs-on: ubuntu-latest
env:
RUSTFLAGS: "-C debuginfo=line-tables-only"
CARGO_INCREMENTAL: 0

# use the same environment we have for python release
container: quay.io/pypa/manylinux2014_x86_64:2022-09-24-4f086d0
steps:
# actions/checkout@v3 is a node action, which runs on a fairly new
# version of node. however, manylinux environment's glibc is too old for
# that version of the node. so we will have to use v1 instead, which is a
# docker based action.
- uses: actions/checkout@v1
- uses: actions/checkout@v3

- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install latest nightly
uses: actions-rs/toolchain@v1
Expand All @@ -60,14 +59,8 @@ jobs:

- uses: Swatinem/rust-cache@v2

- name: Enable manylinux Python targets
run: |
echo "/opt/python/cp37-cp37m/bin" >> $GITHUB_PATH
- name: Build and install deltalake
run: |
# Needed for openssl build
yum install -y perl-IPC-Cmd
pip install virtualenv
virtualenv venv
source venv/bin/activate
Expand Down Expand Up @@ -125,7 +118,7 @@ jobs:
- name: Run tests
run: |
source venv/bin/activate
python -m pytest -m '((s3 or azure) and integration) or not integration and not benchmark'
python -m pytest -m '((s3 or azure) and integration) or not integration and not benchmark' --doctest-modules
- name: Test without pandas
run: |
Expand Down Expand Up @@ -238,7 +231,7 @@ jobs:

strategy:
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/python_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ jobs:
release-pypi-windows,
]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Trigger the docs release event
uses: peter-evans/repository-dispatch@v2
Expand Down
79 changes: 79 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,82 @@ Please take note of our [code of conduct](CODE_OF_CONDUCT.md).
If you want to start contributing, first look at our good first issues: https://github.com/delta-io/delta-rs/contribute

If you want to contribute something more substantial, see our "Projects seeking contributors" section on our roadmap: https://github.com/delta-io/delta-rs/issues/1128

## Claiming an issue

If you want to claim an issue to work on, you can write the word `take` as a comment in it and you will be automatically assigned.

## Quick start

- Install Rust, e.g. as described [here](https://doc.rust-lang.org/cargo/getting-started/installation.html)
- Have a compatible Python version installed (check `python/pyproject.toml` for current requirement)
- Create a Python virtual environment (required for development builds), e.g. as described [here](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/)
- Build the project for development (this requires an active virtual environment and will also install `deltalake` in that virtual environment)
```
cd python
make develop
```

- Run some Python code, e.g. to run a specific test
```
python -m pytest tests/test_writer.py -s -k "test_with_deltalake_schema"
```

- Run some Rust code, e.g. run an example
```
cd crates/deltalake
cargo run --examples basic_operations
```

## Run the docs locally
*This serves your local contens of docs via a web browser, handy for checking what they look like if you are making changes to docs or docstings*
```
(cd python; make develop)
pip install -r docs/requirements.txt
mkdocs serve
```

## To make a pull request (PR)
- Make sure all the following steps run/pass locally before submitting a PR
```
cargo fmt -- --check
cd python
make check-rust
make check-python
make develop
make unit-test
make build-docs
```

## Developing in VSCode

*These are just some basic steps/components to get you started, there are many other very useful extensions for VSCode*

- For a better Rust development experience, install [rust extention](https://marketplace.visualstudio.com/items?itemName=1YiB.rust-bundle)
- For debugging Rust code, install [CodeLLDB](https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb). The extension should even create Debug launch configurations for the project if you allow it, an easy way to get started. Just set a breakpoint and run the relevant configuration.
- For debugging from Python into Rust, follow this procedure:
1. Add this to `.vscode/launch.json`
```
{
"type": "lldb",
"request": "attach",
"name": "LLDB Attach to Python'",
"program": "${command:python.interpreterPath}",
"pid": "${command:pickMyProcess}",
"args": [],
"stopOnEntry": false,
"environment": [],
"externalConsole": true,
"MIMode": "lldb",
"cwd": "${workspaceFolder}"
}
```
2. Add a `breakpoint()` statement somewhere in your Python code (main function or at any point in Python code you know will be executed when you run it)
3. Add a breakpoint in Rust code in VSCode editor where you want to drop into the debugger
4. Run the relevant Python code function in your terminal, execution should drop into the Python debugger showing `PDB` prompt
5. Run the following in that promt to get the Python process ID: `import os; os.getpid()`
6. Run the `LLDB Attach to Python` from the `Run and Debug` panel of VSCode. This will prompt you for a Process ID to attach to, enter the Python process ID obtained earlier (this will also be in the dropdown but that dropdown will have many process IDs)
7. LLDB make take couple of seconds to attach to the process
8. When the debugger is attached to the process (you will notice the debugger panels get filled with extra info), enter `c`+Enter in the `PDB` prompt in your terminal - the execution should continue until the breakpoint in Rust code is hit. From this point it's a standard debugging procecess.


31 changes: 16 additions & 15 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,24 @@ debug = "line-tables-only"

[workspace.dependencies]
# arrow
arrow = { version = "47" }
arrow-array = { version = "47" }
arrow-buffer = { version = "47" }
arrow-cast = { version = "47" }
arrow-ord = { version = "47" }
arrow-row = { version = "47" }
arrow-schema = { version = "47" }
arrow-select = { version = "47" }
parquet = { version = "47" }
arrow = { version = "48.0.1" }
arrow-array = { version = "48.0.1" }
arrow-buffer = { version = "48.0.1" }
arrow-cast = { version = "48.0.1" }
arrow-ord = { version = "48.0.1" }
arrow-row = { version = "48.0.1" }
arrow-schema = { version = "48.0.1" }
arrow-select = { version = "48.0.1" }
parquet = { version = "48.0.1" }

# datafusion
datafusion = { version = "32" }
datafusion-expr = { version = "32" }
datafusion-common = { version = "32" }
datafusion-proto = { version = "32" }
datafusion-sql = { version = "32" }
datafusion-physical-expr = { version = "32" }
datafusion = { version = "33.0.0" }
datafusion-expr = { version = "33.0.0" }
datafusion-common = { version = "33.0.0" }
datafusion-proto = { version = "33.0.0" }
datafusion-sql = { version = "33.0.0" }
datafusion-physical-expr = { version = "33.0.0" }


# serde
serde = { version = "1", features = ["derive"] }
Expand Down
Loading

0 comments on commit c75a085

Please sign in to comment.