From b074d22524d85598e6e414b3a80c9906af09cb1c Mon Sep 17 00:00:00 2001 From: Denny Lee Date: Thu, 30 Nov 2023 23:13:49 -0800 Subject: [PATCH 1/7] Update pyarrow to 14.0.1 --- python/CONTRIBUTING.md | 8 ++++---- python/Cargo.toml | 2 +- python/pyproject.toml | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/CONTRIBUTING.md b/python/CONTRIBUTING.md index 66d8ed623c..2bf638c51f 100644 --- a/python/CONTRIBUTING.md +++ b/python/CONTRIBUTING.md @@ -7,12 +7,12 @@ Most of the workflow is based on the `Makefile` and the `maturin` CLI tool. #### Setup your local environment with virtualenv ```bash -$ make setup-venv +make setup-venv ``` #### Activate it ```bash -$ source ./venv/bin/activate +source ./venv/bin/activate ``` #### Ready to develop with maturin @@ -21,13 +21,13 @@ $ source ./venv/bin/activate Install delta-rs in the current virtualenv ```bash -$ make develop +make develop ``` Then, list all the available tasks ```bash -$ make help +make help ``` Format: diff --git a/python/Cargo.toml b/python/Cargo.toml index 5194a2fc22..0ba44e5dbd 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deltalake-python" -version = "0.13.0" +version = "0.13.1" authors = ["Qingping Hou ", "Will Jones "] homepage = "https://github.com/delta-io/delta-rs" license = "Apache-2.0" diff --git a/python/pyproject.toml b/python/pyproject.toml index 6ffe4ca14c..8215946a9c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -18,7 +18,7 @@ classifiers = [ "Programming Language :: Python :: 3.12" ] dependencies = [ - "pyarrow>=8" + "pyarrow>=14.0.1" ] [project.optional-dependencies] @@ -42,7 +42,7 @@ devel = [ pyspark = [ "pyspark", "delta-spark", - "numpy==1.22.2" # pyspark is no compatible with latest numpy + "numpy==1.22.2" # pyspark is not compatible with latest numpy ] [project.urls] From 54d60894e47ea24d02e392c231762eab21454533 Mon Sep 17 00:00:00 2001 From: Denny Lee Date: Fri, 1 Dec 2023 17:06:50 -0800 Subject: [PATCH 2/7] Update to include hotfix instead of forcing 14.0.1 --- python/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 8215946a9c..2a413f901f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -18,7 +18,8 @@ classifiers = [ "Programming Language :: Python :: 3.12" ] dependencies = [ - "pyarrow>=14.0.1" + "pyarrow>=8", + "pyarrow-hotfix", ] [project.optional-dependencies] From 81505b8c866f4b53ef3a589bfe0aaeefd456cb3e Mon Sep 17 00:00:00 2001 From: Denny Lee Date: Sun, 10 Dec 2023 13:11:50 -0800 Subject: [PATCH 3/7] Include pyarrow_hotfix by mypy fails --- python/deltalake/_internal.pyi | 1 + python/deltalake/fs.py | 1 + python/deltalake/schema.py | 1 + python/deltalake/table.py | 1 + python/deltalake/writer.py | 1 + python/pyproject.toml | 3 +-- python/tests/pyspark_integration/utils.py | 1 + 7 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/deltalake/_internal.pyi b/python/deltalake/_internal.pyi index e1f5288b81..1314245df4 100644 --- a/python/deltalake/_internal.pyi +++ b/python/deltalake/_internal.pyi @@ -8,6 +8,7 @@ else: import pyarrow import pyarrow.fs as fs +import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from deltalake.writer import AddAction diff --git a/python/deltalake/fs.py b/python/deltalake/fs.py index 12e33f40e3..2a3eb5accf 100644 --- a/python/deltalake/fs.py +++ b/python/deltalake/fs.py @@ -1,6 +1,7 @@ from typing import Dict, List, Optional import pyarrow as pa +import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from pyarrow.fs import FileInfo, FileSelector, FileSystemHandler from ._internal import DeltaFileSystemHandler diff --git a/python/deltalake/schema.py b/python/deltalake/schema.py index a22725fdc5..992fda0dbd 100644 --- a/python/deltalake/schema.py +++ b/python/deltalake/schema.py @@ -2,6 +2,7 @@ import pyarrow as pa import pyarrow.dataset as ds +import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from ._internal import ArrayType as ArrayType from ._internal import Field as Field diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 3ac28acf88..b02a90b1f3 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -22,6 +22,7 @@ import pyarrow import pyarrow.dataset as ds import pyarrow.fs as pa_fs +import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from pyarrow.dataset import ( Expression, FileSystemDataset, diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py index bb69fee457..af0982e925 100644 --- a/python/deltalake/writer.py +++ b/python/deltalake/writer.py @@ -34,6 +34,7 @@ import pyarrow as pa import pyarrow.dataset as ds import pyarrow.fs as pa_fs +import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from pyarrow.lib import RecordBatchReader from ._internal import DeltaDataChecker as _DeltaDataChecker diff --git a/python/pyproject.toml b/python/pyproject.toml index 2a413f901f..e9fc7389af 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -71,7 +71,6 @@ warn_return_any = false implicit_reexport = true strict_equality = true - [tool.black] include = '\.pyi?$' exclude = "venv" @@ -102,4 +101,4 @@ markers = [ "azure: marks tests as integration tests with Azure Blob Store", "pandas: marks tests that require pandas", "pyspark: marks tests that require pyspark", -] +] \ No newline at end of file diff --git a/python/tests/pyspark_integration/utils.py b/python/tests/pyspark_integration/utils.py index 5ec23317a0..1860e8fb39 100644 --- a/python/tests/pyspark_integration/utils.py +++ b/python/tests/pyspark_integration/utils.py @@ -1,6 +1,7 @@ from typing import List import pyarrow as pa +import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore try: import delta From 8e08cfd2cb23ccaf76a0612de2ff77eda4cee28d Mon Sep 17 00:00:00 2001 From: Denny Lee Date: Sun, 10 Dec 2023 13:16:20 -0800 Subject: [PATCH 4/7] Update cargo to match --- python/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/Cargo.toml b/python/Cargo.toml index 0ba44e5dbd..a9936a483c 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deltalake-python" -version = "0.13.1" +version = "0.14.0" authors = ["Qingping Hou ", "Will Jones "] homepage = "https://github.com/delta-io/delta-rs" license = "Apache-2.0" From edba4fea45bb1688d1d9df11862c8c85e047f2fd Mon Sep 17 00:00:00 2001 From: Denny Lee Date: Sun, 10 Dec 2023 15:48:40 -0800 Subject: [PATCH 5/7] Adding stub for mypy tests --- python/stubs/pyarrow_hotfix/fs.pyi | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 python/stubs/pyarrow_hotfix/fs.pyi diff --git a/python/stubs/pyarrow_hotfix/fs.pyi b/python/stubs/pyarrow_hotfix/fs.pyi new file mode 100644 index 0000000000..3dfd4db2f1 --- /dev/null +++ b/python/stubs/pyarrow_hotfix/fs.pyi @@ -0,0 +1,11 @@ +from typing import Any + +FileSystem: Any +PyFileSystem: Any +BufferReader: Any +FileInfo: Any +FileType: Any +FileSelector: Any + +class FileSystemHandler: + pass From 970ca7a60dfed1b1e694b9cdb4de53bab0aa5f66 Mon Sep 17 00:00:00 2001 From: ion-elgreco <15728914+ion-elgreco@users.noreply.github.com> Date: Thu, 14 Dec 2023 00:21:42 +0100 Subject: [PATCH 6/7] import only in init --- python/deltalake/__init__.py | 2 ++ python/deltalake/_internal.pyi | 1 - python/deltalake/fs.py | 1 - python/deltalake/schema.py | 1 - python/deltalake/table.py | 1 - python/deltalake/writer.py | 1 - python/stubs/pyarrow_hotfix/__init__.pyi | 0 python/stubs/pyarrow_hotfix/fs.pyi | 11 ----------- python/tests/pyspark_integration/utils.py | 1 - 9 files changed, 2 insertions(+), 17 deletions(-) create mode 100644 python/stubs/pyarrow_hotfix/__init__.pyi delete mode 100644 python/stubs/pyarrow_hotfix/fs.pyi diff --git a/python/deltalake/__init__.py b/python/deltalake/__init__.py index b10a708309..4f74cba93d 100644 --- a/python/deltalake/__init__.py +++ b/python/deltalake/__init__.py @@ -1,3 +1,5 @@ +import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore + from ._internal import __version__ as __version__ from ._internal import rust_core_version as rust_core_version from .data_catalog import DataCatalog as DataCatalog diff --git a/python/deltalake/_internal.pyi b/python/deltalake/_internal.pyi index 68503c1cc9..228488d91a 100644 --- a/python/deltalake/_internal.pyi +++ b/python/deltalake/_internal.pyi @@ -8,7 +8,6 @@ else: import pyarrow import pyarrow.fs as fs -import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from deltalake.writer import AddAction diff --git a/python/deltalake/fs.py b/python/deltalake/fs.py index 2a3eb5accf..12e33f40e3 100644 --- a/python/deltalake/fs.py +++ b/python/deltalake/fs.py @@ -1,7 +1,6 @@ from typing import Dict, List, Optional import pyarrow as pa -import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from pyarrow.fs import FileInfo, FileSelector, FileSystemHandler from ._internal import DeltaFileSystemHandler diff --git a/python/deltalake/schema.py b/python/deltalake/schema.py index 992fda0dbd..a22725fdc5 100644 --- a/python/deltalake/schema.py +++ b/python/deltalake/schema.py @@ -2,7 +2,6 @@ import pyarrow as pa import pyarrow.dataset as ds -import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from ._internal import ArrayType as ArrayType from ._internal import Field as Field diff --git a/python/deltalake/table.py b/python/deltalake/table.py index a2d6189fb6..e7b7613599 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -24,7 +24,6 @@ import pyarrow import pyarrow.dataset as ds import pyarrow.fs as pa_fs -import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from pyarrow.dataset import ( Expression, FileSystemDataset, diff --git a/python/deltalake/writer.py b/python/deltalake/writer.py index af0982e925..bb69fee457 100644 --- a/python/deltalake/writer.py +++ b/python/deltalake/writer.py @@ -34,7 +34,6 @@ import pyarrow as pa import pyarrow.dataset as ds import pyarrow.fs as pa_fs -import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from pyarrow.lib import RecordBatchReader from ._internal import DeltaDataChecker as _DeltaDataChecker diff --git a/python/stubs/pyarrow_hotfix/__init__.pyi b/python/stubs/pyarrow_hotfix/__init__.pyi new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/stubs/pyarrow_hotfix/fs.pyi b/python/stubs/pyarrow_hotfix/fs.pyi deleted file mode 100644 index 3dfd4db2f1..0000000000 --- a/python/stubs/pyarrow_hotfix/fs.pyi +++ /dev/null @@ -1,11 +0,0 @@ -from typing import Any - -FileSystem: Any -PyFileSystem: Any -BufferReader: Any -FileInfo: Any -FileType: Any -FileSelector: Any - -class FileSystemHandler: - pass diff --git a/python/tests/pyspark_integration/utils.py b/python/tests/pyspark_integration/utils.py index 1860e8fb39..5ec23317a0 100644 --- a/python/tests/pyspark_integration/utils.py +++ b/python/tests/pyspark_integration/utils.py @@ -1,7 +1,6 @@ from typing import List import pyarrow as pa -import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore try: import delta From a4fc4dc93228256683bfbc962751c70a707e858c Mon Sep 17 00:00:00 2001 From: ion-elgreco <15728914+ion-elgreco@users.noreply.github.com> Date: Thu, 14 Dec 2023 00:27:35 +0100 Subject: [PATCH 7/7] move into table --- python/deltalake/__init__.py | 2 -- python/deltalake/table.py | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/python/deltalake/__init__.py b/python/deltalake/__init__.py index 4f74cba93d..b10a708309 100644 --- a/python/deltalake/__init__.py +++ b/python/deltalake/__init__.py @@ -1,5 +1,3 @@ -import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore - from ._internal import __version__ as __version__ from ._internal import rust_core_version as rust_core_version from .data_catalog import DataCatalog as DataCatalog diff --git a/python/deltalake/table.py b/python/deltalake/table.py index e7b7613599..a2d6189fb6 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -24,6 +24,7 @@ import pyarrow import pyarrow.dataset as ds import pyarrow.fs as pa_fs +import pyarrow_hotfix # noqa: F401; addresses CVE-2023-47248; # type: ignore from pyarrow.dataset import ( Expression, FileSystemDataset,