Skip to content

Commit

Permalink
extend write_deltalake to accept both PyArrow and Deltalake schema
Browse files Browse the repository at this point in the history
Signed-off-by: Nikolay Ulmasov <[email protected]>
  • Loading branch information
r3stl355 committed Nov 18, 2023
1 parent daa700e commit 2d67292
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
5 changes: 4 additions & 1 deletion python/deltalake/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from urllib.parse import unquote

from deltalake import Schema
from deltalake.fs import DeltaStorageHandler

from ._util import encode_partition_value
Expand Down Expand Up @@ -73,7 +74,7 @@ def write_deltalake(
RecordBatchReader,
],
*,
schema: Optional[pa.Schema] = None,
schema: Optional[Union[pa.Schema, Schema]] = None,
partition_by: Optional[Union[List[str], str]] = None,
filesystem: Optional[pa_fs.FileSystem] = None,
mode: Literal["error", "append", "overwrite", "ignore"] = "error",
Expand Down Expand Up @@ -179,6 +180,8 @@ def write_deltalake(
raise ValueError("You must provide schema if data is Iterable")
else:
schema = data.schema
elif isinstance(schema, Schema):
schema = schema.to_pyarrow()

if filesystem is not None:
raise NotImplementedError("Filesystem support is not yet implemented. #570")
Expand Down
10 changes: 9 additions & 1 deletion python/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pyarrow.dataset import ParquetFileFormat, ParquetReadOptions
from pyarrow.lib import RecordBatchReader

from deltalake import DeltaTable, write_deltalake
from deltalake import DeltaTable, Schema, write_deltalake
from deltalake.exceptions import CommitFailedError, DeltaProtocolError
from deltalake.table import ProtocolVersions
from deltalake.writer import try_get_table_and_table_uri
Expand Down Expand Up @@ -950,3 +950,11 @@ def test_float_values(tmp_path: pathlib.Path):
assert actions["min"].field("x2")[0].as_py() is None
assert actions["max"].field("x2")[0].as_py() == 1.0
assert actions["null_count"].field("x2")[0].as_py() == 1


def test_with_deltalake_schema(tmp_path: pathlib.Path, sample_data: pa.Table):
write_deltalake(
tmp_path, sample_data, schema=Schema.from_pyarrow(sample_data.schema)
)
delta_table = DeltaTable(tmp_path)
assert delta_table.schema().to_pyarrow() == sample_data.schema

0 comments on commit 2d67292

Please sign in to comment.