Skip to content

Commit

Permalink
align compression levels
Browse files Browse the repository at this point in the history
  • Loading branch information
ion-elgreco committed Nov 7, 2023
1 parent 293586e commit 96355f0
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
4 changes: 2 additions & 2 deletions python/deltalake/_internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class RawDeltaTable:
target_size: Optional[int],
max_concurrent_tasks: Optional[int],
min_commit_interval: Optional[int],
compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"],
compression: str,
) -> str: ...
def z_order_optimize(
self,
Expand All @@ -74,7 +74,7 @@ class RawDeltaTable:
max_concurrent_tasks: Optional[int],
max_spill_size: Optional[int],
min_commit_interval: Optional[int],
compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"],
compression: str,
) -> str: ...
def restore(
self,
Expand Down
17 changes: 15 additions & 2 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,15 @@ class ProtocolVersions(NamedTuple):
FilterType = Union[FilterConjunctionType, FilterDNFType]


def _get_parquet_rs_compression(compression: str):
"""Returns compression into parquet-rs compression format with pyarrow default compression level"""
if compression in ["gzip", "brotli", "zstd"]:
compression = (
f"{compression}({pyarrow.Codec.default_compression_level(compression)})"
)
return compression


def _check_contains_null(value: Any) -> bool:
"""
Check if target contains nullish value.
Expand Down Expand Up @@ -1377,12 +1386,14 @@ def compact(
if isinstance(min_commit_interval, timedelta):
min_commit_interval = int(min_commit_interval.total_seconds())

compress = _get_parquet_rs_compression(compression)

metrics = self.table._table.compact_optimize(
partition_filters,
target_size,
max_concurrent_tasks,
min_commit_interval,
compression,
compress,
)
self.table.update_incremental()
return json.loads(metrics)
Expand Down Expand Up @@ -1432,14 +1443,16 @@ def z_order(
if isinstance(min_commit_interval, timedelta):
min_commit_interval = int(min_commit_interval.total_seconds())

compress = _get_parquet_rs_compression(compression)

metrics = self.table._table.z_order_optimize(
list(columns),
partition_filters,
target_size,
max_concurrent_tasks,
max_spill_size,
min_commit_interval,
compression,
compress,
)
self.table.update_incremental()
return json.loads(metrics)

0 comments on commit 96355f0

Please sign in to comment.