From 62d488786ef7a0e896b610e00d98b43fc064e0fa Mon Sep 17 00:00:00 2001 From: Jun Date: Tue, 27 Aug 2024 18:45:46 -0400 Subject: [PATCH] docs: fix documentation about max_spill_size --- crates/core/src/operations/optimize.rs | 4 ++-- python/deltalake/table.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/core/src/operations/optimize.rs b/crates/core/src/operations/optimize.rs index a84955c81d..64566a0fa3 100644 --- a/crates/core/src/operations/optimize.rs +++ b/crates/core/src/operations/optimize.rs @@ -203,9 +203,9 @@ pub struct OptimizeBuilder<'a> { commit_properties: CommitProperties, /// Whether to preserve insertion order within files (default false) preserve_insertion_order: bool, - /// Max number of concurrent tasks (default is number of cpus) + /// Maximum number of concurrent tasks (default is number of cpus) max_concurrent_tasks: usize, - /// Maximum number of bytes that are allowed to spill to disk + /// Maximum number of bytes allowed in memory before spilling to disk max_spill_size: usize, /// Optimize type optimize_type: OptimizeType, diff --git a/python/deltalake/table.py b/python/deltalake/table.py index bccd3a2d7e..3aba38bff0 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -1928,7 +1928,7 @@ def z_order( max_concurrent_tasks: the maximum number of concurrent tasks to use for file compaction. Defaults to number of CPUs. More concurrent tasks can make compaction faster, but will also use more memory. - max_spill_size: the maximum number of bytes to spill to disk. Defaults to 20GB. + max_spill_size: the maximum number of bytes allowed in memory before spilling to disk. Defaults to 20GB. min_commit_interval: minimum interval in seconds or as timedeltas before a new commit is created. Interval is useful for long running executions. Set to 0 or timedelta(0), if you want a commit per partition.