From 534bbbb57aab15030f0e13349a9b1149ed8afbfc Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Wed, 5 Jul 2023 18:07:09 +0800 Subject: [PATCH] [CELEBORN-767][DOC] Update the docs of `celeborn.client.spark.push.sort.memory.threshold` ### What changes were proposed in this pull request? As title ### Why are the changes needed? To clarify the usage of conf `celeborn.client.spark.push.sort.memory.threshold` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GA Closes #1680 from cfmcgrady/docs. Authored-by: Fu Chen Signed-off-by: zky.zhoukeyong (cherry picked from commit 3af5c231c70fb1771619f04989815508412f40bb) Signed-off-by: zky.zhoukeyong --- .../apache/celeborn/common/CelebornConf.scala | 21 +++++++++++-------- docs/configuration/client.md | 2 +- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala index b24b7c03a25..210c45682a6 100644 --- a/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala +++ b/common/src/main/scala/org/apache/celeborn/common/CelebornConf.scala @@ -3270,15 +3270,6 @@ object CelebornConf extends Logging { .longConf .createWithDefault(500000) - val CLIENT_PUSH_SORT_MEMORY_THRESHOLD: ConfigEntry[Long] = - buildConf("celeborn.client.spark.push.sort.memory.threshold") - .withAlternative("celeborn.push.sortMemory.threshold") - .categories("client") - .doc("When SortBasedPusher use memory over the threshold, will trigger push data.") - .version("0.3.0") - .bytesConf(ByteUnit.BYTE) - .createWithDefaultString("64m") - val CLIENT_PUSH_SORT_PIPELINE_ENABLED: ConfigEntry[Boolean] = buildConf("celeborn.client.spark.push.sort.pipeline.enabled") .withAlternative("celeborn.push.sort.pipeline.enabled") @@ -3289,6 +3280,18 @@ object CelebornConf extends Logging { .booleanConf .createWithDefault(false) + val CLIENT_PUSH_SORT_MEMORY_THRESHOLD: ConfigEntry[Long] = + buildConf("celeborn.client.spark.push.sort.memory.threshold") + .withAlternative("celeborn.push.sortMemory.threshold") + .categories("client") + .doc("When SortBasedPusher use memory over the threshold, will trigger push data. If the" + + s" pipeline push feature is enabled (`${CLIENT_PUSH_SORT_PIPELINE_ENABLED.key}=true`)," + + " the SortBasedPusher will trigger a data push when the memory usage exceeds half of the" + + " threshold(by default, 32m).") + .version("0.3.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefaultString("64m") + val TEST_ALTERNATIVE: OptionalConfigEntry[String] = buildConf("celeborn.test.alternative.key") .withAlternative("celeborn.test.alternative.deprecatedKey") diff --git a/docs/configuration/client.md b/docs/configuration/client.md index d88c1b64beb..2fc8fb1d29e 100644 --- a/docs/configuration/client.md +++ b/docs/configuration/client.md @@ -89,7 +89,7 @@ license: | | celeborn.client.shuffle.partitionSplit.mode | SOFT | soft: the shuffle file size might be larger than split threshold. hard: the shuffle file size will be limited to split threshold. | 0.3.0 | | celeborn.client.shuffle.partitionSplit.threshold | 1G | Shuffle file size threshold, if file size exceeds this, trigger split. | 0.3.0 | | celeborn.client.shuffle.rangeReadFilter.enabled | false | If a spark application have skewed partition, this value can set to true to improve performance. | 0.2.0 | -| celeborn.client.spark.push.sort.memory.threshold | 64m | When SortBasedPusher use memory over the threshold, will trigger push data. | 0.3.0 | +| celeborn.client.spark.push.sort.memory.threshold | 64m | When SortBasedPusher use memory over the threshold, will trigger push data. If the pipeline push feature is enabled (`celeborn.client.spark.push.sort.pipeline.enabled=true`), the SortBasedPusher will trigger a data push when the memory usage exceeds half of the threshold(by default, 32m). | 0.3.0 | | celeborn.client.spark.push.sort.pipeline.enabled | false | Whether to enable pipelining for sort based shuffle writer. If true, double buffering will be used to pipeline push | 0.3.0 | | celeborn.client.spark.push.unsafeRow.fastWrite.enabled | true | This is Celeborn's optimization on UnsafeRow for Spark and it's true by default. If you have changed UnsafeRow's memory layout set this to false. | 0.2.2 | | celeborn.client.spark.shuffle.forceFallback.enabled | false | Whether force fallback shuffle to Spark's default. | 0.3.0 |