From 036db74b97f8f5b447bc1d689e9f8081af47604c Mon Sep 17 00:00:00 2001 From: Xi Lyu Date: Thu, 3 Oct 2024 10:24:06 +0200 Subject: [PATCH] [SPARK-47341][SQL] Fix inaccurate documentation of RuntimeConfig#get MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? The existing documentation of `RuntimeConfig.get()` is misleading: * `get(key: String)` method will not throw any exception if the key is not set as long as the config entry has a default value, instead, it will just return the `defaultValue` of the `ConfigEntry`. An `NoSuchElementException` will only be thrown if there is no default value for the config entry. * `get(key: String, default: String)` method will ignore the `defaultValue` of its `ConfigEntry`, and return the given param `default` if unset.  * `getOption(key: String)` method will return the `defaultValue` of its `ConfigEntry` if the config is not set, instead of `None`.   An example to demonstrate the behaviour: image The first line makes sure the config is not set. ``` scala> spark.conf.unset("spark.sql.session.timeZone")  ``` The following code returns `Etc/UTC`, which doesn't throw any exception. ``` scala> spark.conf.get("spark.sql.session.timeZone") res1: String = "Etc/UTC" ``` The following code returns `Some("Etc/UTC")`, instead of `None`. ``` scala> spark.conf.getOption("spark.sql.session.timeZone") res2: Option[String] = Some(value = "Etc/UTC") ``` The following code returns `Europe/Berlin`, ignoring the default value. However, the documentation only says it returns the value, without mentioning ignoring the default value of the entry when the config is not explicitly set. ``` scala> spark.conf.get("spark.sql.session.timeZone", "Europe/Berlin")  res3: String = "Europe/Berlin" ``` In this PR, the documentation is fixed and a new test case is added. ### Why are the changes needed? The incorrect documentation is likely to mislead users to weird behaviours if they rely on the documentation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New test case in `RuntimeConfigSuite`. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48264 from xi-db/SPARK-49798-fix-runtimeconfig-doc. Lead-authored-by: Xi Lyu Co-authored-by: Xi Lyu <159039256+xi-db@users.noreply.github.com> Signed-off-by: Max Gekk --- .../sql/internal/ConnectRuntimeConfig.scala | 2 +- .../org/apache/spark/sql/RuntimeConfig.scala | 13 +++++++---- .../sql/internal/RuntimeConfigImpl.scala | 2 +- .../apache/spark/sql/RuntimeConfigSuite.scala | 22 ++++++++++++++++++- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala index 7578e2424fb42..be1a13cb2fed2 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala @@ -38,7 +38,7 @@ class ConnectRuntimeConfig private[sql] (client: SparkConnectClient) } /** @inheritdoc */ - @throws[NoSuchElementException]("if the key is not set") + @throws[NoSuchElementException]("if the key is not set and there is no default value") def get(key: String): String = getOption(key).getOrElse { throw new NoSuchElementException(key) } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala index 23a2774ebc3a5..9e6e0e97f0302 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala @@ -54,17 +54,21 @@ abstract class RuntimeConfig { } /** - * Returns the value of Spark runtime configuration property for the given key. + * Returns the value of Spark runtime configuration property for the given key. If the key is + * not set yet, return its default value if possible, otherwise `NoSuchElementException` will be + * thrown. * * @throws java.util.NoSuchElementException * if the key is not set and does not have a default value * @since 2.0.0 */ - @throws[NoSuchElementException]("if the key is not set") + @throws[NoSuchElementException]("if the key is not set and there is no default value") def get(key: String): String /** - * Returns the value of Spark runtime configuration property for the given key. + * Returns the value of Spark runtime configuration property for the given key. If the key is + * not set yet, return the user given `default`. This is useful when its default value defined + * by Apache Spark is not the desired one. * * @since 2.0.0 */ @@ -78,7 +82,8 @@ abstract class RuntimeConfig { def getAll: Map[String, String] /** - * Returns the value of Spark runtime configuration property for the given key. + * Returns the value of Spark runtime configuration property for the given key. If the key is + * not set yet, return its default value if possible, otherwise `None` will be returned. * * @since 2.0.0 */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala index f25ca387db299..0ef879387727a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala @@ -42,7 +42,7 @@ class RuntimeConfigImpl private[sql](val sqlConf: SQLConf = new SQLConf) extends } /** @inheritdoc */ - @throws[NoSuchElementException]("if the key is not set") + @throws[NoSuchElementException]("if the key is not set and there is no default value") def get(key: String): String = { sqlConf.getConfString(key) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala index 352197f96acb6..009fe55664a2b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql import org.apache.spark.SparkFunSuite import org.apache.spark.internal.config -import org.apache.spark.sql.internal.RuntimeConfigImpl +import org.apache.spark.sql.internal.{RuntimeConfigImpl, SQLConf} import org.apache.spark.sql.internal.SQLConf.CHECKPOINT_LOCATION import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE @@ -81,4 +81,24 @@ class RuntimeConfigSuite extends SparkFunSuite { } assert(ex.getMessage.contains("Spark config")) } + + test("set and get a config with defaultValue") { + val conf = newConf() + val key = SQLConf.SESSION_LOCAL_TIMEZONE.key + // By default, the value when getting an unset config entry is its defaultValue. + assert(conf.get(key) == SQLConf.SESSION_LOCAL_TIMEZONE.defaultValue.get) + assert(conf.getOption(key).contains(SQLConf.SESSION_LOCAL_TIMEZONE.defaultValue.get)) + // Get the unset config entry with a different default value, which should return the given + // default parameter. + assert(conf.get(key, "Europe/Amsterdam") == "Europe/Amsterdam") + + // Set a config entry. + conf.set(key, "Europe/Berlin") + // Get the set config entry. + assert(conf.get(key) == "Europe/Berlin") + // Unset the config entry. + conf.unset(key) + // Get the unset config entry, which should return its defaultValue again. + assert(conf.get(key) == SQLConf.SESSION_LOCAL_TIMEZONE.defaultValue.get) + } }