From 9ea0a1b35dd155a34f601a830c1ca5f76aaf3421 Mon Sep 17 00:00:00 2001 From: senmiaoliu Date: Fri, 8 Sep 2023 10:52:32 +0800 Subject: [PATCH] [KYUUBI #5244][Improvement] Make engineAliveMaxFailCount configurable ### _Why are the changes needed?_ close #5244 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5251 from lsm1/branch-kyuubi-5244. Closes #5244 bcadaa53a [senmiaoliu] rename a6c92773d [senmiaoliu] fix style 1f38fa711 [senmiaoliu] fix style 3ff57ff8b [senmiaoliu] Make engineAliveMaxFailCount configurable Authored-by: senmiaoliu Signed-off-by: Shaoyun Chen --- docs/configuration/settings.md | 1 + .../scala/org/apache/kyuubi/config/KyuubiConf.scala | 8 ++++++++ .../apache/kyuubi/session/KyuubiSessionImpl.scala | 12 ++++++------ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index d6d1425487d..add63544dd5 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -414,6 +414,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | kyuubi.session.conf.ignore.list || A comma-separated list of ignored keys. If the client connection contains any of them, the key and the corresponding value will be removed silently during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | | kyuubi.session.conf.profile | <undefined> | Specify a profile to load session-level configurations from `$KYUUBI_CONF_DIR/kyuubi-session-.conf`. This configuration will be ignored if the file does not exist. This configuration only takes effect when `kyuubi.session.conf.advisor` is set as `org.apache.kyuubi.session.FileSessionConfAdvisor`. | string | 1.7.0 | | kyuubi.session.conf.restrict.list || A comma-separated list of restricted keys. If the client connection contains any of them, the connection will be rejected explicitly during engine bootstrap and connection setup. Note that this rule is for server-side protection defined via administrators to prevent some essential configs from tampering but will not forbid users to set dynamic configurations via SET syntax. | set | 1.2.0 | +| kyuubi.session.engine.alive.max.failures | 3 | The maximum number of failures allowed for the engine. | int | 1.8.0 | | kyuubi.session.engine.alive.probe.enabled | false | Whether to enable the engine alive probe, it true, we will create a companion thrift client that keeps sending simple requests to check whether the engine is alive. | boolean | 1.6.0 | | kyuubi.session.engine.alive.probe.interval | PT10S | The interval for engine alive probe. | duration | 1.6.0 | | kyuubi.session.engine.alive.timeout | PT2M | The timeout for engine alive. If there is no alive probe success in the last timeout window, the engine will be marked as no-alive. | duration | 1.6.0 | diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index bbbb73b9546..61d97da9ebb 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -1423,6 +1423,14 @@ object KyuubiConf { .timeConf .createWithDefault(Duration.ofSeconds(15).toMillis) + val ENGINE_ALIVE_MAX_FAILURES: ConfigEntry[Int] = + buildConf("kyuubi.session.engine.alive.max.failures") + .doc("The maximum number of failures allowed for the engine.") + .version("1.8.0") + .intConf + .checkValue(_ > 0, "Must be positive") + .createWithDefault(3) + val ENGINE_ALIVE_PROBE_ENABLED: ConfigEntry[Boolean] = buildConf("kyuubi.session.engine.alive.probe.enabled") .doc("Whether to enable the engine alive probe, it true, we will create a companion thrift" + diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala index 67eb6c86e7b..6dd1810a8de 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/session/KyuubiSessionImpl.scala @@ -287,10 +287,10 @@ class KyuubiSessionImpl( } @volatile private var engineLastAlive: Long = _ - val engineAliveTimeout = sessionConf.get(KyuubiConf.ENGINE_ALIVE_TIMEOUT) - val aliveProbeEnabled = sessionConf.get(KyuubiConf.ENGINE_ALIVE_PROBE_ENABLED) - var engineAliveMaxFailCount = 3 - var engineAliveFailCount = 0 + private val engineAliveTimeout = sessionConf.get(KyuubiConf.ENGINE_ALIVE_TIMEOUT) + private val aliveProbeEnabled = sessionConf.get(KyuubiConf.ENGINE_ALIVE_PROBE_ENABLED) + private val engineAliveMaxFailCount = sessionConf.get(KyuubiConf.ENGINE_ALIVE_MAX_FAILURES) + private var engineAliveFailCount = 0 def checkEngineConnectionAlive(): Boolean = { try { @@ -306,7 +306,7 @@ class KyuubiSessionImpl( engineAliveFailCount = engineAliveFailCount + 1 if (now - engineLastAlive > engineAliveTimeout && engineAliveFailCount >= engineAliveMaxFailCount) { - error(s"The engineRef[${engine.getEngineRefId}] is marked as not alive " + error(s"The engineRef[${engine.getEngineRefId()}] is marked as not alive " + s"due to a lack of recent successful alive probes. " + s"The time since last successful probe: " + s"${now - engineLastAlive} ms exceeds the timeout of $engineAliveTimeout ms. " @@ -315,7 +315,7 @@ class KyuubiSessionImpl( false } else { warn( - s"The engineRef[${engine.getEngineRefId}] alive probe fails, " + + s"The engineRef[${engine.getEngineRefId()}] alive probe fails, " + s"${now - engineLastAlive} ms exceeds timeout $engineAliveTimeout ms, " + s"and has failed $engineAliveFailCount times.", e)