From 615b7470f11db49abd6ef3b2e2de01791c699fa1 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Thu, 25 Jan 2024 22:32:30 +0800 Subject: [PATCH 1/3] Speed up GetTables operation --- docs/configuration/settings.md | 2 +- .../engine/spark/util/SparkCatalogUtils.scala | 48 +++++++++++++------ .../org/apache/kyuubi/config/KyuubiConf.scala | 3 +- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index b203b0bdacb..a6efe608574 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -392,7 +392,7 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co | Key | Default | Meaning | Type | Since | |--------------------------------------------------|---------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.operation.getTables.ignoreTableProperties | false | Speed up the `GetTables` operation by returning table identities only. | boolean | 1.8.0 | +| kyuubi.operation.getTables.ignoreTableProperties | false | Speed up the `GetTables` operation by ignoring `tableTypes` query criteria, and returning table identities only. | boolean | 1.8.0 | | kyuubi.operation.idle.timeout | PT3H | Operation will be closed when it's not accessed for this duration of time | duration | 1.0.0 | | kyuubi.operation.interrupt.on.cancel | true | When true, all running tasks will be interrupted if one cancels a query. When false, all running tasks will remain until finished. | boolean | 1.2.0 | | kyuubi.operation.language | SQL | Choose a programing language for the following inputs | string | 1.5.0 | diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala index 18a14494e85..e9797cbed3a 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala @@ -173,21 +173,41 @@ object SparkCatalogUtils extends Logging { databases.flatMap { db => val identifiers = catalog.listTables(db, tablePattern, includeLocalTempViews = false) - catalog.getTablesByName(identifiers) - .filter(t => isMatchedTableType(tableTypes, t.tableType.name)).map { t => - val typ = if (t.tableType.name == VIEW) VIEW else TABLE - Row( - catalogName, - t.database, - t.identifier.table, - typ, - t.comment.getOrElse(""), - null, - null, - null, - null, - null) + if (ignoreTableProperties) { + identifiers.map { + case TableIdentifier( + table: String, + database: Option[String], + catalog: Option[String]) => + Row( + catalog.getOrElse(catalogName), + database.getOrElse("default"), + table, + TABLE, // ignore tableTypes criteria and simply treat all table type as TABLE + "", + null, + null, + null, + null, + null) } + } else { + catalog.getTablesByName(identifiers) + .filter(t => isMatchedTableType(tableTypes, t.tableType.name)).map { t => + val typ = if (t.tableType.name == VIEW) VIEW else TABLE + Row( + catalogName, + t.database, + t.identifier.table, + typ, + t.comment.getOrElse(""), + null, + null, + null, + null, + null) + } + } } case tc: TableCatalog => val tp = tablePattern.r.pattern diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index 3eedfdded91..d68bc2646e2 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -3433,7 +3433,8 @@ object KyuubiConf { val OPERATION_GET_TABLES_IGNORE_TABLE_PROPERTIES: ConfigEntry[Boolean] = buildConf("kyuubi.operation.getTables.ignoreTableProperties") - .doc("Speed up the `GetTables` operation by returning table identities only.") + .doc("Speed up the `GetTables` operation by ignoring `tableTypes` query criteria, " + + "and returning table identities only.") .version("1.8.0") .booleanConf .createWithDefault(false) From 405b12484c7d3287e760d0a62c14f027f81caab4 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Thu, 25 Jan 2024 22:57:37 +0800 Subject: [PATCH 2/3] fix --- .../kyuubi/engine/spark/util/SparkCatalogUtils.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala index e9797cbed3a..b47234f7f1e 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala @@ -163,8 +163,8 @@ object SparkCatalogUtils extends Logging { val namespaces = listNamespacesWithPattern(catalog, schemaPattern) catalog match { case builtin if builtin.name() == SESSION_CATALOG => - val catalog = spark.sessionState.catalog - val databases = catalog.listDatabases(schemaPattern) + val sessionCatalog = spark.sessionState.catalog + val databases = sessionCatalog.listDatabases(schemaPattern) def isMatchedTableType(tableTypes: Set[String], tableType: String): Boolean = { val typ = if (tableType.equalsIgnoreCase(VIEW)) VIEW else TABLE @@ -172,7 +172,8 @@ object SparkCatalogUtils extends Logging { } databases.flatMap { db => - val identifiers = catalog.listTables(db, tablePattern, includeLocalTempViews = false) + val identifiers = + sessionCatalog.listTables(db, tablePattern, includeLocalTempViews = false) if (ignoreTableProperties) { identifiers.map { case TableIdentifier( @@ -192,7 +193,7 @@ object SparkCatalogUtils extends Logging { null) } } else { - catalog.getTablesByName(identifiers) + sessionCatalog.getTablesByName(identifiers) .filter(t => isMatchedTableType(tableTypes, t.tableType.name)).map { t => val typ = if (t.tableType.name == VIEW) VIEW else TABLE Row( From 058001c6fb2748fa31df673870e63a5341dd254e Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 29 Jan 2024 12:11:49 +0800 Subject: [PATCH 3/3] fix --- .../engine/spark/util/SparkCatalogUtils.scala | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala index b47234f7f1e..b55319830b5 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/util/SparkCatalogUtils.scala @@ -175,22 +175,18 @@ object SparkCatalogUtils extends Logging { val identifiers = sessionCatalog.listTables(db, tablePattern, includeLocalTempViews = false) if (ignoreTableProperties) { - identifiers.map { - case TableIdentifier( - table: String, - database: Option[String], - catalog: Option[String]) => - Row( - catalog.getOrElse(catalogName), - database.getOrElse("default"), - table, - TABLE, // ignore tableTypes criteria and simply treat all table type as TABLE - "", - null, - null, - null, - null, - null) + identifiers.map { ti: TableIdentifier => + Row( + catalogName, + ti.database.getOrElse("default"), + ti.table, + TABLE, // ignore tableTypes criteria and simply treat all table type as TABLE + "", + null, + null, + null, + null, + null) } } else { sessionCatalog.getTablesByName(identifiers)