From 35a910a43e939e4816ce9df1d0371415fdd9384d Mon Sep 17 00:00:00 2001 From: Tigran Manasyan Date: Tue, 30 Jan 2024 13:15:35 +0800 Subject: [PATCH] [KYUUBI #5674] Add PostgreSQL as backend database for kyuubi metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # :mag: Description ## Issue References ๐Ÿ”— This pull request fixes #5674 ## Describe Your Solution ๐Ÿ”ง Currently, Kyuubi supports the use of SQLite and MySQL to store metadata. Although PostgreSQL can be supported through the 'CUSTOM' method, there is a certain usage cost for users, therefore this MR adds support for PostgreSQL as a backend database for kyuubi metadata. ## Types of changes :bookmark: - [ ] Bugfix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) ## Test Plan ๐Ÿงช #### Behavior Without This Pull Request :coffin: #### Behavior With This Pull Request :tada: #### Related Unit Tests `org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStoreSuite` --- # Checklist ๐Ÿ“ - [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html) **Be nice. Be informative.** Closes #6027 from tigrulya-exe/feature/5674-postgresql-metadata-backend. Closes #5674 111658002 [Tigran Manasyan] Fix metastore db type option comment 44a22bfcf [Tigran Manasyan] Fix dependency list and metastore db type option comment b638c8942 [Tigran Manasyan] Add PostgreSQL as backend database for kyuubi metadata Authored-by: Tigran Manasyan Signed-off-by: Cheng Pan --- dev/dependencyList | 2 + docs/configuration/settings.md | 36 +++++------ .../org/apache/kyuubi/util/JdbcUtils.scala | 1 + kyuubi-server/pom.xml | 5 ++ ...metadata-store-schema-1.9.0.postgresql.sql | 59 +++++++++++++++++++ .../server/metadata/jdbc/DatabaseType.scala | 2 +- .../metadata/jdbc/JDBCMetadataStore.scala | 2 + .../metadata/jdbc/JDBCMetadataStoreConf.scala | 1 + .../metadata/jdbc/JdbcDatabaseDialect.scala | 1 + .../jdbc/JDBCMetadataStoreSuite.scala | 1 + 10 files changed, 91 insertions(+), 19 deletions(-) create mode 100644 kyuubi-server/src/main/resources/sql/postgresql/metadata-store-schema-1.9.0.postgresql.sql diff --git a/dev/dependencyList b/dev/dependencyList index 0a60d8b73d6..a3f7d7b914d 100644 --- a/dev/dependencyList +++ b/dev/dependencyList @@ -26,6 +26,7 @@ arrow-format/12.0.0//arrow-format-12.0.0.jar arrow-memory-core/12.0.0//arrow-memory-core-12.0.0.jar arrow-memory-netty/12.0.0//arrow-memory-netty-12.0.0.jar arrow-vector/12.0.0//arrow-vector-12.0.0.jar +checker-qual/3.31.0//checker-qual-3.31.0.jar classgraph/4.8.138//classgraph-4.8.138.jar commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar @@ -169,6 +170,7 @@ okio/1.15.0//okio-1.15.0.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar perfmark-api/0.26.0//perfmark-api-0.26.0.jar +postgresql/42.6.0//postgresql-42.6.0.jar proto-google-common-protos/2.22.0//proto-google-common-protos-2.22.0.jar protobuf-java-util/3.21.7//protobuf-java-util-3.21.7.jar protobuf-java/3.21.7//protobuf-java-3.21.7.jar diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md index a7e6520577e..b3a9ff337db 100644 --- a/docs/configuration/settings.md +++ b/docs/configuration/settings.md @@ -357,24 +357,24 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co ### Metadata -| Key | Default | Meaning | Type | Since | -|-------------------------------------------------|----------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| -| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | -| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | -| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | -| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | -| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | -| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | -| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | -| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | -| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | -| kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
  • (Deprecated) DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
  • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
  • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` (fallback `com.mysql.jdbc.Driver`).
  • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
  • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | -| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:<KYUUBI_HOME>/kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across Kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. Note: this value support the variables substitution: ``. | string | 1.6.0 | -| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | +| Key | Default | Meaning | Type | Since | +|-------------------------------------------------|----------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|-------| +| kyuubi.metadata.cleaner.enabled | true | Whether to clean the metadata periodically. If it is enabled, Kyuubi will clean the metadata that is in the terminate state with max age limitation. | boolean | 1.6.0 | +| kyuubi.metadata.cleaner.interval | PT30M | The interval to check and clean expired metadata. | duration | 1.6.0 | +| kyuubi.metadata.max.age | PT72H | The maximum age of metadata, the metadata exceeding the age will be cleaned. | duration | 1.6.0 | +| kyuubi.metadata.recovery.threads | 10 | The number of threads for recovery from the metadata store when the Kyuubi server restarts. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.enabled | true | Whether to retry in async when metadata request failed. When true, return success response immediately even the metadata request failed, and schedule it in background until success, to tolerate long-time metadata store outages w/o blocking the submission request. | boolean | 1.7.0 | +| kyuubi.metadata.request.async.retry.queue.size | 65536 | The maximum queue size for buffering metadata requests in memory when the external metadata storage is down. Requests will be dropped if the queue exceeds. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.async.retry.threads | 10 | Number of threads in the metadata request async retry manager thread pool. Only take affect when kyuubi.metadata.request.async.retry.enabled is `true`. | int | 1.6.0 | +| kyuubi.metadata.request.retry.interval | PT5S | The interval to check and trigger the metadata request retry tasks. | duration | 1.6.0 | +| kyuubi.metadata.store.class | org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStore | Fully qualified class name for server metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.schema.init | true | Whether to init the JDBC metadata store database schema. | boolean | 1.6.0 | +| kyuubi.metadata.store.jdbc.database.type | SQLITE | The database type for server jdbc metadata store.
    • (Deprecated) DERBY: Apache Derby, JDBC driver `org.apache.derby.jdbc.AutoloadedDriver`.
    • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
    • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` (fallback `com.mysql.jdbc.Driver`).
    • POSTGRESQL: PostgreSQL, JDBC driver `org.postgresql.Driver`.
    • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
    • Note that: The JDBC datasource is powered by HiKariCP, for datasource properties, please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource. For example, kyuubi.metadata.store.jdbc.datasource.connectionTimeout=10000. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.driver | <undefined> | JDBC driver class name for server jdbc metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.password || The password for server JDBC metadata store. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.priority.enabled | false | Whether to enable the priority scheduling for batch impl v2. When false, ignore kyuubi.batch.priority and use the FIFO ordering strategy for batch job scheduling. Note: this feature may cause significant performance issues when using MySQL 5.7 as the metastore backend due to the lack of support for mixed order index. See more details at KYUUBI #5329. | boolean | 1.8.0 | +| kyuubi.metadata.store.jdbc.url | jdbc:sqlite:<KYUUBI_HOME>/kyuubi_state_store.db | The JDBC url for server JDBC metadata store. By default, it is a SQLite database url, and the state information is not shared across Kyuubi instances. To enable high availability for multiple kyuubi instances, please specify a production JDBC url. Note: this value support the variables substitution: ``. | string | 1.6.0 | +| kyuubi.metadata.store.jdbc.user || The username for server JDBC metadata store. | string | 1.6.0 | ### Metrics diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala index 4951004b671..a2597395bd0 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/util/JdbcUtils.scala @@ -115,6 +115,7 @@ object JdbcUtils extends Logging { val duplicatedKeyKeywords = Seq( "duplicate key value in a unique or primary key constraint or unique index", // Derby "Duplicate entry", // MySQL + "duplicate key value violates unique constraint", // PostgreSQL "A UNIQUE constraint failed" // SQLite ) duplicatedKeyKeywords.exists(cause.getMessage.contains) diff --git a/kyuubi-server/pom.xml b/kyuubi-server/pom.xml index 09b89bb0e7a..fe5d7364c66 100644 --- a/kyuubi-server/pom.xml +++ b/kyuubi-server/pom.xml @@ -272,6 +272,11 @@ sqlite-jdbc + + org.postgresql + postgresql + + io.trino trino-client diff --git a/kyuubi-server/src/main/resources/sql/postgresql/metadata-store-schema-1.9.0.postgresql.sql b/kyuubi-server/src/main/resources/sql/postgresql/metadata-store-schema-1.9.0.postgresql.sql new file mode 100644 index 00000000000..4c7ab0dc937 --- /dev/null +++ b/kyuubi-server/src/main/resources/sql/postgresql/metadata-store-schema-1.9.0.postgresql.sql @@ -0,0 +1,59 @@ +CREATE TABLE IF NOT EXISTS metadata( + key_id bigserial PRIMARY KEY, + identifier varchar(36) NOT NULL, + session_type varchar(32) NOT NULL, + real_user varchar(255) NOT NULL, + user_name varchar(255) NOT NULL, + ip_address varchar(128), + kyuubi_instance varchar(1024), + state varchar(128) NOT NULL, + resource varchar(1024), + class_name varchar(1024), + request_name varchar(1024), + request_conf text, + request_args text, + create_time bigint NOT NULL, + engine_type varchar(32) NOT NULL, + cluster_manager varchar(128), + engine_open_time bigint, + engine_id varchar(128), + engine_name text, + engine_url varchar(1024), + engine_state varchar(32), + engine_error text, + end_time bigint, + priority int NOT NULL DEFAULT 10, + peer_instance_closed boolean DEFAULT FALSE +); + +COMMENT ON COLUMN metadata.key_id IS 'the auto increment key id'; +COMMENT ON COLUMN metadata.identifier IS 'the identifier id, which is an UUID'; +COMMENT ON COLUMN metadata.session_type IS 'the session type, SQL or BATCH'; +COMMENT ON COLUMN metadata.real_user IS 'the real user'; +COMMENT ON COLUMN metadata.user_name IS 'the user name, might be a proxy user'; +COMMENT ON COLUMN metadata.ip_address IS 'the client ip address'; +COMMENT ON COLUMN metadata.kyuubi_instance IS 'the kyuubi instance that creates this'; +COMMENT ON COLUMN metadata.state IS 'the session state'; +COMMENT ON COLUMN metadata.resource IS 'the main resource'; +COMMENT ON COLUMN metadata.class_name IS 'the main class name'; +COMMENT ON COLUMN metadata.request_name IS 'the request name'; +COMMENT ON COLUMN metadata.request_conf IS 'the request config map'; +COMMENT ON COLUMN metadata.request_args IS 'the request arguments'; +COMMENT ON COLUMN metadata.create_time IS 'the metadata create time'; +COMMENT ON COLUMN metadata.engine_type IS 'the engine type'; +COMMENT ON COLUMN metadata.cluster_manager IS 'the engine cluster manager'; +COMMENT ON COLUMN metadata.engine_open_time IS 'the engine open time'; +COMMENT ON COLUMN metadata.engine_id IS 'the engine application id'; +COMMENT ON COLUMN metadata.engine_name IS 'the engine application name'; +COMMENT ON COLUMN metadata.engine_url IS 'the engine tracking url'; +COMMENT ON COLUMN metadata.engine_state IS 'the engine application state'; +COMMENT ON COLUMN metadata.engine_error IS 'the engine application diagnose'; +COMMENT ON COLUMN metadata.end_time IS 'the metadata end time'; +COMMENT ON COLUMN metadata.priority IS 'the application priority, high value means high priority'; +COMMENT ON COLUMN metadata.peer_instance_closed IS 'closed by peer kyuubi instance'; + +CREATE UNIQUE INDEX unique_identifier_index ON metadata(identifier); +CREATE INDEX user_name_index ON metadata(user_name); +CREATE INDEX engine_type_index ON metadata(engine_type); +CREATE INDEX create_time_index ON metadata(create_time); +CREATE INDEX priority_create_time_index ON metadata(priority DESC, create_time ASC); \ No newline at end of file diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/DatabaseType.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/DatabaseType.scala index 67d6686d17e..b5f9dcef7d9 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/DatabaseType.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/DatabaseType.scala @@ -20,5 +20,5 @@ package org.apache.kyuubi.server.metadata.jdbc object DatabaseType extends Enumeration { type DatabaseType = Value - val DERBY, MYSQL, CUSTOM, SQLITE = Value + val DERBY, MYSQL, CUSTOM, SQLITE, POSTGRESQL = Value } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala index 0a6d402296b..1baaa2f6685 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStore.scala @@ -57,6 +57,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { case SQLITE => driverClassOpt.getOrElse("org.sqlite.JDBC") case DERBY => driverClassOpt.getOrElse("org.apache.derby.jdbc.AutoloadedDriver") case MYSQL => driverClassOpt.getOrElse(mysqlDriverClass) + case POSTGRESQL => driverClassOpt.getOrElse("org.postgresql.Driver") case CUSTOM => driverClassOpt.getOrElse( throw new IllegalArgumentException("No jdbc driver defined")) } @@ -65,6 +66,7 @@ class JDBCMetadataStore(conf: KyuubiConf) extends MetadataStore with Logging { case DERBY => new DerbyDatabaseDialect case SQLITE => new SQLiteDatabaseDialect case MYSQL => new MySQLDatabaseDialect + case POSTGRESQL => new PostgreSQLDatabaseDialect case CUSTOM => new GenericDatabaseDialect } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala index e2b06541ddc..3b29fe18adb 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreConf.scala @@ -56,6 +56,7 @@ object JDBCMetadataStoreConf { "
    • SQLITE: SQLite3, JDBC driver `org.sqlite.JDBC`.
    • " + "
    • MYSQL: MySQL, JDBC driver `com.mysql.cj.jdbc.Driver` " + "(fallback `com.mysql.jdbc.Driver`).
    • " + + "
    • POSTGRESQL: PostgreSQL, JDBC driver `org.postgresql.Driver`.
    • " + "
    • CUSTOM: User-defined database type, need to specify corresponding JDBC driver.
    • " + " Note that: The JDBC datasource is powered by HiKariCP, for datasource properties," + " please specify them with the prefix: kyuubi.metadata.store.jdbc.datasource." + diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JdbcDatabaseDialect.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JdbcDatabaseDialect.scala index 69bd36519e1..221fd13a441 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JdbcDatabaseDialect.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/server/metadata/jdbc/JdbcDatabaseDialect.scala @@ -35,3 +35,4 @@ class GenericDatabaseDialect extends JdbcDatabaseDialect { class SQLiteDatabaseDialect extends GenericDatabaseDialect {} class MySQLDatabaseDialect extends GenericDatabaseDialect {} +class PostgreSQLDatabaseDialect extends GenericDatabaseDialect {} diff --git a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreSuite.scala b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreSuite.scala index 2ee082a1d2b..0da09d6751c 100644 --- a/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreSuite.scala +++ b/kyuubi-server/src/test/scala/org/apache/kyuubi/server/metadata/jdbc/JDBCMetadataStoreSuite.scala @@ -55,6 +55,7 @@ class JDBCMetadataStoreSuite extends KyuubiFunSuite { test("test get init schema stream") { assert(jdbcMetadataStore.getInitSchema(DatabaseType.DERBY).isDefined) assert(jdbcMetadataStore.getInitSchema(DatabaseType.MYSQL).isDefined) + assert(jdbcMetadataStore.getInitSchema(DatabaseType.POSTGRESQL).isDefined) assert(jdbcMetadataStore.getInitSchema(DatabaseType.CUSTOM).isEmpty) }