From bbfb599d094dfd0afffbdb0d4bcf4b4a6822ed0f Mon Sep 17 00:00:00 2001 From: guyulong Date: Wed, 17 Jul 2024 11:41:12 +0800 Subject: [PATCH 1/9] fix bug [Bug report] spark-connector: run sql 'describe table t1' encounter SparkSQLDriver ERROR #4137 --- .../datastrato/gravitino/rel/types/Type.java | 2 ++ .../datastrato/gravitino/rel/types/Types.java | 29 +++++++++++++++++-- gradle/wrapper/gradle-wrapper.properties | 2 +- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/api/src/main/java/com/datastrato/gravitino/rel/types/Type.java b/api/src/main/java/com/datastrato/gravitino/rel/types/Type.java index ed49ae1845f..f0929314289 100644 --- a/api/src/main/java/com/datastrato/gravitino/rel/types/Type.java +++ b/api/src/main/java/com/datastrato/gravitino/rel/types/Type.java @@ -53,6 +53,8 @@ enum Name { TIME, /** The timestamp type. */ TIMESTAMP, + /** The timestamp_ntz type. */ + TimestampNTZType, /** The interval year type. */ INTERVAL_YEAR, /** The interval month type. */ diff --git a/api/src/main/java/com/datastrato/gravitino/rel/types/Types.java b/api/src/main/java/com/datastrato/gravitino/rel/types/Types.java index ca805a304b3..2e36de416e1 100644 --- a/api/src/main/java/com/datastrato/gravitino/rel/types/Types.java +++ b/api/src/main/java/com/datastrato/gravitino/rel/types/Types.java @@ -349,7 +349,7 @@ public String simpleString() { /** The timestamp type in Gravitino. */ public static class TimestampType extends Type.DateTimeType { - private static final TimestampType INSTANCE_WITHOUT_TIME_ZONE = new TimestampType(false); + private static final TimestampNTZType INSTANCE_WITHOUT_TIME_ZONE = new TimestampNTZType(false); private static final TimestampType INSTANCE_WITH_TIME_ZONE = new TimestampType(true); /** @return A {@link TimestampType} with time zone. */ @@ -358,7 +358,7 @@ public static TimestampType withTimeZone() { } /** @return A {@link TimestampType} without time zone. */ - public static TimestampType withoutTimeZone() { + public static TimestampNTZType withoutTimeZone() { return INSTANCE_WITHOUT_TIME_ZONE; } @@ -385,6 +385,31 @@ public String simpleString() { } } + /** The TimestampNTZType type in Gravitino. */ + public static class TimestampNTZType extends Type.DateTimeType { + private final boolean withoutTimeZone; + + private TimestampNTZType(boolean withoutTimeZone) { + this.withoutTimeZone = withoutTimeZone; + } + + /** @return True if the timestamp type has time zone, false otherwise. */ + public boolean hasTimeZone() { + return withoutTimeZone; + } + + @Override + public Name name() { + return Name.TimestampNTZType; + } + + /** @return The simple string representation of the timestamp_ntz type. */ + @Override + public String simpleString() { + return withoutTimeZone ? "timestamp_ntz" : "timestamp"; + } + } + /** The interval year type in Gravitino. */ public static class IntervalYearType extends Type.IntervalType { private static final IntervalYearType INSTANCE = new IntervalYearType(); diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 3f788ef6b57..d489cbf239c 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -20,7 +20,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists # checksum was taken from https://gradle.org/release-checksums distributionSha256Sum=38f66cd6eef217b4c35855bb11ea4e9fbc53594ccccb5fb82dfd317ef8c2c5a3 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip +distributionUrl=file://C:/gradle/gradle-8.9-all.zip networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists From ddc3b3d4b1b217881bb149bdb4cd6428010e9b07 Mon Sep 17 00:00:00 2001 From: guyulong Date: Wed, 17 Jul 2024 11:43:21 +0800 Subject: [PATCH 2/9] rollback --- gradle/wrapper/gradle-wrapper.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index d489cbf239c..3f788ef6b57 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -20,7 +20,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists # checksum was taken from https://gradle.org/release-checksums distributionSha256Sum=38f66cd6eef217b4c35855bb11ea4e9fbc53594ccccb5fb82dfd317ef8c2c5a3 -distributionUrl=file://C:/gradle/gradle-8.9-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.2-bin.zip networkTimeout=10000 zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists From 64d7f17c776682718aa1a1c30c733d314602c2d8 Mon Sep 17 00:00:00 2001 From: guyulong Date: Mon, 22 Jul 2024 09:18:54 +0800 Subject: [PATCH 3/9] fix bug [Bug report] spark-connector: run sql 'describe table t1' encounter SparkSQLDriver ERROR #4137 --- api/src/main/java/org/apache/gravitino/rel/types/Type.java | 2 +- api/src/main/java/org/apache/gravitino/rel/types/Types.java | 4 ++-- docs/spark-connector/spark-connector.md | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/rel/types/Type.java b/api/src/main/java/org/apache/gravitino/rel/types/Type.java index a9fdb2048b2..582da73fd88 100644 --- a/api/src/main/java/org/apache/gravitino/rel/types/Type.java +++ b/api/src/main/java/org/apache/gravitino/rel/types/Type.java @@ -54,7 +54,7 @@ enum Name { /** The timestamp type. */ TIMESTAMP, /** The timestamp_ntz type. */ - TimestampNTZType, + TIMESTAMPNTZ, /** The interval year type. */ INTERVAL_YEAR, /** The interval month type. */ diff --git a/api/src/main/java/org/apache/gravitino/rel/types/Types.java b/api/src/main/java/org/apache/gravitino/rel/types/Types.java index 7b45e3c08c0..207dc0195a6 100644 --- a/api/src/main/java/org/apache/gravitino/rel/types/Types.java +++ b/api/src/main/java/org/apache/gravitino/rel/types/Types.java @@ -385,7 +385,7 @@ public String simpleString() { } } - /** The TimestampNTZType type in Gravitino. */ + /** The Timestamp_ntz type in Gravitino. */ public static class TimestampNTZType extends Type.DateTimeType { private final boolean withoutTimeZone; @@ -400,7 +400,7 @@ public boolean hasTimeZone() { @Override public Name name() { - return Name.TimestampNTZType; + return Name.TIMESTAMPNTZ; } /** @return The simple string representation of the timestamp_ntz type. */ diff --git a/docs/spark-connector/spark-connector.md b/docs/spark-connector/spark-connector.md index 905bacaa82d..ad7fd2933a5 100644 --- a/docs/spark-connector/spark-connector.md +++ b/docs/spark-connector/spark-connector.md @@ -92,7 +92,7 @@ Gravitino spark connector support the following datatype mapping between Spark a | `CharType` | `char` | 0.5.0 | | `VarcharType` | `varchar` | 0.5.0 | | `TimestampType` | `timestamp` | 0.5.0 | -| `TimestampType` | `timestamp` | 0.5.0 | +| `TimestampNTZType` | `timestampntz` | 0.5.0 | | `DateType` | `date` | 0.5.0 | | `BinaryType` | `binary` | 0.5.0 | | `ArrayType` | `array` | 0.5.0 | From 5fec7ba0063b14e0854221b79189067ceb081663 Mon Sep 17 00:00:00 2001 From: guyulong Date: Tue, 23 Jul 2024 09:33:39 +0800 Subject: [PATCH 4/9] fix bug [Bug report] spark-connector: run sql 'describe table t1' encounter SparkSQLDriver ERROR #4137 --- .../org/apache/gravitino/rel/types/Type.java | 2 - .../org/apache/gravitino/rel/types/Types.java | 29 +------------- docs/spark-connector/spark-connector.md | 38 +++++++++---------- 3 files changed, 21 insertions(+), 48 deletions(-) diff --git a/api/src/main/java/org/apache/gravitino/rel/types/Type.java b/api/src/main/java/org/apache/gravitino/rel/types/Type.java index 582da73fd88..bfb7408dd90 100644 --- a/api/src/main/java/org/apache/gravitino/rel/types/Type.java +++ b/api/src/main/java/org/apache/gravitino/rel/types/Type.java @@ -53,8 +53,6 @@ enum Name { TIME, /** The timestamp type. */ TIMESTAMP, - /** The timestamp_ntz type. */ - TIMESTAMPNTZ, /** The interval year type. */ INTERVAL_YEAR, /** The interval month type. */ diff --git a/api/src/main/java/org/apache/gravitino/rel/types/Types.java b/api/src/main/java/org/apache/gravitino/rel/types/Types.java index 207dc0195a6..2355f7ebfe3 100644 --- a/api/src/main/java/org/apache/gravitino/rel/types/Types.java +++ b/api/src/main/java/org/apache/gravitino/rel/types/Types.java @@ -349,7 +349,7 @@ public String simpleString() { /** The timestamp type in Gravitino. */ public static class TimestampType extends Type.DateTimeType { - private static final TimestampNTZType INSTANCE_WITHOUT_TIME_ZONE = new TimestampNTZType(false); + private static final TimestampType INSTANCE_WITHOUT_TIME_ZONE = new TimestampType(false); private static final TimestampType INSTANCE_WITH_TIME_ZONE = new TimestampType(true); /** @return A {@link TimestampType} with time zone. */ @@ -358,7 +358,7 @@ public static TimestampType withTimeZone() { } /** @return A {@link TimestampType} without time zone. */ - public static TimestampNTZType withoutTimeZone() { + public static TimestampType withoutTimeZone() { return INSTANCE_WITHOUT_TIME_ZONE; } @@ -385,31 +385,6 @@ public String simpleString() { } } - /** The Timestamp_ntz type in Gravitino. */ - public static class TimestampNTZType extends Type.DateTimeType { - private final boolean withoutTimeZone; - - private TimestampNTZType(boolean withoutTimeZone) { - this.withoutTimeZone = withoutTimeZone; - } - - /** @return True if the timestamp type has time zone, false otherwise. */ - public boolean hasTimeZone() { - return withoutTimeZone; - } - - @Override - public Name name() { - return Name.TIMESTAMPNTZ; - } - - /** @return The simple string representation of the timestamp_ntz type. */ - @Override - public String simpleString() { - return withoutTimeZone ? "timestamp_ntz" : "timestamp"; - } - } - /** The interval year type in Gravitino. */ public static class IntervalYearType extends Type.IntervalType { private static final IntervalYearType INSTANCE = new IntervalYearType(); diff --git a/docs/spark-connector/spark-connector.md b/docs/spark-connector/spark-connector.md index ad7fd2933a5..ca9192dccef 100644 --- a/docs/spark-connector/spark-connector.md +++ b/docs/spark-connector/spark-connector.md @@ -78,23 +78,23 @@ The command `SHOW CATALOGS` will only display the Spark default catalog, named s Gravitino spark connector support the following datatype mapping between Spark and Gravitino. -| Spark Data Type | Gravitino Data Type | Since Version | -|-----------------|---------------------|---------------| -| `BooleanType` | `boolean` | 0.5.0 | -| `ByteType` | `byte` | 0.5.0 | -| `ShortType` | `short` | 0.5.0 | -| `IntegerType` | `integer` | 0.5.0 | -| `LongType` | `long` | 0.5.0 | -| `FloatType` | `float` | 0.5.0 | -| `DoubleType` | `double` | 0.5.0 | -| `DecimalType` | `decimal` | 0.5.0 | -| `StringType` | `string` | 0.5.0 | -| `CharType` | `char` | 0.5.0 | -| `VarcharType` | `varchar` | 0.5.0 | -| `TimestampType` | `timestamp` | 0.5.0 | +| Spark Data Type | Gravitino Data Type | Since Version | +|--------------------|---------------------|---------------| +| `BooleanType` | `boolean` | 0.5.0 | +| `ByteType` | `byte` | 0.5.0 | +| `ShortType` | `short` | 0.5.0 | +| `IntegerType` | `integer` | 0.5.0 | +| `LongType` | `long` | 0.5.0 | +| `FloatType` | `float` | 0.5.0 | +| `DoubleType` | `double` | 0.5.0 | +| `DecimalType` | `decimal` | 0.5.0 | +| `StringType` | `string` | 0.5.0 | +| `CharType` | `char` | 0.5.0 | +| `VarcharType` | `varchar` | 0.5.0 | +| `TimestampType` | `timestamp` | 0.5.0 | | `TimestampNTZType` | `timestampntz` | 0.5.0 | -| `DateType` | `date` | 0.5.0 | -| `BinaryType` | `binary` | 0.5.0 | -| `ArrayType` | `array` | 0.5.0 | -| `MapType` | `map` | 0.5.0 | -| `StructType` | `struct` | 0.5.0 | +| `DateType` | `date` | 0.5.0 | +| `BinaryType` | `binary` | 0.5.0 | +| `ArrayType` | `array` | 0.5.0 | +| `MapType` | `map` | 0.5.0 | +| `StructType` | `struct` | 0.5.0 | From c38663542b0323949288ca5c71c9e974165ddf56 Mon Sep 17 00:00:00 2001 From: guyulong Date: Sat, 27 Jul 2024 09:22:58 +0800 Subject: [PATCH 5/9] modifiy Spark and Gravitino data type mapping --- docs/spark-connector/spark-connector.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/spark-connector/spark-connector.md b/docs/spark-connector/spark-connector.md index ca9192dccef..d4e9db3135f 100644 --- a/docs/spark-connector/spark-connector.md +++ b/docs/spark-connector/spark-connector.md @@ -92,7 +92,7 @@ Gravitino spark connector support the following datatype mapping between Spark a | `CharType` | `char` | 0.5.0 | | `VarcharType` | `varchar` | 0.5.0 | | `TimestampType` | `timestamp` | 0.5.0 | -| `TimestampNTZType` | `timestampntz` | 0.5.0 | +| `TimestampNTZType` | `timestamp` | 0.5.0 | | `DateType` | `date` | 0.5.0 | | `BinaryType` | `binary` | 0.5.0 | | `ArrayType` | `array` | 0.5.0 | From 00b463908968e4774e3041be68ddfc1f0da9edff Mon Sep 17 00:00:00 2001 From: guyulong Date: Mon, 29 Jul 2024 14:28:12 +0800 Subject: [PATCH 6/9] modifiy Spark and Gravitino data type mapping --- docs/spark-connector/spark-connector.md | 40 ++++++++++++------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/spark-connector/spark-connector.md b/docs/spark-connector/spark-connector.md index d4e9db3135f..49ec2748c34 100644 --- a/docs/spark-connector/spark-connector.md +++ b/docs/spark-connector/spark-connector.md @@ -78,23 +78,23 @@ The command `SHOW CATALOGS` will only display the Spark default catalog, named s Gravitino spark connector support the following datatype mapping between Spark and Gravitino. -| Spark Data Type | Gravitino Data Type | Since Version | -|--------------------|---------------------|---------------| -| `BooleanType` | `boolean` | 0.5.0 | -| `ByteType` | `byte` | 0.5.0 | -| `ShortType` | `short` | 0.5.0 | -| `IntegerType` | `integer` | 0.5.0 | -| `LongType` | `long` | 0.5.0 | -| `FloatType` | `float` | 0.5.0 | -| `DoubleType` | `double` | 0.5.0 | -| `DecimalType` | `decimal` | 0.5.0 | -| `StringType` | `string` | 0.5.0 | -| `CharType` | `char` | 0.5.0 | -| `VarcharType` | `varchar` | 0.5.0 | -| `TimestampType` | `timestamp` | 0.5.0 | -| `TimestampNTZType` | `timestamp` | 0.5.0 | -| `DateType` | `date` | 0.5.0 | -| `BinaryType` | `binary` | 0.5.0 | -| `ArrayType` | `array` | 0.5.0 | -| `MapType` | `map` | 0.5.0 | -| `StructType` | `struct` | 0.5.0 | +| Spark Data Type | Gravitino Data Type | Since Version | +|--------------------|-------------------------------|---------------| +| `BooleanType` | `boolean` | 0.5.0 | +| `ByteType` | `byte` | 0.5.0 | +| `ShortType` | `short` | 0.5.0 | +| `IntegerType` | `integer` | 0.5.0 | +| `LongType` | `long` | 0.5.0 | +| `FloatType` | `float` | 0.5.0 | +| `DoubleType` | `double` | 0.5.0 | +| `DecimalType` | `decimal` | 0.5.0 | +| `StringType` | `string` | 0.5.0 | +| `CharType` | `char` | 0.5.0 | +| `VarcharType` | `varchar` | 0.5.0 | +| `TimestampType` | `timestamp with time zone` | 0.5.0 | +| `TimestampNTZType` | `timestamp without time zone` | 0.5.0 | +| `DateType` | `date` | 0.5.0 | +| `BinaryType` | `binary` | 0.5.0 | +| `ArrayType` | `array` | 0.5.0 | +| `MapType` | `map` | 0.5.0 | +| `StructType` | `struct` | 0.5.0 | From 8a38dba972a99a806e8a8de62ef1c3ae5b10188e Mon Sep 17 00:00:00 2001 From: guyulong Date: Sat, 28 Sep 2024 10:00:14 +0800 Subject: [PATCH 7/9] Fix the data type mapping in some catalog doc --- docs/apache-hive-catalog.md | 4 +-- docs/lakehouse-iceberg-catalog.md | 4 +-- docs/lakehouse-paimon-catalog.md | 44 +++++++++++++++---------------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/apache-hive-catalog.md b/docs/apache-hive-catalog.md index 6197c476cb8..cc5c01fecb0 100644 --- a/docs/apache-hive-catalog.md +++ b/docs/apache-hive-catalog.md @@ -119,10 +119,10 @@ The following table lists the data types mapped from the Hive catalog to Graviti | `interval_year_month` | `interval_year` | 0.2.0 | | `interval_day_time` | `interval_day` | 0.2.0 | | `binary` | `binary` | 0.2.0 | -| `array` | `array` | 0.2.0 | +| `array` | `list` | 0.2.0 | | `map` | `map` | 0.2.0 | | `struct` | `struct` | 0.2.0 | -| `uniontype` | `uniontype` | 0.2.0 | +| `uniontype` | `union` | 0.2.0 | :::info Since 0.6.0, the data types other than listed above are mapped to Gravitino **[External Type](./manage-relational-metadata-using-gravitino.md#external-type)** that represents an unresolvable data type from the Hive catalog. diff --git a/docs/lakehouse-iceberg-catalog.md b/docs/lakehouse-iceberg-catalog.md index 7552e0f195a..1f77eb9eddd 100644 --- a/docs/lakehouse-iceberg-catalog.md +++ b/docs/lakehouse-iceberg-catalog.md @@ -287,7 +287,7 @@ Apache Iceberg doesn't support Gravitino `EvenDistribution` type. |-----------------------------|-----------------------------| | `Struct` | `Struct` | | `Map` | `Map` | -| `Array` | `Array` | +| `List` | `Array` | | `Boolean` | `Boolean` | | `Integer` | `Integer` | | `Long` | `Long` | @@ -300,7 +300,7 @@ Apache Iceberg doesn't support Gravitino `EvenDistribution` type. | `TimestampType withoutZone` | `TimestampType withoutZone` | | `Decimal` | `Decimal` | | `Fixed` | `Fixed` | -| `BinaryType` | `Binary` | +| `Binary` | `Binary` | | `UUID` | `UUID` | :::info diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md index 6eabd3e8fcd..fa1e04523a4 100644 --- a/docs/lakehouse-paimon-catalog.md +++ b/docs/lakehouse-paimon-catalog.md @@ -127,28 +127,28 @@ Paimon Table primary key constraint should not be same with partition fields, th ### Table column types -| Gravitino Type | Apache Paimon Type | -|-------------------------------|--------------------------------| -| `Sturct` | `Row` | -| `Map` | `Map` | -| `Array` | `Array` | -| `Boolean` | `Boolean` | -| `Byte` | `TinyInt` | -| `Short` | `SmallInt` | -| `Integer` | `Int` | -| `Long` | `BigInt` | -| `Float` | `Float` | -| `Double` | `Double` | -| `Decimal` | `Decimal` | -| `String` | `VarChar(Integer.MAX_VALUE)` | -| `VarChar` | `VarChar` | -| `FixedChar` | `Char` | -| `Date` | `Date` | -| `Time` | `Time` | -| `TimestampType withZone` | `LocalZonedTimestamp` | -| `TimestampType withoutZone` | `Timestamp` | -| `Binary` | `Binary` | -| `Fixed` | `VarBinary` | +| Gravitino Type | Apache Paimon Type | +|-----------------------------|--------------------------------| +| `Struct` | `Row` | +| `Map` | `Map` | +| `List` | `Array` | +| `Boolean` | `Boolean` | +| `Byte` | `TinyInt` | +| `Short` | `SmallInt` | +| `Integer` | `Int` | +| `Long` | `BigInt` | +| `Float` | `Float` | +| `Double` | `Double` | +| `Decimal` | `Decimal` | +| `String` | `VarChar(Integer.MAX_VALUE)` | +| `VarChar` | `VarChar` | +| `FixedChar` | `Char` | +| `Date` | `Date` | +| `Time` | `Time` | +| `TimestampType withZone` | `LocalZonedTimestamp` | +| `TimestampType withoutZone` | `Timestamp` | +| `Binary` | `Binary` | +| `Fixed` | `VarBinary` | :::info Gravitino doesn't support Paimon `MultisetType` type. From bc61489bbef6bf323add329fb95fa94464f323e2 Mon Sep 17 00:00:00 2001 From: guyulong Date: Wed, 2 Oct 2024 19:19:37 +0800 Subject: [PATCH 8/9] [Improvement] GravitinoClient needs metalake to obtain version --- .../gravitino/client/GravitinoClient.java | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoClient.java b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoClient.java index a8a46ff8f47..9f3edb5bc59 100644 --- a/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoClient.java +++ b/clients/client-java/src/main/java/org/apache/gravitino/client/GravitinoClient.java @@ -59,7 +59,7 @@ public class GravitinoClient extends GravitinoClientBase implements SupportsCatalogs, TagOperations { - private final GravitinoMetalake metalake; + private static GravitinoMetalake metalake = null; /** * Constructs a new GravitinoClient with the given URI, authenticator and AuthDataProvider. @@ -82,6 +82,24 @@ private GravitinoClient( this.metalake = loadMetalake(metalakeName); } + /** + * Constructs a new GravitinoClient with the given URI, authenticator and AuthDataProvider. + * + * @param uri The base URI for the Gravitino API. + * @param authDataProvider The provider of the data which is used for authentication. + * @param checkVersion Whether to check the version of the Gravitino server. Gravitino does not + * support the case that the client-side version is higher than the server-side version. + * @param headers The base header for Gravitino API. + * @throws NoSuchMetalakeException if the metalake with specified name does not exist. + */ + private GravitinoClient( + String uri, + AuthDataProvider authDataProvider, + boolean checkVersion, + Map headers) { + super(uri, authDataProvider, checkVersion, headers); + } + /** * Get the current metalake object * @@ -494,7 +512,7 @@ public GravitinoClient build() { metalakeName != null && !metalakeName.isEmpty(), "The argument 'metalakeName' must be a valid name"); - return new GravitinoClient(uri, metalakeName, authDataProvider, checkVersion, headers); + return new GravitinoClient(uri, authDataProvider, checkVersion, headers); } } } From aaadf9ce6ccebf0bfe3d5cdf17e3f8c4cb594bc8 Mon Sep 17 00:00:00 2001 From: guyulong Date: Sat, 19 Oct 2024 09:31:43 +0800 Subject: [PATCH 9/9] [Improvement] use incubating version in docs#5114 --- docs/apache-hive-catalog.md | 28 +++---- docs/flink-connector/flink-catalog-hive.md | 14 ++-- docs/flink-connector/flink-connector.md | 60 +++++++-------- docs/hadoop-catalog.md | 12 +-- docs/how-to-use-gvfs.md | 23 +++--- docs/iceberg-rest-service.md | 77 +++++++++---------- docs/security/authorization-pushdown.md | 16 ++-- docs/security/how-to-authenticate.md | 24 +++--- docs/spark-connector/spark-catalog-iceberg.md | 22 +++--- .../spark-connector/spark-integration-test.md | 18 ++--- 10 files changed, 146 insertions(+), 148 deletions(-) diff --git a/docs/apache-hive-catalog.md b/docs/apache-hive-catalog.md index 53659355da5..732183b3d34 100644 --- a/docs/apache-hive-catalog.md +++ b/docs/apache-hive-catalog.md @@ -133,25 +133,25 @@ Since 0.6.0-incubating, the data types other than listed above are mapped to Gra Table properties supply or set metadata for the underlying Hive tables. The following table lists predefined table properties for a Hive table. Additionally, you can define your own key-value pair properties and transmit them to the underlying Hive database. -| Property Name | Description | Default Value | Required | Since version | -|--------------------|-----------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|----------|---------------| -| `location` | The location for table storage, such as `/user/hive/warehouse/test_table`. | HMS uses the database location as the parent directory by default. | No | 0.2.0 | -| `table-type` | Type of the table. Valid values include `MANAGED_TABLE` and `EXTERNAL_TABLE`. | `MANAGED_TABLE` | No | 0.2.0 | -| `format` | The table file format. Valid values include `TEXTFILE`, `SEQUENCEFILE`, `RCFILE`, `ORC`, `PARQUET`, `AVRO`, `JSON`, `CSV`, and `REGEX`. | `TEXTFILE` | No | 0.2.0 | -| `input-format` | The input format class for the table, such as `org.apache.hadoop.hive.ql.io.orc.OrcInputFormat`. | The property `format` sets the default value `org.apache.hadoop.mapred.TextInputFormat` and can change it to a different default. | No | 0.2.0 | -| `output-format` | The output format class for the table, such as `org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat`. | The property `format` sets the default value `org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat` and can change it to a different default. | No | 0.2.0 | -| `serde-lib` | The serde library class for the table, such as `org.apache.hadoop.hive.ql.io.orc.OrcSerde`. | The property `format` sets the default value `org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe` and can change it to a different default. | No | 0.2.0 | +| Property Name | Description | Default Value | Required | Since version | +|--------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|----------|---------------| +| `location` | The location for table storage, such as `/user/hive/warehouse/test_table`. | HMS uses the database location as the parent directory by default. | No | 0.2.0 | +| `table-type` | Type of the table. Valid values include `MANAGED_TABLE` and `EXTERNAL_TABLE`. | `MANAGED_TABLE` | No | 0.2.0 | +| `format` | The table file format. Valid values include `TEXTFILE`, `SEQUENCEFILE`, `RCFILE`, `ORC`, `PARQUET`, `AVRO`, `JSON`, `CSV`, and `REGEX`. | `TEXTFILE` | No | 0.2.0 | +| `input-format` | The input format class for the table, such as `org.apache.hadoop.hive.ql.io.orc.OrcInputFormat`. | The property `format` sets the default value `org.apache.hadoop.mapred.TextInputFormat` and can change it to a different default. | No | 0.2.0 | +| `output-format` | The output format class for the table, such as `org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat`. | The property `format` sets the default value `org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat` and can change it to a different default. | No | 0.2.0 | +| `serde-lib` | The serde library class for the table, such as `org.apache.hadoop.hive.ql.io.orc.OrcSerde`. | The property `format` sets the default value `org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe` and can change it to a different default. | No | 0.2.0 | | `serde.parameter.` | The prefix of the serde parameter, such as `"serde.parameter.orc.create.index" = "true"`, indicating `ORC` serde lib to create row indexes | (none) | No | 0.2.0 | Hive automatically adds and manages some reserved properties. Users aren't allowed to set these properties. -| Property Name | Description | Since Version | -|-------------------------|---------------------------------------------------|---------------| +| Property Name | Description | Since Version | +|-------------------------|-------------------------------------------------|---------------| | `comment` | Used to store a table comment. | 0.2.0 | -| `numFiles` | Used to store the number of files in the table. | 0.2.0 | -| `totalSize` | Used to store the total size of the table. | 0.2.0 | -| `EXTERNAL` | Indicates whether the table is external. | 0.2.0 | -| `transient_lastDdlTime` | Used to store the last DDL time of the table. | 0.2.0 | +| `numFiles` | Used to store the number of files in the table. | 0.2.0 | +| `totalSize` | Used to store the total size of the table. | 0.2.0 | +| `EXTERNAL` | Indicates whether the table is external. | 0.2.0 | +| `transient_lastDdlTime` | Used to store the last DDL time of the table. | 0.2.0 | ### Table indexes diff --git a/docs/flink-connector/flink-catalog-hive.md b/docs/flink-connector/flink-catalog-hive.md index 136dac3ed20..bbf0d5e9c3c 100644 --- a/docs/flink-connector/flink-catalog-hive.md +++ b/docs/flink-connector/flink-catalog-hive.md @@ -59,13 +59,13 @@ The configuration of Flink Hive Connector is the same with the original Flink Hi Gravitino catalog property names with the prefix `flink.bypass.` are passed to Flink Hive connector. For example, using `flink.bypass.hive-conf-dir` to pass the `hive-conf-dir` to the Flink Hive connector. The validated catalog properties are listed below. Any other properties with the prefix `flink.bypass.` in Gravitino Catalog will be ignored by Gravitino Flink Connector. -| Property name in Gravitino catalog properties | Flink Hive connector configuration | Description | Since Version | -|-----------------------------------------------|------------------------------------|-----------------------|---------------| -| `flink.bypass.default-database` | `default-database` | Hive default database | 0.6.0 | -| `flink.bypass.hive-conf-dir` | `hive-conf-dir` | Hive conf dir | 0.6.0 | -| `flink.bypass.hive-version` | `hive-version` | Hive version | 0.6.0 | -| `flink.bypass.hadoop-conf-dir` | `hadoop-conf-dir` | Hadoop conf dir | 0.6.0 | -| `metastore.uris` | `hive.metastore.uris` | Hive metastore uri | 0.6.0 | +| Property name in Gravitino catalog properties | Flink Hive connector configuration | Description | Since Version | +|-----------------------------------------------|------------------------------------|-----------------------|------------------| +| `flink.bypass.default-database` | `default-database` | Hive default database | 0.6.0-incubating | +| `flink.bypass.hive-conf-dir` | `hive-conf-dir` | Hive conf dir | 0.6.0-incubating | +| `flink.bypass.hive-version` | `hive-version` | Hive version | 0.6.0-incubating | +| `flink.bypass.hadoop-conf-dir` | `hadoop-conf-dir` | Hadoop conf dir | 0.6.0-incubating | +| `metastore.uris` | `hive.metastore.uris` | Hive metastore uri | 0.6.0-incubating | :::caution You can set other hadoop properties (with the prefix `hadoop.`, `dfs.`, `fs.`, `hive.`) in Gravitino Catalog properties. If so, it will override diff --git a/docs/flink-connector/flink-connector.md b/docs/flink-connector/flink-connector.md index 639dd0d682a..b6a25d92125 100644 --- a/docs/flink-connector/flink-connector.md +++ b/docs/flink-connector/flink-connector.md @@ -26,11 +26,11 @@ This capability allows users to perform federation queries, accessing data from 1. [Build](../how-to-build.md) or [download](https://mvnrepository.com/artifact/org.apache.gravitino/gravitino-flink-connector-runtime-1.18) the Gravitino flink connector runtime jar, and place it to the classpath of Flink. 2. Configure the Flink configuration to use the Gravitino flink connector. -| Property | Type | Default Value | Description | Required | Since Version | -|--------------------------------------------------|--------|-------------------|----------------------------------------------------------------------|----------|---------------| -| table.catalog-store.kind | string | generic_in_memory | The Catalog Store name, it should set to `gravitino`. | Yes | 0.6.0 | -| table.catalog-store.gravitino.gravitino.metalake | string | (none) | The metalake name that flink connector used to request to Gravitino. | Yes | 0.6.0 | -| table.catalog-store.gravitino.gravitino.uri | string | (none) | The uri of Gravitino server address. | Yes | 0.6.0 | +| Property | Type | Default Value | Description | Required | Since Version | +|--------------------------------------------------|--------|-------------------|----------------------------------------------------------------------|----------|------------------| +| table.catalog-store.kind | string | generic_in_memory | The Catalog Store name, it should set to `gravitino`. | Yes | 0.6.0-incubating | +| table.catalog-store.gravitino.gravitino.metalake | string | (none) | The metalake name that flink connector used to request to Gravitino. | Yes | 0.6.0-incubating | +| table.catalog-store.gravitino.gravitino.uri | string | (none) | The uri of Gravitino server address. | Yes | 0.6.0-incubating | Set the flink configuration in flink-conf.yaml. ```yaml @@ -66,28 +66,28 @@ SELECT * FROM hive_students; Gravitino flink connector support the following datatype mapping between Flink and Gravitino. -| Flink Type | Gravitino Type | Since Version | -|----------------------------------|-------------------------------|---------------| -| `array` | `array` | 0.6.0 | -| `bigint` | `long` | 0.6.0 | -| `binary` | `fixed` | 0.6.0 | -| `boolean` | `boolean` | 0.6.0 | -| `char` | `char` | 0.6.0 | -| `date` | `date` | 0.6.0 | -| `decimal` | `decimal` | 0.6.0 | -| `double` | `double` | 0.6.0 | -| `float` | `float` | 0.6.0 | -| `integer` | `integer` | 0.6.0 | -| `map` | `map` | 0.6.0 | -| `null` | `null` | 0.6.0 | -| `row` | `struct` | 0.6.0 | -| `smallint` | `short` | 0.6.0 | -| `time` | `time` | 0.6.0 | -| `timestamp` | `timestamp without time zone` | 0.6.0 | -| `timestamp without time zone` | `timestamp without time zone` | 0.6.0 | -| `timestamp with time zone` | `timestamp with time zone` | 0.6.0 | -| `timestamp with local time zone` | `timestamp with time zone` | 0.6.0 | -| `timestamp_ltz` | `timestamp with time zone` | 0.6.0 | -| `tinyint` | `byte` | 0.6.0 | -| `varbinary` | `binary` | 0.6.0 | -| `varchar` | `string` | 0.6.0 | +| Flink Type | Gravitino Type | Since Version | +|----------------------------------|-------------------------------|------------------| +| `array` | `list` | 0.6.0-incubating | +| `bigint` | `long` | 0.6.0-incubating | +| `binary` | `fixed` | 0.6.0-incubating | +| `boolean` | `boolean` | 0.6.0-incubating | +| `char` | `char` | 0.6.0-incubating | +| `date` | `date` | 0.6.0-incubating | +| `decimal` | `decimal` | 0.6.0-incubating | +| `double` | `double` | 0.6.0-incubating | +| `float` | `float` | 0.6.0-incubating | +| `integer` | `integer` | 0.6.0-incubating | +| `map` | `map` | 0.6.0-incubating | +| `null` | `null` | 0.6.0-incubating | +| `row` | `struct` | 0.6.0-incubating | +| `smallint` | `short` | 0.6.0-incubating | +| `time` | `time` | 0.6.0-incubating | +| `timestamp` | `timestamp without time zone` | 0.6.0-incubating | +| `timestamp without time zone` | `timestamp without time zone` | 0.6.0-incubating | +| `timestamp with time zone` | `timestamp with time zone` | 0.6.0-incubating | +| `timestamp with local time zone` | `timestamp with time zone` | 0.6.0-incubating | +| `timestamp_ltz` | `timestamp with time zone` | 0.6.0-incubating | +| `tinyint` | `byte` | 0.6.0-incubating | +| `varbinary` | `binary` | 0.6.0-incubating | +| `varchar` | `string` | 0.6.0-incubating | diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md index d28e6d93b04..57c17b0eec5 100644 --- a/docs/hadoop-catalog.md +++ b/docs/hadoop-catalog.md @@ -82,12 +82,12 @@ Refer to [Schema operation](./manage-fileset-metadata-using-gravitino.md#schema- ### Fileset properties -| Property name | Description | Default value | Required | Since Version | -|----------------------------------------------------|--------------------------------------------------------------------------------------------------------|--------------------------|----------|-----------------| -| `authentication.impersonation-enable` | Whether to enable impersonation for the Hadoop catalog fileset. | The parent(schema) value | No | 0.6.0 | -| `authentication.type` | The type of authentication for Hadoop catalog fileset, currently we only support `kerberos`, `simple`. | The parent(schema) value | No | 0.6.0 | -| `authentication.kerberos.principal` | The principal of the Kerberos authentication for the fileset. | The parent(schema) value | No | 0.6.0 | -| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication for the fileset. | The parent(schema) value | No | 0.6.0 | +| Property name | Description | Default value | Required | Since Version | +|---------------------------------------|--------------------------------------------------------------------------------------------------------|--------------------------|----------|------------------| +| `authentication.impersonation-enable` | Whether to enable impersonation for the Hadoop catalog fileset. | The parent(schema) value | No | 0.6.0-incubating | +| `authentication.type` | The type of authentication for Hadoop catalog fileset, currently we only support `kerberos`, `simple`. | The parent(schema) value | No | 0.6.0-incubating | +| `authentication.kerberos.principal` | The principal of the Kerberos authentication for the fileset. | The parent(schema) value | No | 0.6.0-incubating | +| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication for the fileset. | The parent(schema) value | No | 0.6.0-incubating | ### Fileset operations diff --git a/docs/how-to-use-gvfs.md b/docs/how-to-use-gvfs.md index 7a98271d41c..5cece06a173 100644 --- a/docs/how-to-use-gvfs.md +++ b/docs/how-to-use-gvfs.md @@ -335,18 +335,17 @@ to recompile the native libraries like `libhdfs` and others, and completely repl ### Configuration -| Configuration item | Description | Default value | Required | Since version | -|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------|---------------| -| `server_uri` | The Gravitino server uri, e.g. `http://localhost:8090`. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | -| `metalake_name` | The metalake name which the fileset belongs to. | (none) | Yes | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | -| `cache_size` | The cache capacity of the Gravitino Virtual File System. | `20` | No | 0.6.0 |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | -| `cache_expired_time` | The value of time that the cache expires after accessing in the Gravitino Virtual File System. The value is in `seconds`. | `3600` | No | 0.6.0 |. -| `auth_type` | The auth type to initialize the Gravitino client to use with the Gravitino Virtual File System. Currently supports `simple` and `oauth2` auth types. | `simple` | No | 0.6.0 |. -| `oauth2_server_uri` | The auth server URI for the Gravitino client when using `oauth2` auth type. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. -| `oauth2_credential` | The auth credential for the Gravitino client when using `oauth2` auth type. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. -| `oauth2_path` | The auth server path for the Gravitino client when using `oauth2` auth type. Please remove the first slash `/` from the path, for example `oauth/token`. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. -| `oauth2_scope` | The auth scope for the Gravitino client when using `oauth2` auth type with the Gravitino Virtual File System. | (none) | Yes if you use `oauth2` auth type | 0.7.0 |. - +| Configuration item | Description | Default value | Required | Since version | +|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------|------------------| +| `server_uri` | The Gravitino server uri, e.g. `http://localhost:8090`. | (none) | Yes | 0.6.0-incubating |. | (none) | Yes | 0.6.0 | +| `metalake_name` | The metalake name which the fileset belongs to. | (none) | Yes | 0.6.0-incubating |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | +| `cache_size` | The cache capacity of the Gravitino Virtual File System. | `20` | No | 0.6.0-incubating |. | (none) | Yes | 0.6.0 | . | (none) | Yes | 0.6.0 | +| `cache_expired_time` | The value of time that the cache expires after accessing in the Gravitino Virtual File System. The value is in `seconds`. | `3600` | No | 0.6.0-incubating |. +| `auth_type` | The auth type to initialize the Gravitino client to use with the Gravitino Virtual File System. Currently supports `simple` and `oauth2` auth types. | `simple` | No | 0.6.0-incubating |. +| `oauth2_server_uri` | The auth server URI for the Gravitino client when using `oauth2` auth type. | (none) | Yes if you use `oauth2` auth type | 0.7.0-incubating |. +| `oauth2_credential` | The auth credential for the Gravitino client when using `oauth2` auth type. | (none) | Yes if you use `oauth2` auth type | 0.7.0-incubating |. +| `oauth2_path` | The auth server path for the Gravitino client when using `oauth2` auth type. Please remove the first slash `/` from the path, for example `oauth/token`. | (none) | Yes if you use `oauth2` auth type | 0.7.0-incubating |. +| `oauth2_scope` | The auth scope for the Gravitino client when using `oauth2` auth type with the Gravitino Virtual File System. | (none) | Yes if you use `oauth2` auth type | 0.7.0-incubating |. You can configure these properties when obtaining the `Gravitino Virtual FileSystem` in Python like this: diff --git a/docs/iceberg-rest-service.md b/docs/iceberg-rest-service.md index 4217350dac9..7a1cf3d6b31 100644 --- a/docs/iceberg-rest-service.md +++ b/docs/iceberg-rest-service.md @@ -102,13 +102,13 @@ The detailed configuration items are as follows: Gravitino Iceberg REST service supports using static access-key-id and secret-access-key to access S3 data. -| Configuration item | Description | Default value | Required | Since Version | -|-----------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `gravitino.iceberg-rest.io-impl` | The IO implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aws.s3.S3FileIO` for S3. | (none) | No | 0.6.0 | -| `gravitino.iceberg-rest.s3-access-key-id` | The static access key ID used to access S3 data. | (none) | No | 0.6.0 | -| `gravitino.iceberg-rest.s3-secret-access-key` | The static secret access key used to access S3 data. | (none) | No | 0.6.0 | -| `gravitino.iceberg-rest.s3-endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | (none) | No | 0.6.0 | -| `gravitino.iceberg-rest.s3-region` | The region of the S3 service, like `us-west-2`. | (none) | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|-----------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `gravitino.iceberg-rest.io-impl` | The IO implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aws.s3.S3FileIO` for S3. | (none) | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.s3-access-key-id` | The static access key ID used to access S3 data. | (none) | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.s3-secret-access-key` | The static secret access key used to access S3 data. | (none) | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.s3-endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | (none) | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.s3-region` | The region of the S3 service, like `us-west-2`. | (none) | No | 0.6.0-incubating | For other Iceberg s3 properties not managed by Gravitino like `s3.sse.type`, you could config it directly by `gravitino.iceberg-rest.s3.sse.type`. @@ -120,12 +120,12 @@ To configure the JDBC catalog backend, set the `gravitino.iceberg-rest.warehouse Gravitino Iceberg REST service supports using static access-key-id and secret-access-key to access OSS data. -| Configuration item | Description | Default value | Required | Since Version | -|------------------------------------------------|-------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `gravitino.iceberg-rest.io-impl` | The IO implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aliyun.oss.OSSFileIO` for OSS. | (none) | No | 0.6.0 | -| `gravitino.iceberg-rest.oss-access-key-id` | The static access key ID used to access OSS data. | (none) | No | 0.7.0 | -| `gravitino.iceberg-rest.oss-secret-access-key` | The static secret access key used to access OSS data. | (none) | No | 0.7.0 | -| `gravitino.iceberg-rest.oss-endpoint` | The endpoint of Aliyun OSS service. | (none) | No | 0.7.0 | +| Configuration item | Description | Default value | Required | Since Version | +|------------------------------------------------|-------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `gravitino.iceberg-rest.io-impl` | The IO implementation for `FileIO` in Iceberg, use `org.apache.iceberg.aliyun.oss.OSSFileIO` for OSS. | (none) | No | 0.6.0-incubating | +| `gravitino.iceberg-rest.oss-access-key-id` | The static access key ID used to access OSS data. | (none) | No | 0.7.0-incubating | +| `gravitino.iceberg-rest.oss-secret-access-key` | The static secret access key used to access OSS data. | (none) | No | 0.7.0-incubating | +| `gravitino.iceberg-rest.oss-endpoint` | The endpoint of Aliyun OSS service. | (none) | No | 0.7.0-incubating | For other Iceberg OSS properties not managed by Gravitino like `client.security-token`, you could config it directly by `gravitino.iceberg-rest.client.security-token`. @@ -137,9 +137,9 @@ Please set the `gravitino.iceberg-rest.warehouse` parameter to `oss://{bucket_na Supports using google credential file to access GCS data. -| Configuration item | Description | Default value | Required | Since Version | -|----------------------------------|----------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `gravitino.iceberg-rest.io-impl` | The io implementation for `FileIO` in Iceberg, use `org.apache.iceberg.gcp.gcs.GCSFileIO` for GCS. | (none) | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|----------------------------------|----------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `gravitino.iceberg-rest.io-impl` | The io implementation for `FileIO` in Iceberg, use `org.apache.iceberg.gcp.gcs.GCSFileIO` for GCS. | (none) | No | 0.6.0-incubating | For other Iceberg GCS properties not managed by Gravitino like `gcs.project-id`, you could config it directly by `gravitino.iceberg-rest.gcs.project-id`. @@ -161,9 +161,9 @@ Builds with Hadoop 2.10.x. There may be compatibility issues when accessing Hado For other storages that are not managed by Gravitino directly, you can manage them through custom catalog properties. -| Configuration item | Description | Default value | Required | Since Version | -|----------------------------------|-----------------------------------------------------------------------------------------|---------------|----------|---------------| -| `gravitino.iceberg-rest.io-impl` | The IO implementation for `FileIO` in Iceberg, please use the full qualified classname. | (none) | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|----------------------------------|-----------------------------------------------------------------------------------------|---------------|----------|------------------| +| `gravitino.iceberg-rest.io-impl` | The IO implementation for `FileIO` in Iceberg, please use the full qualified classname. | (none) | No | 0.6.0-incubating | To pass custom properties such as `security-token` to your custom `FileIO`, you can directly configure it by `gravitino.iceberg-rest.security-token`. `security-token` will be included in the properties when the initialize method of `FileIO` is invoked. @@ -206,10 +206,11 @@ You must download the corresponding JDBC driver to the `iceberg-rest-server/libs ::: #### Custom backend configuration -| Configuration item | Description | Default value | Required | Since Version | -|------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|------------------|----------|---------------| -| `gravitino.iceberg-rest.catalog-backend` | The Catalog backend of the Gravitino Iceberg REST catalog service. Use the value **`custom`** for a Custom catalog. | `memory` | Yes | 0.2.0 | -| `gravitino.iceberg-rest.catalog-backend-impl` | The fully-qualified class name of a custom catalog implementation, only worked if `catalog-backend` is `custom`. | (none) | No | 0.7.0 | + +| Configuration item | Description | Default value | Required | Since Version | +|-----------------------------------------------|---------------------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `gravitino.iceberg-rest.catalog-backend` | The Catalog backend of the Gravitino Iceberg REST catalog service. Use the value **`custom`** for a Custom catalog. | `memory` | Yes | 0.2.0 | +| `gravitino.iceberg-rest.catalog-backend-impl` | The fully-qualified class name of a custom catalog implementation, only worked if `catalog-backend` is `custom`. | (none) | No | 0.7.0-incubating | If you want to use a custom Iceberg Catalog as `catalog-backend`, you can add a corresponding jar file to the classpath and load a custom Iceberg Catalog implementation by specifying the `catalog-backend-impl` property. @@ -217,18 +218,17 @@ If you want to use a custom Iceberg Catalog as `catalog-backend`, you can add a You could access the view interface if using JDBC backend and enable `jdbc.schema-version` property. -| Configuration item | Description | Default value | Required | Since Version | -|-------------------------------------------------|--------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `gravitino.iceberg-rest.jdbc.schema-version` | The schema version of JDBC catalog backend, setting to `V1` if supporting view operations. | (none) | NO | 0.7.0 | - +| Configuration item | Description | Default value | Required | Since Version | +|----------------------------------------------|--------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `gravitino.iceberg-rest.jdbc.schema-version` | The schema version of JDBC catalog backend, setting to `V1` if supporting view operations. | (none) | NO | 0.7.0-incubating | #### Multi catalog support The Gravitino Iceberg REST server supports multiple catalogs and offers a configuration-based catalog management system. -| Configuration item | Description | Default value | Required | Since Version | -|----------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------|----------|---------------| -| `gravitino.iceberg-rest.catalog-provider` | Catalog provider class name, you can develop a class that implements `IcebergTableOpsProvider` and add the corresponding jar file to the Iceberg REST service classpath directory. | `config-based-provider` | No | 0.7.0 | +| Configuration item | Description | Default value | Required | Since Version | +|-------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|----------|------------------| +| `gravitino.iceberg-rest.catalog-provider` | Catalog provider class name, you can develop a class that implements `IcebergTableOpsProvider` and add the corresponding jar file to the Iceberg REST service classpath directory. | `config-based-provider` | No | 0.7.0-incubating | ##### Configuration based catalog provider @@ -273,11 +273,11 @@ You can access different catalogs by setting the `prefix` to the specific catalo When using a Gravitino server based catalog provider, you can leverage Gravitino to support dynamic catalog management for the Iceberg REST server. -| Configuration item | Description | Default value | Required | Since Version | -|--------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `gravitino.iceberg-rest.gravitino-uri` | The uri of Gravitino server address, only worked if `catalog-provider` is `gravitino-based-provider`. | (none) | No | 0.7.0 | -| `gravitino.iceberg-rest.gravitino-metalake` | The metalake name that `gravitino-based-provider` used to request to Gravitino, only worked if `catalog-provider` is `gravitino-based-provider`. | (none) | No | 0.7.0 | -| `gravitino.iceberg-rest.catalog-cache-eviction-interval-ms` | Catalog cache eviction interval. | 3600000 | No | 0.7.0 | +| Configuration item | Description | Default value | Required | Since Version | +|-------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `gravitino.iceberg-rest.gravitino-uri` | The uri of Gravitino server address, only worked if `catalog-provider` is `gravitino-based-provider`. | (none) | No | 0.7.0-incubating | +| `gravitino.iceberg-rest.gravitino-metalake` | The metalake name that `gravitino-based-provider` used to request to Gravitino, only worked if `catalog-provider` is `gravitino-based-provider`. | (none) | No | 0.7.0-incubating | +| `gravitino.iceberg-rest.catalog-cache-eviction-interval-ms` | Catalog cache eviction interval. | 3600000 | No | 0.7.0-incubating | ```text gravitino.iceberg-rest.catalog-cache-eviction-interval-ms = 300000 @@ -311,10 +311,9 @@ Gravitino provides a pluggable metrics store interface to store and delete Icebe ### Misc configurations -| Configuration item | Description | Default value | Required | Since Version | -|---------------------------------------------|--------------------------------------------------------------|---------------|----------|---------------| -| `gravitino.iceberg-rest.extension-packages` | Comma-separated list of Iceberg REST API packages to expand. | (none) | No | 0.7.0 | - +| Configuration item | Description | Default value | Required | Since Version | +|---------------------------------------------|--------------------------------------------------------------|---------------|----------|------------------| +| `gravitino.iceberg-rest.extension-packages` | Comma-separated list of Iceberg REST API packages to expand. | (none) | No | 0.7.0-incubating | ## Starting the Iceberg REST server diff --git a/docs/security/authorization-pushdown.md b/docs/security/authorization-pushdown.md index e521402f6e3..148e76b5f81 100644 --- a/docs/security/authorization-pushdown.md +++ b/docs/security/authorization-pushdown.md @@ -17,14 +17,14 @@ This module translates Gravitino's authorization model into the permission rules In order to use the Authorization Ranger Hive Plugin, you need to configure the following properties and [Apache Hive catalog properties](../apache-hive-catalog.md#catalog-properties): -| Property Name | Description | Default Value | Required | Since Version | -|-------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|---------------| -| `authorization-provider` | Providers to use to implement authorization plugin such as `ranger`. | (none) | No | 0.6.0 | -| `authorization.ranger.admin.url` | The Apache Ranger web URIs. | (none) | No | 0.6.0 | -| `authorization.ranger.auth.type` | The Apache Ranger authentication type `simple` or `kerberos`. | `simple` | No | 0.6.0 | -| `authorization.ranger.username` | The Apache Ranger admin web login username (auth type=simple), or kerberos principal(auth type=kerberos), Need have Ranger administrator permission. | (none) | No | 0.6.0 | -| `authorization.ranger.password` | The Apache Ranger admin web login user password (auth type=simple), or path of the keytab file(auth type=kerberos) | (none) | No | 0.6.0 | -| `authorization.ranger.service.name` | The Apache Ranger service name. | (none) | No | 0.6.0 | +| Property Name | Description | Default Value | Required | Since Version | +|-------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------|------------------| +| `authorization-provider` | Providers to use to implement authorization plugin such as `ranger`. | (none) | No | 0.6.0-incubating | +| `authorization.ranger.admin.url` | The Apache Ranger web URIs. | (none) | No | 0.6.0-incubating | +| `authorization.ranger.auth.type` | The Apache Ranger authentication type `simple` or `kerberos`. | `simple` | No | 0.6.0-incubating | +| `authorization.ranger.username` | The Apache Ranger admin web login username (auth type=simple), or kerberos principal(auth type=kerberos), Need have Ranger administrator permission. | (none) | No | 0.6.0-incubating | +| `authorization.ranger.password` | The Apache Ranger admin web login user password (auth type=simple), or path of the keytab file(auth type=kerberos) | (none) | No | 0.6.0-incubating | +| `authorization.ranger.service.name` | The Apache Ranger service name. | (none) | No | 0.6.0-incubating | Once you have used the correct configuration, you can perform authorization operations by calling Gravitino [authorization RESTful API](https://gravitino.apache.org/docs/latest/api/rest/grant-roles-to-a-user). diff --git a/docs/security/how-to-authenticate.md b/docs/security/how-to-authenticate.md index c98676350e4..56a7d20ecb4 100644 --- a/docs/security/how-to-authenticate.md +++ b/docs/security/how-to-authenticate.md @@ -88,18 +88,18 @@ The URI must use the hostname of server instead of IP. ### Server configuration -| Configuration item | Description | Default value | Required | Since version | -|---------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|--------------------------------------------|---------------| -| `gravitino.authenticator` | It is deprecated since Gravitino 0.6.0. Please use `gravitino.authenticators` instead. | `simple` | No | 0.3.0 | -| `gravitino.authenticators` | The authenticators which Gravitino uses, setting as `simple`,`oauth` or `kerberos`. Multiple authenticators are separated by commas. If a request is supported by multiple authenticators simultaneously, the first authenticator will be used by default. | `simple` | No | 0.6.0 | -| `gravitino.authenticator.oauth.serviceAudience` | The audience name when Gravitino uses OAuth as the authenticator. | `GravitinoServer` | No | 0.3.0 | -| `gravitino.authenticator.oauth.allowSkewSecs` | The JWT allows skew seconds when Gravitino uses OAuth as the authenticator. | `0` | No | 0.3.0 | -| `gravitino.authenticator.oauth.defaultSignKey` | The signing key of JWT when Gravitino uses OAuth as the authenticator. | (none) | Yes if use `oauth` as the authenticator | 0.3.0 | -| `gravitino.authenticator.oauth.signAlgorithmType` | The signature algorithm when Gravitino uses OAuth as the authenticator. | `RS256` | No | 0.3.0 | -| `gravitino.authenticator.oauth.serverUri` | The URI of the default OAuth server. | (none) | Yes if use `oauth` as the authenticator | 0.3.0 | -| `gravitino.authenticator.oauth.tokenPath` | The path for token of the default OAuth server. | (none) | Yes if use `oauth` as the authenticator | 0.3.0 | -| `gravitino.authenticator.kerberos.principal` | Indicates the Kerberos principal to be used for HTTP endpoint. Principal should start with `HTTP/`. | (none) | Yes if use `kerberos` as the authenticator | 0.4.0 | -| `gravitino.authenticator.kerberos.keytab` | Location of the keytab file with the credentials for the principal. | (none) | Yes if use `kerberos` as the authenticator | 0.4.0 | +| Configuration item | Description | Default value | Required | Since version | +|---------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------|--------------------------------------------|------------------| +| `gravitino.authenticator` | It is deprecated since Gravitino 0.6.0. Please use `gravitino.authenticators` instead. | `simple` | No | 0.3.0 | +| `gravitino.authenticators` | The authenticators which Gravitino uses, setting as `simple`,`oauth` or `kerberos`. Multiple authenticators are separated by commas. If a request is supported by multiple authenticators simultaneously, the first authenticator will be used by default. | `simple` | No | 0.6.0-incubating | +| `gravitino.authenticator.oauth.serviceAudience` | The audience name when Gravitino uses OAuth as the authenticator. | `GravitinoServer` | No | 0.3.0 | +| `gravitino.authenticator.oauth.allowSkewSecs` | The JWT allows skew seconds when Gravitino uses OAuth as the authenticator. | `0` | No | 0.3.0 | +| `gravitino.authenticator.oauth.defaultSignKey` | The signing key of JWT when Gravitino uses OAuth as the authenticator. | (none) | Yes if use `oauth` as the authenticator | 0.3.0 | +| `gravitino.authenticator.oauth.signAlgorithmType` | The signature algorithm when Gravitino uses OAuth as the authenticator. | `RS256` | No | 0.3.0 | +| `gravitino.authenticator.oauth.serverUri` | The URI of the default OAuth server. | (none) | Yes if use `oauth` as the authenticator | 0.3.0 | +| `gravitino.authenticator.oauth.tokenPath` | The path for token of the default OAuth server. | (none) | Yes if use `oauth` as the authenticator | 0.3.0 | +| `gravitino.authenticator.kerberos.principal` | Indicates the Kerberos principal to be used for HTTP endpoint. Principal should start with `HTTP/`. | (none) | Yes if use `kerberos` as the authenticator | 0.4.0 | +| `gravitino.authenticator.kerberos.keytab` | Location of the keytab file with the credentials for the principal. | (none) | Yes if use `kerberos` as the authenticator | 0.4.0 | The signature algorithms that Gravitino supports follows: diff --git a/docs/spark-connector/spark-catalog-iceberg.md b/docs/spark-connector/spark-catalog-iceberg.md index f0b1f2f6419..dfa50805c6a 100644 --- a/docs/spark-connector/spark-catalog-iceberg.md +++ b/docs/spark-connector/spark-catalog-iceberg.md @@ -101,17 +101,17 @@ For more details about `CALL`, please refer to the [Spark Procedures description Gravitino spark connector will transform below property names which are defined in catalog properties to Spark Iceberg connector configuration. -| Gravitino catalog property name | Spark Iceberg connector configuration | Description | Since Version | -|---------------------------------|---------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------| -| `catalog-backend` | `type` | Catalog backend type | 0.5.0 | -| `uri` | `uri` | Catalog backend uri | 0.5.0 | -| `warehouse` | `warehouse` | Catalog backend warehouse | 0.5.0 | -| `jdbc-user` | `jdbc.user` | JDBC user name | 0.5.0 | -| `jdbc-password` | `jdbc.password` | JDBC password | 0.5.0 | -| `io-impl` | `io-impl` | The io implementation for `FileIO` in Iceberg. | 0.6.0 | -| `s3-endpoint` | `s3.endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | 0.6.0 | -| `s3-region` | `client.region` | The region of the S3 service, like `us-west-2`. | 0.6.0 | -| `oss-endpoint` | `oss.endpoint` | The endpoint of Aliyun OSS service. | 0.7.0 | +| Gravitino catalog property name | Spark Iceberg connector configuration | Description | Since Version | +|---------------------------------|---------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------| +| `catalog-backend` | `type` | Catalog backend type | 0.5.0 | +| `uri` | `uri` | Catalog backend uri | 0.5.0 | +| `warehouse` | `warehouse` | Catalog backend warehouse | 0.5.0 | +| `jdbc-user` | `jdbc.user` | JDBC user name | 0.5.0 | +| `jdbc-password` | `jdbc.password` | JDBC password | 0.5.0 | +| `io-impl` | `io-impl` | The io implementation for `FileIO` in Iceberg. | 0.6.0-incubating | +| `s3-endpoint` | `s3.endpoint` | An alternative endpoint of the S3 service, This could be used for S3FileIO with any s3-compatible object storage service that has a different endpoint, or access a private S3 endpoint in a virtual private cloud. | 0.6.0-incubating | +| `s3-region` | `client.region` | The region of the S3 service, like `us-west-2`. | 0.6.0-incubating | +| `oss-endpoint` | `oss.endpoint` | The endpoint of Aliyun OSS service. | 0.7.0-incubating | Gravitino catalog property names with the prefix `spark.bypass.` are passed to Spark Iceberg connector. For example, using `spark.bypass.clients` to pass the `clients` to the Spark Iceberg connector. diff --git a/docs/spark-connector/spark-integration-test.md b/docs/spark-connector/spark-integration-test.md index 35ad27b56ef..ce8bdb2df94 100644 --- a/docs/spark-connector/spark-integration-test.md +++ b/docs/spark-connector/spark-integration-test.md @@ -28,15 +28,15 @@ Golden file integration test are mainly to test the correctness of the SQL resul Please change the Spark version number if you want to test other Spark versions. If you want to change the test behaviour, please modify `spark-connector/spark-common/src/test/resources/spark-test.conf`. -| Configuration item | Description | Default value | Required | Since Version | -|--------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------|----------|---------------| -| `gravitino.spark.test.dir` | The Spark SQL test base dir, include `test-sqls` and `data`. | `spark-connector/spark-common/src/test/resources/` | No | 0.6.0 | -| `gravitino.spark.test.sqls` | Specify the test SQLs, using directory to specify group of SQLs like `test-sqls/hive`, using file path to specify one SQL like `test-sqls/hive/basic.sql`, use `,` to split multi part | run all SQLs | No | 0.6.0 | -| `gravitino.spark.test.generateGoldenFiles` | Whether generate golden files which are used to check the correctness of the SQL result | false | No | 0.6.0 | -| `gravitino.spark.test.metalake` | The metalake name to run the test | `test` | No | 0.6.0 | -| `gravitino.spark.test.setupEnv` | Whether to setup Gravitino and Hive environment | `false` | No | 0.6.0 | -| `gravitino.spark.test.uri` | Gravitino uri address, only available when `gravitino.spark.test.setupEnv` is false | http://127.0.0.1:8090 | No | 0.6.0 | -| `gravitino.spark.test.iceberg.warehouse` | The warehouse location, only available when `gravitino.spark.test.setupEnv` is false | hdfs://127.0.0.1:9000/user/hive/warehouse-spark-test | No | 0.6.0 | +| Configuration item | Description | Default value | Required | Since Version | +|--------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------|----------|------------------| +| `gravitino.spark.test.dir` | The Spark SQL test base dir, include `test-sqls` and `data`. | `spark-connector/spark-common/src/test/resources/` | No | 0.6.0-incubating | +| `gravitino.spark.test.sqls` | Specify the test SQLs, using directory to specify group of SQLs like `test-sqls/hive`, using file path to specify one SQL like `test-sqls/hive/basic.sql`, use `,` to split multi part | run all SQLs | No | 0.6.0-incubating | +| `gravitino.spark.test.generateGoldenFiles` | Whether generate golden files which are used to check the correctness of the SQL result | false | No | 0.6.0-incubating | +| `gravitino.spark.test.metalake` | The metalake name to run the test | `test` | No | 0.6.0-incubating | +| `gravitino.spark.test.setupEnv` | Whether to setup Gravitino and Hive environment | `false` | No | 0.6.0-incubating | +| `gravitino.spark.test.uri` | Gravitino uri address, only available when `gravitino.spark.test.setupEnv` is false | http://127.0.0.1:8090 | No | 0.6.0-incubating | +| `gravitino.spark.test.iceberg.warehouse` | The warehouse location, only available when `gravitino.spark.test.setupEnv` is false | hdfs://127.0.0.1:9000/user/hive/warehouse-spark-test | No | 0.6.0-incubating | The test SQL files are located in `spark-connector/spark-common/src/test/resources/` by default. There are three directories: - `hive`, SQL tests for Hive catalog.