-
Notifications
You must be signed in to change notification settings - Fork 4.8k
HIVE-29233: Iceberg: Validate HiveRESTCatalogClient with external RESTCatalogs like Gravitino #6108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
-- SORT_QUERY_RESULTS | ||
-- Mask neededVirtualColumns due to non-strict order | ||
--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/ | ||
-- Mask random uuid | ||
--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/ | ||
-- Mask random uuid | ||
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ | ||
-- Mask a random snapshot id | ||
--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/ | ||
-- Mask added file size | ||
--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask total file size | ||
--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask current-snapshot-timestamp-ms | ||
--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ | ||
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ | ||
--! qt:replace:/(MAJOR\s+refused\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ | ||
-- Mask compaction id as they will be allocated in parallel threads | ||
--! qt:replace:/^[0-9]/#Masked#/ | ||
-- Mask removed file size | ||
--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ | ||
-- Mask iceberg version | ||
--! qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/ | ||
|
||
set hive.stats.autogather=false; | ||
set metastore.client.impl=org.apache.iceberg.hive.client.HiveRESTCatalogClient; | ||
set metastore.catalog.default=ice01; | ||
set iceberg.catalog.ice01.type=rest; | ||
|
||
--! This config is set in the driver setup (see TestIcebergRESTCatalogLlapLocalCliDriver.java) | ||
--! conf.set('iceberg.catalog.ice01.uri', <RESTServer URI>); | ||
|
||
create database ice_rest; | ||
use ice_rest; | ||
|
||
----------------------------------------------------------------------------- | ||
--! Creating a table without a catalog name in table properties | ||
----------------------------------------------------------------------------- | ||
|
||
create table ice_orc1 ( | ||
first_name string, | ||
last_name string, | ||
dept_id bigint, | ||
team_id bigint | ||
) | ||
partitioned by (company_id bigint) | ||
stored by iceberg stored as orc; | ||
|
||
----------------------------------------------------------------------------- | ||
--! Creating table with a valid catalog name in table properties | ||
----------------------------------------------------------------------------- | ||
|
||
create table ice_orc2 ( | ||
first_name string, | ||
last_name string, | ||
dept_id bigint, | ||
team_id bigint | ||
) | ||
partitioned by (company_id bigint) | ||
stored by iceberg stored as orc | ||
TBLPROPERTIES('format-version'='2', 'iceberg.catalog'='ice01'); | ||
|
||
--! Output should contain: 'type' = 'rest' | ||
show create table ice_orc2; | ||
|
||
insert into ice_orc2 partition (company_id=100) | ||
VALUES ('fn1','ln1', 1, 10), ('fn2','ln2', 2, 20), ('fn3','ln3', 3, 30); | ||
|
||
--! In CI, Testcontainers' .withFileSystemBind() is not able to bind the same host path to the same container path, | ||
--! so as a workaround, the .metadata.json files from container are manually synced in a daemon process, | ||
--! since the sync can take some time, need to wait for it to happen after the insert operation. | ||
! sleep 20; | ||
|
||
describe formatted ice_orc2; | ||
select * from ice_orc2; | ||
|
||
----------------------------------------------------------------------------- | ||
|
||
show tables; | ||
drop table ice_orc1; | ||
drop table ice_orc2; | ||
show tables; | ||
|
||
show databases; | ||
drop database ice_rest; | ||
show databases; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,231 @@ | ||
PREHOOK: query: create database ice_rest | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I verified the diff is likely expected.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. .q.out files are slightly different: HMS REST Catalog has the |
||
PREHOOK: type: CREATEDATABASE | ||
PREHOOK: Output: database:ice_rest | ||
POSTHOOK: query: create database ice_rest | ||
POSTHOOK: type: CREATEDATABASE | ||
POSTHOOK: Output: database:ice_rest | ||
PREHOOK: query: use ice_rest | ||
PREHOOK: type: SWITCHDATABASE | ||
PREHOOK: Input: database:ice_rest | ||
POSTHOOK: query: use ice_rest | ||
POSTHOOK: type: SWITCHDATABASE | ||
POSTHOOK: Input: database:ice_rest | ||
PREHOOK: query: create table ice_orc1 ( | ||
first_name string, | ||
last_name string, | ||
dept_id bigint, | ||
team_id bigint | ||
) | ||
partitioned by (company_id bigint) | ||
stored by iceberg stored as orc | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:ice_rest | ||
PREHOOK: Output: ice_rest@ice_orc1 | ||
POSTHOOK: query: create table ice_orc1 ( | ||
first_name string, | ||
last_name string, | ||
dept_id bigint, | ||
team_id bigint | ||
) | ||
partitioned by (company_id bigint) | ||
stored by iceberg stored as orc | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:ice_rest | ||
POSTHOOK: Output: ice_rest@ice_orc1 | ||
PREHOOK: query: create table ice_orc2 ( | ||
first_name string, | ||
last_name string, | ||
dept_id bigint, | ||
team_id bigint | ||
) | ||
partitioned by (company_id bigint) | ||
stored by iceberg stored as orc | ||
TBLPROPERTIES('format-version'='2', 'iceberg.catalog'='ice01') | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:ice_rest | ||
PREHOOK: Output: ice_rest@ice_orc2 | ||
POSTHOOK: query: create table ice_orc2 ( | ||
first_name string, | ||
last_name string, | ||
dept_id bigint, | ||
team_id bigint | ||
) | ||
partitioned by (company_id bigint) | ||
stored by iceberg stored as orc | ||
TBLPROPERTIES('format-version'='2', 'iceberg.catalog'='ice01') | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:ice_rest | ||
POSTHOOK: Output: ice_rest@ice_orc2 | ||
PREHOOK: query: show create table ice_orc2 | ||
PREHOOK: type: SHOW_CREATETABLE | ||
PREHOOK: Input: ice_rest@ice_orc2 | ||
POSTHOOK: query: show create table ice_orc2 | ||
POSTHOOK: type: SHOW_CREATETABLE | ||
POSTHOOK: Input: ice_rest@ice_orc2 | ||
CREATE EXTERNAL TABLE `ice_orc2`( | ||
`first_name` string, | ||
`last_name` string, | ||
`dept_id` bigint, | ||
`team_id` bigint, | ||
`company_id` bigint) | ||
PARTITIONED BY ( | ||
`company_id` bigint COMMENT 'Transform: identity') | ||
PARTITIONED BY SPEC ( | ||
`company_id`) | ||
ROW FORMAT SERDE | ||
'org.apache.iceberg.mr.hive.HiveIcebergSerDe' | ||
STORED BY | ||
'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' | ||
WITH SERDEPROPERTIES ( | ||
'serialization.format'='1') | ||
LOCATION | ||
#### A masked pattern was here #### | ||
TBLPROPERTIES ( | ||
'bucketing_version'='2', | ||
'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"first_name","required":false,"type":"string"},{"id":2,"name":"last_name","required":false,"type":"string"},{"id":3,"name":"dept_id","required":false,"type":"long"},{"id":4,"name":"team_id","required":false,"type":"long"},{"id":5,"name":"company_id","required":false,"type":"long"}]}', | ||
'default-partition-spec'='{"spec-id":0,"fields":[{"name":"company_id","transform":"identity","source-id":5,"field-id":1000}]}', | ||
'format-version'='2', | ||
'iceberg.catalog'='ice01', | ||
'iceberg.orc.files.only'='true', | ||
#### A masked pattern was here #### | ||
'name'='ice_rest.ice_orc2', | ||
'parquet.compression'='zstd', | ||
'serialization.format'='1', | ||
'snapshot-count'='0', | ||
'table_type'='ICEBERG', | ||
'type'='rest', | ||
'uuid'='#Masked#', | ||
'write.delete.mode'='merge-on-read', | ||
'write.format.default'='orc', | ||
'write.merge.mode'='merge-on-read', | ||
'write.update.mode'='merge-on-read') | ||
PREHOOK: query: insert into ice_orc2 partition (company_id=100) | ||
VALUES ('fn1','ln1', 1, 10), ('fn2','ln2', 2, 20), ('fn3','ln3', 3, 30) | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: _dummy_database@_dummy_table | ||
PREHOOK: Output: ice_rest@ice_orc2@company_id=100 | ||
POSTHOOK: query: insert into ice_orc2 partition (company_id=100) | ||
VALUES ('fn1','ln1', 1, 10), ('fn2','ln2', 2, 20), ('fn3','ln3', 3, 30) | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: _dummy_database@_dummy_table | ||
POSTHOOK: Output: ice_rest@ice_orc2@company_id=100 | ||
PREHOOK: query: describe formatted ice_orc2 | ||
PREHOOK: type: DESCTABLE | ||
PREHOOK: Input: ice_rest@ice_orc2 | ||
POSTHOOK: query: describe formatted ice_orc2 | ||
POSTHOOK: type: DESCTABLE | ||
POSTHOOK: Input: ice_rest@ice_orc2 | ||
# col_name data_type comment | ||
first_name string | ||
last_name string | ||
dept_id bigint | ||
team_id bigint | ||
company_id bigint | ||
|
||
# Partition Transform Information | ||
# col_name transform_type | ||
company_id IDENTITY | ||
|
||
# Detailed Table Information | ||
Database: ice_rest | ||
#### A masked pattern was here #### | ||
Retention: 0 | ||
#### A masked pattern was here #### | ||
Table Type: EXTERNAL_TABLE | ||
Table Parameters: | ||
bucketing_version 2 | ||
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"team_id\",\"required\":false,\"type\":\"long\"},{\"id\":5,\"name\":\"company_id\",\"required\":false,\"type\":\"long\"}]} | ||
current-snapshot-id #Masked# | ||
current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"3\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"3\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"} | ||
current-snapshot-timestamp-ms #Masked# | ||
default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000}]} | ||
format-version 2 | ||
iceberg.catalog ice01 | ||
iceberg.orc.files.only true | ||
#### A masked pattern was here #### | ||
name ice_rest.ice_orc2 | ||
numFiles 1 | ||
numRows 3 | ||
parquet.compression zstd | ||
serialization.format 1 | ||
snapshot-count 1 | ||
storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler | ||
table_type ICEBERG | ||
totalSize #Masked# | ||
type rest | ||
uuid #Masked# | ||
write.delete.mode merge-on-read | ||
write.format.default orc | ||
write.merge.mode merge-on-read | ||
write.update.mode merge-on-read | ||
|
||
# Storage Information | ||
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe | ||
InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat | ||
OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat | ||
Compressed: No | ||
Sort Columns: [] | ||
Storage Desc Params: | ||
serialization.format 1 | ||
PREHOOK: query: select * from ice_orc2 | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: ice_rest@ice_orc2 | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: select * from ice_orc2 | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: ice_rest@ice_orc2 | ||
#### A masked pattern was here #### | ||
fn1 ln1 1 10 100 | ||
fn2 ln2 2 20 100 | ||
fn3 ln3 3 30 100 | ||
PREHOOK: query: show tables | ||
PREHOOK: type: SHOWTABLES | ||
PREHOOK: Input: database:ice_rest | ||
POSTHOOK: query: show tables | ||
POSTHOOK: type: SHOWTABLES | ||
POSTHOOK: Input: database:ice_rest | ||
ice_orc1 | ||
ice_orc2 | ||
PREHOOK: query: drop table ice_orc1 | ||
PREHOOK: type: DROPTABLE | ||
PREHOOK: Input: ice_rest@ice_orc1 | ||
PREHOOK: Output: database:ice_rest | ||
PREHOOK: Output: ice_rest@ice_orc1 | ||
POSTHOOK: query: drop table ice_orc1 | ||
POSTHOOK: type: DROPTABLE | ||
POSTHOOK: Input: ice_rest@ice_orc1 | ||
POSTHOOK: Output: database:ice_rest | ||
POSTHOOK: Output: ice_rest@ice_orc1 | ||
PREHOOK: query: drop table ice_orc2 | ||
PREHOOK: type: DROPTABLE | ||
PREHOOK: Input: ice_rest@ice_orc2 | ||
PREHOOK: Output: database:ice_rest | ||
PREHOOK: Output: ice_rest@ice_orc2 | ||
POSTHOOK: query: drop table ice_orc2 | ||
POSTHOOK: type: DROPTABLE | ||
POSTHOOK: Input: ice_rest@ice_orc2 | ||
POSTHOOK: Output: database:ice_rest | ||
POSTHOOK: Output: ice_rest@ice_orc2 | ||
PREHOOK: query: show tables | ||
PREHOOK: type: SHOWTABLES | ||
PREHOOK: Input: database:ice_rest | ||
POSTHOOK: query: show tables | ||
POSTHOOK: type: SHOWTABLES | ||
POSTHOOK: Input: database:ice_rest | ||
PREHOOK: query: show databases | ||
PREHOOK: type: SHOWDATABASES | ||
POSTHOOK: query: show databases | ||
POSTHOOK: type: SHOWDATABASES | ||
ice_rest | ||
PREHOOK: query: drop database ice_rest | ||
PREHOOK: type: DROPDATABASE | ||
PREHOOK: Input: database:ice_rest | ||
PREHOOK: Output: database:ice_rest | ||
POSTHOOK: query: drop database ice_rest | ||
POSTHOOK: type: DROPDATABASE | ||
POSTHOOK: Input: database:ice_rest | ||
POSTHOOK: Output: database:ice_rest | ||
PREHOOK: query: show databases | ||
PREHOOK: type: SHOWDATABASES | ||
POSTHOOK: query: show databases | ||
POSTHOOK: type: SHOWDATABASES |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -475,6 +475,23 @@ | |
<version>${project.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.httpcomponents.client5</groupId> | ||
<artifactId>httpclient5</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.httpcomponents.core5</groupId> | ||
<artifactId>httpcore5</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.httpcomponents.core5</groupId> | ||
<artifactId>httpcore5-h2</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.testcontainers</groupId> | ||
<artifactId>testcontainers</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need these ones? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the library that allows to run docker containers in tests -
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I wanted to mention only the httpcomponents family, which is not imported. |
||
</dependencies> | ||
<build> | ||
<plugins> | ||
|
@@ -525,6 +542,30 @@ | |
</additionalClasspathElements> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-dependency-plugin</artifactId> | ||
<executions> | ||
<execution> | ||
<id>copy-h2-driver-for-test</id> | ||
<phase>process-test-resources</phase> | ||
<goals> | ||
<goal>copy</goal> | ||
</goals> | ||
<configuration> | ||
<artifactItems> | ||
<artifactItem> | ||
<groupId>com.h2database</groupId> | ||
<artifactId>h2</artifactId> | ||
<version>2.2.224</version> | ||
<destFileName>h2-driver.jar</destFileName> | ||
</artifactItem> | ||
</artifactItems> | ||
<outputDirectory>${project.build.directory}/test-dependencies</outputDirectory> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I verified the diff is likely expected.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From #6108 (comment):