Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
-- SORT_QUERY_RESULTS
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I verified the diff is likely expected.

% diff iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_hms.q iceberg/iceberg-handler/src/test/queries/positive/iceberg_rest_catalog_gravitino.q
68a69,73
> --! In CI, Testcontainers' .withFileSystemBind() is not able to bind the same host path to the same container path,
> --! so as a workaround, the .metadata.json files from container are manually synced in a daemon process,
> --! since the sync can take some time, need to wait for it to happen after the insert operation.
> ! sleep 20;
>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From #6108 (comment):

  • .q files are slightly different: gravitino .q file has ! sleep command as a workaround to give the manual sync process to complete after INSERT and before reading the table.
  • .q.out files are slightly different: HMS REST Catalog has the default database, Gravitino doesn't.

-- Mask neededVirtualColumns due to non-strict order
--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/
-- Mask random uuid
--! qt:replace:/(\s+'uuid'=')\S+('\s*)/$1#Masked#$2/
-- Mask random uuid
--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
-- Mask a random snapshot id
--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/
-- Mask added file size
--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
-- Mask total file size
--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
-- Mask current-snapshot-timestamp-ms
--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/
--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
--! qt:replace:/(MAJOR\s+refused\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
-- Mask compaction id as they will be allocated in parallel threads
--! qt:replace:/^[0-9]/#Masked#/
-- Mask removed file size
--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
-- Mask iceberg version
--! qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/

set hive.stats.autogather=false;
set metastore.client.impl=org.apache.iceberg.hive.client.HiveRESTCatalogClient;
set metastore.catalog.default=ice01;
set iceberg.catalog.ice01.type=rest;

--! This config is set in the driver setup (see TestIcebergRESTCatalogLlapLocalCliDriver.java)
--! conf.set('iceberg.catalog.ice01.uri', <RESTServer URI>);

create database ice_rest;
use ice_rest;

-----------------------------------------------------------------------------
--! Creating a table without a catalog name in table properties
-----------------------------------------------------------------------------

create table ice_orc1 (
first_name string,
last_name string,
dept_id bigint,
team_id bigint
)
partitioned by (company_id bigint)
stored by iceberg stored as orc;

-----------------------------------------------------------------------------
--! Creating table with a valid catalog name in table properties
-----------------------------------------------------------------------------

create table ice_orc2 (
first_name string,
last_name string,
dept_id bigint,
team_id bigint
)
partitioned by (company_id bigint)
stored by iceberg stored as orc
TBLPROPERTIES('format-version'='2', 'iceberg.catalog'='ice01');

--! Output should contain: 'type' = 'rest'
show create table ice_orc2;

insert into ice_orc2 partition (company_id=100)
VALUES ('fn1','ln1', 1, 10), ('fn2','ln2', 2, 20), ('fn3','ln3', 3, 30);

--! In CI, Testcontainers' .withFileSystemBind() is not able to bind the same host path to the same container path,
--! so as a workaround, the .metadata.json files from container are manually synced in a daemon process,
--! since the sync can take some time, need to wait for it to happen after the insert operation.
! sleep 20;

describe formatted ice_orc2;
select * from ice_orc2;

-----------------------------------------------------------------------------

show tables;
drop table ice_orc1;
drop table ice_orc2;
show tables;

show databases;
drop database ice_rest;
show databases;
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
PREHOOK: query: create database ice_rest
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I verified the diff is likely expected.

% diff iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_hms.q.out iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_rest_catalog_gravitino.q.out
219d218
< default
233d231
< default

Copy link
Contributor Author

@difin difin Oct 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.q.out files are slightly different: HMS REST Catalog has the default database, Gravitino doesn't.

PREHOOK: type: CREATEDATABASE
PREHOOK: Output: database:ice_rest
POSTHOOK: query: create database ice_rest
POSTHOOK: type: CREATEDATABASE
POSTHOOK: Output: database:ice_rest
PREHOOK: query: use ice_rest
PREHOOK: type: SWITCHDATABASE
PREHOOK: Input: database:ice_rest
POSTHOOK: query: use ice_rest
POSTHOOK: type: SWITCHDATABASE
POSTHOOK: Input: database:ice_rest
PREHOOK: query: create table ice_orc1 (
first_name string,
last_name string,
dept_id bigint,
team_id bigint
)
partitioned by (company_id bigint)
stored by iceberg stored as orc
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:ice_rest
PREHOOK: Output: ice_rest@ice_orc1
POSTHOOK: query: create table ice_orc1 (
first_name string,
last_name string,
dept_id bigint,
team_id bigint
)
partitioned by (company_id bigint)
stored by iceberg stored as orc
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:ice_rest
POSTHOOK: Output: ice_rest@ice_orc1
PREHOOK: query: create table ice_orc2 (
first_name string,
last_name string,
dept_id bigint,
team_id bigint
)
partitioned by (company_id bigint)
stored by iceberg stored as orc
TBLPROPERTIES('format-version'='2', 'iceberg.catalog'='ice01')
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:ice_rest
PREHOOK: Output: ice_rest@ice_orc2
POSTHOOK: query: create table ice_orc2 (
first_name string,
last_name string,
dept_id bigint,
team_id bigint
)
partitioned by (company_id bigint)
stored by iceberg stored as orc
TBLPROPERTIES('format-version'='2', 'iceberg.catalog'='ice01')
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:ice_rest
POSTHOOK: Output: ice_rest@ice_orc2
PREHOOK: query: show create table ice_orc2
PREHOOK: type: SHOW_CREATETABLE
PREHOOK: Input: ice_rest@ice_orc2
POSTHOOK: query: show create table ice_orc2
POSTHOOK: type: SHOW_CREATETABLE
POSTHOOK: Input: ice_rest@ice_orc2
CREATE EXTERNAL TABLE `ice_orc2`(
`first_name` string,
`last_name` string,
`dept_id` bigint,
`team_id` bigint,
`company_id` bigint)
PARTITIONED BY (
`company_id` bigint COMMENT 'Transform: identity')
PARTITIONED BY SPEC (
`company_id`)
ROW FORMAT SERDE
'org.apache.iceberg.mr.hive.HiveIcebergSerDe'
STORED BY
'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
WITH SERDEPROPERTIES (
'serialization.format'='1')
LOCATION
#### A masked pattern was here ####
TBLPROPERTIES (
'bucketing_version'='2',
'current-schema'='{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"first_name","required":false,"type":"string"},{"id":2,"name":"last_name","required":false,"type":"string"},{"id":3,"name":"dept_id","required":false,"type":"long"},{"id":4,"name":"team_id","required":false,"type":"long"},{"id":5,"name":"company_id","required":false,"type":"long"}]}',
'default-partition-spec'='{"spec-id":0,"fields":[{"name":"company_id","transform":"identity","source-id":5,"field-id":1000}]}',
'format-version'='2',
'iceberg.catalog'='ice01',
'iceberg.orc.files.only'='true',
#### A masked pattern was here ####
'name'='ice_rest.ice_orc2',
'parquet.compression'='zstd',
'serialization.format'='1',
'snapshot-count'='0',
'table_type'='ICEBERG',
'type'='rest',
'uuid'='#Masked#',
'write.delete.mode'='merge-on-read',
'write.format.default'='orc',
'write.merge.mode'='merge-on-read',
'write.update.mode'='merge-on-read')
PREHOOK: query: insert into ice_orc2 partition (company_id=100)
VALUES ('fn1','ln1', 1, 10), ('fn2','ln2', 2, 20), ('fn3','ln3', 3, 30)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: ice_rest@ice_orc2@company_id=100
POSTHOOK: query: insert into ice_orc2 partition (company_id=100)
VALUES ('fn1','ln1', 1, 10), ('fn2','ln2', 2, 20), ('fn3','ln3', 3, 30)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: ice_rest@ice_orc2@company_id=100
PREHOOK: query: describe formatted ice_orc2
PREHOOK: type: DESCTABLE
PREHOOK: Input: ice_rest@ice_orc2
POSTHOOK: query: describe formatted ice_orc2
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: ice_rest@ice_orc2
# col_name data_type comment
first_name string
last_name string
dept_id bigint
team_id bigint
company_id bigint

# Partition Transform Information
# col_name transform_type
company_id IDENTITY

# Detailed Table Information
Database: ice_rest
#### A masked pattern was here ####
Retention: 0
#### A masked pattern was here ####
Table Type: EXTERNAL_TABLE
Table Parameters:
bucketing_version 2
current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"first_name\",\"required\":false,\"type\":\"string\"},{\"id\":2,\"name\":\"last_name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"dept_id\",\"required\":false,\"type\":\"long\"},{\"id\":4,\"name\":\"team_id\",\"required\":false,\"type\":\"long\"},{\"id\":5,\"name\":\"company_id\",\"required\":false,\"type\":\"long\"}]}
current-snapshot-id #Masked#
current-snapshot-summary {\"added-data-files\":\"1\",\"added-records\":\"3\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"3\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\",\"iceberg-version\":\"#Masked#\"}
current-snapshot-timestamp-ms #Masked#
default-partition-spec {\"spec-id\":0,\"fields\":[{\"name\":\"company_id\",\"transform\":\"identity\",\"source-id\":5,\"field-id\":1000}]}
format-version 2
iceberg.catalog ice01
iceberg.orc.files.only true
#### A masked pattern was here ####
name ice_rest.ice_orc2
numFiles 1
numRows 3
parquet.compression zstd
serialization.format 1
snapshot-count 1
storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
table_type ICEBERG
totalSize #Masked#
type rest
uuid #Masked#
write.delete.mode merge-on-read
write.format.default orc
write.merge.mode merge-on-read
write.update.mode merge-on-read

# Storage Information
SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
Compressed: No
Sort Columns: []
Storage Desc Params:
serialization.format 1
PREHOOK: query: select * from ice_orc2
PREHOOK: type: QUERY
PREHOOK: Input: ice_rest@ice_orc2
#### A masked pattern was here ####
POSTHOOK: query: select * from ice_orc2
POSTHOOK: type: QUERY
POSTHOOK: Input: ice_rest@ice_orc2
#### A masked pattern was here ####
fn1 ln1 1 10 100
fn2 ln2 2 20 100
fn3 ln3 3 30 100
PREHOOK: query: show tables
PREHOOK: type: SHOWTABLES
PREHOOK: Input: database:ice_rest
POSTHOOK: query: show tables
POSTHOOK: type: SHOWTABLES
POSTHOOK: Input: database:ice_rest
ice_orc1
ice_orc2
PREHOOK: query: drop table ice_orc1
PREHOOK: type: DROPTABLE
PREHOOK: Input: ice_rest@ice_orc1
PREHOOK: Output: database:ice_rest
PREHOOK: Output: ice_rest@ice_orc1
POSTHOOK: query: drop table ice_orc1
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: ice_rest@ice_orc1
POSTHOOK: Output: database:ice_rest
POSTHOOK: Output: ice_rest@ice_orc1
PREHOOK: query: drop table ice_orc2
PREHOOK: type: DROPTABLE
PREHOOK: Input: ice_rest@ice_orc2
PREHOOK: Output: database:ice_rest
PREHOOK: Output: ice_rest@ice_orc2
POSTHOOK: query: drop table ice_orc2
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: ice_rest@ice_orc2
POSTHOOK: Output: database:ice_rest
POSTHOOK: Output: ice_rest@ice_orc2
PREHOOK: query: show tables
PREHOOK: type: SHOWTABLES
PREHOOK: Input: database:ice_rest
POSTHOOK: query: show tables
POSTHOOK: type: SHOWTABLES
POSTHOOK: Input: database:ice_rest
PREHOOK: query: show databases
PREHOOK: type: SHOWDATABASES
POSTHOOK: query: show databases
POSTHOOK: type: SHOWDATABASES
ice_rest
PREHOOK: query: drop database ice_rest
PREHOOK: type: DROPDATABASE
PREHOOK: Input: database:ice_rest
PREHOOK: Output: database:ice_rest
POSTHOOK: query: drop database ice_rest
POSTHOOK: type: DROPDATABASE
POSTHOOK: Input: database:ice_rest
POSTHOOK: Output: database:ice_rest
PREHOOK: query: show databases
PREHOOK: type: SHOWDATABASES
POSTHOOK: query: show databases
POSTHOOK: type: SHOWDATABASES
41 changes: 41 additions & 0 deletions itests/qtest-iceberg/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,23 @@
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents.core5</groupId>
<artifactId>httpcore5</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents.core5</groupId>
<artifactId>httpcore5-h2</artifactId>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
<scope>test</scope>
</dependency>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need these ones?

Copy link
Contributor Author

@difin difin Oct 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the library that allows to run docker containers in tests - GenericContainer is from testcontainers:

  private void startGravitinoContainer() {
    gravitinoContainer = new GenericContainer<>(GRAVITINO_IMAGE)
        .withExposedPorts(9001)
        // Update entrypoint to create the warehouse directory before starting the server
        .withCreateContainerCmdModifier(cmd -> cmd.withEntrypoint("bash", "-c",
            String.format("mkdir -p %s && exec %s", warehouseDir.toString(), GRAVITINO_STARTUP_SCRIPT)))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I wanted to mention only the httpcomponents family, which is not imported.

</dependencies>
<build>
<plugins>
Expand Down Expand Up @@ -525,6 +542,30 @@
</additionalClasspathElements>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-h2-driver-for-test</id>
<phase>process-test-resources</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<artifactItem>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>2.2.224</version>
<destFileName>h2-driver.jar</destFileName>
</artifactItem>
</artifactItems>
<outputDirectory>${project.build.directory}/test-dependencies</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Loading