From a3dcfd19fd1b2a709f7bdf013b83836953d49c6f Mon Sep 17 00:00:00 2001 From: Hongyue/Steve Zhang Date: Thu, 12 Dec 2024 11:01:44 -0700 Subject: [PATCH] Hive: Optimize tableExists API in hive catalog (#11597) * Hive: Optimize tableExists API in hive catalog Skip creation of hive table operation when check existence of iceberg table in hive catalog * Add a newline after if/else * Add current thread interrupt * Handle metadata tables and separate the tests * Add comment back * Address feedback * Add extra comment for EcsCatalog override method * Move javadoc around * Added note if hive table with same name exists * Added note if hive table with same name exists * Add test with invalid identifier --- .../apache/iceberg/BaseMetastoreCatalog.java | 2 +- .../org/apache/iceberg/hive/HiveCatalog.java | 37 ++++++++++++++ .../apache/iceberg/hive/HiveTableTest.java | 49 +++++++++++++++++++ 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java index e960fe2b63e0..29068df380a9 100644 --- a/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java +++ b/core/src/main/java/org/apache/iceberg/BaseMetastoreCatalog.java @@ -113,7 +113,7 @@ private Table loadMetadataTable(TableIdentifier identifier) { } } - private boolean isValidMetadataIdentifier(TableIdentifier identifier) { + protected boolean isValidMetadataIdentifier(TableIdentifier identifier) { return MetadataTableType.from(identifier.name()) != null && isValidIdentifier(TableIdentifier.of(identifier.namespace().levels())); } diff --git a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java index 1cf738d736cb..9fd7c6f2eeb0 100644 --- a/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java +++ b/hive-metastore/src/main/java/org/apache/iceberg/hive/HiveCatalog.java @@ -412,6 +412,43 @@ private void validateTableIsIcebergTableOrView( } } + /** + * Check whether table or metadata table exists. + * + *

Note: If a hive table with the same identifier exists in catalog, this method will return + * {@code false}. + * + * @param identifier a table identifier + * @return true if the table exists, false otherwise + */ + @Override + public boolean tableExists(TableIdentifier identifier) { + TableIdentifier baseTableIdentifier = identifier; + if (!isValidIdentifier(identifier)) { + if (!isValidMetadataIdentifier(identifier)) { + return false; + } else { + baseTableIdentifier = TableIdentifier.of(identifier.namespace().levels()); + } + } + + String database = baseTableIdentifier.namespace().level(0); + String tableName = baseTableIdentifier.name(); + try { + Table table = clients.run(client -> client.getTable(database, tableName)); + HiveOperationsBase.validateTableIsIceberg(table, fullTableName(name, baseTableIdentifier)); + return true; + } catch (NoSuchTableException | NoSuchObjectException e) { + return false; + } catch (TException e) { + throw new RuntimeException("Failed to check table existence of " + baseTableIdentifier, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException( + "Interrupted in call to check table existence of " + baseTableIdentifier, e); + } + } + @Override public void createNamespace(Namespace namespace, Map meta) { Preconditions.checkArgument( diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/HiveTableTest.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/HiveTableTest.java index 5650c4e82458..7f7a56c9a126 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/HiveTableTest.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/HiveTableTest.java @@ -388,6 +388,55 @@ public void testHiveTableAndIcebergTableWithSameName(TableType tableType) HIVE_METASTORE_EXTENSION.metastoreClient().dropTable(DB_NAME, hiveTableName); } + @Test + public void testTableExists() throws TException, IOException { + String testTableName = "test_table_exists"; + TableIdentifier identifier = TableIdentifier.of(DB_NAME, testTableName); + TableIdentifier metadataIdentifier = TableIdentifier.of(DB_NAME, testTableName, "partitions"); + TableIdentifier invalidIdentifier = TableIdentifier.of(DB_NAME, "invalid", testTableName); + + assertThat(catalog.tableExists(invalidIdentifier)) + .as("Should return false on invalid identifier") + .isFalse(); + assertThat(catalog.tableExists(identifier)) + .as("Table should not exist before create") + .isFalse(); + catalog.buildTable(identifier, SCHEMA).create(); + + assertThat(catalog.tableExists(identifier)).as("Table should exist after create").isTrue(); + assertThat(catalog.tableExists(metadataIdentifier)) + .as("Metadata table should also exist") + .isTrue(); + + assertThat(catalog.dropTable(identifier)).as("Should drop a table that does exist").isTrue(); + assertThat(catalog.tableExists(identifier)).as("Table should not exist after drop").isFalse(); + assertThat(catalog.tableExists(metadataIdentifier)) + .as("Metadata table should not exist after drop") + .isFalse(); + + HIVE_METASTORE_EXTENSION + .metastoreClient() + .createTable(createHiveTable(testTableName, TableType.EXTERNAL_TABLE)); + assertThat(catalog.tableExists(identifier)) + .as("Should return false when a hive table with the same name exists") + .isFalse(); + assertThat(catalog.tableExists(metadataIdentifier)) + .as("Metadata table should not exist") + .isFalse(); + HIVE_METASTORE_EXTENSION.metastoreClient().dropTable(DB_NAME, testTableName); + + catalog + .buildView(identifier) + .withSchema(SCHEMA) + .withDefaultNamespace(identifier.namespace()) + .withQuery("spark", "select * from ns.tbl") + .create(); + assertThat(catalog.tableExists(identifier)) + .as("Should return false if identifier refers to a view") + .isFalse(); + catalog.dropView(identifier); + } + private org.apache.hadoop.hive.metastore.api.Table createHiveTable( String hiveTableName, TableType type) throws IOException { Map parameters = Maps.newHashMap();