diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 04f1c0dbc05..747b57bbf7b 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -17,6 +17,7 @@ import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.spark.sql.AnalysisException; import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; @@ -650,6 +651,21 @@ protected void checkDirExists(Path dir) { } } + protected void checkDataFileExists(Path dir) { + Boolean isExists = false; + try { + for (FileStatus fileStatus : hdfs.listStatus(dir)) { + if (fileStatus.isFile()) { + isExists = true; + break; + } + } + Assertions.assertTrue(isExists); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + @Test void testTableOptions() { String tableName = "options_table"; @@ -726,9 +742,17 @@ protected String getExpectedTableData(SparkTableInfo table) { } protected String getCreateSimpleTableString(String tableName) { + return getCreateSimpleTableString(tableName, false); + } + + protected String getCreateSimpleTableString(String tableName, boolean isExternal) { + String external = ""; + if (isExternal) { + external = "EXTERNAL"; + } return String.format( - "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', age INT)", - tableName); + "CREATE %s TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', age INT)", + external, tableName); } protected List getSimpleTableColumn() { diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java index 91bea87a2aa..7a16925b627 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -206,6 +206,27 @@ void testHiveFormatWithStoredAs() { checkParquetFile(tableInfo); } + @Test + void testHiveFormatWithExternalTable() { + String tableName = "test_hive_format_with_external_table"; + dropTableIfExists(tableName); + String createTableSql = getCreateSimpleTableString(tableName, true); + sql(createTableSql); + SparkTableInfo tableInfo = getTableInfo(tableName); + + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withTableProperties( + ImmutableMap.of(HivePropertiesConstants.SPARK_HIVE_EXTERNAL, "true")); + checker.check(tableInfo); + checkTableReadWrite(tableInfo); + + dropTableIfExists(tableName); + Path tableLocation = new Path(tableInfo.getTableLocation()); + checkDataFileExists(tableLocation); + } + @Test void testHiveFormatWithUsing() { String tableName = "test_hive_format_using_table"; diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConstants.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConstants.java index c70e038a19a..3384e274fab 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConstants.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConstants.java @@ -5,6 +5,8 @@ package com.datastrato.gravitino.spark.connector.hive; +import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE; + import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata; import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.StorageFormat; import com.google.common.annotations.VisibleForTesting; @@ -27,11 +29,14 @@ public class HivePropertiesConstants { public static final String GRAVITINO_HIVE_FORMAT_AVRO = StorageFormat.AVRO.toString(); public static final String GRAVITINO_HIVE_FORMAT_JSON = StorageFormat.JSON.toString(); public static final String GRAVITINO_HIVE_FORMAT_CSV = StorageFormat.CSV.toString(); + public static final String GRAVITINO_HIVE_EXTERNAL_TABLE = EXTERNAL_TABLE.name(); + public static final String GRAVITINO_HIVE_TABLE_TYPE = "table-type"; public static final String SPARK_HIVE_STORED_AS = "hive.stored-as"; public static final String SPARK_HIVE_INPUT_FORMAT = "input-format"; public static final String SPARK_HIVE_OUTPUT_FORMAT = "output-format"; public static final String SPARK_HIVE_SERDE_LIB = "serde-lib"; + public static final String SPARK_HIVE_EXTERNAL = "external"; @VisibleForTesting public static final String TEXT_INPUT_FORMAT_CLASS = diff --git a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConverter.java b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConverter.java index 6958ef89ca4..87bc968080b 100644 --- a/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConverter.java +++ b/spark-connector/src/main/java/com/datastrato/gravitino/spark/connector/hive/HivePropertiesConverter.java @@ -5,6 +5,7 @@ package com.datastrato.gravitino.spark.connector.hive; +import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata; import com.datastrato.gravitino.spark.connector.PropertiesConverter; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; @@ -60,6 +61,10 @@ public Map toGravitinoTableProperties(Map proper String provider = gravitinoTableProperties.get(TableCatalog.PROP_PROVIDER); String storeAs = gravitinoTableProperties.get(HivePropertiesConstants.SPARK_HIVE_STORED_AS); String fileFormat = Optional.ofNullable(storeAs).orElse(provider); + String isExternal = + Optional.ofNullable(gravitinoTableProperties.get(TableCatalog.PROP_EXTERNAL)) + .orElse("false"); + if (fileFormat != null) { String gravitinoFormat = fileFormatMap.get(fileFormat.toLowerCase(Locale.ROOT)); if (gravitinoFormat != null) { @@ -70,6 +75,11 @@ public Map toGravitinoTableProperties(Map proper } } + if (isExternal.equalsIgnoreCase("true")) { + gravitinoTableProperties.put( + HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE, + HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE.name()); + } sparkToGravitinoPropertyMap.forEach( (sparkProperty, gravitinoProperty) -> { if (gravitinoTableProperties.containsKey(sparkProperty)) { @@ -83,7 +93,15 @@ public Map toGravitinoTableProperties(Map proper @Override public Map toSparkTableProperties(Map properties) { - return toOptionProperties(properties); + Map sparkTableProperties = toOptionProperties(properties); + String hiveTableType = + sparkTableProperties.get(HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE); + if (HivePropertiesConstants.GRAVITINO_HIVE_EXTERNAL_TABLE.equalsIgnoreCase(hiveTableType)) { + sparkTableProperties.remove(HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE); + sparkTableProperties.put(HivePropertiesConstants.SPARK_HIVE_EXTERNAL, "true"); + } + + return sparkTableProperties; } @VisibleForTesting diff --git a/spark-connector/src/test/java/com/datastrato/gravitino/spark/connector/hive/TestHivePropertiesConverter.java b/spark-connector/src/test/java/com/datastrato/gravitino/spark/connector/hive/TestHivePropertiesConverter.java index 2a04915d917..1546be9d60c 100644 --- a/spark-connector/src/test/java/com/datastrato/gravitino/spark/connector/hive/TestHivePropertiesConverter.java +++ b/spark-connector/src/test/java/com/datastrato/gravitino/spark/connector/hive/TestHivePropertiesConverter.java @@ -78,6 +78,24 @@ void testTableFormat() { ImmutableMap.of(TableCatalog.OPTION_PREFIX + "a", "a", "b", "b"), hiveProperties); } + @Test + void testExternalTable() { + Map hiveProperties = + hivePropertiesConverter.toGravitinoTableProperties( + ImmutableMap.of(HivePropertiesConstants.SPARK_HIVE_EXTERNAL, "true")); + Assertions.assertEquals( + hiveProperties.get(HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE), + HivePropertiesConstants.GRAVITINO_HIVE_EXTERNAL_TABLE); + + hiveProperties = + hivePropertiesConverter.toSparkTableProperties( + ImmutableMap.of( + HivePropertiesConstants.GRAVITINO_HIVE_TABLE_TYPE, + HivePropertiesConstants.GRAVITINO_HIVE_EXTERNAL_TABLE)); + Assertions.assertEquals( + ImmutableMap.of(HivePropertiesConstants.SPARK_HIVE_EXTERNAL, "true"), hiveProperties); + } + @Test void testOptionProperties() { Map properties =