diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala index 18c9efab39e0..4918a6eadeb8 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala @@ -155,18 +155,12 @@ object VeloxBackendSettings extends BackendSettingsApi { format match { case ParquetReadFormat => - val typeValidator: PartialFunction[StructField, String] = { - // Parquet timestamp is not fully supported yet - case StructField(_, TimestampType, _, _) - if GlutenConfig.get.forceParquetTimestampTypeScanFallbackEnabled => - "TimestampType(force fallback)" - } val parquetOptions = new ParquetOptions(CaseInsensitiveMap(properties), SQLConf.get) if (parquetOptions.mergeSchema) { // https://github.com/apache/incubator-gluten/issues/7174 Some(s"not support when merge schema is true") } else { - validateTypes(typeValidator) + None } case DwrfReadFormat => None case OrcReadFormat => diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index baed98729bc3..f5071d2f3fc4 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -917,8 +917,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("SPARK-35640: read binary as timestamp should throw schema incompatible error") // Exception msg. .exclude("SPARK-35640: int as long should throw schema incompatible error") - // Velox only support read Timestamp with INT96 for now. - .exclude("read dictionary and plain encoded timestamp_millis written as INT64") enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] enableSuite[GlutenParquetProtobufCompatibilitySuite] @@ -927,9 +925,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Enabling/disabling ignoreCorruptFiles") // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") - // Timestamp is read as INT96. - .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") - .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") // Rewrite because the filter after datasource is not needed. .exclude( "SPARK-26677: negated null-safe equality comparison should not filter matched row groups") @@ -938,9 +933,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Enabling/disabling ignoreCorruptFiles") // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") - // Timestamp is read as INT96. - .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") - .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") // Rewrite because the filter after datasource is not needed. .exclude( "SPARK-26677: negated null-safe equality comparison should not filter matched row groups") diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index d1f8b5b0c492..d3bc3846d80f 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -720,8 +720,7 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("SPARK-35640: read binary as timestamp should throw schema incompatible error") // Exception msg. .exclude("SPARK-35640: int as long should throw schema incompatible error") - // Velox only support read Timestamp with INT96 for now. - .exclude("read dictionary and plain encoded timestamp_millis written as INT64") + // Velox parquet reader not allow offset zero. .exclude("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings") enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] @@ -731,10 +730,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Enabling/disabling ignoreCorruptFiles") // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") - // Timestamp is read as INT96. - .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") - .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") - .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") // new added in spark-3.3 and need fix later, random failure may caused by memory free .exclude("SPARK-39833: pushed filters with project without filter columns") .exclude("SPARK-39833: pushed filters with count()") @@ -746,10 +741,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Enabling/disabling ignoreCorruptFiles") // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") - // Timestamp is read as INT96. - .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") - .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") - .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") // Rewrite because the filter after datasource is not needed. .exclude( "SPARK-26677: negated null-safe equality comparison should not filter matched row groups") diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 1de6961192f7..cc9746dcdb53 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -715,9 +715,7 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("SPARK-35640: read binary as timestamp should throw schema incompatible error") // Exception msg. .exclude("SPARK-35640: int as long should throw schema incompatible error") - // Velox only support read Timestamp with INT96 for now. - .exclude("read dictionary and plain encoded timestamp_millis written as INT64") - .exclude("Read TimestampNTZ and TimestampLTZ for various logical TIMESTAMP types") + // Velox parquet reader not allow offset zero. .exclude("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings") enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] @@ -728,10 +726,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Enabling/disabling ignoreCorruptFiles") // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") - // Timestamp is read as INT96. - .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") - .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") - .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") // new added in spark-3.3 and need fix later, random failure may caused by memory free .exclude("SPARK-39833: pushed filters with project without filter columns") .exclude("SPARK-39833: pushed filters with count()") @@ -744,10 +738,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Enabling/disabling ignoreCorruptFiles") // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") - // Timestamp is read as INT96. - .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") - .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") - .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") // Rewrite because the filter after datasource is not needed. .exclude( "SPARK-26677: negated null-safe equality comparison should not filter matched row groups") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 2cf2f8ad3186..71786c91322b 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -726,9 +726,7 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("SPARK-35640: read binary as timestamp should throw schema incompatible error") // Exception msg. .exclude("SPARK-35640: int as long should throw schema incompatible error") - // Velox only support read Timestamp with INT96 for now. - .exclude("read dictionary and plain encoded timestamp_millis written as INT64") - .exclude("Read TimestampNTZ and TimestampLTZ for various logical TIMESTAMP types") + // Velox parquet reader not allow offset zero. .exclude("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings") enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] @@ -739,10 +737,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Enabling/disabling ignoreCorruptFiles") // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") - // Timestamp is read as INT96. - .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") - .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") - .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") // new added in spark-3.3 and need fix later, random failure may caused by memory free .exclude("SPARK-39833: pushed filters with project without filter columns") .exclude("SPARK-39833: pushed filters with count()") @@ -755,10 +749,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("Enabling/disabling ignoreCorruptFiles") // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") - // Timestamp is read as INT96. - .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") - .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") - .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") // Rewrite because the filter after datasource is not needed. .exclude( "SPARK-26677: negated null-safe equality comparison should not filter matched row groups") diff --git a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala index f6ed0327349d..cd01b1a42f8a 100644 --- a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala @@ -126,9 +126,6 @@ class GlutenConfig(conf: SQLConf) extends Logging { def forceOrcCharTypeScanFallbackEnabled: Boolean = conf.getConf(VELOX_FORCE_ORC_CHAR_TYPE_SCAN_FALLBACK) - def forceParquetTimestampTypeScanFallbackEnabled: Boolean = - conf.getConf(VELOX_FORCE_PARQUET_TIMESTAMP_TYPE_SCAN_FALLBACK) - def scanFileSchemeValidationEnabled: Boolean = conf.getConf(VELOX_SCAN_FILE_SCHEME_VALIDATION_ENABLED) @@ -2184,13 +2181,6 @@ object GlutenConfig { .booleanConf .createWithDefault(true) - val VELOX_FORCE_PARQUET_TIMESTAMP_TYPE_SCAN_FALLBACK = - buildConf("spark.gluten.sql.parquet.timestampType.scan.fallback.enabled") - .internal() - .doc("Force fallback for parquet timestamp type scan.") - .booleanConf - .createWithDefault(false) - val VELOX_SCAN_FILE_SCHEME_VALIDATION_ENABLED = buildConf("spark.gluten.sql.scan.fileSchemeValidation.enabled") .internal()