From 6a3063f6eb89ff20c10879119fc09709cf2ed713 Mon Sep 17 00:00:00 2001
From: rui-mo <rui.mo@intel.com>
Date: Fri, 9 Feb 2024 19:19:49 +0800
Subject: [PATCH] Add test for decimal partition column

---
 velox/connectors/hive/HiveConnectorSplit.h |  4 +-
 velox/exec/tests/TableScanTest.cpp         | 98 +++++++++++++++++-----
 2 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/velox/connectors/hive/HiveConnectorSplit.h b/velox/connectors/hive/HiveConnectorSplit.h
index 15be998884b0a..60ed26cdcccb6 100644
--- a/velox/connectors/hive/HiveConnectorSplit.h
+++ b/velox/connectors/hive/HiveConnectorSplit.h
@@ -31,7 +31,9 @@ struct HiveConnectorSplit : public connector::ConnectorSplit {
   /// Mapping from partition keys to values. Values are specified as strings
   /// formatted the same way as CAST(x as VARCHAR). Null values are specified as
   /// std::nullopt. Date values must be formatted using ISO 8601 as YYYY-MM-DD.
-  /// All scalar types and date type are supported.
+  /// Decimal values must be formatted using unscaled values, e.g. '123456' for
+  /// '1245.56' of decimal(6, 2) type. All scalar types and date type are
+  /// supported.
   const std::unordered_map<std::string, std::optional<std::string>>
       partitionKeys;
   std::optional<int32_t> tableBucketNumber;
diff --git a/velox/exec/tests/TableScanTest.cpp b/velox/exec/tests/TableScanTest.cpp
index 76485ebb37fa4..866cd18c0647e 100644
--- a/velox/exec/tests/TableScanTest.cpp
+++ b/velox/exec/tests/TableScanTest.cpp
@@ -39,6 +39,14 @@ using namespace facebook::velox::common::test;
 using namespace facebook::velox::exec::test;
 
 namespace {
+std::string makeCastSql(const variant& v, const TypePtr& type) {
+  std::ostringstream out;
+  out << "CAST('" << v.toJson(type) << "' AS ";
+  toTypeSql(type, out);
+  out << ")";
+  return out.str();
+}
+
 void verifyCacheStats(
     const FileHandleCacheStats& cacheStats,
     size_t curSize,
@@ -148,10 +156,22 @@ class TableScanTest : public virtual HiveConnectorTestBase {
   void testPartitionedTableImpl(
       const std::string& filePath,
       const TypePtr& partitionType,
-      const std::optional<std::string>& partitionValue) {
-    auto split = HiveConnectorSplitBuilder(filePath)
-                     .partitionKey("pkey", partitionValue)
-                     .build();
+      const variant& partitionValue) {
+    // Create the partition value of a split.
+    std::optional<std::string> value = std::nullopt;
+    if (!partitionValue.isNull()) {
+      auto type = partitionType;
+      if (partitionType->isDecimal()) {
+        const auto [precision, scale] =
+            getDecimalPrecisionScale(*partitionType);
+        // The partition value of decimal should be formatted with unscaled
+        // value.
+        type = DECIMAL(precision, 0);
+      }
+      value = std::optional<std::string>(partitionValue.toJson(type));
+    }
+    auto split =
+        HiveConnectorSplitBuilder(filePath).partitionKey("pkey", value).build();
     auto outputType =
         ROW({"pkey", "c0", "c1"}, {partitionType, BIGINT(), DOUBLE()});
     ColumnHandleMap assignments = {
@@ -166,8 +186,10 @@ class TableScanTest : public virtual HiveConnectorTestBase {
                   .endTableScan()
                   .planNode();
 
-    std::string partitionValueStr =
-        partitionValue.has_value() ? "'" + *partitionValue + "'" : "null";
+    std::string partitionValueStr = partitionValue.isNull()
+        ? "null"
+        : makeCastSql(partitionValue, partitionType);
+
     assertQuery(
         op, split, fmt::format("SELECT {}, * FROM tmp", partitionValueStr));
 
@@ -210,9 +232,10 @@ class TableScanTest : public virtual HiveConnectorTestBase {
   void testPartitionedTable(
       const std::string& filePath,
       const TypePtr& partitionType,
-      const std::optional<std::string>& partitionValue) {
+      const variant& partitionValue) {
     testPartitionedTableImpl(filePath, partitionType, partitionValue);
-    testPartitionedTableImpl(filePath, partitionType, std::nullopt);
+    testPartitionedTableImpl(
+        filePath, partitionType, variant::null(partitionType->kind()));
   }
 
   RowTypePtr rowType_{
@@ -1442,7 +1465,7 @@ TEST_F(TableScanTest, partitionedTableVarcharKey) {
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
 
-  testPartitionedTable(filePath->path, VARCHAR(), "2020-11-01");
+  testPartitionedTable(filePath->path, VARCHAR(), variant("2020-11-01"));
 }
 
 TEST_F(TableScanTest, partitionedTableBigIntKey) {
@@ -1451,7 +1474,10 @@ TEST_F(TableScanTest, partitionedTableBigIntKey) {
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
-  testPartitionedTable(filePath->path, BIGINT(), "123456789123456789");
+  testPartitionedTable(
+      filePath->path,
+      BIGINT(),
+      variant::create<TypeKind::BIGINT>(123456789123456789));
 }
 
 TEST_F(TableScanTest, partitionedTableIntegerKey) {
@@ -1460,7 +1486,8 @@ TEST_F(TableScanTest, partitionedTableIntegerKey) {
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
-  testPartitionedTable(filePath->path, INTEGER(), "123456789");
+  testPartitionedTable(
+      filePath->path, INTEGER(), variant::create<TypeKind::INTEGER>(123456789));
 }
 
 TEST_F(TableScanTest, partitionedTableSmallIntKey) {
@@ -1469,7 +1496,8 @@ TEST_F(TableScanTest, partitionedTableSmallIntKey) {
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
-  testPartitionedTable(filePath->path, SMALLINT(), "1");
+  testPartitionedTable(
+      filePath->path, SMALLINT(), variant::create<TypeKind::SMALLINT>(1));
 }
 
 TEST_F(TableScanTest, partitionedTableTinyIntKey) {
@@ -1478,7 +1506,8 @@ TEST_F(TableScanTest, partitionedTableTinyIntKey) {
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
-  testPartitionedTable(filePath->path, TINYINT(), "1");
+  testPartitionedTable(
+      filePath->path, TINYINT(), variant::create<TypeKind::TINYINT>(1));
 }
 
 TEST_F(TableScanTest, partitionedTableBooleanKey) {
@@ -1487,7 +1516,8 @@ TEST_F(TableScanTest, partitionedTableBooleanKey) {
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
-  testPartitionedTable(filePath->path, BOOLEAN(), "0");
+  testPartitionedTable(
+      filePath->path, BOOLEAN(), variant::create<TypeKind::BOOLEAN>(false));
 }
 
 TEST_F(TableScanTest, partitionedTableRealKey) {
@@ -1496,7 +1526,8 @@ TEST_F(TableScanTest, partitionedTableRealKey) {
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
-  testPartitionedTable(filePath->path, REAL(), "3.5");
+  testPartitionedTable(
+      filePath->path, REAL(), variant::create<TypeKind::REAL>(3.5));
 }
 
 TEST_F(TableScanTest, partitionedTableDoubleKey) {
@@ -1505,7 +1536,35 @@ TEST_F(TableScanTest, partitionedTableDoubleKey) {
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
-  testPartitionedTable(filePath->path, DOUBLE(), "3.5");
+  testPartitionedTable(
+      filePath->path, DOUBLE(), variant::create<TypeKind::DOUBLE>(3.5));
+}
+
+TEST_F(TableScanTest, partitionedTableDecimalKey) {
+  auto rowType = ROW({"c0", "c1"}, {BIGINT(), DOUBLE()});
+  auto vectors = makeVectors(10, 1'000, rowType);
+  auto filePath = TempFilePath::create();
+  writeToFile(filePath->path, vectors);
+  createDuckDbTable(vectors);
+
+  testPartitionedTable(
+      filePath->path,
+      DECIMAL(12, 3),
+      variant::create<TypeKind::BIGINT>(123456789123));
+  testPartitionedTable(
+      filePath->path,
+      DECIMAL(12, 3),
+      variant::create<TypeKind::BIGINT>(-123456789123));
+  testPartitionedTable(
+      filePath->path,
+      DECIMAL(36, 18),
+      variant::create<TypeKind::HUGEINT>(
+          HugeInt::parse("123456789123456789123456789123456789")));
+  testPartitionedTable(
+      filePath->path,
+      DECIMAL(36, 18),
+      variant::create<TypeKind::HUGEINT>(
+          HugeInt::parse("-123456789123456789123456789123456789")));
 }
 
 TEST_F(TableScanTest, partitionedTableDateKey) {
@@ -1514,7 +1573,8 @@ TEST_F(TableScanTest, partitionedTableDateKey) {
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
   createDuckDbTable(vectors);
-  testPartitionedTable(filePath->path, DATE(), "2023-10-27");
+  testPartitionedTable(
+      filePath->path, DATE(), variant::create<TypeKind::INTEGER>(19657));
 }
 
 std::vector<StringView> toStringViews(const std::vector<std::string>& values) {
@@ -3709,7 +3769,7 @@ TEST_F(TableScanTest, readMissingFieldsWithMoreColumns) {
   }
 }
 
-TEST_F(TableScanTest, varbinaryPartitionKey) {
+TEST_F(TableScanTest, partitionedTableVarbinaryKey) {
   auto vectors = makeVectors(1, 1'000);
   auto filePath = TempFilePath::create();
   writeToFile(filePath->path, vectors);
@@ -3734,7 +3794,7 @@ TEST_F(TableScanTest, varbinaryPartitionKey) {
   assertQuery(op, split, "SELECT c0, '2021-12-02' FROM tmp");
 }
 
-TEST_F(TableScanTest, timestampPartitionKey) {
+TEST_F(TableScanTest, partitionedTableTimestampKey) {
   const char* inputs[] = {"2023-10-14 07:00:00.0", "2024-01-06 04:00:00.0"};
   auto expected = makeRowVector(
       {"t"},