fix: Use the correct index when doing comparison in custom view (#11900)

Summary: We need to use the correct index inside the decoded vector when doing comparison. Reviewed By: xiaoxmeng Differential Revision: D67354078
facebookincubator · Dec 18, 2024 · 2956413 · 2956413
1 parent 01e755c
commit 2956413
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 8 deletions.
diff --git a/velox/expression/ComplexViewTypes.h b/velox/expression/ComplexViewTypes.h
@@ -1030,13 +1030,14 @@ class RowView
       const RowView& other,
       const CompareFlags flags) const {
     if constexpr (Is < sizeof...(T)) {
-      auto result = std::get<Is>(*childReaders_)
-                        ->baseVector()
-                        ->compare(
-                            std::get<Is>(*other.childReaders_)->baseVector(),
-                            offset_,
-                            other.offset_,
-                            flags);
+      auto result =
+          std::get<Is>(*childReaders_)
+              ->baseVector()
+              ->compare(
+                  std::get<Is>(*other.childReaders_)->baseVector(),
+                  std::get<Is>(*childReaders_)->index(offset_),
+                  std::get<Is>(*other.childReaders_)->index(other.offset_),
+                  flags);
       if (!result.has_value()) {
         return std::nullopt;
       }

diff --git a/velox/expression/VectorReaders.h b/velox/expression/VectorReaders.h
@@ -52,6 +52,10 @@ struct VectorReader {
   explicit VectorReader(const VectorReader<T>&) = delete;
   VectorReader<T>& operator=(const VectorReader<T>&) = delete;
 
+  vector_size_t index(vector_size_t idx) const {
+    return decoded_.index(idx);
+  }
+
   exec_in_t operator[](size_t offset) const {
     return decoded_.template valueAt<exec_in_t>(offset);
   }

diff --git a/velox/expression/tests/RowViewTest.cpp b/velox/expression/tests/RowViewTest.cpp
@@ -170,7 +170,8 @@ class RowViewTest : public functions::test::FunctionBaseTest {
       ASSERT_TRUE(reader2.isSet(0));
       auto l = read(reader1, 0);
       auto r = read(reader2, 0);
-      // Default flag for all operators other than `==` is kNullAsIndeterminate
+      // Default flag for all operators other than `==` is
+      // kNullAsIndeterminate
       VELOX_ASSERT_THROW(r < l, "Ordering nulls is not supported");
       VELOX_ASSERT_THROW(r <= l, "Ordering nulls is not supported");
       VELOX_ASSERT_THROW(r > l, "Ordering nulls is not supported");
@@ -205,6 +206,73 @@ class RowViewTest : public functions::test::FunctionBaseTest {
           CompareFlags::NullHandlingMode::kNullAsIndeterminate);
       ASSERT_EQ(l.compare(l, flags), kIndeterminate);
     }
+
+    // Test a layer of indirection with by wrapping the vector in a dictionary
+    {
+      auto field1Vector =
+          makeFlatVector<int32_t>(size, [](vector_size_t row) { return row; });
+      auto field2Vector =
+          makeFlatVector<float>(size, [](vector_size_t row) { return row; });
+      auto baseVector = makeRowVector(
+          {field1Vector, field2Vector}, [](vector_size_t idx) { return idx; });
+      vector_size_t size = baseVector->size();
+      BufferPtr indices =
+          AlignedBuffer::allocate<vector_size_t>(size, execCtx_.pool());
+      auto rawIndices = indices->asMutable<vector_size_t>();
+      for (auto i = 0; i < size; ++i) {
+        rawIndices[i] = i;
+      }
+      auto rowVector = wrapInDictionary(
+          indices, size, wrapInDictionary(indices, size, baseVector));
+      DecodedVector decoded;
+      exec::VectorReader<Row<int32_t, float>> reader(
+          decode(decoded, *rowVector.get()));
+
+      // Test the equals case so that we are traversing all of the tuples and
+      // ensuring correct index accessing.
+      auto l = read(reader, 0);
+      auto r = read(reader, 0);
+      ASSERT_TRUE(l == r);
+    }
+
+    {
+      auto vector1 = makeRowVector(
+          {makeNullableFlatVector<int32_t>({666666666666666, 4, 7}),
+           makeNullableFlatVector<float>({3.0, 1.0, 1.0})});
+      auto vector2 = makeRowVector(
+          {makeNullableFlatVector<int32_t>({3, 123, 5}),
+           makeNullableFlatVector<float>({3.0, 0.5, 0.1})});
+      vector_size_t size = vector1->size();
+      BufferPtr indices =
+          AlignedBuffer::allocate<vector_size_t>(size, execCtx_.pool());
+      auto rawIndices = indices->asMutable<vector_size_t>();
+      for (auto i = 0; i < size; ++i) {
+        rawIndices[i] = size - i - 1;
+      }
+      BufferPtr indices1 =
+          AlignedBuffer::allocate<vector_size_t>(size, execCtx_.pool());
+      auto rawIndices1 = indices1->asMutable<vector_size_t>();
+      for (auto i = 0; i < size; ++i) {
+        rawIndices1[i] = i;
+      }
+
+      DecodedVector decoded1;
+      DecodedVector decoded2;
+      auto wrappedRowVector1 =
+          wrapInDictionary(indices, (vector1->size()), vector1);
+      auto wrappedRowVector2 =
+          wrapInDictionary(indices1, (vector2->size()), vector2);
+      exec::VectorReader<Row<int32_t, float>> reader1(
+          decode(decoded1, *wrappedRowVector1));
+      exec::VectorReader<Row<int32_t, float>> reader2(
+          decode(decoded2, *wrappedRowVector2));
+
+      ASSERT_TRUE(reader1.isSet(0));
+      ASSERT_TRUE(reader2.isSet(0));
+      auto l = read(reader1, 0);
+      auto r = read(reader2, 0);
+      ASSERT_TRUE(l > r);
+    }
   }
 
   void e2eComparisonTest() {