From 483bc7b6d10d62e3bb83c167569cde84e2912744 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Tue, 13 Aug 2024 07:45:11 +0530 Subject: [PATCH] GH-43638: [Java] LargeListViewVector RangeEqualVisitor and TypeEqualVisitor integration (#43642) ### Rationale for this change LargeListViewVector requires `RangeEqualVisitor` and `TypeEqualVisitor` to support the C Data interface. ### What changes are included in this PR? Adding `RangeEqualVisitor`, `TypeEqualVisitor` and the corresponding test cases. ### Are these changes tested? Yes. ### Are there any user-facing changes? No * GitHub Issue: #43638 Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../vector/compare/RangeEqualsVisitor.java | 57 ++++++++++ .../vector/compare/TypeEqualsVisitor.java | 6 ++ .../arrow/vector/compare/VectorVisitor.java | 6 ++ .../vector/complex/LargeListViewVector.java | 2 +- .../apache/arrow/vector/TestValueVector.java | 95 ++++++++++++++++ .../compare/TestRangeEqualsVisitor.java | 102 ++++++++++++++++++ .../vector/compare/TestTypeEqualsVisitor.java | 17 +++ 7 files changed, 284 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index fbc28a3609c07..9aa1bffb8463e 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -31,11 +31,13 @@ import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; +import org.apache.arrow.vector.complex.LargeListViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.NonNullableStructVector; @@ -244,6 +246,14 @@ public Boolean visit(ListViewVector left, Range range) { return compareListViewVectors(range); } + @Override + public Boolean visit(LargeListViewVector left, Range range) { + if (!validate(left)) { + return false; + } + return compareLargeListViewVectors(range); + } + protected RangeEqualsVisitor createInnerVisitor( ValueVector leftInner, ValueVector rightInner, @@ -759,4 +769,51 @@ protected boolean compareListViewVectors(Range range) { } return true; } + + protected boolean compareLargeListViewVectors(Range range) { + LargeListViewVector leftVector = (LargeListViewVector) left; + LargeListViewVector rightVector = (LargeListViewVector) right; + + RangeEqualsVisitor innerVisitor = + createInnerVisitor( + leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null); + Range innerRange = new Range(); + + for (int i = 0; i < range.getLength(); i++) { + int leftIndex = range.getLeftStart() + i; + int rightIndex = range.getRightStart() + i; + + boolean isNull = leftVector.isNull(leftIndex); + if (isNull != rightVector.isNull(rightIndex)) { + return false; + } + + int offsetWidth = BaseLargeRepeatedValueViewVector.OFFSET_WIDTH; + int sizeWidth = BaseLargeRepeatedValueViewVector.SIZE_WIDTH; + + if (!isNull) { + final int startIndexLeft = + leftVector.getOffsetBuffer().getInt((long) leftIndex * offsetWidth); + final int leftSize = leftVector.getSizeBuffer().getInt((long) leftIndex * sizeWidth); + + final int startIndexRight = + rightVector.getOffsetBuffer().getInt((long) rightIndex * offsetWidth); + final int rightSize = rightVector.getSizeBuffer().getInt((long) rightIndex * sizeWidth); + + if (leftSize != rightSize) { + return false; + } + + innerRange = + innerRange + .setRightStart(startIndexRight) + .setLeftStart(startIndexLeft) + .setLength(leftSize); + if (!innerVisitor.rangeEquals(innerRange)) { + return false; + } + } + } + return true; + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java index 6e15d6a83e7d9..ce92b22ef61c9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java @@ -28,6 +28,7 @@ import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; +import org.apache.arrow.vector.complex.LargeListViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.NonNullableStructVector; @@ -130,6 +131,11 @@ public Boolean visit(ListViewVector left, Void value) { return compareField(left.getField(), right.getField()); } + @Override + public Boolean visit(LargeListViewVector left, Void value) { + return compareField(left.getField(), right.getField()); + } + private boolean compareField(Field leftField, Field rightField) { if (leftField == rightField) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java index c912359d4af5d..e20f8cd9cfba5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java @@ -25,6 +25,7 @@ import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; +import org.apache.arrow.vector.complex.LargeListViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.NonNullableStructVector; @@ -65,4 +66,9 @@ public interface VectorVisitor { default OUT visit(ListViewVector left, IN value) { throw new UnsupportedOperationException("VectorVisitor for ListViewVector is not supported."); } + + default OUT visit(LargeListViewVector left, IN value) { + throw new UnsupportedOperationException( + "VectorVisitor for LargeListViewVector is not supported."); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 1bb24a53fc254..17ccdbf0eae39 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -449,7 +449,7 @@ public int hashCode(int index, ArrowBufHasher hasher) { @Override public OUT accept(VectorVisitor visitor, IN value) { - throw new UnsupportedOperationException(); + return visitor.visit(this, value); } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 4dd55afdb8b04..83e470ae2581d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -46,11 +46,13 @@ import org.apache.arrow.vector.compare.VectorEqualsVisitor; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.impl.NullableStructWriter; +import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.holders.NullableIntHolder; @@ -2910,6 +2912,35 @@ public void testListViewVectorEqualsWithNull() { } } + @Test + public void testLargeListViewVectorEqualsWithNull() { + try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector vector2 = + LargeListViewVector.empty("largelistview", allocator); ) { + + UnionLargeListViewWriter writer1 = vector1.getWriter(); + writer1.allocate(); + + // set some values + writeLargeListViewVector(writer1, new int[] {1, 2}); + writeLargeListViewVector(writer1, new int[] {3, 4}); + writeLargeListViewVector(writer1, new int[] {}); + writer1.setValueCount(3); + + UnionLargeListViewWriter writer2 = vector2.getWriter(); + writer2.allocate(); + + // set some values + writeLargeListViewVector(writer2, new int[] {1, 2}); + writeLargeListViewVector(writer2, new int[] {3, 4}); + writer2.setValueCount(3); + + VectorEqualsVisitor visitor = new VectorEqualsVisitor(); + + assertFalse(visitor.vectorEquals(vector1, vector2)); + } + } + @Test public void testListVectorEquals() { try (final ListVector vector1 = ListVector.empty("list", allocator); @@ -2974,6 +3005,39 @@ public void testListViewVectorEquals() { } } + @Test + public void testLargeListViewVectorEquals() { + try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector vector2 = + LargeListViewVector.empty("largelistview", allocator); ) { + + UnionLargeListViewWriter writer1 = vector1.getWriter(); + writer1.allocate(); + + // set some values + writeLargeListViewVector(writer1, new int[] {1, 2}); + writeLargeListViewVector(writer1, new int[] {3, 4}); + writeLargeListViewVector(writer1, new int[] {5, 6}); + writer1.setValueCount(3); + + UnionLargeListViewWriter writer2 = vector2.getWriter(); + writer2.allocate(); + + // set some values + writeLargeListViewVector(writer2, new int[] {1, 2}); + writeLargeListViewVector(writer2, new int[] {3, 4}); + writer2.setValueCount(2); + + VectorEqualsVisitor visitor = new VectorEqualsVisitor(); + assertFalse(visitor.vectorEquals(vector1, vector2)); + + writeLargeListViewVector(writer2, new int[] {5, 6}); + writer2.setValueCount(3); + + assertTrue(visitor.vectorEquals(vector1, vector2)); + } + } + @Test public void testListVectorSetNull() { try (final ListVector vector = ListVector.empty("list", allocator)) { @@ -3020,6 +3084,29 @@ public void testListViewVectorSetNull() { } } + @Test + public void testLargeListViewVectorSetNull() { + try (final LargeListViewVector vector = LargeListViewVector.empty("largelistview", allocator)) { + UnionLargeListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writeLargeListViewVector(writer, new int[] {1, 2}); + writeLargeListViewVector(writer, new int[] {3, 4}); + writeLargeListViewVector(writer, new int[] {5, 6}); + vector.setNull(3); + vector.setNull(4); + vector.setNull(5); + writer.setValueCount(6); + + assertEquals(vector.getObject(0), Arrays.asList(1, 2)); + assertEquals(vector.getObject(1), Arrays.asList(3, 4)); + assertEquals(vector.getObject(2), Arrays.asList(5, 6)); + assertTrue(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertTrue(vector.isNull(5)); + } + } + @Test public void testStructVectorEqualsWithNull() { @@ -3359,6 +3446,14 @@ private void writeListViewVector(UnionListViewWriter writer, int[] values) { writer.endListView(); } + private void writeLargeListViewVector(UnionLargeListViewWriter writer, int[] values) { + writer.startListView(); + for (int v : values) { + writer.integer().writeInt(v); + } + writer.endListView(); + } + @Test public void testVariableVectorGetEndOffset() { try (final VarCharVector vector1 = new VarCharVector("v1", allocator); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java index 7e91b76043057..eca5c2d9b2a83 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java @@ -36,12 +36,14 @@ import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; +import org.apache.arrow.vector.complex.LargeListViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.complex.impl.NullableStructWriter; import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter; +import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.holders.NullableBigIntHolder; @@ -221,6 +223,25 @@ public void testListViewVectorWithDifferentChild() { } } + @Test + public void testLargeListViewVectorWithDifferentChild() { + try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector vector2 = + LargeListViewVector.empty("largelistview", allocator); ) { + + vector1.allocateNew(); + vector1.initializeChildrenFromFields( + Arrays.asList(Field.nullable("child", new ArrowType.Int(32, true)))); + + vector2.allocateNew(); + vector2.initializeChildrenFromFields( + Arrays.asList(Field.nullable("child", new ArrowType.Int(64, true)))); + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); + assertFalse(visitor.rangeEquals(new Range(0, 0, 0))); + } + } + @Test public void testListVectorRangeEquals() { try (final ListVector vector1 = ListVector.empty("list", allocator); @@ -285,6 +306,39 @@ public void testListViewVectorRangeEquals() { } } + @Test + public void testLargeListViewVectorRangeEquals() { + try (final LargeListViewVector vector1 = LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector vector2 = + LargeListViewVector.empty("largelistview", allocator); ) { + + UnionLargeListViewWriter writer1 = vector1.getWriter(); + writer1.allocate(); + + // set some values + writeLargeListViewVector(writer1, new int[] {1, 2}); + writeLargeListViewVector(writer1, new int[] {3, 4}); + writeLargeListViewVector(writer1, new int[] {5, 6}); + writeLargeListViewVector(writer1, new int[] {7, 8}); + writeLargeListViewVector(writer1, new int[] {9, 10}); + writer1.setValueCount(5); + + UnionLargeListViewWriter writer2 = vector2.getWriter(); + writer2.allocate(); + + // set some values + writeLargeListViewVector(writer2, new int[] {0, 0}); + writeLargeListViewVector(writer2, new int[] {3, 4}); + writeLargeListViewVector(writer2, new int[] {5, 6}); + writeLargeListViewVector(writer2, new int[] {7, 8}); + writeLargeListViewVector(writer2, new int[] {0, 0}); + writer2.setValueCount(5); + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); + assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); + } + } + @Test public void testBitVectorRangeEquals() { try (final BitVector vector1 = new BitVector("v1", allocator); @@ -903,6 +957,38 @@ public void testListViewVectorApproxEquals() { } } + @Test + public void testLargeListViewVectorApproxEquals() { + try (final LargeListViewVector right = LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector left1 = LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector left2 = LargeListViewVector.empty("largelistview", allocator); ) { + + final float epsilon = 1.0E-6f; + + UnionLargeListViewWriter rightWriter = right.getWriter(); + rightWriter.allocate(); + writeLargeListViewVector(rightWriter, new double[] {1, 2}); + writeLargeListViewVector(rightWriter, new double[] {1.01, 2.02}); + rightWriter.setValueCount(2); + + UnionLargeListViewWriter leftWriter1 = left1.getWriter(); + leftWriter1.allocate(); + writeLargeListViewVector(leftWriter1, new double[] {1, 2}); + writeLargeListViewVector(leftWriter1, new double[] {1.01 + epsilon / 2, 2.02 - epsilon / 2}); + leftWriter1.setValueCount(2); + + UnionLargeListViewWriter leftWriter2 = left2.getWriter(); + leftWriter2.allocate(); + writeLargeListViewVector(leftWriter2, new double[] {1, 2}); + writeLargeListViewVector(leftWriter2, new double[] {1.01 + epsilon * 2, 2.02 - epsilon * 2}); + leftWriter2.setValueCount(2); + + Range range = new Range(0, 0, right.getValueCount()); + assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range)); + assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range)); + } + } + private void writeStructVector(NullableStructWriter writer, int value1, long value2) { writer.start(); writer.integer("f0").writeInt(value1); @@ -933,6 +1019,14 @@ private void writeListViewVector(UnionListViewWriter writer, int[] values) { writer.endListView(); } + private void writeLargeListViewVector(UnionLargeListViewWriter writer, int[] values) { + writer.startListView(); + for (int v : values) { + writer.integer().writeInt(v); + } + writer.endListView(); + } + private void writeFixedSizeListVector(UnionFixedSizeListWriter writer, int[] values) { writer.startList(); for (int v : values) { @@ -956,4 +1050,12 @@ private void writeListViewVector(UnionListViewWriter writer, double[] values) { } writer.endListView(); } + + private void writeLargeListViewVector(UnionLargeListViewWriter writer, double[] values) { + writer.startListView(); + for (double v : values) { + writer.float8().writeFloat8(v); + } + writer.endListView(); + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java index d65096205fd71..ce029493473bb 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java @@ -32,6 +32,7 @@ import org.apache.arrow.vector.ViewVarBinaryVector; import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.complex.DenseUnionVector; +import org.apache.arrow.vector.complex.LargeListViewVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.StructVector; @@ -121,6 +122,22 @@ public void testListViewTypeEquals() { } } + @Test + public void testLargeListViewTypeEquals() { + try (final LargeListViewVector right = LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector left1 = LargeListViewVector.empty("largelistview", allocator); + final LargeListViewVector left2 = LargeListViewVector.empty("largelistview", allocator)) { + + right.addOrGetVector(FieldType.nullable(new ArrowType.Utf8())); + left1.addOrGetVector(FieldType.nullable(new ArrowType.Utf8())); + left2.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeBinary(2))); + + TypeEqualsVisitor visitor = new TypeEqualsVisitor(right); + assertTrue(visitor.equals(left1)); + assertFalse(visitor.equals(left2)); + } + } + @Test public void testStructTypeEquals() { try (final StructVector right = StructVector.empty("struct", allocator);