From b89d29390f40e3443582a644f0927b09ad0732fe Mon Sep 17 00:00:00 2001 From: Vibhatha Abeykoon Date: Thu, 1 Aug 2024 17:37:50 +0530 Subject: [PATCH] fix: adding tests for Int, List, LargeList, Utf8, Binary --- .../arrow/c/BufferImportTypeVisitor.java | 39 ++++--- .../org/apache/arrow/c/RoundtripTest.java | 108 ++++++++++++++++-- .../arrow/vector/complex/LargeListVector.java | 13 +++ .../arrow/vector/complex/ListVector.java | 13 +++ 4 files changed, 149 insertions(+), 24 deletions(-) diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java index 53f81600a225d..f4c41f194c64b 100644 --- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java +++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java @@ -135,18 +135,24 @@ private ArrowBuf importFixedBitsWithOffset(ArrowType type, int index, long bitsP if (startByte != endByte) { return buf.slice(startByte, endByte - startByte); - } // else { - // final ArrowBuf bufCopy = allocator.buffer(buf.capacity()); - // bufCopy.setZero(0, buf.capacity()); - // - // for (int i = (int) arrowArrayOffset; i < bufCopy.capacity(); i++) { - // if (BitVectorHelper.get(buf, i) == 1) { - // BitVectorHelper.setBit(buf, i); - // } - // } - // return bufCopy; - // } - return buf; + } else { + ArrowBuf bufCopy = allocator.buffer(buf.capacity()); + bufCopy.setZero(0, buf.capacity()); + for (int i = 0; i < bufCopy.capacity() * 8; i++) { + int bitIndex = (int) (i + arrowArrayOffset); + if (bitIndex < buf.capacity() * 8) { + if (BitVectorHelper.get(buf, bitIndex) == 1) { + BitVectorHelper.setBit(bufCopy, i); + } else { + BitVectorHelper.unsetBit(bufCopy, i); + } + } else { + BitVectorHelper.unsetBit(bufCopy, i); + } + } + imported.add(bufCopy); + return bufCopy; + } } private ArrowBuf importFixedBytes(ArrowType type, int index, long bytesPerSlot) { @@ -216,13 +222,14 @@ public List visit(ArrowType.Struct type) { @Override public List visit(ArrowType.List type) { - return Arrays.asList(maybeImportBitmap(type), importOffsets(type, ListVector.OFFSET_WIDTH)); + return Arrays.asList( + maybeImportBitmapWithOffset(type), importOffsets(type, ListVector.OFFSET_WIDTH)); } @Override public List visit(ArrowType.LargeList type) { return Arrays.asList( - maybeImportBitmap(type), importOffsets(type, LargeListVector.OFFSET_WIDTH)); + maybeImportBitmapWithOffset(type), importOffsets(type, LargeListVector.OFFSET_WIDTH)); } @Override @@ -283,7 +290,7 @@ public List visit(ArrowType.Utf8 type) { type, start, end); - return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, end)); + return Arrays.asList(maybeImportBitmapWithOffset(type), offsets, importData(type, end)); } private List visitVariableWidthView(ArrowType type) { @@ -348,7 +355,7 @@ public List visit(ArrowType.Binary type) { type, start, end); - return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, end)); + return Arrays.asList(maybeImportBitmapWithOffset(type), offsets, importData(type, end)); } @Override diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java index 9b3209e7f584c..dff4db8e404ad 100644 --- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java @@ -986,10 +986,21 @@ private FieldVector getSlicedVector(FieldVector vector, int offset, int length) public void testSliceVariableWidthVector() { try (final VarCharVector vector = new VarCharVector("v", allocator); VarCharVector target = new VarCharVector("v", allocator)) { - setVector(vector, "foo", "bar", "baz1", "baz223", "baz23445", "baz2121", "12312baz"); + setVector( + vector, + "foo", + "bar", + "baz1", + "", + "baz23445", + null, + "12312baz", + "baz11", + "baz22", + "baz33"); // slice information final int startIndex = 2; - final int length = 3; + final int length = 6; // create a sliced vector manually to mimic C++ slice behavior try (VarCharVector slicedVector = (VarCharVector) getSlicedVector(vector, startIndex, length)) { @@ -1000,20 +1011,48 @@ public void testSliceVariableWidthVector() { } } + @Test + public void testSliceVarBinaryVector() { + try (final VarBinaryVector vector = new VarBinaryVector("v", allocator); + VarBinaryVector target = new VarBinaryVector("v", allocator)) { + setVector( + vector, + new byte[] {0x01, 0x02, 0x03}, + new byte[] {0x04, 0x05}, + new byte[] {0x06, 0x07, 0x08, 0x09}, + new byte[] {}, + new byte[] {0x0A, 0x0B, 0x0C, 0x0D, 0x0E}, + null, + new byte[] {0x0F, 0x10, 0x11}, + new byte[] {0x12, 0x13}, + new byte[] {0x14, 0x15, 0x16}, + new byte[] {0x17, 0x18, 0x19, 0x1A}); + // slice information + final int startIndex = 2; + final int length = 6; + // create a sliced vector manually to mimic C++ slice behavior + try (VarBinaryVector slicedVector = + (VarBinaryVector) getSlicedVector(vector, startIndex, length)) { + vector.splitAndTransferTo(startIndex, length, target); + assertTrue(roundtrip(slicedVector, VarBinaryVector.class)); + assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector)); + } + } + } + @Test public void testSliceFixedWidthVector() { - // TODO: fix the import visitor to support offset try (final IntVector vector = new IntVector("v", allocator); IntVector target = new IntVector("v", allocator)) { - setVector(vector, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10); + setVector(vector, 1, 2, null, 3, 4, null, 6, 7, 8, 9, 10); // slice information final int startIndex = 2; - final int length = 3; + final int length = 6; // create a sliced vector manually to mimic C++ slice behavior try (IntVector slicedVector = (IntVector) getSlicedVector(vector, startIndex, length)) { vector.splitAndTransferTo(startIndex, length, target); assertTrue(roundtrip(slicedVector, IntVector.class)); - // assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector)); + assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector)); } } } @@ -1025,12 +1064,65 @@ public void testSliceVariableWidthViewVector() { @Test public void testSliceListVector() { - // TODO: complete this test and function + try (final ListVector vector = ListVector.empty("v", allocator); + ListVector target = ListVector.empty("v", allocator)) { + // Set values in the ListVector + setVector( + vector, + Arrays.asList(1, 2, 3), + Arrays.asList(4, 5), + Arrays.asList(6, 7, 8, 9), + Collections.emptyList(), + Arrays.asList(10, 11, 12, 13, 14), + null, + Arrays.asList(15, 16, 17), + Arrays.asList(18, 19), + Arrays.asList(20, 21, 22), + Arrays.asList(23, 24, 25, 26)); + + // Slice information + final int startIndex = 2; + final int length = 6; + + // Create a sliced vector manually to mimic C++ slice behavior + try (ListVector slicedVector = (ListVector) getSlicedVector(vector, startIndex, length)) { + vector.splitAndTransferTo(startIndex, length, target); + assertTrue(roundtrip(slicedVector, ListVector.class)); + assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector)); + } + } } @Test public void testSliceLargeListVector() { - // TODO: complete this test and function + try (final LargeListVector vector = LargeListVector.empty("v", allocator); + LargeListVector target = LargeListVector.empty("v", allocator)) { + // Set values in the LargeListVector + setVector( + vector, + Arrays.asList(1, 2, 3), + Arrays.asList(4, 5), + Arrays.asList(6, 7, 8, 9), + Collections.emptyList(), + Arrays.asList(10, 11, 12, 13, 14), + null, + Arrays.asList(15, 16, 17), + Arrays.asList(18, 19), + Arrays.asList(20, 21, 22), + Arrays.asList(23, 24, 25, 26)); + + // Slice information + final int startIndex = 2; + final int length = 6; + + // Create a sliced vector manually to mimic C++ slice behavior + try (LargeListVector slicedVector = + (LargeListVector) getSlicedVector(vector, startIndex, length)) { + vector.splitAndTransferTo(startIndex, length, target); + assertTrue(roundtrip(slicedVector, LargeListVector.class)); + assertTrue(VectorEqualsVisitor.vectorEquals(target, slicedVector)); + } + } } @Test diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index b5b32c8032dfe..438d1a18945cb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -1123,4 +1123,17 @@ public long getElementStartIndex(int index) { public long getElementEndIndex(int index) { return offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH); } + + /** + * Slice this vector at desired index and length and transfer the corresponding data to the target + * vector. + * + * @param startIndex start position of the split in source vector. + * @param length length of the split. + * @param target destination vector + */ + public void splitAndTransferTo(int startIndex, int length, LargeListVector target) { + TransferImpl transfer = new TransferImpl(target); + transfer.splitAndTransfer(startIndex, length); + } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index a1e18210fc686..fa59fdb71e7ea 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -648,6 +648,19 @@ public void copyValueSafe(int from, int to) { } } + /** + * Slice this vector at desired index and length and transfer the corresponding data to the target + * vector. + * + * @param startIndex start position of the split in source vector. + * @param length length of the split. + * @param target destination vector + */ + public void splitAndTransferTo(int startIndex, int length, ListVector target) { + TransferImpl transfer = new TransferImpl(target); + transfer.splitAndTransfer(startIndex, length); + } + @Override protected FieldReader getReaderImpl() { return new UnionListReader(this);