Skip to content

Commit

Permalink
GH-39982: [Java] Add RunEndEncodedVector (#43888)
Browse files Browse the repository at this point in the history
### Rationale for this change

### What changes are included in this PR?

### Are these changes tested?

### Are there any user-facing changes?

* GitHub Issue: #39982

Lead-authored-by: ViggoC <[email protected]>
Co-authored-by: chenweiguo.vc <[email protected]>
Co-authored-by: David Li <[email protected]>
Signed-off-by: David Li <[email protected]>
  • Loading branch information
3 people authored Sep 20, 2024
1 parent 192d232 commit 7d96d94
Show file tree
Hide file tree
Showing 17 changed files with 1,128 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ public ColumnBinder visit(ArrowType.Union type) {
throw new UnsupportedOperationException("No column binder implemented for type " + type);
}

@Override
public ColumnBinder visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException("No column binder implemented for type " + type);
}

@Override
public ColumnBinder visit(ArrowType.Map type) {
return new MapBinder((MapVector) vector);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ public List<ArrowBuf> visit(ArrowType.Union type) {
}
}

@Override
public List<ArrowBuf> visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException("Importing buffers for type: " + type);
}

@Override
public List<ArrowBuf> visit(ArrowType.Map type) {
return Arrays.asList(maybeImportBitmap(type), importOffsets(type, MapVector.OFFSET_WIDTH));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,5 +281,11 @@ public Boolean visit(ArrowType.ListView type) {
public Boolean visit(ArrowType.LargeListView type) {
throw new UnsupportedOperationException("Binding is not yet supported for type " + type);
}

@Override
public Boolean visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException(
"No Avatica parameter binder implemented for type " + type);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -284,5 +284,11 @@ public AvaticaParameter visit(ArrowType.LargeListView type) {
throw new UnsupportedOperationException(
"AvaticaParameter not yet supported for type " + type);
}

@Override
public AvaticaParameter visit(ArrowType.RunEndEncoded type) {
throw new UnsupportedOperationException(
"No Avatica parameter binder implemented for type " + type);
}
}
}
5 changes: 5 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@
name: "LargeListView",
fields: [],
complex: true
},
{
name: "RunEndEncoded",
fields: [],
complex: true
}
]
}
2 changes: 1 addition & 1 deletion java/vector/src/main/codegen/templates/UnionReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
@SuppressWarnings("unused")
public class UnionReader extends AbstractFieldReader {

private static final int NUM_SUPPORTED_TYPES = 50;
private static final int NUM_SUPPORTED_TYPES = 51;

private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES];
public UnionVector data;
Expand Down
11 changes: 11 additions & 0 deletions java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8;
import org.apache.arrow.vector.types.pojo.ArrowType.Map;
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
import org.apache.arrow.vector.types.pojo.ArrowType.RunEndEncoded;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
Expand Down Expand Up @@ -280,6 +281,11 @@ public TypeLayout visit(Interval type) {
public TypeLayout visit(Duration type) {
return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
}

@Override
public TypeLayout visit(RunEndEncoded type) {
return new TypeLayout(Collections.<BufferLayout>emptyList());
}
});
return layout;
}
Expand Down Expand Up @@ -444,6 +450,11 @@ public Integer visit(Interval type) {
public Integer visit(Duration type) {
return FIXED_WIDTH_BUFFER_COUNT;
}

@Override
public Integer visit(RunEndEncoded type) {
return 0;
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.ExtensionTypeVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.NullVector;
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.BaseLargeRepeatedValueViewVector;
Expand All @@ -41,11 +42,13 @@
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.RunEndEncodedVector;
import org.apache.arrow.vector.complex.StructVector;
import org.apache.arrow.vector.complex.UnionVector;

/** Visitor to compare a range of values for vectors. */
public class RangeEqualsVisitor implements VectorVisitor<Boolean, Range> {

private ValueVector left;
private ValueVector right;

Expand Down Expand Up @@ -226,6 +229,14 @@ public Boolean visit(NullVector left, Range range) {
return true;
}

@Override
public Boolean visit(RunEndEncodedVector left, Range range) {
if (!validate(left)) {
return false;
}
return compareRunEndEncodedVectors(range);
}

@Override
public Boolean visit(ExtensionTypeVector<?> left, Range range) {
if (!(right instanceof ExtensionTypeVector<?>) || !validate(left)) {
Expand Down Expand Up @@ -255,6 +266,48 @@ public Boolean visit(LargeListViewVector left, Range range) {
return compareLargeListViewVectors(range);
}

protected boolean compareRunEndEncodedVectors(Range range) {
RunEndEncodedVector leftVector = (RunEndEncodedVector) left;
RunEndEncodedVector rightVector = (RunEndEncodedVector) right;

final int leftRangeEnd = range.getLeftStart() + range.getLength();
final int rightRangeEnd = range.getRightStart() + range.getLength();

FieldVector leftValuesVector = leftVector.getValuesVector();
FieldVector rightValuesVector = rightVector.getValuesVector();

RangeEqualsVisitor innerVisitor = createInnerVisitor(leftValuesVector, rightValuesVector, null);

int leftLogicalIndex = range.getLeftStart();
int rightLogicalIndex = range.getRightStart();

while (leftLogicalIndex < leftRangeEnd) {
// TODO: implement it more efficient
// https://github.com/apache/arrow/issues/44157
int leftPhysicalIndex = leftVector.getPhysicalIndex(leftLogicalIndex);
int rightPhysicalIndex = rightVector.getPhysicalIndex(rightLogicalIndex);
if (leftValuesVector.accept(
innerVisitor, new Range(leftPhysicalIndex, rightPhysicalIndex, 1))) {
int leftRunEnd = leftVector.getRunEnd(leftLogicalIndex);
int rightRunEnd = rightVector.getRunEnd(rightLogicalIndex);

int leftRunLength = Math.min(leftRunEnd, leftRangeEnd) - leftLogicalIndex;
int rightRunLength = Math.min(rightRunEnd, rightRangeEnd) - rightLogicalIndex;

if (leftRunLength != rightRunLength) {
return false;
} else {
leftLogicalIndex = leftRunEnd;
rightLogicalIndex = rightRunEnd;
}
} else {
return false;
}
}

return true;
}

protected RangeEqualsVisitor createInnerVisitor(
ValueVector leftInner,
ValueVector rightInner,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.RunEndEncodedVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.types.pojo.Field;

Expand Down Expand Up @@ -136,6 +137,11 @@ public Boolean visit(LargeListViewVector left, Void value) {
return compareField(left.getField(), right.getField());
}

@Override
public Boolean visit(RunEndEncodedVector left, Void value) {
return compareField(left.getField(), right.getField());
}

private boolean compareField(Field leftField, Field rightField) {

if (leftField == rightField) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.ListViewVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.RunEndEncodedVector;
import org.apache.arrow.vector.complex.UnionVector;

/**
Expand Down Expand Up @@ -71,4 +72,9 @@ default OUT visit(LargeListViewVector left, IN value) {
throw new UnsupportedOperationException(
"VectorVisitor for LargeListViewVector is not supported.");
}

default OUT visit(RunEndEncodedVector left, IN value) {
throw new UnsupportedOperationException(
"VectorVisitor for LargeListViewVector is not supported.");
};
}
Loading

0 comments on commit 7d96d94

Please sign in to comment.