From 330e9f3d8ed411f577d1a57938efdd69301c10ac Mon Sep 17 00:00:00 2001 From: Allison Portis Date: Tue, 10 Oct 2023 16:00:54 -0700 Subject: [PATCH 1/3] remove getStruct --- .../io/delta/kernel/data/ColumnVector.java | 15 +- .../actions/DeletionVectorDescriptor.java | 20 +++ .../delta/kernel/internal/actions/Format.java | 11 +- .../kernel/internal/actions/Metadata.java | 28 ++-- .../kernel/internal/actions/Protocol.java | 21 ++- .../internal/data/ChildVectorBasedRow.java | 115 +++++++++++++ .../internal/data/ColumnarBatchRow.java | 86 +--------- .../delta/kernel/internal/data/StructRow.java | 47 ++++++ .../replay/ActiveAddFilesIterator.java | 12 +- .../kernel/internal/replay/LogReplay.java | 6 +- .../internal/types/TableSchemaSerDe.java | 29 ++-- .../java/io/delta/kernel/utils/Utils.java | 9 + .../io/delta/kernel/utils/VectorUtils.java | 7 +- .../internal/types/JsonHandlerTestImpl.java | 154 +++++++++++++++++- .../data/vector/AbstractColumnVector.java | 8 +- .../data/vector/DefaultGenericVector.java | 7 - .../data/vector/DefaultStructVector.java | 106 ------------ .../data/vector/DefaultSubFieldVector.java | 6 - .../data/vector/DefaultViewVector.java | 7 - .../client/TestDefaultJsonHandler.java | 6 - .../parquet/TestParquetBatchReader.java | 21 +-- .../utils/DefaultKernelTestUtils.java | 2 - .../delta/kernel/defaults/utils/TestRow.scala | 5 +- 23 files changed, 422 insertions(+), 306 deletions(-) create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ChildVectorBasedRow.java create mode 100644 kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/StructRow.java diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnVector.java b/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnVector.java index 5d4693071f6..55c639a0161 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnVector.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/data/ColumnVector.java @@ -167,17 +167,6 @@ default MapValue getMap(int rowId) { throw new UnsupportedOperationException("Invalid value request for data type"); } - /** - * Return the row value located at {@code rowId}. The return value is undefined and can be - * anything, if the slot for {@code rowId} is null. - * - * @param rowId - * @return - */ - default Row getStruct(int rowId) { - throw new UnsupportedOperationException("Invalid value request for data type"); - } - /** * Return the array value located at {@code rowId}. Returns null if the slot for {@code rowId} * is null @@ -191,9 +180,9 @@ default ArrayValue getArray(int rowId) { * {@code struct} type columns. * * @param ordinal Ordinal of the child vector to return. - * @return */ default ColumnVector getChild(int ordinal) { - throw new UnsupportedOperationException("Child vectors are not available."); + throw new UnsupportedOperationException( + "Child vectors are not available for vector of type " + getDataType()); } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java index 347c8b5280b..78ff7e147c7 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java @@ -22,6 +22,7 @@ import java.util.stream.IntStream; import static java.util.stream.Collectors.toMap; +import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.Row; import io.delta.kernel.types.IntegerType; import io.delta.kernel.types.LongType; @@ -57,6 +58,25 @@ public static DeletionVectorDescriptor fromRow(Row row) { sizeInBytes, cardinality); } + public static DeletionVectorDescriptor fromColumnVector(ColumnVector vector, int rowId) { + if (vector.isNullAt(rowId)) { + return null; + } + + final String storageType = requireNonNull(vector.getChild(0), rowId, "storageType") + .getString(rowId); + final String pathOrInlineDv = requireNonNull(vector.getChild(1), rowId, "pathOrInlineDv") + .getString(rowId); + final Optional offset = Optional.ofNullable( + vector.getChild(2).isNullAt(rowId) ? null : vector.getChild(2).getInt(rowId)); + final int sizeInBytes = requireNonNull(vector.getChild(3), rowId, "sizeInBytes") + .getInt(rowId); + final long cardinality = requireNonNull(vector.getChild(4), rowId, "cardinality") + .getLong(rowId); + return new DeletionVectorDescriptor(storageType, pathOrInlineDv, offset, + sizeInBytes, cardinality); + } + // Markers to separate different kinds of DV storage. public static final String PATH_DV_MARKER = "p"; public static final String INLINE_DV_MARKER = "i"; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java index d6116ddb4d2..629b1d40ded 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java @@ -15,19 +15,20 @@ */ package io.delta.kernel.internal.actions; -import io.delta.kernel.data.Row; +import io.delta.kernel.data.ColumnVector; import io.delta.kernel.types.MapType; import io.delta.kernel.types.StringType; import io.delta.kernel.types.StructType; import static io.delta.kernel.utils.Utils.requireNonNull; public class Format { - public static Format fromRow(Row row) { - if (row == null) { + + public static Format fromColumnVector(ColumnVector vector, int rowId) { + if (vector.isNullAt(rowId)) { return null; } - - final String provider = requireNonNull(row, 0, "provider").getString(0); + final String provider = requireNonNull(vector.getChild(0), rowId, "provider") + .getString(rowId); return new Format(provider); } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java index d5a30e024d8..e83e47a9b70 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java @@ -22,8 +22,8 @@ import io.delta.kernel.client.TableClient; import io.delta.kernel.data.ArrayValue; +import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.MapValue; -import io.delta.kernel.data.Row; import io.delta.kernel.types.ArrayType; import io.delta.kernel.types.LongType; import io.delta.kernel.types.MapType; @@ -36,24 +36,30 @@ import io.delta.kernel.internal.types.TableSchemaSerDe; public class Metadata { - public static Metadata fromRow(Row row, TableClient tableClient) { - if (row == null) { + + public static Metadata fromColumnVector( + ColumnVector vector, int rowId, TableClient tableClient) { + if (vector.isNullAt(rowId)) { return null; } - final String schemaJson = requireNonNull(row, 4, "schemaString").getString(4); + final String schemaJson = requireNonNull(vector.getChild(4), rowId, "schemaString") + .getString(rowId); StructType schema = TableSchemaSerDe.fromJson(tableClient.getJsonHandler(), schemaJson); return new Metadata( - requireNonNull(row, 0, "id").getString(0), - Optional.ofNullable(row.isNullAt(1) ? null : row.getString(1)), - Optional.ofNullable(row.isNullAt(2) ? null : row.getString(2)), - Format.fromRow(requireNonNull(row, 0, "id").getStruct(3)), + requireNonNull(vector.getChild(0), rowId, "id").getString(rowId), + Optional.ofNullable(vector.getChild(1).isNullAt(rowId) ? null : + vector.getChild(1).getString(rowId)), + Optional.ofNullable(vector.getChild(2).isNullAt(rowId) ? null : + vector.getChild(2).getString(rowId)), + Format.fromColumnVector(requireNonNull(vector.getChild(3), rowId, "format"), rowId), schemaJson, schema, - row.getArray(5), - Optional.ofNullable(row.isNullAt(6) ? null : row.getLong(6)), - row.getMap(7) + vector.getChild(5).getArray(rowId), + Optional.ofNullable(vector.getChild(6).isNullAt(rowId) ? null : + vector.getChild(6).getLong(rowId)), + vector.getChild(7).getMap(rowId) ); } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Protocol.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Protocol.java index 84eefb4ab46..4cd58157fbb 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Protocol.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Protocol.java @@ -18,7 +18,7 @@ import java.util.Collections; import java.util.List; -import io.delta.kernel.data.Row; +import io.delta.kernel.data.ColumnVector; import io.delta.kernel.types.ArrayType; import io.delta.kernel.types.IntegerType; import io.delta.kernel.types.StringType; @@ -26,17 +26,20 @@ import io.delta.kernel.utils.VectorUtils; public class Protocol { - public static Protocol fromRow(Row row) { - if (row == null) { + + public static Protocol fromColumnVector(ColumnVector vector, int rowId) { + if (vector.isNullAt(rowId)) { return null; } + return new Protocol( - row.getInt(0), - row.getInt(1), - row.isNullAt(2) ? Collections.emptyList() : - VectorUtils.toJavaList(row.getArray(2)), - row.isNullAt(3) ? Collections.emptyList() : - VectorUtils.toJavaList(row.getArray(3))); + vector.getChild(0).getInt(rowId), + vector.getChild(1).getInt(rowId), + vector.getChild(2).isNullAt(rowId) ? Collections.emptyList() : + VectorUtils.toJavaList(vector.getChild(2).getArray(rowId)), + vector.getChild(3).isNullAt(rowId) ? Collections.emptyList() : + VectorUtils.toJavaList(vector.getChild(3).getArray(rowId)) + ); } public static final StructType READ_SCHEMA = new StructType() diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ChildVectorBasedRow.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ChildVectorBasedRow.java new file mode 100644 index 00000000000..ae4793fa479 --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ChildVectorBasedRow.java @@ -0,0 +1,115 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.internal.data; + +import java.math.BigDecimal; + +import io.delta.kernel.data.ArrayValue; +import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.data.MapValue; +import io.delta.kernel.data.Row; +import io.delta.kernel.types.StructType; + +/** + * A {@link Row} implementation that wraps a set of child vectors for a specific {@code rowId}. + */ +public abstract class ChildVectorBasedRow implements Row { + + private final int rowId; + private final StructType schema; + + public ChildVectorBasedRow(int rowId, StructType schema) { + this.rowId = rowId; + this.schema = schema; + } + + @Override + public StructType getSchema() { + return schema; + } + + @Override + public boolean isNullAt(int ordinal) { + return getChild(ordinal).isNullAt(rowId); + } + + @Override + public boolean getBoolean(int ordinal) { + return getChild(ordinal).getBoolean(rowId); + } + + @Override + public byte getByte(int ordinal) { + return getChild(ordinal).getByte(rowId); + } + + @Override + public short getShort(int ordinal) { + return getChild(ordinal).getShort(rowId); + } + + @Override + public int getInt(int ordinal) { + return getChild(ordinal).getInt(rowId); + } + + @Override + public long getLong(int ordinal) { + return getChild(ordinal).getLong(rowId); + } + + @Override + public float getFloat(int ordinal) { + return getChild(ordinal).getFloat(rowId); + } + + @Override + public double getDouble(int ordinal) { + return getChild(ordinal).getDouble(rowId); + } + + @Override + public String getString(int ordinal) { + return getChild(ordinal).getString(rowId); + } + + @Override + public BigDecimal getDecimal(int ordinal) { + return getChild(ordinal).getDecimal(rowId); + } + + @Override + public byte[] getBinary(int ordinal) { + return getChild(ordinal).getBinary(rowId); + } + + @Override + public Row getStruct(int ordinal) { + return StructRow.fromStructVector(getChild(ordinal), rowId); + } + + @Override + public ArrayValue getArray(int ordinal) { + return getChild(ordinal).getArray(rowId); + } + + @Override + public MapValue getMap(int ordinal) { + return getChild(ordinal).getMap(rowId); + } + + protected abstract ColumnVector getChild(int ordinal); +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ColumnarBatchRow.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ColumnarBatchRow.java index ffe7cc507aa..48b738f5009 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ColumnarBatchRow.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ColumnarBatchRow.java @@ -15,101 +15,25 @@ */ package io.delta.kernel.internal.data; -import java.math.BigDecimal; import java.util.Objects; import io.delta.kernel.data.*; -import io.delta.kernel.types.StructType; /** * Row abstraction around a columnar batch and a particular row within the columnar batch. */ public class ColumnarBatchRow - implements Row { + extends ChildVectorBasedRow { + private final ColumnarBatch columnarBatch; - private final int rowId; public ColumnarBatchRow(ColumnarBatch columnarBatch, int rowId) { - this.columnarBatch = Objects.requireNonNull(columnarBatch, "columnarBatch is null"); - this.rowId = rowId; - } - - @Override - public StructType getSchema() { - return columnarBatch.getSchema(); - } - - @Override - public boolean isNullAt(int ordinal) { - return columnVector(ordinal).isNullAt(rowId); - } - - @Override - public boolean getBoolean(int ordinal) { - return columnVector(ordinal).getBoolean(rowId); + super(rowId, Objects.requireNonNull(columnarBatch, "columnarBatch is null").getSchema()); + this.columnarBatch = columnarBatch; } @Override - public byte getByte(int ordinal) { - return columnVector(ordinal).getByte(rowId); - } - - @Override - public short getShort(int ordinal) { - return columnVector(ordinal).getShort(rowId); - } - - @Override - public int getInt(int ordinal) { - return columnVector(ordinal).getInt(rowId); - } - - @Override - public long getLong(int ordinal) { - return columnVector(ordinal).getLong(rowId); - } - - @Override - public float getFloat(int ordinal) { - return columnVector(ordinal).getFloat(rowId); - } - - @Override - public double getDouble(int ordinal) { - return columnVector(ordinal).getDouble(rowId); - } - - @Override - public String getString(int ordinal) { - return columnVector(ordinal).getString(rowId); - } - - @Override - public BigDecimal getDecimal(int ordinal) { - return columnVector(ordinal).getDecimal(rowId); - } - - @Override - public byte[] getBinary(int ordinal) { - return columnVector(ordinal).getBinary(rowId); - } - - @Override - public Row getStruct(int ordinal) { - return columnVector(ordinal).getStruct(rowId); - } - - @Override - public ArrayValue getArray(int ordinal) { - return columnVector(ordinal).getArray(rowId); - } - - @Override - public MapValue getMap(int ordinal) { - return columnVector(ordinal).getMap(rowId); - } - - private ColumnVector columnVector(int ordinal) { + protected ColumnVector getChild(int ordinal) { return columnarBatch.getColumnVector(ordinal); } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/StructRow.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/StructRow.java new file mode 100644 index 00000000000..28234c9cadd --- /dev/null +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/StructRow.java @@ -0,0 +1,47 @@ +/* + * Copyright (2023) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.delta.kernel.internal.data; + +import io.delta.kernel.data.*; +import io.delta.kernel.types.StructType; +import static io.delta.kernel.internal.util.InternalUtils.checkArgument; + +/** + * A {@link Row} abstraction for a struct type column vector and a specific {@code rowId}. + */ +public class StructRow extends ChildVectorBasedRow { + + public static StructRow fromStructVector(ColumnVector columnVector, int rowId) { + checkArgument(columnVector.getDataType() instanceof StructType); + if (columnVector.isNullAt(rowId)) { + return null; + } else { + return new StructRow(columnVector, rowId, (StructType) columnVector.getDataType()); + } + } + + private final ColumnVector structVector; + + private StructRow(ColumnVector structVector, int rowId, StructType schema) { + super(rowId, schema); + this.structVector = structVector; + } + + @Override + protected ColumnVector getChild(int ordinal) { + return structVector.getChild(ordinal); + } +} diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/ActiveAddFilesIterator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/ActiveAddFilesIterator.java index 5938a665026..c95e421c6ed 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/ActiveAddFilesIterator.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/ActiveAddFilesIterator.java @@ -253,21 +253,21 @@ private URI pathToUri(String path) { } public static String getAddFilePath(ColumnVector addFileVector, int rowId) { - return addFileVector.getStruct(rowId).getString(ADD_FILE_PATH_ORDINAL); + return addFileVector.getChild(ADD_FILE_PATH_ORDINAL).getString(rowId); } public static DeletionVectorDescriptor getAddFileDV(ColumnVector addFileVector, int rowId) { - return DeletionVectorDescriptor.fromRow( - addFileVector.getStruct(rowId).getStruct(ADD_FILE_DV_ORDINAL)); + return DeletionVectorDescriptor.fromColumnVector( + addFileVector.getChild(ADD_FILE_DV_ORDINAL), rowId); } public static String getRemoveFilePath(ColumnVector removeFileVector, int rowId) { - return removeFileVector.getStruct(rowId).getString(REMOVE_FILE_PATH_ORDINAL); + return removeFileVector.getChild(REMOVE_FILE_PATH_ORDINAL).getString(rowId); } public static DeletionVectorDescriptor getRemoveFileDV( ColumnVector removeFileVector, int rowId) { - return DeletionVectorDescriptor.fromRow( - removeFileVector.getStruct(rowId).getStruct(REMOVE_FILE_DV_ORDINAL)); + return DeletionVectorDescriptor.fromColumnVector( + removeFileVector.getChild(REMOVE_FILE_DV_ORDINAL), rowId); } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/LogReplay.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/LogReplay.java index 912966270d8..3ef8d5276c4 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/LogReplay.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/LogReplay.java @@ -149,8 +149,7 @@ private Tuple2 loadTableProtocolAndMetadata() { for (int i = 0; i < protocolVector.getSize(); i++) { if (!protocolVector.isNullAt(i)) { - final Row row = protocolVector.getStruct(i); - protocol = Protocol.fromRow(row); + protocol = Protocol.fromColumnVector(protocolVector, i); if (metadata != null) { // Stop since we have found the latest Protocol and Metadata. @@ -167,8 +166,7 @@ private Tuple2 loadTableProtocolAndMetadata() { for (int i = 0; i < metadataVector.getSize(); i++) { if (!metadataVector.isNullAt(i)) { - final Row row = metadataVector.getStruct(i); - metadata = Metadata.fromRow(row, tableClient); + metadata = Metadata.fromColumnVector(metadataVector, i, tableClient); if (protocol != null) { // Stop since we have found the latest Protocol and Metadata. diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java index afaa7b4e8f5..b03f70f123a 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java @@ -15,16 +15,16 @@ */ package io.delta.kernel.internal.types; +import java.util.ArrayList; import java.util.Collections; -import java.util.List; import java.util.Map; import java.util.Optional; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Collectors; import io.delta.kernel.client.JsonHandler; +import io.delta.kernel.data.ArrayValue; import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.ColumnarBatch; import io.delta.kernel.data.Row; @@ -79,11 +79,12 @@ public static StructType fromJson(JsonHandler jsonHandler, String serializedStru private static StructType parseStructType(JsonHandler jsonHandler, String serializedStructType) { Function evalMethod = (row) -> { - final List fields = VectorUtils.toJavaList(row.getArray(0)); - return new StructType( - fields.stream() - .map(field -> parseStructField(jsonHandler, field)) - .collect(Collectors.toList())); + final ArrayValue fieldsArrayValue = row.getArray(0); + ArrayList parsedFields = new ArrayList<>(fieldsArrayValue.getSize()); + for (int i = 0; i < fieldsArrayValue.getSize(); i ++) { + parsedFields.add(parseStructField(jsonHandler, fieldsArrayValue.getElements(), i)); + } + return new StructType(parsedFields); }; return parseAndEvalSingleRow( jsonHandler, serializedStructType, STRUCT_TYPE_SCHEMA, evalMethod); @@ -92,13 +93,15 @@ private static StructType parseStructType(JsonHandler jsonHandler, /** * Utility method to parse a {@link StructField} from the {@link Row} */ - private static StructField parseStructField(JsonHandler jsonHandler, Row row) { - String name = row.getString(0); - String serializedDataType = row.getString(1); + private static StructField parseStructField( + JsonHandler jsonHandler, ColumnVector vector, int rowId) { + + String name = vector.getChild(0).getString(rowId); + String serializedDataType = vector.getChild(1).getString(rowId); DataType type = parseDataType(jsonHandler, serializedDataType); - boolean nullable = row.getBoolean(2); - Map metadata = row.isNullAt(3) ? Collections.emptyMap() : - VectorUtils.toJavaMap(row.getMap(3)); + boolean nullable = vector.getChild(2).getBoolean(rowId); + Map metadata = vector.getChild(3).isNullAt(rowId) ? Collections.emptyMap() : + VectorUtils.toJavaMap(vector.getChild(3).getMap(rowId)); return new StructField(name, type, nullable, metadata); } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java index 7913b72ce39..c866eeb8f0c 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java @@ -20,6 +20,7 @@ import java.util.Iterator; import io.delta.kernel.annotation.Evolving; +import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.Row; /** @@ -134,4 +135,12 @@ public static Row requireNonNull(Row row, int ordinal, String columnName) { } return row; } + + public static ColumnVector requireNonNull(ColumnVector vector, int rowId, String columnName) { + if (vector.isNullAt(rowId)) { + throw new IllegalArgumentException( + "Expected a non-null value for column: " + columnName); + } + return vector; + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/VectorUtils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/VectorUtils.java index 8a1d812d9b2..27c574c190f 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/VectorUtils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/VectorUtils.java @@ -23,7 +23,9 @@ import io.delta.kernel.data.ArrayValue; import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.MapValue; +import io.delta.kernel.data.Row; import io.delta.kernel.types.*; +import io.delta.kernel.internal.data.StructRow; public final class VectorUtils { @@ -71,7 +73,7 @@ public static Map toJavaMap(MapValue mapValue) { /** * Gets the value at {@code rowId} from the column vector. The type of the Object returned * depends on the data type of the column vector. For complex types array and map, returns - * the value as Java list or Java map. + * the value as Java list or Java map. For struct type, returns an {@link Row}. */ private static Object getValueAsObject( ColumnVector columnVector, DataType dataType, int rowId) { @@ -99,7 +101,8 @@ private static Object getValueAsObject( } else if (dataType instanceof BinaryType) { return columnVector.getBinary(rowId); } else if (dataType instanceof StructType) { - return columnVector.getStruct(rowId); + // TODO are we okay with this usage of StructRow? + return StructRow.fromStructVector(columnVector, rowId); } else if (dataType instanceof DecimalType) { return columnVector.getDecimal(rowId); } else if (dataType instanceof ArrayType) { diff --git a/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java index d2e87512263..7bbe620b6d4 100644 --- a/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java +++ b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java @@ -20,6 +20,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -41,6 +42,7 @@ import io.delta.kernel.types.StructType; import io.delta.kernel.utils.CloseableIterator; import io.delta.kernel.utils.Utils; +import static io.delta.kernel.internal.util.InternalUtils.checkArgument; /** * Implementation of {@link JsonHandler} for testing Delta Kernel APIs @@ -392,11 +394,6 @@ public byte[] getBinary(int rowId) { throw new UnsupportedOperationException("not yet implemented - test only"); } - @Override - public Row getStruct(int rowId) { - return (Row) values.get(rowId); - } - @Override public ArrayValue getArray(int rowId) { return (ArrayValue) values.get(rowId); @@ -406,5 +403,152 @@ public ArrayValue getArray(int rowId) { public MapValue getMap(int rowId) { return (MapValue) values.get(rowId); } + + + @Override + public ColumnVector getChild(int ordinal) { + checkArgument(dataType instanceof StructType); + StructType structType = (StructType) dataType; + List rows = values.stream() + .map(row -> (Row) row) + .collect(Collectors.toList()); + return new RowBasedVector( + structType.at(ordinal).getDataType(), + rows, + ordinal + ); + } + } + + /** + * Wrapper around list of {@link Row}s to expose the rows as a column vector + */ + private static class RowBasedVector implements ColumnVector { + private final DataType dataType; + private final List rows; + private final int columnOrdinal; + + RowBasedVector(DataType dataType, List rows, int columnOrdinal) { + this.dataType = dataType; + this.rows = rows; + this.columnOrdinal = columnOrdinal; + } + + @Override + public DataType getDataType() { + return dataType; + } + + @Override + public int getSize() { + return rows.size(); + } + + @Override + public void close() { /* nothing to close */ } + + @Override + public boolean isNullAt(int rowId) { + assertValidRowId(rowId); + if (rows.get(rowId) == null) { + return true; + } + return rows.get(rowId).isNullAt(columnOrdinal); + } + + @Override + public boolean getBoolean(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getBoolean(columnOrdinal); + } + + @Override + public byte getByte(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getByte(columnOrdinal); + } + + @Override + public short getShort(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getShort(columnOrdinal); + } + + @Override + public int getInt(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getInt(columnOrdinal); + } + + @Override + public long getLong(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getLong(columnOrdinal); + } + + @Override + public float getFloat(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getFloat(columnOrdinal); + } + + @Override + public double getDouble(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getDouble(columnOrdinal); + } + + @Override + public String getString(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getString(columnOrdinal); + } + + @Override + public BigDecimal getDecimal(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getDecimal(columnOrdinal); + } + + @Override + public byte[] getBinary(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getBinary(columnOrdinal); + } + + @Override + public MapValue getMap(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getMap(columnOrdinal); + } + + @Override + public ArrayValue getArray(int rowId) { + assertValidRowId(rowId); + return rows.get(rowId).getArray(columnOrdinal); + } + + @Override + public ColumnVector getChild(int ordinal) { + List childRows = rows.stream() + .map(row -> { + if (row == null || row.isNullAt(columnOrdinal)) { + return null; + } else { + return row.getStruct(columnOrdinal); + } + }).collect(Collectors.toList()); + StructType structType = (StructType) dataType; + return new RowBasedVector( + structType.at(ordinal).getDataType(), + childRows, + ordinal + ); + } + + private void assertValidRowId(int rowId) { + checkArgument(rowId < rows.size(), + "Invalid rowId: " + rowId + ", max allowed rowId is: " + (rows.size() - 1)); + } } } diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/AbstractColumnVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/AbstractColumnVector.java index 024b33bcf60..23f3ed42855 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/AbstractColumnVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/AbstractColumnVector.java @@ -22,7 +22,6 @@ import io.delta.kernel.data.ArrayValue; import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.MapValue; -import io.delta.kernel.data.Row; import io.delta.kernel.types.DataType; import static io.delta.kernel.defaults.internal.DefaultKernelUtils.checkArgument; @@ -134,16 +133,13 @@ public MapValue getMap(int rowId) { throw unsupportedDataAccessException("map"); } - @Override - public Row getStruct(int rowId) { - throw unsupportedDataAccessException("struct"); - } - @Override public ArrayValue getArray(int rowId) { throw unsupportedDataAccessException("array"); } + // TODO no need to override these here; update default implementations in `ColumnVector` + // to have a more informative exception message protected UnsupportedOperationException unsupportedDataAccessException(String accessType) { String msg = String.format( "Trying to access a `%s` value from vector of type `%s`", diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultGenericVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultGenericVector.java index 817757e34e5..d6c6c2c9b17 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultGenericVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultGenericVector.java @@ -143,13 +143,6 @@ public byte[] getBinary(int rowId) { return (byte[]) rowIdToValueAccessor.apply(rowId); } - @Override - public Row getStruct(int rowId) { - assertValidRowId(rowId); - throwIfUnsafeAccess(StructType.class, "struct"); - return (Row) rowIdToValueAccessor.apply(rowId); - } - @Override public ArrayValue getArray(int rowId) { assertValidRowId(rowId); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultStructVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultStructVector.java index ce4158e4042..7ac7ecde00a 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultStructVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultStructVector.java @@ -15,14 +15,9 @@ */ package io.delta.kernel.defaults.internal.data.vector; -import java.math.BigDecimal; import java.util.Optional; -import static java.util.Objects.requireNonNull; -import io.delta.kernel.data.ArrayValue; import io.delta.kernel.data.ColumnVector; -import io.delta.kernel.data.MapValue; -import io.delta.kernel.data.Row; import io.delta.kernel.types.DataType; import io.delta.kernel.types.StructType; @@ -59,111 +54,10 @@ public DefaultStructVector( this.memberVectors = memberVectors; } - @Override - public Row getStruct(int rowId) { - checkValidRowId(rowId); - if (isNullAt(rowId)) { - return null; - } - return new StructRow(this, rowId); - } - @Override public ColumnVector getChild(int ordinal) { checkArgument( ordinal >= 0 && ordinal < memberVectors.length, "Invalid ordinal " + ordinal); return memberVectors[ordinal]; } - - /** - * Wrapper class to expose one member as a {@link Row} - */ - private static class StructRow - implements Row { - private final DefaultStructVector structVector; - private final int rowId; - - StructRow(DefaultStructVector structVector, int rowId) { - this.structVector = requireNonNull(structVector, "structVector is null"); - checkArgument( - rowId >= 0 && rowId < structVector.getSize(), - "invalid row id: %s", rowId); - this.rowId = rowId; - } - - @Override - public StructType getSchema() { - return (StructType) structVector.getDataType(); - } - - @Override - public boolean isNullAt(int ordinal) { - return structVector.memberVectors[ordinal].isNullAt(rowId); - } - - @Override - public boolean getBoolean(int ordinal) { - return structVector.memberVectors[ordinal].getBoolean(rowId); - } - - @Override - public byte getByte(int ordinal) { - return structVector.memberVectors[ordinal].getByte(rowId); - } - - @Override - public short getShort(int ordinal) { - return structVector.memberVectors[ordinal].getShort(rowId); - } - - @Override - public int getInt(int ordinal) { - return structVector.memberVectors[ordinal].getInt(rowId); - } - - @Override - public long getLong(int ordinal) { - return structVector.memberVectors[ordinal].getLong(rowId); - } - - @Override - public float getFloat(int ordinal) { - return structVector.memberVectors[ordinal].getFloat(rowId); - } - - @Override - public double getDouble(int ordinal) { - return structVector.memberVectors[ordinal].getDouble(rowId); - } - - @Override - public String getString(int ordinal) { - return structVector.memberVectors[ordinal].getString(rowId); - } - - @Override - public BigDecimal getDecimal(int ordinal) { - return structVector.memberVectors[ordinal].getDecimal(rowId); - } - - @Override - public byte[] getBinary(int ordinal) { - return structVector.memberVectors[ordinal].getBinary(rowId); - } - - @Override - public Row getStruct(int ordinal) { - return structVector.memberVectors[ordinal].getStruct(rowId); - } - - @Override - public ArrayValue getArray(int ordinal) { - return structVector.memberVectors[ordinal].getArray(rowId); - } - - @Override - public MapValue getMap(int ordinal) { - return structVector.memberVectors[ordinal].getMap(rowId); - } - } } diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultSubFieldVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultSubFieldVector.java index 7d0edbb7c6e..0e5c2187e23 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultSubFieldVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultSubFieldVector.java @@ -149,12 +149,6 @@ public MapValue getMap(int rowId) { return rowIdToRowAccessor.apply(rowId).getMap(columnOrdinal); } - @Override - public Row getStruct(int rowId) { - assertValidRowId(rowId); - return rowIdToRowAccessor.apply(rowId).getStruct(columnOrdinal); - } - @Override public ArrayValue getArray(int rowId) { assertValidRowId(rowId); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultViewVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultViewVector.java index 144e9438cb3..4f0b262cfc0 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultViewVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultViewVector.java @@ -20,7 +20,6 @@ import io.delta.kernel.data.ArrayValue; import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.MapValue; -import io.delta.kernel.data.Row; import io.delta.kernel.types.DataType; import static io.delta.kernel.defaults.internal.DefaultKernelUtils.checkArgument; @@ -132,12 +131,6 @@ public MapValue getMap(int rowId) { return underlyingVector.getMap(offset + rowId); } - @Override - public Row getStruct(int rowId) { - checkValidRowId(rowId); - return underlyingVector.getStruct(offset + rowId); - } - @Override public ArrayValue getArray(int rowId) { checkValidRowId(rowId); diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/client/TestDefaultJsonHandler.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/client/TestDefaultJsonHandler.java index d736ee98554..777e6dbdca2 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/client/TestDefaultJsonHandler.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/client/TestDefaultJsonHandler.java @@ -216,12 +216,6 @@ public void parseNestedComplexTypes() throws IOException { assertEquals(exp3, VectorUtils.toJavaMap(result.getMap(3))); ArrayValue arrayOfStruct = result.getArray(4); assertEquals(arrayOfStruct.getSize(), 2); - // check getStruct - assertEquals("foo", arrayOfStruct.getElements().getStruct(0).getString(0)); - assertEquals(3, arrayOfStruct.getElements().getStruct(0).getInt(1)); - assertTrue(arrayOfStruct.getElements().getStruct(1).isNullAt(0)); - assertTrue(arrayOfStruct.getElements().getStruct(1).isNullAt(1)); - // check getChild assertEquals("foo", arrayOfStruct.getElements().getChild(0).getString(0)); assertEquals(3, arrayOfStruct.getElements().getChild(1).getInt(0)); assertTrue(arrayOfStruct.getElements().getChild(0).isNullAt(1)); diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/internal/parquet/TestParquetBatchReader.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/internal/parquet/TestParquetBatchReader.java index 4c661913bba..b9ac8c9013d 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/internal/parquet/TestParquetBatchReader.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/internal/parquet/TestParquetBatchReader.java @@ -364,10 +364,6 @@ private static void verifyRowFromAllTypesFile( assertEquals(2, arrayValue.getSize()); assertEquals(2, elementVector.getSize()); assertTrue(elementVector.getDataType() instanceof StructType); - // check getStruct - Row item0 = elementVector.getStruct(0); - assertEquals(rowId, item0.getLong(0)); - // also check DefaultViewVector implements getChild assertEquals(rowId, elementVector.getChild(0).getLong(0)); assertTrue(elementVector.isNullAt(1)); break; @@ -443,28 +439,21 @@ private static void validateNestedStructColumn( return; } - Row struct = vector.getStruct(batchRowId); boolean expAaValNull = tableRowId % 19 == 0; boolean expAcValNull = tableRowId % 19 == 0 || tableRowId % 23 == 0; final int aaColOrdinal = 0; final int acColOrdinal = 1; - assertEquals(struct.isNullAt(aaColOrdinal), expAaValNull); - assertEquals(struct.isNullAt(acColOrdinal), expAcValNull); + assertEquals(vector.getChild(aaColOrdinal).isNullAt(batchRowId), expAaValNull); + assertEquals(vector.getChild(acColOrdinal).isNullAt(batchRowId), expAcValNull); if (!expAaValNull) { - String aaVal = struct.getString(aaColOrdinal); + String aaVal = vector.getChild(aaColOrdinal).getString(batchRowId); assertEquals(Integer.toString(tableRowId), aaVal); } if (!expAcValNull) { - Row acVal = struct.getStruct(acColOrdinal); - if (expAcValNull) { - assertTrue(struct.isNullAt(1)); - assertNull(acVal); - } else { - int actAcaVal = acVal.getInt(0); - assertEquals(tableRowId, actAcaVal); - } + int actAcaVal = vector.getChild(acColOrdinal).getChild(0).getInt(batchRowId); + assertEquals(tableRowId, actAcaVal); } } diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/utils/DefaultKernelTestUtils.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/utils/DefaultKernelTestUtils.java index 91353a7aee7..afa0decd789 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/utils/DefaultKernelTestUtils.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/utils/DefaultKernelTestUtils.java @@ -91,8 +91,6 @@ public static Object getValueAsObject(ColumnVector vector, int rowId) { return vector.getString(rowId); } else if (dataType instanceof BinaryType) { return vector.getBinary(rowId); - } else if (dataType instanceof StructType) { - return vector.getStruct(rowId); } else if (dataType instanceof DecimalType) { return vector.getDecimal(rowId); } diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestRow.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestRow.scala index 680f2f60777..228a38ea488 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestRow.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestRow.scala @@ -126,7 +126,10 @@ object TestRow { case _: DecimalType => vector.getDecimal(rowId) case _: ArrayType => arrayValueToScalaSeq(vector.getArray(rowId)) case _: MapType => mapValueToScalaMap(vector.getMap(rowId)) - case _: StructType => TestRow(vector.getStruct(rowId)) + case dataType: StructType => + TestRow.fromSeq(Seq.range(0, dataType.length()).map { ordinal => + getAsTestObject(vector.getChild(ordinal), rowId) + }) case _ => throw new UnsupportedOperationException("unrecognized data type") } } From f5ed6e984bdafcd63858a9a671cd09d46e0adf4b Mon Sep 17 00:00:00 2001 From: Allison Portis Date: Thu, 12 Oct 2023 16:08:45 -0700 Subject: [PATCH 2/3] respond to comments --- .../actions/DeletionVectorDescriptor.java | 2 +- .../delta/kernel/internal/actions/Format.java | 22 ++++++++++++++++--- .../kernel/internal/actions/Metadata.java | 3 +-- .../kernel/internal/util/InternalUtils.java | 16 ++++++++++++++ .../java/io/delta/kernel/utils/Utils.java | 17 -------------- 5 files changed, 37 insertions(+), 23 deletions(-) diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java index 78ff7e147c7..5623553d686 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java @@ -28,7 +28,7 @@ import io.delta.kernel.types.LongType; import io.delta.kernel.types.StringType; import io.delta.kernel.types.StructType; -import static io.delta.kernel.utils.Utils.requireNonNull; +import static io.delta.kernel.internal.util.InternalUtils.requireNonNull; import io.delta.kernel.internal.deletionvectors.Base85Codec; import io.delta.kernel.internal.fs.Path; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java index 629b1d40ded..5a50514d6ce 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java @@ -15,11 +15,15 @@ */ package io.delta.kernel.internal.actions; +import java.util.Collections; +import java.util.Map; + import io.delta.kernel.data.ColumnVector; import io.delta.kernel.types.MapType; import io.delta.kernel.types.StringType; import io.delta.kernel.types.StructType; -import static io.delta.kernel.utils.Utils.requireNonNull; +import io.delta.kernel.utils.VectorUtils; +import static io.delta.kernel.internal.util.InternalUtils.requireNonNull; public class Format { @@ -29,7 +33,9 @@ public static Format fromColumnVector(ColumnVector vector, int rowId) { } final String provider = requireNonNull(vector.getChild(0), rowId, "provider") .getString(rowId); - return new Format(provider); + final Map options = vector.getChild(1).isNullAt(rowId) ? + Collections.emptyMap() : VectorUtils.toJavaMap(vector.getChild(1).getMap(rowId)); + return new Format(provider, options); } public static final StructType READ_SCHEMA = new StructType() @@ -40,8 +46,18 @@ public static Format fromColumnVector(ColumnVector vector, int rowId) { ); private final String provider; + private final Map options; - public Format(String provider) { + public Format(String provider, Map options) { this.provider = provider; + this.options = options; + } + + public String getProvider() { + return provider; + } + + public Map getOptions() { + return Collections.unmodifiableMap(options); } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java index e83e47a9b70..68a8933c9fc 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java @@ -30,10 +30,9 @@ import io.delta.kernel.types.StringType; import io.delta.kernel.types.StructType; import io.delta.kernel.utils.VectorUtils; -import static io.delta.kernel.utils.Utils.requireNonNull; - import io.delta.kernel.internal.lang.Lazy; import io.delta.kernel.internal.types.TableSchemaSerDe; +import static io.delta.kernel.internal.util.InternalUtils.requireNonNull; public class Metadata { diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/InternalUtils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/InternalUtils.java index ffa25daf943..6e2ca300310 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/InternalUtils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/InternalUtils.java @@ -165,4 +165,20 @@ public String getString(int rowId) { } }; } + + public static Row requireNonNull(Row row, int ordinal, String columnName) { + if (row.isNullAt(ordinal)) { + throw new IllegalArgumentException( + "Expected a non-null value for column: " + columnName); + } + return row; + } + + public static ColumnVector requireNonNull(ColumnVector vector, int rowId, String columnName) { + if (vector.isNullAt(rowId)) { + throw new IllegalArgumentException( + "Expected a non-null value for column: " + columnName); + } + return vector; + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java index c866eeb8f0c..f0b0926cbf3 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java @@ -20,8 +20,6 @@ import java.util.Iterator; import io.delta.kernel.annotation.Evolving; -import io.delta.kernel.data.ColumnVector; -import io.delta.kernel.data.Row; /** * Various utility methods to help the connectors work with data objects returned by Kernel @@ -128,19 +126,4 @@ public static void closeCloseablesSilently(AutoCloseable... closeables) { } } - public static Row requireNonNull(Row row, int ordinal, String columnName) { - if (row.isNullAt(ordinal)) { - throw new IllegalArgumentException( - "Expected a non-null value for column: " + columnName); - } - return row; - } - - public static ColumnVector requireNonNull(ColumnVector vector, int rowId, String columnName) { - if (vector.isNullAt(rowId)) { - throw new IllegalArgumentException( - "Expected a non-null value for column: " + columnName); - } - return vector; - } } From fd7017d2d1580b085fe17ed7aaf3d2a444dd9188 Mon Sep 17 00:00:00 2001 From: Allison Portis Date: Thu, 12 Oct 2023 16:19:25 -0700 Subject: [PATCH 3/3] fix imports --- .../kernel/internal/actions/DeletionVectorDescriptor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java index 5623553d686..84c125e1597 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java @@ -28,11 +28,10 @@ import io.delta.kernel.types.LongType; import io.delta.kernel.types.StringType; import io.delta.kernel.types.StructType; -import static io.delta.kernel.internal.util.InternalUtils.requireNonNull; - import io.delta.kernel.internal.deletionvectors.Base85Codec; import io.delta.kernel.internal.fs.Path; import static io.delta.kernel.internal.util.InternalUtils.checkArgument; +import static io.delta.kernel.internal.util.InternalUtils.requireNonNull; /** * Information about a deletion vector attached to a file action.