From 2117d503c0b05243124512726153870fb6ec9f7c Mon Sep 17 00:00:00 2001 From: Allison Portis Date: Tue, 10 Oct 2023 11:04:06 -0700 Subject: [PATCH] [Kernel] Resolve some TODOs (#2143) ## Description Resolves some miscellaneous TODOs in the Kernel code including - Moves `MixedDataType` to an internal package since it is a temporary type - Change `INSTANCE` to `$TYPENAME` for all simple data types - Move `singletonColumnVector` to internal utilities - Remove logging to stdout for now - Remove miscellaneous TODO comments that don't need to be in the code ## How was this patch tested? Existing tests suffice. --- .../src/main/java/io/delta/kernel/Scan.java | 2 +- .../io/delta/kernel/expressions/Literal.java | 22 +-- .../internal/InternalScanFileUtils.java | 4 +- .../kernel/internal/actions/AddFile.java | 12 +- .../actions/DeletionVectorDescriptor.java | 10 +- .../delta/kernel/internal/actions/Format.java | 4 +- .../kernel/internal/actions/Metadata.java | 14 +- .../kernel/internal/actions/Protocol.java | 8 +- .../checkpoints/CheckpointMetaData.java | 7 +- .../internal/checkpoints/Checkpointer.java | 5 +- .../kernel/internal/data/ScanStateRow.java | 14 +- .../internal/data/SelectionColumnVector.java | 2 +- .../deletionvectors/RoaringBitmapArray.java | 3 - .../io/delta/kernel/internal/fs/Path.java | 2 +- .../replay/ActiveAddFilesIterator.java | 2 +- .../kernel/internal/replay/LogReplay.java | 2 +- .../{ => internal}/types/MixedDataType.java | 6 +- .../internal/types/TableSchemaSerDe.java | 18 +- .../kernel/internal/util/InternalUtils.java | 40 ++++ .../delta/kernel/internal/util/Logging.java | 12 +- .../delta/kernel/types/BasePrimitiveType.java | 22 +-- .../io/delta/kernel/types/BinaryType.java | 2 +- .../io/delta/kernel/types/BooleanType.java | 4 +- .../java/io/delta/kernel/types/ByteType.java | 2 +- .../java/io/delta/kernel/types/DateType.java | 2 +- .../io/delta/kernel/types/DoubleType.java | 2 +- .../java/io/delta/kernel/types/FloatType.java | 2 +- .../io/delta/kernel/types/IntegerType.java | 2 +- .../java/io/delta/kernel/types/LongType.java | 2 +- .../java/io/delta/kernel/types/ShortType.java | 2 +- .../io/delta/kernel/types/StringType.java | 2 +- .../io/delta/kernel/types/StructField.java | 3 +- .../io/delta/kernel/types/TimestampType.java | 2 +- .../java/io/delta/kernel/utils/Utils.java | 41 ---- .../internal/types/JsonHandlerTestImpl.java | 1 - .../internal/types/TestTableSchemaSerDe.java | 10 +- .../kernel/expressions/ExpressionsSuite.scala | 6 +- .../internal/util/PartitionUtilsSuite.scala | 6 +- .../defaults/client/DefaultFileHandler.java | 2 +- .../internal/data/DefaultJsonRow.java | 1 + .../data/vector/DefaultBooleanVector.java | 2 +- .../data/vector/DefaultByteVector.java | 2 +- .../data/vector/DefaultDoubleVector.java | 2 +- .../data/vector/DefaultFloatVector.java | 2 +- .../data/vector/DefaultShortVector.java | 2 +- .../DefaultExpressionEvaluator.java | 14 +- .../DefaultPredicateEvaluator.java | 6 +- .../expressions/ElementAtEvaluator.java | 8 +- .../expressions/PartitionValueEvaluator.java | 6 +- .../internal/parquet/ParquetConverters.java | 2 +- .../internal/parquet/TimestampConverters.java | 6 +- .../client/TestDefaultJsonHandler.java | 33 ++-- .../defaults/integration/BaseIntegration.java | 2 - .../integration/DataBuilderUtils.java | 2 - .../integration/TestDeltaTableReads.java | 6 +- .../parquet/TestParquetBatchReader.java | 90 ++++----- .../utils/DefaultKernelTestUtils.java | 4 - .../defaults/DeltaTableReadsSuite.scala | 2 +- .../kernel/defaults/LogReplaySuite.scala | 2 +- .../defaults/ParquetBatchReaderSuite.scala | 4 +- .../defaults/PartitionPruningSuite.scala | 20 +- .../DefaultExpressionHandlerSuite.scala | 4 +- .../DefaultExpressionEvaluatorSuite.scala | 176 +++++++++--------- .../DefaultPredicateEvaluatorSuite.scala | 4 +- .../expressions/ExpressionSuiteBase.scala | 2 +- .../ImplicitCastExpressionSuite.scala | 42 ++--- .../kernel/defaults/utils/TestUtils.scala | 30 ++- 67 files changed, 380 insertions(+), 400 deletions(-) rename kernel/kernel-api/src/main/java/io/delta/kernel/{ => internal}/types/MixedDataType.java (94%) diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java b/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java index ad8f6d123cd..35f669a0f18 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/Scan.java @@ -86,7 +86,7 @@ public interface Scan { * *
  • diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java index 109aea65dc0..a0df9f33549 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Literal.java @@ -49,7 +49,7 @@ public final class Literal implements Expression { * @return a {@link Literal} of type {@link BooleanType} */ public static Literal ofBoolean(boolean value) { - return new Literal(value, BooleanType.INSTANCE); + return new Literal(value, BooleanType.BOOLEAN); } /** @@ -59,7 +59,7 @@ public static Literal ofBoolean(boolean value) { * @return a {@link Literal} of type {@link ByteType} */ public static Literal ofByte(byte value) { - return new Literal(value, ByteType.INSTANCE); + return new Literal(value, ByteType.BYTE); } /** @@ -69,7 +69,7 @@ public static Literal ofByte(byte value) { * @return a {@link Literal} of type {@link ShortType} */ public static Literal ofShort(short value) { - return new Literal(value, ShortType.INSTANCE); + return new Literal(value, ShortType.SHORT); } /** @@ -79,7 +79,7 @@ public static Literal ofShort(short value) { * @return a {@link Literal} of type {@link IntegerType} */ public static Literal ofInt(int value) { - return new Literal(value, IntegerType.INSTANCE); + return new Literal(value, IntegerType.INTEGER); } /** @@ -89,7 +89,7 @@ public static Literal ofInt(int value) { * @return a {@link Literal} of type {@link LongType} */ public static Literal ofLong(long value) { - return new Literal(value, LongType.INSTANCE); + return new Literal(value, LongType.LONG); } /** @@ -99,7 +99,7 @@ public static Literal ofLong(long value) { * @return a {@link Literal} of type {@link FloatType} */ public static Literal ofFloat(float value) { - return new Literal(value, FloatType.INSTANCE); + return new Literal(value, FloatType.FLOAT); } /** @@ -109,7 +109,7 @@ public static Literal ofFloat(float value) { * @return a {@link Literal} of type {@link DoubleType} */ public static Literal ofDouble(double value) { - return new Literal(value, DoubleType.INSTANCE); + return new Literal(value, DoubleType.DOUBLE); } /** @@ -119,7 +119,7 @@ public static Literal ofDouble(double value) { * @return a {@link Literal} of type {@link StringType} */ public static Literal ofString(String value) { - return new Literal(value, StringType.INSTANCE); + return new Literal(value, StringType.STRING); } /** @@ -129,7 +129,7 @@ public static Literal ofString(String value) { * @return a {@link Literal} of type {@link BinaryType} */ public static Literal ofBinary(byte[] value) { - return new Literal(value, BinaryType.INSTANCE); + return new Literal(value, BinaryType.BINARY); } /** @@ -139,7 +139,7 @@ public static Literal ofBinary(byte[] value) { * @return a {@link Literal} of type {@link DateType} */ public static Literal ofDate(int daysSinceEpochUTC) { - return new Literal(daysSinceEpochUTC, DateType.INSTANCE); + return new Literal(daysSinceEpochUTC, DateType.DATE); } /** @@ -149,7 +149,7 @@ public static Literal ofDate(int daysSinceEpochUTC) { * @return a {@link Literal} with data type {@link TimestampType} */ public static Literal ofTimestamp(long microsSinceEpochUTC) { - return new Literal(microsSinceEpochUTC, TimestampType.INSTANCE); + return new Literal(microsSinceEpochUTC, TimestampType.TIMESTAMP); } /** diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/InternalScanFileUtils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/InternalScanFileUtils.java index 7a3db5bf41d..bc800a4f8ea 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/InternalScanFileUtils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/InternalScanFileUtils.java @@ -43,7 +43,7 @@ public class InternalScanFileUtils { private InternalScanFileUtils() {} private static final String TABLE_ROOT_COL_NAME = "tableRoot"; - private static final DataType TABLE_ROOT_DATA_TYPE = StringType.INSTANCE; + private static final DataType TABLE_ROOT_DATA_TYPE = StringType.STRING; /** * {@link Column} expression referring to the `partitionValues` in scan `add` file. */ @@ -58,7 +58,7 @@ private InternalScanFileUtils() {} public static final StructType SCAN_FILE_SCHEMA = new StructType() .add("add", AddFile.SCHEMA) - // TODO: table root is temporary, until the path in `add.path` is converted to + // NOTE: table root is temporary, until the path in `add.path` is converted to // an absolute path. https://github.com/delta-io/delta/issues/2089 .add(TABLE_ROOT_COL_NAME, TABLE_ROOT_DATA_TYPE); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/AddFile.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/AddFile.java index 2519b1cef81..26e236e2f4c 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/AddFile.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/AddFile.java @@ -21,15 +21,15 @@ * Delta log action representing an `AddFile` */ public class AddFile { - // TODO: there are more optional fields in `AddFile` according to the spec. We will be adding + // Note: these are more optional fields in `AddFile` according to the spec. We will be adding // them in read schema as we support the related features. public static final StructType SCHEMA = new StructType() - .add("path", StringType.INSTANCE, false /* nullable */) + .add("path", StringType.STRING, false /* nullable */) .add("partitionValues", - new MapType(StringType.INSTANCE, StringType.INSTANCE, true), + new MapType(StringType.STRING, StringType.STRING, true), false /* nullable*/) - .add("size", LongType.INSTANCE, false /* nullable*/) - .add("modificationTime", LongType.INSTANCE, false /* nullable*/) - .add("dataChange", BooleanType.INSTANCE, false /* nullable*/) + .add("size", LongType.LONG, false /* nullable*/) + .add("modificationTime", LongType.LONG, false /* nullable*/) + .add("dataChange", BooleanType.BOOLEAN, false /* nullable*/) .add("deletionVector", DeletionVectorDescriptor.READ_SCHEMA, true /* nullable */); } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java index 6130dbd46d3..347c8b5280b 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/DeletionVectorDescriptor.java @@ -63,11 +63,11 @@ public static DeletionVectorDescriptor fromRow(Row row) { public static final String UUID_DV_MARKER = "u"; public static final StructType READ_SCHEMA = new StructType() - .add("storageType", StringType.INSTANCE, false /* nullable*/) - .add("pathOrInlineDv", StringType.INSTANCE, false /* nullable*/) - .add("offset", IntegerType.INSTANCE, true /* nullable*/) - .add("sizeInBytes", IntegerType.INSTANCE, false /* nullable*/) - .add("cardinality", LongType.INSTANCE, false /* nullable*/); + .add("storageType", StringType.STRING, false /* nullable*/) + .add("pathOrInlineDv", StringType.STRING, false /* nullable*/) + .add("offset", IntegerType.INTEGER, true /* nullable*/) + .add("sizeInBytes", IntegerType.INTEGER, false /* nullable*/) + .add("cardinality", LongType.LONG, false /* nullable*/); private static final Map COL_NAME_TO_ORDINAL = IntStream.range(0, READ_SCHEMA.length()) diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java index 0b4728cc749..d6116ddb4d2 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Format.java @@ -32,9 +32,9 @@ public static Format fromRow(Row row) { } public static final StructType READ_SCHEMA = new StructType() - .add("provider", StringType.INSTANCE, false /* nullable */) + .add("provider", StringType.STRING, false /* nullable */) .add("options", - new MapType(StringType.INSTANCE, StringType.INSTANCE, false), + new MapType(StringType.STRING, StringType.STRING, false), true /* nullable */ ); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java index e1227804ba8..d5a30e024d8 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Metadata.java @@ -58,17 +58,17 @@ public static Metadata fromRow(Row row, TableClient tableClient) { } public static final StructType READ_SCHEMA = new StructType() - .add("id", StringType.INSTANCE, false /* nullable */) - .add("name", StringType.INSTANCE, true /* nullable */) - .add("description", StringType.INSTANCE, true /* nullable */) + .add("id", StringType.STRING, false /* nullable */) + .add("name", StringType.STRING, true /* nullable */) + .add("description", StringType.STRING, true /* nullable */) .add("format", Format.READ_SCHEMA, false /* nullable */) - .add("schemaString", StringType.INSTANCE, false /* nullable */) + .add("schemaString", StringType.STRING, false /* nullable */) .add("partitionColumns", - new ArrayType(StringType.INSTANCE, false /* contains null */), + new ArrayType(StringType.STRING, false /* contains null */), false /* nullable */) - .add("createdTime", LongType.INSTANCE, true /* contains null */) + .add("createdTime", LongType.LONG, true /* contains null */) .add("configuration", - new MapType(StringType.INSTANCE, StringType.INSTANCE, false), + new MapType(StringType.STRING, StringType.STRING, false), false /* nullable */); private final String id; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Protocol.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Protocol.java index efd289831d8..84eefb4ab46 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Protocol.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/actions/Protocol.java @@ -40,10 +40,10 @@ public static Protocol fromRow(Row row) { } public static final StructType READ_SCHEMA = new StructType() - .add("minReaderVersion", IntegerType.INSTANCE, false /* nullable */) - .add("minWriterVersion", IntegerType.INSTANCE, false /* nullable */) - .add("readerFeatures", new ArrayType(StringType.INSTANCE, false /* contains null */)) - .add("writerFeatures", new ArrayType(StringType.INSTANCE, false /* contains null */)); + .add("minReaderVersion", IntegerType.INTEGER, false /* nullable */) + .add("minWriterVersion", IntegerType.INTEGER, false /* nullable */) + .add("readerFeatures", new ArrayType(StringType.STRING, false /* contains null */)) + .add("writerFeatures", new ArrayType(StringType.STRING, false /* contains null */)); private final int minReaderVersion; private final int minWriterVersion; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/CheckpointMetaData.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/CheckpointMetaData.java index 0233af8825c..1ccf345b378 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/CheckpointMetaData.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/CheckpointMetaData.java @@ -30,11 +30,10 @@ public static CheckpointMetaData fromRow(Row row) { ); } - // TODO: there are more optional fields public static StructType READ_SCHEMA = new StructType() - .add("version", LongType.INSTANCE, false /* nullable */) - .add("size", LongType.INSTANCE, false /* nullable */) - .add("parts", LongType.INSTANCE); + .add("version", LongType.LONG, false /* nullable */) + .add("size", LongType.LONG, false /* nullable */) + .add("parts", LongType.LONG); public final long version; public final long size; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java index ef94b8d52dd..75f30e97264 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/checkpoints/Checkpointer.java @@ -98,9 +98,8 @@ public Optional readLastCheckpointFile(TableClient tableClie */ private Optional loadMetadataFromFile(TableClient tableClient) { try { - // TODO: we have no way to get the file size and modification time within the api - // module. Should we have a client API for that or make use of the - // `FileSystemClient#listFrom`? + // For now we use file size = 0 and modification time = 0, in the future we should use + // listFrom to retrieve the real values see delta-io/delta#2140 FileStatus lastCheckpointFile = FileStatus.of(lastCheckpointFilePath.toString(), 0, 0); JsonHandler jsonHandler = tableClient.getJsonHandler(); try (CloseableIterator fileReadContextIter = diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ScanStateRow.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ScanStateRow.java index 0c350d06edf..0153430e41e 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ScanStateRow.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/ScanStateRow.java @@ -36,13 +36,13 @@ */ public class ScanStateRow extends GenericRow { private static final StructType SCHEMA = new StructType() - .add("configuration", new MapType(StringType.INSTANCE, StringType.INSTANCE, false)) - .add("logicalSchemaString", StringType.INSTANCE) - .add("physicalSchemaString", StringType.INSTANCE) - .add("partitionColumns", new ArrayType(StringType.INSTANCE, false)) - .add("minReaderVersion", IntegerType.INSTANCE) - .add("minWriterVersion", IntegerType.INSTANCE) - .add("tablePath", StringType.INSTANCE); + .add("configuration", new MapType(StringType.STRING, StringType.STRING, false)) + .add("logicalSchemaString", StringType.STRING) + .add("physicalSchemaString", StringType.STRING) + .add("partitionColumns", new ArrayType(StringType.STRING, false)) + .add("minReaderVersion", IntegerType.INTEGER) + .add("minWriterVersion", IntegerType.INTEGER) + .add("tablePath", StringType.STRING); private static final Map COL_NAME_TO_ORDINAL = IntStream.range(0, SCHEMA.length()) diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/SelectionColumnVector.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/SelectionColumnVector.java index a998da0b5b1..718525eaad0 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/SelectionColumnVector.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/data/SelectionColumnVector.java @@ -38,7 +38,7 @@ public SelectionColumnVector(RoaringBitmapArray bitmap, ColumnVector rowIndices) @Override public DataType getDataType() { - return BooleanType.INSTANCE; + return BooleanType.BOOLEAN; } @Override diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/deletionvectors/RoaringBitmapArray.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/deletionvectors/RoaringBitmapArray.java index f2e94c6ab2a..b9f73175593 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/deletionvectors/RoaringBitmapArray.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/deletionvectors/RoaringBitmapArray.java @@ -27,9 +27,6 @@ import static io.delta.kernel.internal.util.InternalUtils.checkArgument; -// TODO: add test suite -// If we implement additional methods (i.e. serialize) we can copy the test suite from delta-spark - /** * A 64-bit extension of [[RoaringBitmap]] that is optimized for cases that usually fit within * a 32-bit bitmap, but may run over by a few bits on occasion. diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/fs/Path.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/fs/Path.java index e45d982d143..d56b34b9fbc 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/fs/Path.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/fs/Path.java @@ -29,8 +29,8 @@ *

    * Taken from https://github.com/apache/hadoop/blob/branch-3.3 * .4/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java - * TODO: remove unused parts */ +// TODO remove this class public class Path implements Comparable, Serializable, ObjectInputValidation { diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/ActiveAddFilesIterator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/ActiveAddFilesIterator.java index 4bf838f1371..5938a665026 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/ActiveAddFilesIterator.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/ActiveAddFilesIterator.java @@ -220,7 +220,7 @@ private void prepareNext() { .getEvaluator( scanAddFiles.getSchema(), Literal.ofString(tableRoot.toUri().toString()), - StringType.INSTANCE); + StringType.STRING); } ColumnVector tableRootVector = tableRootVectorGenerator.eval(scanAddFiles); scanAddFiles = scanAddFiles.withNewColumn( diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/LogReplay.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/LogReplay.java index b50ef646535..912966270d8 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/LogReplay.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/replay/LogReplay.java @@ -66,7 +66,7 @@ public class LogReplay implements Logging { // the whether stats are needed or not: https://github.com/delta-io/delta/issues/1961 .add("add", AddFile.SCHEMA) .add("remove", new StructType() - .add("path", StringType.INSTANCE, false /* nullable */) + .add("path", StringType.STRING, false /* nullable */) .add("deletionVector", DeletionVectorDescriptor.READ_SCHEMA, true /* nullable */) ); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/MixedDataType.java similarity index 94% rename from kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java rename to kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/MixedDataType.java index 306fd88b9dd..05151388c83 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/MixedDataType.java @@ -13,13 +13,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.delta.kernel.types; +package io.delta.kernel.internal.types; import io.delta.kernel.annotation.Evolving; import io.delta.kernel.data.ColumnVector; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.StructType; /** - * TODO: + * TODO: this is temporary * This is a controversial data type to have, but we have no way to specify the schema * of JSON serialized table schema. In order to use the * {@link io.delta.kernel.client.JsonHandler#parseJson(ColumnVector, StructType)}, the Kernel diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java index d583b9c5f98..afaa7b4e8f5 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java @@ -34,13 +34,13 @@ import io.delta.kernel.types.DataType; import io.delta.kernel.types.DecimalType; import io.delta.kernel.types.MapType; -import io.delta.kernel.types.MixedDataType; import io.delta.kernel.types.StringType; import io.delta.kernel.types.StructField; import io.delta.kernel.types.StructType; import io.delta.kernel.utils.CloseableIterator; import io.delta.kernel.utils.Utils; import io.delta.kernel.utils.VectorUtils; +import io.delta.kernel.internal.util.InternalUtils; /** * Utility class to serialize and deserialize the table schema which is of type {@link StructType}. @@ -190,7 +190,7 @@ private static R parseAndEvalSingleRow( String jsonString, StructType outputSchema, Function evalFunction) { - ColumnVector columnVector = Utils.singletonColumnVector(jsonString); + ColumnVector columnVector = InternalUtils.singletonStringColumnVector(jsonString); ColumnarBatch result = jsonHandler.parseJson(columnVector, outputSchema); assert result.getSize() == 1; @@ -207,11 +207,11 @@ private static R parseAndEvalSingleRow( * Schema of the one member ({@link StructField}) in {@link StructType}. */ private static final StructType STRUCT_FIELD_SCHEMA = new StructType() - .add("name", StringType.INSTANCE) + .add("name", StringType.STRING) .add("type", MixedDataType.INSTANCE) // Data type can be a string or a object. - .add("nullable", BooleanType.INSTANCE) + .add("nullable", BooleanType.BOOLEAN) .add("metadata", - new MapType(StringType.INSTANCE, StringType.INSTANCE, false /* valueContainsNull */)); + new MapType(StringType.STRING, StringType.STRING, false /* valueContainsNull */)); /** * Schema of the serialized {@link StructType}. @@ -238,9 +238,9 @@ private static R parseAndEvalSingleRow( */ private static StructType ARRAY_TYPE_SCHEMA = new StructType() - .add("type", StringType.INSTANCE) + .add("type", StringType.STRING) .add("elementType", MixedDataType.INSTANCE) - .add("containsNull", BooleanType.INSTANCE); + .add("containsNull", BooleanType.BOOLEAN); /** * Example Map Type in serialized format @@ -253,10 +253,10 @@ private static R parseAndEvalSingleRow( */ private static StructType MAP_TYPE_SCHEMA = new StructType() - .add("type", StringType.INSTANCE) + .add("type", StringType.STRING) .add("keyType", MixedDataType.INSTANCE) .add("valueType", MixedDataType.INSTANCE) - .add("valueContainsNull", BooleanType.INSTANCE); + .add("valueContainsNull", BooleanType.BOOLEAN); private static Pattern DECIMAL_TYPE_PATTERN = Pattern.compile("decimal\\(\\s*(?[0-9]+),\\s*(?[0-9]+)\\s*\\)"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/InternalUtils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/InternalUtils.java index 26a2e364f37..ffa25daf943 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/InternalUtils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/InternalUtils.java @@ -21,8 +21,11 @@ import java.time.temporal.ChronoUnit; import java.util.Optional; +import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.FileDataReadResult; import io.delta.kernel.data.Row; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.StringType; import io.delta.kernel.utils.CloseableIterator; public class InternalUtils { @@ -125,4 +128,41 @@ public static int daysSinceEpoch(Date date) { LocalDate localDate = date.toLocalDate(); return (int) ChronoUnit.DAYS.between(EPOCH, localDate); } + + /** + * Utility method to create a singleton string {@link ColumnVector} + * + * @param value the string element to create the vector with + * @return A {@link ColumnVector} with a single element {@code value} + */ + public static ColumnVector singletonStringColumnVector(String value) { + return new ColumnVector() { + @Override + public DataType getDataType() { + return StringType.STRING; + } + + @Override + public int getSize() { + return 1; + } + + @Override + public void close() { + } + + @Override + public boolean isNullAt(int rowId) { + return value == null; + } + + @Override + public String getString(int rowId) { + if (rowId != 0) { + throw new IllegalArgumentException("Invalid row id: " + rowId); + } + return value; + } + }; + } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/Logging.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/Logging.java index aaef62ccba4..754918f1c57 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/Logging.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/util/Logging.java @@ -17,24 +17,22 @@ import java.util.function.Supplier; -/** - * TODO: Remove the stdout printing before 3.0 release. - */ +// TODO re-enable proper logging public interface Logging { default void logInfo(String msg) { - System.out.println(this.getClass() + " :: " + msg); + // System.out.println(this.getClass() + " :: " + msg); } default void logInfo(Supplier msg) { - System.out.println(this.getClass() + " :: " + msg.get()); + // System.out.println(this.getClass() + " :: " + msg.get()); } default void logDebug(String msg) { - System.out.println(this.getClass() + " :: " + msg); + // System.out.println(this.getClass() + " :: " + msg); } default void logDebug(Supplier msg) { - System.out.println(this.getClass() + " :: " + msg.get()); + // System.out.println(this.getClass() + " :: " + msg.get()); } } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java index d49bddf649e..46b27f0ae66 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java @@ -57,17 +57,17 @@ public static List getAllPrimitiveTypes() { private static final Supplier> nameToPrimitiveTypeMap = () -> Collections.unmodifiableMap(new HashMap() { { - put("boolean", BooleanType.INSTANCE); - put("byte", ByteType.INSTANCE); - put("short", ShortType.INSTANCE); - put("integer", IntegerType.INSTANCE); - put("long", LongType.INSTANCE); - put("float", FloatType.INSTANCE); - put("double", DoubleType.INSTANCE); - put("date", DateType.INSTANCE); - put("timestamp", TimestampType.INSTANCE); - put("binary", BinaryType.INSTANCE); - put("string", StringType.INSTANCE); + put("boolean", BooleanType.BOOLEAN); + put("byte", ByteType.BYTE); + put("short", ShortType.SHORT); + put("integer", IntegerType.INTEGER); + put("long", LongType.LONG); + put("float", FloatType.FLOAT); + put("double", DoubleType.DOUBLE); + put("date", DateType.DATE); + put("timestamp", TimestampType.TIMESTAMP); + put("binary", BinaryType.BINARY); + put("string", StringType.STRING); } }); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BinaryType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BinaryType.java index c591fabab6f..cafa64fa9d8 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BinaryType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BinaryType.java @@ -24,7 +24,7 @@ */ @Evolving public class BinaryType extends BasePrimitiveType { - public static final BinaryType INSTANCE = new BinaryType(); + public static final BinaryType BINARY = new BinaryType(); private BinaryType() { super("binary"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java index 83d41d34805..9e07d34e482 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BooleanType.java @@ -24,9 +24,7 @@ */ @Evolving public class BooleanType extends BasePrimitiveType { - // TODO: Should remove the `INSTANCE` to `BOOLEAN` so that it can be static imported where - // needed and referred without the `BooleanType.` prefix. Same for other types. - public static final BooleanType INSTANCE = new BooleanType(); + public static final BooleanType BOOLEAN = new BooleanType(); private BooleanType() { super("boolean"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ByteType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ByteType.java index 469955f2bc9..c31b0dad80d 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ByteType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ByteType.java @@ -24,7 +24,7 @@ */ @Evolving public class ByteType extends BasePrimitiveType { - public static final ByteType INSTANCE = new ByteType(); + public static final ByteType BYTE = new ByteType(); private ByteType() { super("byte"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DateType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DateType.java index ec7ef9501ba..cb4bb6e58b6 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DateType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DateType.java @@ -25,7 +25,7 @@ */ @Evolving public class DateType extends BasePrimitiveType { - public static final DateType INSTANCE = new DateType(); + public static final DateType DATE = new DateType(); private DateType() { super("date"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DoubleType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DoubleType.java index 2f3dbfe6a92..c856ed44790 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DoubleType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DoubleType.java @@ -24,7 +24,7 @@ */ @Evolving public class DoubleType extends BasePrimitiveType { - public static final DoubleType INSTANCE = new DoubleType(); + public static final DoubleType DOUBLE = new DoubleType(); private DoubleType() { super("double"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/FloatType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/FloatType.java index af7db2b2e67..fea0e1208ff 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/FloatType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/FloatType.java @@ -24,7 +24,7 @@ */ @Evolving public class FloatType extends BasePrimitiveType { - public static final FloatType INSTANCE = new FloatType(); + public static final FloatType FLOAT = new FloatType(); private FloatType() { super("float"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java index b20ed509dea..80360002c53 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/IntegerType.java @@ -24,7 +24,7 @@ */ @Evolving public class IntegerType extends BasePrimitiveType { - public static final IntegerType INSTANCE = new IntegerType(); + public static final IntegerType INTEGER = new IntegerType(); private IntegerType() { super("integer"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java index 4acd2074904..bddb583e2db 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/LongType.java @@ -24,7 +24,7 @@ */ @Evolving public class LongType extends BasePrimitiveType { - public static final LongType INSTANCE = new LongType(); + public static final LongType LONG = new LongType(); private LongType() { super("long"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ShortType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ShortType.java index 313fbecfe08..016eff908e8 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/ShortType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/ShortType.java @@ -24,7 +24,7 @@ */ @Evolving public class ShortType extends BasePrimitiveType { - public static final ShortType INSTANCE = new ShortType(); + public static final ShortType SHORT = new ShortType(); private ShortType() { super("short"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java index 33c47940913..8a5ba8c8eed 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StringType.java @@ -24,7 +24,7 @@ */ @Evolving public class StringType extends BasePrimitiveType { - public static final StringType INSTANCE = new StringType(); + public static final StringType STRING = new StringType(); private StringType() { super("string"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java index 28a503acdb3..231e789bbc6 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/StructField.java @@ -35,7 +35,6 @@ public class StructField { // Static Fields / Methods //////////////////////////////////////////////////////////////////////////////// - // TODO: for now we introduce isMetadataColumn as a field in the column metadata /** * Indicates a metadata column when present in the field metadata and the value is true */ @@ -48,7 +47,7 @@ public class StructField { public static String ROW_INDEX_COLUMN_NAME = "_metadata.row_index"; public static StructField ROW_INDEX_COLUMN = new StructField( ROW_INDEX_COLUMN_NAME, - LongType.INSTANCE, + LongType.LONG, false, Collections.singletonMap(IS_METADATA_COLUMN_KEY, "true")); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/TimestampType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/TimestampType.java index c1f16d55863..aff5765d2bd 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/TimestampType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/TimestampType.java @@ -28,7 +28,7 @@ */ @Evolving public class TimestampType extends BasePrimitiveType { - public static final TimestampType INSTANCE = new TimestampType(); + public static final TimestampType TIMESTAMP = new TimestampType(); private TimestampType() { super("timestamp"); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java index bbf6716a2d3..7913b72ce39 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java @@ -20,10 +20,7 @@ import java.util.Iterator; import io.delta.kernel.annotation.Evolving; -import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.Row; -import io.delta.kernel.types.DataType; -import io.delta.kernel.types.StringType; /** * Various utility methods to help the connectors work with data objects returned by Kernel @@ -86,44 +83,6 @@ public T next() { }; } - /** - * Utility method to create a singleton string {@link ColumnVector} - * - * @param value the string element to create the vector with - * @return A {@link ColumnVector} with a single element {@code value} - */ - // TODO: add String to method name or make generic? - public static ColumnVector singletonColumnVector(String value) { - return new ColumnVector() { - @Override - public DataType getDataType() { - return StringType.INSTANCE; - } - - @Override - public int getSize() { - return 1; - } - - @Override - public void close() { - } - - @Override - public boolean isNullAt(int rowId) { - return value == null; - } - - @Override - public String getString(int rowId) { - if (rowId != 0) { - throw new IllegalArgumentException("Invalid row id: " + rowId); - } - return value; - } - }; - } - /** * Close the given one or more {@link AutoCloseable}s. {@link AutoCloseable#close()} * will be called on all given non-null closeables. Will throw unchecked diff --git a/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java index 2987d8d8ed8..d2e87512263 100644 --- a/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java +++ b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java @@ -36,7 +36,6 @@ import io.delta.kernel.types.IntegerType; import io.delta.kernel.types.LongType; import io.delta.kernel.types.MapType; -import io.delta.kernel.types.MixedDataType; import io.delta.kernel.types.StringType; import io.delta.kernel.types.StructField; import io.delta.kernel.types.StructType; diff --git a/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java index 633b53d1caa..5001aaab586 100644 --- a/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java +++ b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java @@ -63,14 +63,14 @@ public void primitiveTypeRoundTrip() { public void complexTypesRoundTrip() { List fieldList = new ArrayList<>(); - ArrayType arrayType = array(IntegerType.INSTANCE, true); + ArrayType arrayType = array(IntegerType.INTEGER, true); ArrayType arrayArrayType = array(arrayType, false); - MapType mapType = map(FloatType.INSTANCE, BinaryType.INSTANCE, false); - MapType mapMapType = map(mapType, BinaryType.INSTANCE, true); + MapType mapType = map(FloatType.FLOAT, BinaryType.BINARY, false); + MapType mapMapType = map(mapType, BinaryType.BINARY, true); StructType structType = new StructType() - .add("simple", DateType.INSTANCE); + .add("simple", DateType.DATE); StructType structAllType = new StructType() - .add("prim", BooleanType.INSTANCE) + .add("prim", BooleanType.BOOLEAN) .add("arr", arrayType) .add("map", mapType) .add("struct", structType); diff --git a/kernel/kernel-api/src/test/scala/io/delta/kernel/expressions/ExpressionsSuite.scala b/kernel/kernel-api/src/test/scala/io/delta/kernel/expressions/ExpressionsSuite.scala index 4e13301ee9e..b38406e1b1b 100644 --- a/kernel/kernel-api/src/test/scala/io/delta/kernel/expressions/ExpressionsSuite.scala +++ b/kernel/kernel-api/src/test/scala/io/delta/kernel/expressions/ExpressionsSuite.scala @@ -21,17 +21,17 @@ import org.scalatest.funsuite.AnyFunSuite class ExpressionsSuite extends AnyFunSuite { test("expressions: unsupported literal data types") { val ex1 = intercept[IllegalArgumentException] { - Literal.ofNull(new ArrayType(IntegerType.INSTANCE, true)) + Literal.ofNull(new ArrayType(IntegerType.INTEGER, true)) } assert(ex1.getMessage.contains("array[integer] is an invalid data type for Literal.")) val ex2 = intercept[IllegalArgumentException] { - Literal.ofNull(new MapType(IntegerType.INSTANCE, IntegerType.INSTANCE, true)) + Literal.ofNull(new MapType(IntegerType.INTEGER, IntegerType.INTEGER, true)) } assert(ex2.getMessage.contains("map[integer, integer] is an invalid data type for Literal.")) val ex3 = intercept[IllegalArgumentException] { - Literal.ofNull(new StructType().add("s1", BooleanType.INSTANCE)) + Literal.ofNull(new StructType().add("s1", BooleanType.BOOLEAN)) } assert(ex3.getMessage.matches("struct.* is an invalid data type for Literal.")) } diff --git a/kernel/kernel-api/src/test/scala/io/delta/kernel/internal/util/PartitionUtilsSuite.scala b/kernel/kernel-api/src/test/scala/io/delta/kernel/internal/util/PartitionUtilsSuite.scala index a5c84fb3f0b..6e71a1e77a1 100644 --- a/kernel/kernel-api/src/test/scala/io/delta/kernel/internal/util/PartitionUtilsSuite.scala +++ b/kernel/kernel-api/src/test/scala/io/delta/kernel/internal/util/PartitionUtilsSuite.scala @@ -31,9 +31,9 @@ class PartitionUtilsSuite extends AnyFunSuite { // Partition columns: part1: int, part2: date, part3: string private val partitionColsToType = new util.HashMap[String, DataType]() { { - put("part1", IntegerType.INSTANCE) - put("part2", DateType.INSTANCE) - put("part3", StringType.INSTANCE) + put("part1", IntegerType.INTEGER) + put("part2", DateType.DATE) + put("part3", StringType.STRING) } } diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/client/DefaultFileHandler.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/client/DefaultFileHandler.java index 1b497f09ebb..58b4a72e263 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/client/DefaultFileHandler.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/client/DefaultFileHandler.java @@ -32,7 +32,7 @@ public CloseableIterator contextualizeFileReads( CloseableIterator fileIter, Predicate filter) { requireNonNull(fileIter, "fileIter is null"); requireNonNull(filter, "filter is null"); - // TODO: we are not using the filter now, will be used later. + // Note: we are not currently using the filter but it may be used later return fileIter.map(scanFileRow -> new DefaultFileReadContext(scanFileRow)); } } diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/DefaultJsonRow.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/DefaultJsonRow.java index 02544eddbc2..ffedc150339 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/DefaultJsonRow.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/DefaultJsonRow.java @@ -27,6 +27,7 @@ import io.delta.kernel.data.MapValue; import io.delta.kernel.data.Row; import io.delta.kernel.types.*; +import io.delta.kernel.internal.types.MixedDataType; import io.delta.kernel.defaults.internal.data.vector.DefaultGenericVector; diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultBooleanVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultBooleanVector.java index 64481c2ff67..a6c7c0a4018 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultBooleanVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultBooleanVector.java @@ -38,7 +38,7 @@ public class DefaultBooleanVector * @param values column vector values. */ public DefaultBooleanVector(int size, Optional nullability, boolean[] values) { - super(size, BooleanType.INSTANCE, nullability); + super(size, BooleanType.BOOLEAN, nullability); this.values = requireNonNull(values, "values is null"); checkArgument(values.length >= size, "invalid number of values (%s) for given size (%s)", values.length, size); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultByteVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultByteVector.java index 29b9fac2c63..a3fd44b5626 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultByteVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultByteVector.java @@ -38,7 +38,7 @@ public class DefaultByteVector * @param values column vector values. */ public DefaultByteVector(int size, Optional nullability, byte[] values) { - super(size, ByteType.INSTANCE, nullability); + super(size, ByteType.BYTE, nullability); this.values = requireNonNull(values, "values is null"); checkArgument(values.length >= size, "invalid number of values (%s) for given size (%s)", values.length, size); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultDoubleVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultDoubleVector.java index fa1c825d3f6..0da26936a83 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultDoubleVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultDoubleVector.java @@ -38,7 +38,7 @@ public class DefaultDoubleVector * @param values column vector values. */ public DefaultDoubleVector(int size, Optional nullability, double[] values) { - super(size, DoubleType.INSTANCE, nullability); + super(size, DoubleType.DOUBLE, nullability); this.values = requireNonNull(values, "values is null"); checkArgument(values.length >= size, "invalid number of values (%s) for given size (%s)", values.length, size); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultFloatVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultFloatVector.java index 31520845136..b0ad6d42b52 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultFloatVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultFloatVector.java @@ -38,7 +38,7 @@ public class DefaultFloatVector * @param values column vector values. */ public DefaultFloatVector(int size, Optional nullability, float[] values) { - super(size, FloatType.INSTANCE, nullability); + super(size, FloatType.FLOAT, nullability); this.values = requireNonNull(values, "values is null"); checkArgument(values.length >= size, "invalid number of values (%s) for given size (%s)", values.length, size); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultShortVector.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultShortVector.java index 9c57bfe8887..c4cfd7dc03e 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultShortVector.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/data/vector/DefaultShortVector.java @@ -38,7 +38,7 @@ public class DefaultShortVector * @param values column vector values. */ public DefaultShortVector(int size, Optional nullability, short[] values) { - super(size, ShortType.INSTANCE, nullability); + super(size, ShortType.SHORT, nullability); this.values = requireNonNull(values, "values is null"); checkArgument(values.length >= size, "invalid number of values (%s) for given size (%s)", values.length, size); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java index c7b57dec889..712f2a20e5c 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java @@ -107,26 +107,26 @@ private static class ExpressionTransformer ExpressionTransformResult visitAnd(And and) { Predicate left = validateIsPredicate(and, visit(and.getLeft())); Predicate right = validateIsPredicate(and, visit(and.getRight())); - return new ExpressionTransformResult(new And(left, right), BooleanType.INSTANCE); + return new ExpressionTransformResult(new And(left, right), BooleanType.BOOLEAN); } @Override ExpressionTransformResult visitOr(Or or) { Predicate left = validateIsPredicate(or, visit(or.getLeft())); Predicate right = validateIsPredicate(or, visit(or.getRight())); - return new ExpressionTransformResult(new Or(left, right), BooleanType.INSTANCE); + return new ExpressionTransformResult(new Or(left, right), BooleanType.BOOLEAN); } @Override ExpressionTransformResult visitAlwaysTrue(AlwaysTrue alwaysTrue) { // nothing to validate or rewrite. - return new ExpressionTransformResult(alwaysTrue, BooleanType.INSTANCE); + return new ExpressionTransformResult(alwaysTrue, BooleanType.BOOLEAN); } @Override ExpressionTransformResult visitAlwaysFalse(AlwaysFalse alwaysFalse) { // nothing to validate or rewrite. - return new ExpressionTransformResult(alwaysFalse, BooleanType.INSTANCE); + return new ExpressionTransformResult(alwaysFalse, BooleanType.BOOLEAN); } @Override @@ -139,7 +139,7 @@ ExpressionTransformResult visitComparator(Predicate predicate) { case "<=": return new ExpressionTransformResult( transformBinaryComparator(predicate), - BooleanType.INSTANCE); + BooleanType.BOOLEAN); default: throw new UnsupportedOperationException( "unsupported expression encountered: " + predicate); @@ -281,12 +281,12 @@ ColumnVector visitOr(Or or) { @Override ColumnVector visitAlwaysTrue(AlwaysTrue alwaysTrue) { - return new DefaultConstantVector(BooleanType.INSTANCE, input.getSize(), true); + return new DefaultConstantVector(BooleanType.BOOLEAN, input.getSize(), true); } @Override ColumnVector visitAlwaysFalse(AlwaysFalse alwaysFalse) { - return new DefaultConstantVector(BooleanType.INSTANCE, input.getSize(), false); + return new DefaultConstantVector(BooleanType.BOOLEAN, input.getSize(), false); } @Override diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultPredicateEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultPredicateEvaluator.java index 09ef79278aa..578036351b2 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultPredicateEvaluator.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultPredicateEvaluator.java @@ -39,7 +39,7 @@ public class DefaultPredicateEvaluator implements PredicateEvaluator { private static final String EXISTING_SEL_VECTOR_COL_NAME = "____existing_selection_vector_value____"; private static final StructField EXISTING_SEL_VECTOR_FIELD = - new StructField(EXISTING_SEL_VECTOR_COL_NAME, BooleanType.INSTANCE, false, emptyMap()); + new StructField(EXISTING_SEL_VECTOR_COL_NAME, BooleanType.BOOLEAN, false, emptyMap()); private final ExpressionEvaluator expressionEvaluator; @@ -54,7 +54,7 @@ public DefaultPredicateEvaluator(StructType inputSchema, Predicate predicate) { predicate); StructType rewrittenInputSchema = inputSchema.add(EXISTING_SEL_VECTOR_FIELD); this.expressionEvaluator = new DefaultExpressionEvaluator( - rewrittenInputSchema, rewrittenPredicate, BooleanType.INSTANCE); + rewrittenInputSchema, rewrittenPredicate, BooleanType.BOOLEAN); } @Override @@ -63,7 +63,7 @@ public ColumnVector eval( Optional existingSelectionVector) { try { ColumnVector newVector = existingSelectionVector.orElse( - new DefaultConstantVector(BooleanType.INSTANCE, inputData.getSize(), true)); + new DefaultConstantVector(BooleanType.BOOLEAN, inputData.getSize(), true)); ColumnarBatch withExistingSelVector = inputData.withNewColumn( inputData.getSchema().length(), EXISTING_SEL_VECTOR_FIELD, newVector); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/ElementAtEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/ElementAtEvaluator.java index 329a9c18f5c..5fd5b0972b5 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/ElementAtEvaluator.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/ElementAtEvaluator.java @@ -141,10 +141,10 @@ private static MapType validateSupportedMapType(Expression elementAt, DataType m mapInputType instanceof MapType, "expected a map type input as first argument: " + elementAt); MapType asMapType = (MapType) mapInputType; - // TODO: we may extend type support in future, but currently the need is just a lookup - // in map column of type `map(string -> string)`. - if (asMapType.getKeyType().equivalent(StringType.INSTANCE) && - asMapType.getValueType().equivalent(StringType.INSTANCE)) { + // For now we only need to support lookup in columns of type `map(string -> string)`. + // Additional type support may be added later + if (asMapType.getKeyType().equivalent(StringType.STRING) && + asMapType.getValueType().equivalent(StringType.STRING)) { return asMapType; } throw new UnsupportedOperationException( diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/PartitionValueEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/PartitionValueEvaluator.java index 57fcf83130d..7bebe0ea550 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/PartitionValueEvaluator.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/PartitionValueEvaluator.java @@ -73,9 +73,9 @@ public short getShort(int rowId) { @Override public int getInt(int rowId) { - if (partitionType.equivalent(IntegerType.INSTANCE)) { + if (partitionType.equivalent(IntegerType.INTEGER)) { return Integer.parseInt(input.getString(rowId)); - } else if (partitionType.equivalent(DateType.INSTANCE)) { + } else if (partitionType.equivalent(DateType.DATE)) { return InternalUtils.daysSinceEpoch(Date.valueOf(input.getString(rowId))); } throw new UnsupportedOperationException("Invalid value request for data type"); @@ -83,7 +83,7 @@ public int getInt(int rowId) { @Override public long getLong(int rowId) { - if (partitionType.equivalent(LongType.INSTANCE)) { + if (partitionType.equivalent(LongType.LONG)) { return Long.parseLong(input.getString(rowId)); } // TODO: partition value of timestamp type are not yet supported diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/parquet/ParquetConverters.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/parquet/ParquetConverters.java index c2f7b66cd8d..ea3b0e9b54b 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/parquet/ParquetConverters.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/parquet/ParquetConverters.java @@ -468,7 +468,7 @@ public void resizeIfNeeded() { public static class FileRowIndexColumnConverter extends LongColumnConverter { FileRowIndexColumnConverter(int initialBatchSize) { - super(LongType.INSTANCE, initialBatchSize); + super(LongType.LONG, initialBatchSize); } @Override diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/parquet/TimestampConverters.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/parquet/TimestampConverters.java index e40cc3ab5a0..623e173d982 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/parquet/TimestampConverters.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/parquet/TimestampConverters.java @@ -53,7 +53,7 @@ public static Converter createTimestampConverter(int initialBatchSize, Type type if (timestamp.getUnit() == LogicalTypeAnnotation.TimeUnit.MICROS) { return new ParquetConverters.LongColumnConverter( - TimestampType.INSTANCE, initialBatchSize); + TimestampType.TIMESTAMP, initialBatchSize); } else if (timestamp.getUnit() == LogicalTypeAnnotation.TimeUnit.MILLIS) { return new TimestampMillisConverter(initialBatchSize); } else { @@ -71,7 +71,7 @@ public static Converter createTimestampConverter(int initialBatchSize, Type type public static class TimestampMillisConverter extends ParquetConverters.LongColumnConverter { TimestampMillisConverter( int initialBatchSize) { - super(TimestampType.INSTANCE, initialBatchSize); + super(TimestampType.TIMESTAMP, initialBatchSize); } @Override @@ -83,7 +83,7 @@ public void addLong(long value) { public static class TimestampBinaryConverter extends ParquetConverters.LongColumnConverter { TimestampBinaryConverter( int initialBatchSize) { - super(TimestampType.INSTANCE, initialBatchSize); + super(TimestampType.TIMESTAMP, initialBatchSize); } private long binaryToSQLTimestamp(Binary binary) { diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/client/TestDefaultJsonHandler.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/client/TestDefaultJsonHandler.java index 03e1d8f53c3..81300a60f69 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/client/TestDefaultJsonHandler.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/client/TestDefaultJsonHandler.java @@ -33,9 +33,8 @@ import io.delta.kernel.utils.CloseableIterator; import io.delta.kernel.utils.VectorUtils; import static io.delta.kernel.expressions.AlwaysTrue.ALWAYS_TRUE; -import static io.delta.kernel.utils.Utils.singletonColumnVector; - import io.delta.kernel.internal.InternalScanFileUtils; +import static io.delta.kernel.internal.util.InternalUtils.singletonStringColumnVector; import io.delta.kernel.defaults.utils.DefaultKernelTestUtils; @@ -71,9 +70,9 @@ public void readJsonFiles() JSON_HANDLER.readJsonFiles( JSON_HANDLER.contextualizeFileReads(testFiles(), ALWAYS_TRUE), new StructType() - .add("path", StringType.INSTANCE) - .add("size", LongType.INSTANCE) - .add("dataChange", BooleanType.INSTANCE))) { + .add("path", StringType.STRING) + .add("size", LongType.LONG) + .add("dataChange", BooleanType.BOOLEAN))) { List actPaths = new ArrayList<>(); List actSizes = new ArrayList<>(); @@ -121,14 +120,14 @@ public void parseJsonContent() " \"dataChange\":true" + " }"; StructType readSchema = new StructType() - .add("path", StringType.INSTANCE) + .add("path", StringType.STRING) .add("partitionValues", - new MapType(StringType.INSTANCE, StringType.INSTANCE, false)) - .add("size", LongType.INSTANCE) - .add("dataChange", BooleanType.INSTANCE); + new MapType(StringType.STRING, StringType.STRING, false)) + .add("size", LongType.LONG) + .add("dataChange", BooleanType.BOOLEAN); ColumnarBatch batch = - JSON_HANDLER.parseJson(singletonColumnVector(input), readSchema); + JSON_HANDLER.parseJson(singletonStringColumnVector(input), readSchema); assertEquals(1, batch.getSize()); try (CloseableIterator rows = batch.getRows()) { @@ -160,20 +159,20 @@ public void parseNestedComplexTypes() throws IOException { " \"nested_map\": {\"a\": {\"one\": [], \"two\": [1, 2, 3]}, \"b\": {}}\n" + "}"; StructType schema = new StructType() - .add("array", new ArrayType(IntegerType.INSTANCE, true)) - .add("nested_array", new ArrayType(new ArrayType(StringType.INSTANCE, true), true)) - .add("map", new MapType(StringType.INSTANCE, BooleanType.INSTANCE, true)) + .add("array", new ArrayType(IntegerType.INTEGER, true)) + .add("nested_array", new ArrayType(new ArrayType(StringType.STRING, true), true)) + .add("map", new MapType(StringType.STRING, BooleanType.BOOLEAN, true)) .add("nested_map", new MapType( - StringType.INSTANCE, + StringType.STRING, new MapType( - StringType.INSTANCE, - new ArrayType(IntegerType.INSTANCE, true), + StringType.STRING, + new ArrayType(IntegerType.INTEGER, true), true ), true )); - ColumnarBatch batch = JSON_HANDLER.parseJson(singletonColumnVector(json), schema); + ColumnarBatch batch = JSON_HANDLER.parseJson(singletonStringColumnVector(json), schema); try (CloseableIterator rows = batch.getRows()) { Row result = rows.next(); diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/BaseIntegration.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/BaseIntegration.java index 9585ec05cc8..b50eee008e2 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/BaseIntegration.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/BaseIntegration.java @@ -134,12 +134,10 @@ protected void compareEqualUnorderd(ColumnarBatch expDataBatch, break; } } - // TODO: improve the logging info assertTrue("Actual data contain a row that is not expected", matched); } } - // TODO: improve the logging info assertEquals( "An expected row is not present in the actual data output", expDataBatch.getSize(), diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/DataBuilderUtils.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/DataBuilderUtils.java index b4a1d07c42e..f4c4930f355 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/DataBuilderUtils.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/DataBuilderUtils.java @@ -54,8 +54,6 @@ private TestColumnBatchBuilder(StructType schema) { public TestColumnBatchBuilder addRow(Object... values) { checkArgument(values.length == schema.length(), "Invalid columns length"); - // TODO: we could improve this further to check the type of the object based on the - // column data type in the schema, but given this for test it should be fine. rows.add(row(schema, values)); return this; diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/TestDeltaTableReads.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/TestDeltaTableReads.java index 13f73113421..6ebac345f8a 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/TestDeltaTableReads.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/integration/TestDeltaTableReads.java @@ -132,10 +132,10 @@ public void partitionedTableWithColumnMapping() Snapshot snapshot = snapshot(tablePath); StructType readSchema = new StructType() // partition fields - .add("as_int", IntegerType.INSTANCE) - .add("as_double", DoubleType.INSTANCE) + .add("as_int", IntegerType.INTEGER) + .add("as_double", DoubleType.DOUBLE) // data fields - .add("value", StringType.INSTANCE); + .add("value", StringType.STRING); List actualData = readSnapshot(readSchema, snapshot); diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/internal/parquet/TestParquetBatchReader.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/internal/parquet/TestParquetBatchReader.java index 2875c69d365..c46a02665b5 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/internal/parquet/TestParquetBatchReader.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/internal/parquet/TestParquetBatchReader.java @@ -55,36 +55,36 @@ public class TestParquetBatchReader { .get(); private static final StructType ALL_TYPES_FILE_SCHEMA = new StructType() - .add("byteType", ByteType.INSTANCE) - .add("shortType", ShortType.INSTANCE) - .add("integerType", IntegerType.INSTANCE) - .add("longType", LongType.INSTANCE) - .add("floatType", FloatType.INSTANCE) - .add("doubleType", DoubleType.INSTANCE) + .add("byteType", ByteType.BYTE) + .add("shortType", ShortType.SHORT) + .add("integerType", IntegerType.INTEGER) + .add("longType", LongType.LONG) + .add("floatType", FloatType.FLOAT) + .add("doubleType", DoubleType.DOUBLE) .add("decimal", new DecimalType(10, 2)) - .add("booleanType", BooleanType.INSTANCE) - .add("stringType", StringType.INSTANCE) - .add("binaryType", BinaryType.INSTANCE) - .add("dateType", DateType.INSTANCE) - .add("timestampType", TimestampType.INSTANCE) + .add("booleanType", BooleanType.BOOLEAN) + .add("stringType", StringType.STRING) + .add("binaryType", BinaryType.BINARY) + .add("dateType", DateType.DATE) + .add("timestampType", TimestampType.TIMESTAMP) .add("nested_struct", new StructType() - .add("aa", StringType.INSTANCE) - .add("ac", new StructType().add("aca", IntegerType.INSTANCE))) + .add("aa", StringType.STRING) + .add("ac", new StructType().add("aca", IntegerType.INTEGER))) .add("array_of_prims", - new ArrayType(IntegerType.INSTANCE, true)) + new ArrayType(IntegerType.INTEGER, true)) .add("array_of_arrays", - new ArrayType(new ArrayType(IntegerType.INSTANCE, true), true)) + new ArrayType(new ArrayType(IntegerType.INTEGER, true), true)) .add("array_of_structs", - new ArrayType(new StructType().add("ab", LongType.INSTANCE), true)) - .add("map_of_prims", new MapType(IntegerType.INSTANCE, LongType.INSTANCE, true)) + new ArrayType(new StructType().add("ab", LongType.LONG), true)) + .add("map_of_prims", new MapType(IntegerType.INTEGER, LongType.LONG, true)) .add("map_of_rows", new MapType( - IntegerType.INSTANCE, - new StructType().add("ab", LongType.INSTANCE), + IntegerType.INTEGER, + new StructType().add("ab", LongType.LONG), true)) .add("map_of_arrays", new MapType( - LongType.INSTANCE, - new ArrayType(IntegerType.INSTANCE, true), + LongType.LONG, + new ArrayType(IntegerType.INTEGER, true), true)); @Test @@ -97,16 +97,16 @@ public void readAllTypesOfData() public void readSubsetOfColumns() throws Exception { StructType readSchema = new StructType() - .add("byteType", ByteType.INSTANCE) - .add("booleanType", BooleanType.INSTANCE) - .add("stringType", StringType.INSTANCE) - .add("dateType", DateType.INSTANCE) + .add("byteType", ByteType.BYTE) + .add("booleanType", BooleanType.BOOLEAN) + .add("stringType", StringType.STRING) + .add("dateType", DateType.DATE) .add("nested_struct", new StructType() - .add("aa", StringType.INSTANCE) - .add("ac", new StructType().add("aca", IntegerType.INSTANCE))) + .add("aa", StringType.STRING) + .add("ac", new StructType().add("aca", IntegerType.INTEGER))) .add("array_of_prims", - new ArrayType(IntegerType.INSTANCE, true)); + new ArrayType(IntegerType.INTEGER, true)); readAndVerify(readSchema, 73 /* readBatchSize */); } @@ -115,16 +115,16 @@ public void readSubsetOfColumns() public void readSubsetOfColumnsWithMissingColumnsInFile() throws Exception { StructType readSchema = new StructType() - .add("booleanType", BooleanType.INSTANCE) - .add("integerType", IntegerType.INSTANCE) + .add("booleanType", BooleanType.BOOLEAN) + .add("integerType", IntegerType.INTEGER) .add("missing_column_struct", - new StructType().add("ab", IntegerType.INSTANCE)) - .add("longType", LongType.INSTANCE) - .add("missing_column_primitive", DateType.INSTANCE) + new StructType().add("ab", IntegerType.INTEGER)) + .add("longType", LongType.LONG) + .add("missing_column_primitive", DateType.DATE) .add("nested_struct", new StructType() - .add("aa", StringType.INSTANCE) - .add("ac", new StructType().add("aca", IntegerType.INSTANCE)) + .add("aa", StringType.STRING) + .add("ac", new StructType().add("aca", IntegerType.INTEGER)) ); readAndVerify(readSchema, 23 /* readBatchSize */); @@ -140,7 +140,7 @@ public void requestRowIndices() throws IOException { .collect(Collectors.toList()); StructType readSchema = new StructType() - .add("id", LongType.INSTANCE) + .add("id", LongType.LONG) .add(StructField.ROW_INDEX_COLUMN); Configuration conf = new Configuration(); @@ -345,11 +345,11 @@ private static void verifyRowFromAllTypesFile( assertTrue(vector.isNullAt(batchWithIdx._2)); assertNull(vector.getArray(batchWithIdx._2)); } else if (rowId % 29 == 0) { - checkArrayValue(vector.getArray(batchWithIdx._2), IntegerType.INSTANCE, + checkArrayValue(vector.getArray(batchWithIdx._2), IntegerType.INTEGER, Collections.emptyList()); } else { List expArray = Arrays.asList(rowId, null, rowId + 1); - checkArrayValue(vector.getArray(batchWithIdx._2), IntegerType.INSTANCE, + checkArrayValue(vector.getArray(batchWithIdx._2), IntegerType.INTEGER, expArray); } break; @@ -377,8 +377,8 @@ private static void verifyRowFromAllTypesFile( } else if (rowId % 30 == 0) { checkMapValue( vector.getMap(batchWithIdx._2), - IntegerType.INSTANCE, - LongType.INSTANCE, + IntegerType.INTEGER, + LongType.LONG, Collections.emptyMap() ); } else { @@ -391,8 +391,8 @@ private static void verifyRowFromAllTypesFile( }; checkMapValue( vector.getMap(batchWithIdx._2), - IntegerType.INSTANCE, - LongType.INSTANCE, + IntegerType.INTEGER, + LongType.LONG, expValue ); } @@ -503,7 +503,7 @@ private static void validateArrayOfArraysColumn( expArray = Collections.emptyList(); break; } - DataType expDataType = new ArrayType(IntegerType.INSTANCE, true); + DataType expDataType = new ArrayType(IntegerType.INTEGER, true); checkArrayValue(vector.getArray(batchRowId), expDataType, expArray); } @@ -539,8 +539,8 @@ private static void validateMapOfArraysColumn( } checkMapValue( vector.getMap(batchRowId), - LongType.INSTANCE, - new ArrayType(IntegerType.INSTANCE, true), + LongType.LONG, + new ArrayType(IntegerType.INTEGER, true), expMap ); } diff --git a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/utils/DefaultKernelTestUtils.java b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/utils/DefaultKernelTestUtils.java index 03ef0bd2371..91353a7aee7 100644 --- a/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/utils/DefaultKernelTestUtils.java +++ b/kernel/kernel-defaults/src/test/java/io/delta/kernel/defaults/utils/DefaultKernelTestUtils.java @@ -31,8 +31,6 @@ public static String getTestResourceFilePath(String resourcePath) { // This will no longer be needed once all tests have been moved to Scala public static Object getValueAsObject(Row row, int columnOrdinal) { - // TODO: may be it is better to just provide a `getObject` on the `Row` to - // avoid the nested if-else statements. final DataType dataType = row.getSchema().at(columnOrdinal).getDataType(); if (row.isNullAt(columnOrdinal)) { @@ -69,8 +67,6 @@ public static Object getValueAsObject(Row row, int columnOrdinal) { * depends on the data type of the {@code vector}. */ public static Object getValueAsObject(ColumnVector vector, int rowId) { - // TODO: may be it is better to just provide a `getObject` on the `ColumnVector` to - // avoid the nested if-else statements. final DataType dataType = vector.getDataType(); if (vector.isNullAt(rowId)) { diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala index 273627cfc04..bbe4ceb144e 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/DeltaTableReadsSuite.scala @@ -36,7 +36,7 @@ class DeltaTableReadsSuite extends AnyFunSuite with TestUtils { // Timestamp type tests ////////////////////////////////////////////////////////////////////////////////// - // TODO: for now we do not support timestamp partition columns, make sure it's blocked + // For now we do not support timestamp partition columns, make sure it's blocked test("cannot read partition column of timestamp type") { val path = goldenTablePath("kernel-timestamp-TIMESTAMP_MICROS") val snapshot = latestSnapshot(path) diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/LogReplaySuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/LogReplaySuite.scala index 513c5dfe3da..3e11d71810a 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/LogReplaySuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/LogReplaySuite.scala @@ -75,7 +75,7 @@ class LogReplaySuite extends AnyFunSuite { val schema = snapshot.getSchema(client) val version = snapshot.getVersion(client) - assert(schema === new StructType().add("id", LongType.INSTANCE)) + assert(schema === new StructType().add("id", LongType.LONG)) assert(version === 13) var output = new scala.collection.mutable.ArrayBuffer[Long]() diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/ParquetBatchReaderSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/ParquetBatchReaderSuite.scala index 44312ef0f8f..516f0bfaa4f 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/ParquetBatchReaderSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/ParquetBatchReaderSuite.scala @@ -50,7 +50,7 @@ class ParquetBatchReaderSuite extends AnyFunSuite with TestUtils { } val readSchema = new StructType() - .add("id", IntegerType.INSTANCE) + .add("id", IntegerType.INTEGER) .add("col1", new DecimalType(9, 0)) // INT32: 1 <= precision <= 9 .add("col2", new DecimalType(12, 0)) // INT64: 10 <= precision <= 18 .add("col3", new DecimalType(25, 0)) // FIXED_LEN_BYTE_ARRAY @@ -93,7 +93,7 @@ class ParquetBatchReaderSuite extends AnyFunSuite with TestUtils { } val readSchema = new StructType() - .add("id", IntegerType.INSTANCE) + .add("id", IntegerType.INTEGER) .add("col1", new DecimalType(5, 1)) // INT32: 1 <= precision <= 9 .add("col2", new DecimalType(10, 5)) // INT64: 10 <= precision <= 18 .add("col3", new DecimalType(20, 5)) // FIXED_LEN_BYTE_ARRAY diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/PartitionPruningSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/PartitionPruningSuite.scala index a69ddb55dc7..dcb0abccada 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/PartitionPruningSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/PartitionPruningSuite.scala @@ -35,18 +35,18 @@ class PartitionPruningSuite extends AnyFunSuite with TestUtils { // Test case to verify pruning on each partition column type works. // format: partition column reference -> (nonNullPartitionValues, nullPartitionValue) val testCasesAllTypes = Map( - col("as_boolean") -> (ofBoolean(false), ofNull(BooleanType.INSTANCE)), - col("as_byte") -> (ofByte(1), ofNull(ByteType.INSTANCE)), - col("as_short") -> (ofShort(1), ofNull(ShortType.INSTANCE)), - col("as_int") -> (ofInt(1), ofNull(IntegerType.INSTANCE)), - col("as_long") -> (ofLong(1), ofNull(LongType.INSTANCE)), - col("as_float") -> (ofFloat(1), ofNull(FloatType.INSTANCE)), - col("as_double") -> (ofDouble(1), ofNull(DoubleType.INSTANCE)), + col("as_boolean") -> (ofBoolean(false), ofNull(BooleanType.BOOLEAN)), + col("as_byte") -> (ofByte(1), ofNull(ByteType.BYTE)), + col("as_short") -> (ofShort(1), ofNull(ShortType.SHORT)), + col("as_int") -> (ofInt(1), ofNull(IntegerType.INTEGER)), + col("as_long") -> (ofLong(1), ofNull(LongType.LONG)), + col("as_float") -> (ofFloat(1), ofNull(FloatType.FLOAT)), + col("as_double") -> (ofDouble(1), ofNull(DoubleType.DOUBLE)), // 2021-09-08 in days since epoch 18878 - col("as_date") -> (ofDate(18878 /* daysSinceEpochUTC */), ofNull(DateType.INSTANCE)), - col("as_string") -> (ofString("1"), ofNull(StringType.INSTANCE)), + col("as_date") -> (ofDate(18878 /* daysSinceEpochUTC */), ofNull(DateType.DATE)), + col("as_string") -> (ofString("1"), ofNull(StringType.STRING)), // TODO: timestamp partition column is not yet supported - // col("as_timestamp") -> (ofTimestamp(1), ofNull(TimestampType.INSTANCE)), + // col("as_timestamp") -> (ofTimestamp(1), ofNull(TimestampType.TIMESTAMP)), col("as_big_decimal") -> ( ofDecimal(new BigDecimalJ(1), 1, 0), ofNull(new DecimalType(1, 0)))) diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/client/DefaultExpressionHandlerSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/client/DefaultExpressionHandlerSuite.scala index bdada55734b..3997ba5d420 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/client/DefaultExpressionHandlerSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/client/DefaultExpressionHandlerSuite.scala @@ -23,7 +23,7 @@ class DefaultExpressionHandlerSuite extends AnyFunSuite { test("create selection vector: single value") { Seq(true, false).foreach { testValue => val outputVector = selectionVector(Seq(testValue).toArray, 0, 1) - assert(outputVector.getDataType === BooleanType.INSTANCE) + assert(outputVector.getDataType === BooleanType.BOOLEAN) assert(outputVector.getSize == 1) assert(outputVector.isNullAt(0) == false) assert(outputVector.getBoolean(0) == testValue) @@ -34,7 +34,7 @@ class DefaultExpressionHandlerSuite extends AnyFunSuite { Seq((0, testValues.length), (0, 3), (2, 2), (2, 4), (3, testValues.length)).foreach { pair => val (from, to) = (pair._1, pair._2) val outputVector = selectionVector(testValues, from, to) - assert(outputVector.getDataType === BooleanType.INSTANCE) + assert(outputVector.getDataType === BooleanType.BOOLEAN) assert(outputVector.getSize == (to - from)) Seq.range(from, to).foreach { rowId => assert(outputVector.isNullAt(rowId - from) == false) diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala index 3fd8ca03d6a..d3cd55827cb 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala @@ -39,27 +39,27 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa val testLiterals = Seq( Literal.ofBoolean(true), Literal.ofBoolean(false), - Literal.ofNull(BooleanType.INSTANCE), + Literal.ofNull(BooleanType.BOOLEAN), ofByte(24.toByte), - Literal.ofNull(ByteType.INSTANCE), + Literal.ofNull(ByteType.BYTE), Literal.ofShort(876.toShort), - Literal.ofNull(ShortType.INSTANCE), + Literal.ofNull(ShortType.SHORT), Literal.ofInt(2342342), - Literal.ofNull(IntegerType.INSTANCE), + Literal.ofNull(IntegerType.INTEGER), Literal.ofLong(234234223L), - Literal.ofNull(LongType.INSTANCE), + Literal.ofNull(LongType.LONG), Literal.ofFloat(23423.4223f), - Literal.ofNull(FloatType.INSTANCE), + Literal.ofNull(FloatType.FLOAT), Literal.ofDouble(23423.422233d), - Literal.ofNull(DoubleType.INSTANCE), + Literal.ofNull(DoubleType.DOUBLE), Literal.ofString("string_val"), - Literal.ofNull(StringType.INSTANCE), + Literal.ofNull(StringType.STRING), Literal.ofBinary("binary_val".getBytes), - Literal.ofNull(BinaryType.INSTANCE), + Literal.ofNull(BinaryType.BINARY), Literal.ofDate(4234), - Literal.ofNull(DateType.INSTANCE), + Literal.ofNull(DateType.DATE), Literal.ofTimestamp(2342342342232L), - Literal.ofNull(TimestampType.INSTANCE)) + Literal.ofNull(TimestampType.TIMESTAMP)) val inputBatches: Seq[ColumnarBatch] = Seq[ColumnarBatch]( zeroColumnBatch(rowCount = 0), @@ -120,7 +120,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa } test("evaluate expression: nested column reference") { - val col3Type = IntegerType.INSTANCE + val col3Type = IntegerType.INTEGER val col2Type = new StructType().add("col3", col3Type) val col1Type = new StructType().add("col2", col2Type) val batchSchema = new StructType().add("col1", col1Type) @@ -176,9 +176,9 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa test("evaluate expression: always true, always false") { Seq(ALWAYS_TRUE, ALWAYS_FALSE).foreach { expr => val batch = zeroColumnBatch(rowCount = 87) - val outputVector = evaluator(batch.getSchema, expr, BooleanType.INSTANCE).eval(batch) + val outputVector = evaluator(batch.getSchema, expr, BooleanType.BOOLEAN).eval(batch) assert(outputVector.getSize === 87) - assert(outputVector.getDataType === BooleanType.INSTANCE) + assert(outputVector.getDataType === BooleanType.BOOLEAN) Seq.range(0, 87).foreach { rowId => assert(!outputVector.isNullAt(rowId)) assert(outputVector.getBoolean(rowId) == (expr == ALWAYS_TRUE)) @@ -197,8 +197,8 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa Seq[BooleanJ](true, true, false, true, null, null, null, null, null)) val schema = new StructType() - .add("left", BooleanType.INSTANCE) - .add("right", BooleanType.INSTANCE) + .add("left", BooleanType.BOOLEAN) + .add("right", BooleanType.BOOLEAN) val batch = new DefaultColumnarBatch(leftColumn.getSize, schema, Array(leftColumn, rightColumn)) val left = comparator("=", new Column("left"), Literal.ofBoolean(true)) @@ -206,12 +206,12 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa // And val andExpression = and(left, right) - val actAndOutputVector = evaluator(schema, andExpression, BooleanType.INSTANCE).eval(batch) + val actAndOutputVector = evaluator(schema, andExpression, BooleanType.BOOLEAN).eval(batch) checkBooleanVectors(actAndOutputVector, expAndOutputVector) // Or val orExpression = or(left, right) - val actOrOutputVector = evaluator(schema, orExpression, BooleanType.INSTANCE).eval(batch) + val actOrOutputVector = evaluator(schema, orExpression, BooleanType.BOOLEAN).eval(batch) checkBooleanVectors(actOrOutputVector, expOrOutputVector) } @@ -219,26 +219,26 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa // Literals for each data type from the data type value range, used as inputs to comparator // (small, big, small, null) val literals = Seq( - (ofByte(1.toByte), ofByte(2.toByte), ofByte(1.toByte), ofNull(ByteType.INSTANCE)), - (ofShort(1.toShort), ofShort(2.toShort), ofShort(1.toShort), ofNull(ShortType.INSTANCE)), - (ofInt(1), ofInt(2), ofInt(1), ofNull(IntegerType.INSTANCE)), - (ofLong(1L), ofLong(2L), ofLong(1L), ofNull(LongType.INSTANCE)), - (ofFloat(1.0F), ofFloat(2.0F), ofFloat(1.0F), ofNull(FloatType.INSTANCE)), - (ofDouble(1.0), ofDouble(2.0), ofDouble(1.0), ofNull(DoubleType.INSTANCE)), - (ofBoolean(false), ofBoolean(true), ofBoolean(false), ofNull(BooleanType.INSTANCE)), + (ofByte(1.toByte), ofByte(2.toByte), ofByte(1.toByte), ofNull(ByteType.BYTE)), + (ofShort(1.toShort), ofShort(2.toShort), ofShort(1.toShort), ofNull(ShortType.SHORT)), + (ofInt(1), ofInt(2), ofInt(1), ofNull(IntegerType.INTEGER)), + (ofLong(1L), ofLong(2L), ofLong(1L), ofNull(LongType.LONG)), + (ofFloat(1.0F), ofFloat(2.0F), ofFloat(1.0F), ofNull(FloatType.FLOAT)), + (ofDouble(1.0), ofDouble(2.0), ofDouble(1.0), ofNull(DoubleType.DOUBLE)), + (ofBoolean(false), ofBoolean(true), ofBoolean(false), ofNull(BooleanType.BOOLEAN)), ( ofTimestamp(343L), ofTimestamp(123212312L), ofTimestamp(343L), - ofNull(TimestampType.INSTANCE) + ofNull(TimestampType.TIMESTAMP) ), - (ofDate(-12123), ofDate(123123), ofDate(-12123), ofNull(DateType.INSTANCE)), - (ofString("apples"), ofString("oranges"), ofString("apples"), ofNull(StringType.INSTANCE)), + (ofDate(-12123), ofDate(123123), ofDate(-12123), ofNull(DateType.DATE)), + (ofString("apples"), ofString("oranges"), ofString("apples"), ofNull(StringType.STRING)), ( ofBinary("apples".getBytes()), ofBinary("oranges".getBytes()), ofBinary("apples".getBytes()), - ofNull(BinaryType.INSTANCE) + ofNull(BinaryType.BINARY) ), ( ofDecimal(BigDecimalJ.valueOf(1.12), 7, 3), @@ -285,7 +285,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa ofInt(-234), ofLong(223L), ofFloat(-2423423.9f), - ofNull(DoubleType.INSTANCE) + ofNull(DoubleType.DOUBLE) ) test("evaluate expression: comparators `byte` with other implicit types") { @@ -405,7 +405,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa ) val testMapVector = buildMapVector( testMapValues, - new MapType(StringType.INSTANCE, StringType.INSTANCE, true)) + new MapType(StringType.STRING, StringType.STRING, true)) val inputBatch = new DefaultColumnarBatch( testMapVector.getSize, @@ -419,7 +419,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa }) val lookupKeyExpr = if (lookupKey == null) { - Literal.ofNull(StringType.INSTANCE) + Literal.ofNull(StringType.STRING) } else { Literal.ofString(lookupKey) } @@ -427,10 +427,10 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa "element_at", util.Arrays.asList(new Column("partitionValues"), lookupKeyExpr)) - val outputVector = evaluator(inputBatch.getSchema, elementAtExpr, StringType.INSTANCE) + val outputVector = evaluator(inputBatch.getSchema, elementAtExpr, StringType.STRING) .eval(inputBatch) assert(outputVector.getSize === testMapValues.size) - assert(outputVector.getDataType === StringType.INSTANCE) + assert(outputVector.getDataType === StringType.STRING) Seq.range(0, testMapValues.size).foreach { rowId => val expNull = expOutput(rowId) == null assert(outputVector.isNullAt(rowId) == expNull) @@ -443,13 +443,13 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa test("evaluate expression: element_at - unsupported map type input") { val inputSchema = new StructType() - .add("as_map", new MapType(IntegerType.INSTANCE, BooleanType.INSTANCE, true)) + .add("as_map", new MapType(IntegerType.INTEGER, BooleanType.BOOLEAN, true)) val elementAtExpr = new ScalarExpression( "element_at", util.Arrays.asList(new Column("as_map"), Literal.ofString("empty"))) val ex = intercept[UnsupportedOperationException] { - evaluator(inputSchema, elementAtExpr, StringType.INSTANCE) + evaluator(inputSchema, elementAtExpr, StringType.STRING) } assert(ex.getMessage.contains( "ELEMENT_AT(column(`as_map`), empty): Supported only on type map(string, string) input data")) @@ -457,13 +457,13 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa test("evaluate expression: element_at - unsupported lookup type input") { val inputSchema = new StructType() - .add("as_map", new MapType(StringType.INSTANCE, StringType.INSTANCE, true)) + .add("as_map", new MapType(StringType.STRING, StringType.STRING, true)) val elementAtExpr = new ScalarExpression( "element_at", util.Arrays.asList(new Column("as_map"), Literal.ofShort(24))) val ex = intercept[UnsupportedOperationException] { - evaluator(inputSchema, elementAtExpr, StringType.INSTANCE) + evaluator(inputSchema, elementAtExpr, StringType.STRING) } assert(ex.getMessage.contains("ELEMENT_AT(column(`as_map`), 24): " + "lookup key type (short) is different from the map key type (string)")) @@ -472,31 +472,31 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa test("evaluate expression: partition_value") { // (serialized partition value, partition col type, expected deserialized partition value) val testCases = Seq( - ("true", BooleanType.INSTANCE, true), - ("false", BooleanType.INSTANCE, false), - (null, BooleanType.INSTANCE, null), - ("24", ByteType.INSTANCE, 24.toByte), - ("null", ByteType.INSTANCE, null), - ("876", ShortType.INSTANCE, 876.toShort), - ("null", ShortType.INSTANCE, null), - ("2342342", IntegerType.INSTANCE, 2342342), - ("null", IntegerType.INSTANCE, null), - ("234234223", LongType.INSTANCE, 234234223L), - ("null", LongType.INSTANCE, null), - ("23423.4223", FloatType.INSTANCE, 23423.4223f), - ("null", FloatType.INSTANCE, null), - ("23423.422233", DoubleType.INSTANCE, 23423.422233d), - ("null", DoubleType.INSTANCE, null), + ("true", BooleanType.BOOLEAN, true), + ("false", BooleanType.BOOLEAN, false), + (null, BooleanType.BOOLEAN, null), + ("24", ByteType.BYTE, 24.toByte), + ("null", ByteType.BYTE, null), + ("876", ShortType.SHORT, 876.toShort), + ("null", ShortType.SHORT, null), + ("2342342", IntegerType.INTEGER, 2342342), + ("null", IntegerType.INTEGER, null), + ("234234223", LongType.LONG, 234234223L), + ("null", LongType.LONG, null), + ("23423.4223", FloatType.FLOAT, 23423.4223f), + ("null", FloatType.FLOAT, null), + ("23423.422233", DoubleType.DOUBLE, 23423.422233d), + ("null", DoubleType.DOUBLE, null), ("234.422233", new DecimalType(10, 6), new BigDecimalJ("234.422233")), - ("null", DoubleType.INSTANCE, null), - ("string_val", StringType.INSTANCE, "string_val"), - ("null", StringType.INSTANCE, null), - ("binary_val", BinaryType.INSTANCE, "binary_val".getBytes()), - ("null", BinaryType.INSTANCE, null), - ("2021-11-18", DateType.INSTANCE, InternalUtils.daysSinceEpoch(Date.valueOf("2021-11-18"))), - ("null", DateType.INSTANCE, null), - ("2021-11-18", DateType.INSTANCE, InternalUtils.daysSinceEpoch(Date.valueOf("2021-11-18"))), - ("null", DateType.INSTANCE, null) + ("null", DoubleType.DOUBLE, null), + ("string_val", StringType.STRING, "string_val"), + ("null", StringType.STRING, null), + ("binary_val", BinaryType.BINARY, "binary_val".getBytes()), + ("null", BinaryType.BINARY, null), + ("2021-11-18", DateType.DATE, InternalUtils.daysSinceEpoch(Date.valueOf("2021-11-18"))), + ("null", DateType.DATE, null), + ("2021-11-18", DateType.DATE, InternalUtils.daysSinceEpoch(Date.valueOf("2021-11-18"))), + ("null", DateType.DATE, null) // TODO: timestamp partition value types are not yet supported in reading ) @@ -504,7 +504,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa testCases.foreach { testCase => val (serializedPartVal, partType, deserializedPartVal) = testCase val literalSerializedPartVal = if (serializedPartVal == "null") { - Literal.ofNull(StringType.INSTANCE) + Literal.ofNull(StringType.STRING) } else { Literal.ofString(serializedPartVal) } @@ -521,7 +521,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa test("evaluate expression: partition_value - invalid serialize value") { val inputBatch = zeroColumnBatch(rowCount = 1) - val (serializedPartVal, partType) = ("23423sdfsdf", IntegerType.INSTANCE) + val (serializedPartVal, partType) = ("23423sdfsdf", IntegerType.INTEGER) val expr = new PartitionValueExpression(Literal.ofString(serializedPartVal), partType) val ex = intercept[IllegalArgumentException] { val outputVector = evaluator(inputBatch.getSchema, expr, partType).eval(inputBatch) @@ -577,17 +577,17 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa /** Utility method to generate a consistent `isNull` value for given column type and row id */ private def testIsNullValue(dataType: DataType, rowId: Int): Boolean = { dataType match { - case BooleanType.INSTANCE => rowId % 4 == 0 - case ByteType.INSTANCE => rowId % 8 == 0 - case ShortType.INSTANCE => rowId % 12 == 0 - case IntegerType.INSTANCE => rowId % 20 == 0 - case LongType.INSTANCE => rowId % 25 == 0 - case FloatType.INSTANCE => rowId % 5 == 0 - case DoubleType.INSTANCE => rowId % 10 == 0 - case StringType.INSTANCE => rowId % 2 == 0 - case BinaryType.INSTANCE => rowId % 3 == 0 - case DateType.INSTANCE => rowId % 5 == 0 - case TimestampType.INSTANCE => rowId % 3 == 0 + case BooleanType.BOOLEAN => rowId % 4 == 0 + case ByteType.BYTE => rowId % 8 == 0 + case ShortType.SHORT => rowId % 12 == 0 + case IntegerType.INTEGER => rowId % 20 == 0 + case LongType.LONG => rowId % 25 == 0 + case FloatType.FLOAT => rowId % 5 == 0 + case DoubleType.DOUBLE => rowId % 10 == 0 + case StringType.STRING => rowId % 2 == 0 + case BinaryType.BINARY => rowId % 3 == 0 + case DateType.DATE => rowId % 5 == 0 + case TimestampType.TIMESTAMP => rowId % 3 == 0 case _ => if (dataType.isInstanceOf[DecimalType]) rowId % 6 == 0 else throw new UnsupportedOperationException(s"$dataType is not supported") @@ -597,17 +597,17 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa /** Utility method to generate a consistent column value for given column type and row id */ private def testColumnValue(dataType: DataType, rowId: Int): Any = { dataType match { - case BooleanType.INSTANCE => rowId % 7 == 0 - case ByteType.INSTANCE => (rowId * 7 / 17).toByte - case ShortType.INSTANCE => (rowId * 9 / 87).toShort - case IntegerType.INSTANCE => rowId * 2876 / 176 - case LongType.INSTANCE => rowId * 287623L / 91 - case FloatType.INSTANCE => rowId * 7651.2323f / 91 - case DoubleType.INSTANCE => rowId * 23423.23d / 17 - case StringType.INSTANCE => (rowId % 19).toString - case BinaryType.INSTANCE => Array[Byte]((rowId % 21).toByte, (rowId % 7 - 1).toByte) - case DateType.INSTANCE => (rowId * 28234) % 2876 - case TimestampType.INSTANCE => (rowId * 2342342L) % 23 + case BooleanType.BOOLEAN => rowId % 7 == 0 + case ByteType.BYTE => (rowId * 7 / 17).toByte + case ShortType.SHORT => (rowId * 9 / 87).toShort + case IntegerType.INTEGER => rowId * 2876 / 176 + case LongType.LONG => rowId * 287623L / 91 + case FloatType.FLOAT => rowId * 7651.2323f / 91 + case DoubleType.DOUBLE => rowId * 23423.23d / 17 + case StringType.STRING => (rowId % 19).toString + case BinaryType.BINARY => Array[Byte]((rowId % 21).toByte, (rowId % 7 - 1).toByte) + case DateType.DATE => (rowId * 28234) % 2876 + case TimestampType.TIMESTAMP => (rowId * 2342342L) % 23 case _ => if (dataType.isInstanceOf[DecimalType]) new BigDecimalJ(rowId * 22342.23) else throw new UnsupportedOperationException(s"$dataType is not supported") @@ -623,10 +623,10 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa comparator: String, left: Expression, right: Expression, expResult: BooleanJ): Unit = { val expression = new Predicate(comparator, util.Arrays.asList(left, right)) val batch = zeroColumnBatch(rowCount = 1) - val outputVector = evaluator(batch.getSchema, expression, BooleanType.INSTANCE).eval(batch) + val outputVector = evaluator(batch.getSchema, expression, BooleanType.BOOLEAN).eval(batch) assert(outputVector.getSize === 1) - assert(outputVector.getDataType === BooleanType.INSTANCE) + assert(outputVector.getDataType === BooleanType.BOOLEAN) assert( outputVector.isNullAt(0) === (expResult == null), s"Unexpected null value: $comparator($left, $right)") diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultPredicateEvaluatorSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultPredicateEvaluatorSuite.scala index f9c14dde045..ed57be19bb1 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultPredicateEvaluatorSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultPredicateEvaluatorSuite.scala @@ -37,8 +37,8 @@ class DefaultPredicateEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBas Seq[BooleanJ](true, false, false, true, false, false, true, null, false, null, null)) private val testSchema = new StructType() - .add("left", BooleanType.INSTANCE) - .add("right", BooleanType.INSTANCE) + .add("left", BooleanType.BOOLEAN) + .add("right", BooleanType.BOOLEAN) private val batch = new DefaultColumnarBatch( testLeftCol.getSize, testSchema, Array(testLeftCol, testRightCol)) diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/ExpressionSuiteBase.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/ExpressionSuiteBase.scala index ddf3b5ce4e9..b4ff9d5c6c9 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/ExpressionSuiteBase.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/ExpressionSuiteBase.scala @@ -58,7 +58,7 @@ trait ExpressionSuiteBase extends TestUtils { protected def booleanVector(values: Seq[BooleanJ]): ColumnVector = { new ColumnVector { - override def getDataType: DataType = BooleanType.INSTANCE + override def getDataType: DataType = BooleanType.BOOLEAN override def getSize: Int = values.length diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/ImplicitCastExpressionSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/ImplicitCastExpressionSuite.scala index 1d9bb9b6e3c..4e27ec3a79e 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/ImplicitCastExpressionSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/ImplicitCastExpressionSuite.scala @@ -26,24 +26,24 @@ import io.delta.kernel.types._ class ImplicitCastExpressionSuite extends AnyFunSuite with TestUtils { private val allowedCasts: Set[(DataType, DataType)] = Set( - (ByteType.INSTANCE, ShortType.INSTANCE), - (ByteType.INSTANCE, IntegerType.INSTANCE), - (ByteType.INSTANCE, LongType.INSTANCE), - (ByteType.INSTANCE, FloatType.INSTANCE), - (ByteType.INSTANCE, DoubleType.INSTANCE), + (ByteType.BYTE, ShortType.SHORT), + (ByteType.BYTE, IntegerType.INTEGER), + (ByteType.BYTE, LongType.LONG), + (ByteType.BYTE, FloatType.FLOAT), + (ByteType.BYTE, DoubleType.DOUBLE), - (ShortType.INSTANCE, IntegerType.INSTANCE), - (ShortType.INSTANCE, LongType.INSTANCE), - (ShortType.INSTANCE, FloatType.INSTANCE), - (ShortType.INSTANCE, DoubleType.INSTANCE), + (ShortType.SHORT, IntegerType.INTEGER), + (ShortType.SHORT, LongType.LONG), + (ShortType.SHORT, FloatType.FLOAT), + (ShortType.SHORT, DoubleType.DOUBLE), - (IntegerType.INSTANCE, LongType.INSTANCE), - (IntegerType.INSTANCE, FloatType.INSTANCE), - (IntegerType.INSTANCE, DoubleType.INSTANCE), + (IntegerType.INTEGER, LongType.LONG), + (IntegerType.INTEGER, FloatType.FLOAT), + (IntegerType.INTEGER, DoubleType.DOUBLE), - (LongType.INSTANCE, FloatType.INSTANCE), - (LongType.INSTANCE, DoubleType.INSTANCE), - (FloatType.INSTANCE, DoubleType.INSTANCE)) + (LongType.LONG, FloatType.FLOAT), + (LongType.LONG, DoubleType.DOUBLE), + (FloatType.FLOAT, DoubleType.DOUBLE)) test("can cast to") { Seq.range(0, ALL_TYPES.length).foreach { fromTypeIdx => @@ -75,32 +75,32 @@ class ImplicitCastExpressionSuite extends AnyFunSuite with TestUtils { override def isNullAt(rowId: Int): Boolean = nullability(rowId) override def getByte(rowId: Int): Byte = { - assert(dataType === ByteType.INSTANCE) + assert(dataType === ByteType.BYTE) generateValue(rowId).toByte } override def getShort(rowId: Int): Short = { - assert(dataType === ShortType.INSTANCE) + assert(dataType === ShortType.SHORT) generateValue(rowId).toShort } override def getInt(rowId: Int): Int = { - assert(dataType === IntegerType.INSTANCE) + assert(dataType === IntegerType.INTEGER) generateValue(rowId).toInt } override def getLong(rowId: Int): Long = { - assert(dataType === LongType.INSTANCE) + assert(dataType === LongType.LONG) generateValue(rowId).toLong } override def getFloat(rowId: Int): Float = { - assert(dataType === FloatType.INSTANCE) + assert(dataType === FloatType.FLOAT) generateValue(rowId).toFloat } override def getDouble(rowId: Int): Double = { - assert(dataType === DoubleType.INSTANCE) + assert(dataType === DoubleType.DOUBLE) generateValue(rowId) } } diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestUtils.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestUtils.scala index fc78fdecc4c..c8e2b1044d2 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestUtils.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestUtils.scala @@ -107,10 +107,8 @@ trait TestUtils extends Assertions { val scanState = scan.getScanState(tableClient); val fileIter = scan.getScanFiles(tableClient) - // TODO serialize scan state and scan rows fileIter.forEach { fileColumnarBatch => - // TODO deserialize scan state and scan rows val dataBatches = Scan.readData( tableClient, scanState, @@ -158,25 +156,25 @@ trait TestUtils extends Assertions { /** All simple data type used in parameterized tests where type is one of the test dimensions. */ val SIMPLE_TYPES = Seq( - BooleanType.INSTANCE, - ByteType.INSTANCE, - ShortType.INSTANCE, - IntegerType.INSTANCE, - LongType.INSTANCE, - FloatType.INSTANCE, - DoubleType.INSTANCE, - DateType.INSTANCE, - TimestampType.INSTANCE, - StringType.INSTANCE, - BinaryType.INSTANCE, + BooleanType.BOOLEAN, + ByteType.BYTE, + ShortType.SHORT, + IntegerType.INTEGER, + LongType.LONG, + FloatType.FLOAT, + DoubleType.DOUBLE, + DateType.DATE, + TimestampType.TIMESTAMP, + StringType.STRING, + BinaryType.BINARY, new DecimalType(10, 5) ) /** All types. Used in parameterized tests where type is one of the test dimensions. */ val ALL_TYPES = SIMPLE_TYPES ++ Seq( - new ArrayType(BooleanType.INSTANCE, true), - new MapType(IntegerType.INSTANCE, LongType.INSTANCE, true), - new StructType().add("s1", BooleanType.INSTANCE).add("s2", IntegerType.INSTANCE) + new ArrayType(BooleanType.BOOLEAN, true), + new MapType(IntegerType.INTEGER, LongType.LONG, true), + new StructType().add("s1", BooleanType.BOOLEAN).add("s2", IntegerType.INTEGER) ) /**