From f5672fbb5b55bebddb177fd19ddb1ea92deca003 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Mon, 28 Oct 2024 15:53:14 -0700 Subject: [PATCH 01/12] Core: Add Variant implementation to read serialized objects. --- .../org/apache/iceberg/util/RandomUtil.java | 9 +- .../java/org/apache/iceberg/VariantArray.java | 91 ++++ .../org/apache/iceberg/VariantMetadata.java | 117 +++++ .../org/apache/iceberg/VariantObject.java | 114 +++++ .../org/apache/iceberg/VariantPrimitive.java | 126 ++++++ .../apache/iceberg/VariantShortString.java | 72 +++ .../java/org/apache/iceberg/VariantUtil.java | 147 ++++++ .../java/org/apache/iceberg/Variants.java | 207 +++++++++ .../org/apache/iceberg/TestVariantArray.java | 235 ++++++++++ .../apache/iceberg/TestVariantMetadata.java | 225 ++++++++++ .../org/apache/iceberg/TestVariantObject.java | 263 +++++++++++ .../apache/iceberg/TestVariantPrimitives.java | 421 ++++++++++++++++++ .../org/apache/iceberg/TestVariantUtil.java | 46 ++ .../org/apache/iceberg/VariantTestUtil.java | 229 ++++++++++ 14 files changed, 2301 insertions(+), 1 deletion(-) create mode 100644 core/src/main/java/org/apache/iceberg/VariantArray.java create mode 100644 core/src/main/java/org/apache/iceberg/VariantMetadata.java create mode 100644 core/src/main/java/org/apache/iceberg/VariantObject.java create mode 100644 core/src/main/java/org/apache/iceberg/VariantPrimitive.java create mode 100644 core/src/main/java/org/apache/iceberg/VariantShortString.java create mode 100644 core/src/main/java/org/apache/iceberg/VariantUtil.java create mode 100644 core/src/main/java/org/apache/iceberg/Variants.java create mode 100644 core/src/test/java/org/apache/iceberg/TestVariantArray.java create mode 100644 core/src/test/java/org/apache/iceberg/TestVariantMetadata.java create mode 100644 core/src/test/java/org/apache/iceberg/TestVariantObject.java create mode 100644 core/src/test/java/org/apache/iceberg/TestVariantPrimitives.java create mode 100644 core/src/test/java/org/apache/iceberg/TestVariantUtil.java create mode 100644 core/src/test/java/org/apache/iceberg/VariantTestUtil.java diff --git a/api/src/test/java/org/apache/iceberg/util/RandomUtil.java b/api/src/test/java/org/apache/iceberg/util/RandomUtil.java index 9131e6166133..bf281835a237 100644 --- a/api/src/test/java/org/apache/iceberg/util/RandomUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/RandomUtil.java @@ -29,6 +29,10 @@ public class RandomUtil { private RandomUtil() {} + public static String generateString(int length, Random random) { + return randomString(length, random); + } + private static boolean negate(int num) { return num % 2 == 1; } @@ -200,7 +204,10 @@ public static Object generateDictionaryEncodablePrimitive( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-.!?"; private static String randomString(Random random) { - int length = random.nextInt(50); + return randomString(random.nextInt(50), random); + } + + private static String randomString(int length, Random random) { byte[] buffer = new byte[length]; for (int i = 0; i < length; i += 1) { diff --git a/core/src/main/java/org/apache/iceberg/VariantArray.java b/core/src/main/java/org/apache/iceberg/VariantArray.java new file mode 100644 index 000000000000..185b35a5ed44 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/VariantArray.java @@ -0,0 +1,91 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +class VariantArray implements Variants.Array, Variants.Serialized { + private static final int OFFSET_SIZE_MASK = 0b1100; + private static final int OFFSET_SIZE_SHIFT = 2; + private static final int IS_LARGE = 0b10000; + + @VisibleForTesting + static VariantArray from(VariantMetadata metadata, byte[] bytes) { + return from(metadata, ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); + } + + static VariantArray from(VariantMetadata metadata, ByteBuffer value, int header) { + Preconditions.checkArgument( + value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + int basicType = header & Variants.BASIC_TYPE_MASK; + Preconditions.checkArgument( + basicType == Variants.BASIC_TYPE_ARRAY, "Invalid array, basic type != 3: " + basicType); + return new VariantArray(metadata, value, header); + } + + private final VariantMetadata metadata; + private final ByteBuffer value; + private final int offsetSize; + private final int offsetListOffset; + private final int dataOffset; + private final Variants.Value[] array; + + private VariantArray(VariantMetadata metadata, ByteBuffer value, int header) { + this.metadata = metadata; + this.value = value; + this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); + int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; + int numElements = + VariantUtil.readLittleEndianUnsigned(value, Variants.HEADER_SIZE, numElementsSize); + this.offsetListOffset = Variants.HEADER_SIZE + numElementsSize; + this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); + this.array = new Variants.Value[numElements]; + } + + @VisibleForTesting + int numElements() { + return array.length; + } + + @Override + public Variants.Value get(int index) { + if (null == array[index]) { + int offset = + VariantUtil.readLittleEndianUnsigned( + value, offsetListOffset + (offsetSize * index), offsetSize); + int next = + VariantUtil.readLittleEndianUnsigned( + value, offsetListOffset + (offsetSize * (1 + index)), offsetSize); + array[index] = + Variants.from(metadata, VariantUtil.slice(value, dataOffset + offset, next - offset)); + } + return array[index]; + } + + @Override + public ByteBuffer buffer() { + return value; + } +} diff --git a/core/src/main/java/org/apache/iceberg/VariantMetadata.java b/core/src/main/java/org/apache/iceberg/VariantMetadata.java new file mode 100644 index 000000000000..421a3e66fe48 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/VariantMetadata.java @@ -0,0 +1,117 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +class VariantMetadata implements Variants.Metadata, Variants.Serialized { + private static final int SUPPORTED_VERSION = 1; + private static final int VERSION_MASK = 0b1111; + private static final int SORTED_STRINGS = 0b10000; + private static final int RESERVED = 0b100000; + private static final int OFFSET_SIZE_MASK = 0b11000000; + private static final int OFFSET_SIZE_SHIFT = 6; + + static final ByteBuffer EMPTY_V1_BUFFER = + ByteBuffer.wrap(new byte[] {0x01, 0x00}).order(ByteOrder.LITTLE_ENDIAN); + + static VariantMetadata from(byte[] bytes) { + return from(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)); + } + + static VariantMetadata from(ByteBuffer metadata) { + Preconditions.checkArgument( + metadata.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + int header = VariantUtil.readByte(metadata, 0); + int version = header & VERSION_MASK; + Preconditions.checkArgument(SUPPORTED_VERSION == version, "Unsupported version: %s", version); + return new VariantMetadata(metadata, header); + } + + private final ByteBuffer metadata; + private final boolean isSorted; + private final int offsetSize; + private final int offsetListOffset; + private final int dataOffset; + private final String[] dict; + + private VariantMetadata(ByteBuffer metadata, int header) { + this.metadata = metadata; + this.isSorted = (header & SORTED_STRINGS) == SORTED_STRINGS; + this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); + int dictSize = VariantUtil.readLittleEndianUnsigned(metadata, Variants.HEADER_SIZE, offsetSize); + this.dict = new String[dictSize]; + this.offsetListOffset = Variants.HEADER_SIZE + offsetSize; + this.dataOffset = offsetListOffset + ((1 + dictSize) * offsetSize); + } + + @VisibleForTesting + int dictionarySize() { + return dict.length; + } + + @VisibleForTesting + boolean isSorted() { + return isSorted; + } + + /** Returns the position of the string in the metadata, or -1 if the string is not found. */ + @Override + public int id(String name) { + if (name != null) { + if (isSorted) { + return VariantUtil.find(dict.length, name, this::get); + } else { + for (int id = 0; id < dict.length; id += 1) { + if (name.equals(get(id))) { + return id; + } + } + } + } + + return -1; + } + + /** Returns the string for the given dictionary id. */ + @Override + public String get(int index) { + if (null == dict[index]) { + int offset = + VariantUtil.readLittleEndianUnsigned( + metadata, offsetListOffset + (offsetSize * index), offsetSize); + int next = + VariantUtil.readLittleEndianUnsigned( + metadata, offsetListOffset + (offsetSize * (1 + index)), offsetSize); + dict[index] = VariantUtil.readString(metadata, dataOffset + offset, next - offset); + } + return dict[index]; + } + + @Override + public ByteBuffer buffer() { + return metadata; + } +} diff --git a/core/src/main/java/org/apache/iceberg/VariantObject.java b/core/src/main/java/org/apache/iceberg/VariantObject.java new file mode 100644 index 000000000000..1961371a3919 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/VariantObject.java @@ -0,0 +1,114 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +class VariantObject implements Variants.Object, Variants.Serialized { + private static final int OFFSET_SIZE_MASK = 0b1100; + private static final int OFFSET_SIZE_SHIFT = 2; + private static final int FIELD_ID_SIZE_MASK = 0b110000; + private static final int FIELD_ID_SIZE_SHIFT = 4; + private static final int IS_LARGE = 0b1000000; + + static VariantObject from(VariantMetadata metadata, byte[] bytes) { + return from(metadata, ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); + } + + static VariantObject from(VariantMetadata metadata, ByteBuffer value, int header) { + Preconditions.checkArgument( + value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + int basicType = header & Variants.BASIC_TYPE_MASK; + Preconditions.checkArgument( + basicType == Variants.BASIC_TYPE_OBJECT, "Invalid object, basic type != 2: " + basicType); + return new VariantObject(metadata, value, header); + } + + private final VariantMetadata metadata; + private final ByteBuffer value; + private final int fieldIdSize; + private final int fieldIdListOffset; + private final int[] fieldIds; + private final int offsetSize; + private final int offsetListOffset; + private final int dataOffset; + private final Variants.Value[] values; + + private VariantObject(VariantMetadata metadata, ByteBuffer value, int header) { + this.metadata = metadata; + this.value = value; + this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); + this.fieldIdSize = 1 + ((header & FIELD_ID_SIZE_MASK) >> FIELD_ID_SIZE_SHIFT); + int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; + int numElements = + VariantUtil.readLittleEndianUnsigned(value, Variants.HEADER_SIZE, numElementsSize); + this.fieldIdListOffset = Variants.HEADER_SIZE + numElementsSize; + this.fieldIds = new int[numElements]; + this.offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); + this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); + this.values = new Variants.Value[numElements]; + } + + @VisibleForTesting + int numElements() { + return fieldIds.length; + } + + // keys are ordered lexicographically by the name + @Override + public Variants.Value get(String name) { + int index = + VariantUtil.find( + fieldIds.length, + name, + pos -> { + int id = + VariantUtil.readLittleEndianUnsigned( + value, fieldIdListOffset + (pos * fieldIdSize), fieldIdSize); + return metadata.get(id); + }); + + if (index < 0) { + return null; + } + + if (null == values[index]) { + int offset = + VariantUtil.readLittleEndianUnsigned( + value, offsetListOffset + (index * offsetSize), offsetSize); + int next = + VariantUtil.readLittleEndianUnsigned( + value, offsetListOffset + ((1 + index) * offsetSize), offsetSize); + values[index] = Variants.from(metadata, VariantUtil.slice(value, dataOffset + offset, next - offset)); + } + + return values[index]; + } + + @Override + public ByteBuffer buffer() { + return value; + } +} diff --git a/core/src/main/java/org/apache/iceberg/VariantPrimitive.java b/core/src/main/java/org/apache/iceberg/VariantPrimitive.java new file mode 100644 index 000000000000..bd469bb2865a --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/VariantPrimitive.java @@ -0,0 +1,126 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +class VariantPrimitive implements Variants.Primitive<Object>, Variants.Serialized { + private static final int PRIMITIVE_TYPE_SHIFT = 2; + private static final int PRIMITIVE_OFFSET = Variants.HEADER_SIZE; + + static VariantPrimitive from(byte[] bytes) { + return from(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); + } + + static VariantPrimitive from(ByteBuffer value, int header) { + Preconditions.checkArgument( + value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + int basicType = header & Variants.BASIC_TYPE_MASK; + Preconditions.checkArgument( + basicType == Variants.BASIC_TYPE_PRIMITIVE, + "Invalid primitive, basic type != 0: " + basicType); + return new VariantPrimitive(value, header); + } + + private final ByteBuffer value; + private final Variants.PhysicalType type; + private Object primitive = null; + + private VariantPrimitive(ByteBuffer value, int header) { + this.value = value; + this.type = Variants.PhysicalType.from(header >> PRIMITIVE_TYPE_SHIFT); + } + + private Object read() { + switch (type) { + case NULL: + return null; + case BOOLEAN_TRUE: + return true; + case BOOLEAN_FALSE: + return false; + case INT8: + return VariantUtil.readLittleEndianInt8(value, PRIMITIVE_OFFSET); + case INT16: + return VariantUtil.readLittleEndianInt16(value, PRIMITIVE_OFFSET); + case INT32: + case DATE: + return VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET); + case INT64: + case TIMESTAMPTZ: + case TIMESTAMPNTZ: + return VariantUtil.readLittleEndianInt64(value, PRIMITIVE_OFFSET); + case FLOAT: + return VariantUtil.readFloat(value, PRIMITIVE_OFFSET); + case DOUBLE: + return VariantUtil.readDouble(value, PRIMITIVE_OFFSET); + case DECIMAL4: + { + int scale = VariantUtil.readByte(value, PRIMITIVE_OFFSET); + int unscaled = VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET + 1); + return new BigDecimal(BigInteger.valueOf(unscaled), scale); + } + case DECIMAL8: + { + int scale = VariantUtil.readByte(value, PRIMITIVE_OFFSET); + long unscaled = VariantUtil.readLittleEndianInt64(value, PRIMITIVE_OFFSET + 1); + return new BigDecimal(BigInteger.valueOf(unscaled), scale); + } + case DECIMAL16: + throw new UnsupportedOperationException("unsupported"); + case BINARY: + { + int size = VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET); + return VariantUtil.slice(value, PRIMITIVE_OFFSET + 4, size); + } + case STRING: + { + int size = VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET); + return VariantUtil.readString(value, PRIMITIVE_OFFSET + 4, size); + } + } + + throw new UnsupportedOperationException("Unsupported primitive type: " + type); + } + + @Override + public Variants.PhysicalType type() { + return type; + } + + @Override + public Object get() { + if (null == primitive) { + this.primitive = read(); + } + return primitive; + } + + @Override + public ByteBuffer buffer() { + return value; + } +} diff --git a/core/src/main/java/org/apache/iceberg/VariantShortString.java b/core/src/main/java/org/apache/iceberg/VariantShortString.java new file mode 100644 index 000000000000..89f264b983cb --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/VariantShortString.java @@ -0,0 +1,72 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +class VariantShortString implements Variants.Primitive<String>, Variants.Serialized { + private static final int LENGTH_MASK = 0b11111100; + private static final int LENGTH_SHIFT = 2; + + static VariantShortString from(byte[] bytes) { + return from(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); + } + + static VariantShortString from(ByteBuffer value, int header) { + Preconditions.checkArgument( + value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + int basicType = header & Variants.BASIC_TYPE_MASK; + Preconditions.checkArgument( + basicType == Variants.BASIC_TYPE_SHORT_STRING, + "Invalid short string, basic type != 1: " + basicType); + return new VariantShortString(value, header); + } + + private final ByteBuffer value; + private final int length; + private String string = null; + + private VariantShortString(ByteBuffer value, int header) { + this.value = value; + this.length = ((header & LENGTH_MASK) >> LENGTH_SHIFT); + } + + @Override + public Variants.PhysicalType type() { + return Variants.PhysicalType.STRING; + } + + @Override + public String get() { + if (null == string) { + this.string = VariantUtil.readString(value, Variants.HEADER_SIZE, length); + } + return string; + } + + @Override + public ByteBuffer buffer() { + return value; + } +} diff --git a/core/src/main/java/org/apache/iceberg/VariantUtil.java b/core/src/main/java/org/apache/iceberg/VariantUtil.java new file mode 100644 index 000000000000..dd48ca411fd4 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/VariantUtil.java @@ -0,0 +1,147 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.function.Function; + +class VariantUtil { + private VariantUtil() {} + + static void writeLittleEndianUnsigned(ByteBuffer buffer, int value, int offset, int size) { + int base = buffer.position() + offset; + switch (size) { + case 4: + buffer.putInt(base, value); + return; + case 3: + buffer.putShort(base, (short) (value & 0xFFFF)); + buffer.put(base + 2, (byte) ((value >> 16) & 0xFF)); + return; + case 2: + buffer.putShort(base, (short) (value & 0xFFFF)); + return; + case 1: + buffer.put(base, (byte) (value & 0xFF)); + return; + } + + throw new IllegalArgumentException("Invalid size: " + size); + } + + static int readLittleEndianInt8(ByteBuffer buffer, int offset) { + return buffer.get(buffer.position() + offset); + } + + static int readLittleEndianInt16(ByteBuffer buffer, int offset) { + return buffer.getShort(buffer.position() + offset); + } + + static int readByte(ByteBuffer buffer, int offset) { + return buffer.get(buffer.position() + offset) & 0xFF; + } + + static int readLittleEndianUnsigned(ByteBuffer buffer, int offset, int size) { + int base = buffer.position() + offset; + switch (size) { + case 4: + return buffer.getInt(base); + case 3: + return ((int) buffer.getShort(base)) & 0xFFFF | (buffer.get(base + 2) & 0xFF) << 16; + case 2: + return ((int) buffer.getShort(base)) & 0xFFFF; + case 1: + return buffer.get(base) & 0xFF; + } + + throw new IllegalArgumentException("Invalid size: " + size); + } + + static int readLittleEndianInt32(ByteBuffer buffer, int offset) { + return buffer.getInt(buffer.position() + offset); + } + + static long readLittleEndianInt64(ByteBuffer buffer, int offset) { + return buffer.getLong(buffer.position() + offset); + } + + static float readFloat(ByteBuffer buffer, int offset) { + return buffer.getFloat(buffer.position() + offset); + } + + static double readDouble(ByteBuffer buffer, int offset) { + return buffer.getDouble(buffer.position() + offset); + } + + static ByteBuffer slice(ByteBuffer buffer, int offset, int length) { + ByteBuffer slice = buffer.duplicate(); + slice.order(ByteOrder.LITTLE_ENDIAN); + slice.position(buffer.position() + offset); + slice.limit(buffer.position() + offset + length); + return slice; + } + + static String readString(ByteBuffer buffer, int offset, int length) { + if (buffer.hasArray()) { + return new String( + buffer.array(), + buffer.arrayOffset() + buffer.position() + offset, + length, + StandardCharsets.UTF_8); + } else { + return StandardCharsets.UTF_8.decode(slice(buffer, offset, length)).toString(); + } + } + + static <T extends Comparable<T>> int find(int size, T key, Function<Integer, T> resolve) { + int low = 0; + int high = size - 1; + while (low <= high) { + int mid = (low + high) >>> 1; + T value = resolve.apply(mid); + int cmp = key.compareTo(value); + if (cmp == 0) { + return mid; + } else if (cmp < 0) { + high = mid - 1; + } else { + low = mid + 1; + } + } + + return -1; + } + + static int sizeOf(int maxValue) { + if (maxValue <= 0xFF) { + return 1; + } else if (maxValue <= 0xFFFF) { + return 2; + } else if (maxValue <= 0xFFFFFF) { + return 3; + } else { + return 4; + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/Variants.java b/core/src/main/java/org/apache/iceberg/Variants.java new file mode 100644 index 000000000000..872d7138d00f --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/Variants.java @@ -0,0 +1,207 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; + +public class Variants { + enum LogicalType { + NULL, + BOOLEAN, + EXACT_NUMERIC, + FLOAT, + DOUBLE, + DATE, + TIMESTAMPTZ, + TIMESTAMPNTZ, + BINARY, + STRING, + ARRAY, + OBJECT + } + + public enum PhysicalType { + NULL(LogicalType.NULL, Void.class), + BOOLEAN_TRUE(LogicalType.BOOLEAN, Boolean.class), + BOOLEAN_FALSE(LogicalType.BOOLEAN, Boolean.class), + INT8(LogicalType.EXACT_NUMERIC, Integer.class), + INT16(LogicalType.EXACT_NUMERIC, Integer.class), + INT32(LogicalType.EXACT_NUMERIC, Integer.class), + INT64(LogicalType.EXACT_NUMERIC, Long.class), + DOUBLE(LogicalType.DOUBLE, Double.class), + DECIMAL4(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DECIMAL8(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DECIMAL16(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DATE(LogicalType.DATE, Integer.class), + TIMESTAMPTZ(LogicalType.TIMESTAMPTZ, Long.class), + TIMESTAMPNTZ(LogicalType.TIMESTAMPNTZ, Long.class), + FLOAT(LogicalType.FLOAT, Float.class), + BINARY(LogicalType.BINARY, ByteBuffer.class), + STRING(LogicalType.STRING, String.class), + ARRAY(LogicalType.ARRAY, List.class), + OBJECT(LogicalType.OBJECT, Map.class); + + private final LogicalType logicalType; + private final Class<?> javaClass; + + PhysicalType(LogicalType logicalType, Class<?> javaClass) { + this.logicalType = logicalType; + this.javaClass = javaClass; + } + + LogicalType toLogicalType() { + return logicalType; + } + + public Class<?> javaClass() { + return javaClass; + } + + public static PhysicalType from(int primitiveType) { + switch (primitiveType) { + case Primitives.TYPE_NULL: + return NULL; + case Primitives.TYPE_TRUE: + return BOOLEAN_TRUE; + case Primitives.TYPE_FALSE: + return BOOLEAN_FALSE; + case Primitives.TYPE_INT8: + return INT8; + case Primitives.TYPE_INT16: + return INT16; + case Primitives.TYPE_INT32: + return INT32; + case Primitives.TYPE_INT64: + return INT64; + case Primitives.TYPE_DATE: + return DATE; + case Primitives.TYPE_TIMESTAMPTZ: + return TIMESTAMPTZ; + case Primitives.TYPE_TIMESTAMPNTZ: + return TIMESTAMPNTZ; + case Primitives.TYPE_FLOAT: + return FLOAT; + case Primitives.TYPE_DOUBLE: + return DOUBLE; + case Primitives.TYPE_DECIMAL4: + return DECIMAL4; + case Primitives.TYPE_DECIMAL8: + return DECIMAL8; + case Primitives.TYPE_DECIMAL16: + return DECIMAL16; + case Primitives.TYPE_BINARY: + return BINARY; + case Primitives.TYPE_STRING: + return STRING; + } + + throw new UnsupportedOperationException("Unknown primitive physical type: " + primitiveType); + } + } + + public interface Serialized { + ByteBuffer buffer(); + } + + public interface Metadata extends Serialized { + int id(String name); + + String get(int id); + } + + public interface Value { + PhysicalType type(); + } + + public interface Primitive<T> extends Value { + T get(); + } + + public interface Object extends Value { + Value get(String field); + + default PhysicalType type() { + return PhysicalType.OBJECT; + } + } + + public interface Array extends Value { + Value get(int index); + + default PhysicalType type() { + return PhysicalType.ARRAY; + } + } + + static class Primitives { + private static final int TYPE_NULL = 0; + private static final int TYPE_TRUE = 1; + private static final int TYPE_FALSE = 2; + private static final int TYPE_INT8 = 3; + private static final int TYPE_INT16 = 4; + private static final int TYPE_INT32 = 5; + private static final int TYPE_INT64 = 6; + private static final int TYPE_DOUBLE = 7; + private static final int TYPE_DECIMAL4 = 8; + private static final int TYPE_DECIMAL8 = 9; + private static final int TYPE_DECIMAL16 = 10; + private static final int TYPE_DATE = 11; + private static final int TYPE_TIMESTAMPTZ = 12; // equivalent to timestamptz + private static final int TYPE_TIMESTAMPNTZ = 13; // equivalent to timestamp + private static final int TYPE_FLOAT = 14; + private static final int TYPE_BINARY = 15; + private static final int TYPE_STRING = 16; + + private Primitives() {} + } + + static final int HEADER_SIZE = 1; + static final int BASIC_TYPE_MASK = 0b11; + static final int BASIC_TYPE_PRIMITIVE = 0; + static final int BASIC_TYPE_SHORT_STRING = 1; + static final int BASIC_TYPE_OBJECT = 2; + static final int BASIC_TYPE_ARRAY = 3; + + public static Value from(ByteBuffer metadata, ByteBuffer value) { + return from(VariantMetadata.from(metadata), value); + } + + static Value from(VariantMetadata metadata, ByteBuffer value) { + int header = VariantUtil.readByte(value, 0); + int basicType = header & BASIC_TYPE_MASK; + switch (basicType) { + case BASIC_TYPE_PRIMITIVE: + return VariantPrimitive.from(value, header); + case BASIC_TYPE_SHORT_STRING: + return VariantShortString.from(value, header); + case BASIC_TYPE_OBJECT: + return VariantObject.from(metadata, value, header); + case BASIC_TYPE_ARRAY: + return VariantArray.from(metadata, value, header); + default: + throw new UnsupportedOperationException("Unsupported basic type: %s" + basicType); + } + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestVariantArray.java b/core/src/test/java/org/apache/iceberg/TestVariantArray.java new file mode 100644 index 000000000000..57ebe982b172 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestVariantArray.java @@ -0,0 +1,235 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.util.Random; +import org.apache.iceberg.Variants.PhysicalType; +import org.apache.iceberg.Variants.Primitive; +import org.apache.iceberg.util.RandomUtil; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestVariantArray { + private static final VariantMetadata EMPTY_METADATA = + VariantMetadata.from(VariantMetadata.EMPTY_V1_BUFFER); + private static final VariantPrimitive vNull = VariantPrimitive.from(new byte[] {0x00}); + private static final VariantPrimitive vTrue = VariantPrimitive.from(new byte[] {0b100}); + private static final VariantPrimitive vFalse = VariantPrimitive.from(new byte[] {0b1000}); + private static final VariantShortString str = + VariantShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); + private static final VariantShortString a = VariantShortString.from(new byte[] {0b101, 'a'}); + private static final VariantShortString b = VariantShortString.from(new byte[] {0b101, 'b'}); + private static final VariantShortString c = VariantShortString.from(new byte[] {0b101, 'c'}); + private static final VariantShortString d = VariantShortString.from(new byte[] {0b101, 'd'}); + private static final VariantShortString e = VariantShortString.from(new byte[] {0b101, 'e'}); + private static final VariantPrimitive i34 = VariantPrimitive.from(new byte[] {0b1100, 34}); + private static final VariantPrimitive i1234 = + VariantPrimitive.from(new byte[] {0b10000, (byte) 0xD2, 0x04}); + private static final VariantPrimitive date = + VariantPrimitive.from(new byte[] {0b101100, (byte) 0xF4, 0x43, 0x00, 0x00}); + + private final Random random = new Random(374513); + + @Test + public void testEmptyArray() { + VariantArray array = VariantArray.from(EMPTY_METADATA, new byte[] {0b0011, 0x00}); + + Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + Assertions.assertThat(array.numElements()).isEqualTo(0); + } + + @Test + public void testEmptyLargeArray() { + VariantArray array = + VariantArray.from(EMPTY_METADATA, new byte[] {0b10011, 0x00, 0x00, 0x00, 0x00}); + + Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + Assertions.assertThat(array.numElements()).isEqualTo(0); + } + + @Test + public void testStringArray() { + ByteBuffer buffer = VariantTestUtil.createArray(a, b, c, d, e); + VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + + Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + Assertions.assertThat(array.numElements()).isEqualTo(5); + Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo("a"); + Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("c"); + Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo("d"); + Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(4)).get()).isEqualTo("e"); + + Assertions.assertThatThrownBy(() -> array.get(5)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class) + .hasMessage("Index 5 out of bounds for length 5"); + } + + @Test + public void testStringDifferentLengths() { + ByteBuffer buffer = VariantTestUtil.createArray(a, b, c, str, d, e); + VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + + Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + Assertions.assertThat(array.numElements()).isEqualTo(6); + Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo("a"); + Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("c"); + Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo("iceberg"); + Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(4)).get()).isEqualTo("d"); + Assertions.assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(5)).get()).isEqualTo("e"); + + Assertions.assertThatThrownBy(() -> array.get(6)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class) + .hasMessage("Index 6 out of bounds for length 6"); + } + + @Test + public void testArrayOfMixedTypes() { + ByteBuffer nestedBuffer = VariantTestUtil.createArray(a, c, d); + VariantArray nested = VariantArray.from(EMPTY_METADATA, nestedBuffer, nestedBuffer.get(0)); + ByteBuffer buffer = VariantTestUtil.createArray(date, i34, str, vNull, e, b, vFalse, nested, vTrue, i1234); + VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + + Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + Assertions.assertThat(array.numElements()).isEqualTo(10); + Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.DATE); + Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo(17396); + Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo(34); + Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("iceberg"); + Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.NULL); + Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo(null); + Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(4)).get()).isEqualTo("e"); + Assertions.assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(5)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(6).type()).isEqualTo(PhysicalType.BOOLEAN_FALSE); + Assertions.assertThat(((Primitive<?>) array.get(6)).get()).isEqualTo(false); + Assertions.assertThat(array.get(8).type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); + Assertions.assertThat(((Primitive<?>) array.get(8)).get()).isEqualTo(true); + Assertions.assertThat(array.get(9).type()).isEqualTo(PhysicalType.INT16); + Assertions.assertThat(((Primitive<?>) array.get(9)).get()).isEqualTo(1234); + + Assertions.assertThatThrownBy(() -> array.get(10)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class) + .hasMessage("Index 10 out of bounds for length 10"); + + Assertions.assertThat(array.get(7).type()).isEqualTo(PhysicalType.ARRAY); + VariantArray actualNested = (VariantArray) array.get(7); + Assertions.assertThat(actualNested.numElements()).isEqualTo(3); + Assertions.assertThat(actualNested.get(0).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) actualNested.get(0)).get()).isEqualTo("a"); + Assertions.assertThat(actualNested.get(1).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) actualNested.get(1)).get()).isEqualTo("c"); + Assertions.assertThat(actualNested.get(2).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) actualNested.get(2)).get()).isEqualTo("d"); + + Assertions.assertThatThrownBy(() -> actualNested.get(3)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class) + .hasMessage("Index 3 out of bounds for length 3"); + } + + @Test + public void testTwoByteOffsets() { + // a string larger than 255 bytes to push the value offset size above 1 byte + String randomString = RandomUtil.generateString(300, random); + VariantPrimitive bigString = VariantTestUtil.createString(randomString); + + ByteBuffer buffer = VariantTestUtil.createArray(bigString, a, b, c); + VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + + Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + Assertions.assertThat(array.numElements()).isEqualTo(4); + Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo(randomString); + Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo("a"); + Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo("c"); + + Assertions.assertThatThrownBy(() -> array.get(4)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class) + .hasMessage("Index 4 out of bounds for length 4"); + } + + @Test + public void testThreeByteOffsets() { + // a string larger than 65535 bytes to push the value offset size above 1 byte + String randomString = RandomUtil.generateString(70_000, random); + VariantPrimitive reallyBigString = VariantTestUtil.createString(randomString); + + ByteBuffer buffer = VariantTestUtil.createArray(reallyBigString, a, b, c); + VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + + Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + Assertions.assertThat(array.numElements()).isEqualTo(4); + Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo(randomString); + Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo("a"); + Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo("c"); + + Assertions.assertThatThrownBy(() -> array.get(4)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class) + .hasMessage("Index 4 out of bounds for length 4"); + } + + @Test + public void testLargeArraySize() { + VariantArray array = + VariantArray.from( + EMPTY_METADATA, new byte[] {0b10011, (byte) 0xFF, (byte) 0x01, 0x00, 0x00}); + + Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + Assertions.assertThat(array.numElements()).isEqualTo(511); + } + + @Test + public void testNegativeArraySize() { + Assertions.assertThatThrownBy( + () -> + VariantArray.from( + EMPTY_METADATA, + new byte[] {0b10011, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF})) + .isInstanceOf(NegativeArraySizeException.class) + .hasMessage("-1"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestVariantMetadata.java b/core/src/test/java/org/apache/iceberg/TestVariantMetadata.java new file mode 100644 index 000000000000..edb5220628b0 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestVariantMetadata.java @@ -0,0 +1,225 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.util.Random; +import java.util.Set; +import org.apache.iceberg.util.RandomUtil; +import org.assertj.core.api.Assertions; +import org.assertj.core.util.Sets; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestVariantMetadata { + private final Random random = new Random(872591); + + @Test + public void testEmptyVariantMetadata() { + VariantMetadata metadata = VariantMetadata.from(VariantMetadata.EMPTY_V1_BUFFER); + + Assertions.assertThat(metadata.isSorted()).isFalse(); + Assertions.assertThat(metadata.dictionarySize()).isEqualTo(0); + Assertions.assertThatThrownBy(() -> metadata.get(0)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class); + } + + @Test + public void testHeaderSorted() { + VariantMetadata metadata = VariantMetadata.from(new byte[] {0b10001, 0x00}); + + Assertions.assertThat(metadata.isSorted()).isTrue(); + Assertions.assertThat(metadata.dictionarySize()).isEqualTo(0); + } + + @Test + public void testHeaderOffsetSize() { + // offset size is 4-byte LE = 1 + Assertions.assertThat( + VariantMetadata.from(new byte[] {(byte) 0b11010001, 0x01, 0x00, 0x00, 0x00}) + .dictionarySize()) + .isEqualTo(1); + + // offset size is 3-byte LE = 1 + Assertions.assertThat( + VariantMetadata.from(new byte[] {(byte) 0b10010001, 0x01, 0x00, 0x00}).dictionarySize()) + .isEqualTo(1); + + // offset size is 2-byte LE = 1 + Assertions.assertThat( + VariantMetadata.from(new byte[] {(byte) 0b01010001, 0x01, 0x00}).dictionarySize()) + .isEqualTo(1); + + // offset size is 1-byte LE = 1 + Assertions.assertThat( + VariantMetadata.from(new byte[] {(byte) 0b00010001, 0x01}).dictionarySize()) + .isEqualTo(1); + } + + @Test + public void testReadString() { + VariantMetadata metadata = + VariantMetadata.from( + new byte[] { + 0b10001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'a', 'b', 'c', 'd', 'e' + }); + + Assertions.assertThat(metadata.get(0)).isEqualTo("a"); + Assertions.assertThat(metadata.get(1)).isEqualTo("b"); + Assertions.assertThat(metadata.get(2)).isEqualTo("c"); + Assertions.assertThat(metadata.get(3)).isEqualTo("d"); + Assertions.assertThat(metadata.get(4)).isEqualTo("e"); + Assertions.assertThatThrownBy(() -> metadata.get(5)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class); + } + + @Test + public void testMultibyteString() { + VariantMetadata metadata = + VariantMetadata.from( + new byte[] { + 0b10001, 0x05, 0x00, 0x01, 0x02, 0x05, 0x06, 0x07, 'a', 'b', 'x', 'y', 'z', 'd', 'e' + }); + + Assertions.assertThat(metadata.get(0)).isEqualTo("a"); + Assertions.assertThat(metadata.get(1)).isEqualTo("b"); + Assertions.assertThat(metadata.get(2)).isEqualTo("xyz"); + Assertions.assertThat(metadata.get(3)).isEqualTo("d"); + Assertions.assertThat(metadata.get(4)).isEqualTo("e"); + Assertions.assertThatThrownBy(() -> metadata.get(5)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class); + } + + @Test + public void testTwoByteOffsets() { + VariantMetadata metadata = + VariantMetadata.from( + new byte[] { + 0b1010001, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x05, 0x00, 0x06, 0x00, + 0x07, 0x00, 'a', 'b', 'x', 'y', 'z', 'd', 'e' + }); + + Assertions.assertThat(metadata.get(0)).isEqualTo("a"); + Assertions.assertThat(metadata.get(1)).isEqualTo("b"); + Assertions.assertThat(metadata.get(2)).isEqualTo("xyz"); + Assertions.assertThat(metadata.get(3)).isEqualTo("d"); + Assertions.assertThat(metadata.get(4)).isEqualTo("e"); + Assertions.assertThatThrownBy(() -> metadata.get(5)) + .isInstanceOf(ArrayIndexOutOfBoundsException.class); + } + + @Test + public void testFindStringSorted() { + VariantMetadata metadata = + VariantMetadata.from( + new byte[] { + 0b10001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'a', 'b', 'c', 'd', 'e' + }); + Assertions.assertThat(metadata.id("A")).isEqualTo(-1); + Assertions.assertThat(metadata.id("a")).isEqualTo(0); + Assertions.assertThat(metadata.id("aa")).isEqualTo(-1); + Assertions.assertThat(metadata.id("b")).isEqualTo(1); + Assertions.assertThat(metadata.id("bb")).isEqualTo(-1); + Assertions.assertThat(metadata.id("c")).isEqualTo(2); + Assertions.assertThat(metadata.id("cc")).isEqualTo(-1); + Assertions.assertThat(metadata.id("d")).isEqualTo(3); + Assertions.assertThat(metadata.id("dd")).isEqualTo(-1); + Assertions.assertThat(metadata.id("e")).isEqualTo(4); + Assertions.assertThat(metadata.id("ee")).isEqualTo(-1); + } + + @Test + public void testFindStringUnsorted() { + VariantMetadata metadata = + VariantMetadata.from( + new byte[] { + 0b00001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'e', 'd', 'c', 'b', 'a' + }); + Assertions.assertThat(metadata.id("A")).isEqualTo(-1); + Assertions.assertThat(metadata.id("a")).isEqualTo(4); + Assertions.assertThat(metadata.id("aa")).isEqualTo(-1); + Assertions.assertThat(metadata.id("b")).isEqualTo(3); + Assertions.assertThat(metadata.id("bb")).isEqualTo(-1); + Assertions.assertThat(metadata.id("c")).isEqualTo(2); + Assertions.assertThat(metadata.id("cc")).isEqualTo(-1); + Assertions.assertThat(metadata.id("d")).isEqualTo(1); + Assertions.assertThat(metadata.id("dd")).isEqualTo(-1); + Assertions.assertThat(metadata.id("e")).isEqualTo(0); + Assertions.assertThat(metadata.id("ee")).isEqualTo(-1); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testTwoByteFieldIds(boolean sortFieldNames) { + Set<String> keySet = Sets.newHashSet(); + String lastKey = null; + for (int i = 0; i < 10_000; i += 1) { + lastKey = RandomUtil.generateString(10, random); + keySet.add(lastKey); + } + + ByteBuffer buffer = VariantTestUtil.createMetadata(keySet, sortFieldNames); + VariantMetadata metadata = VariantMetadata.from(buffer); + + Assertions.assertThat(metadata.dictionarySize()).isEqualTo(10_000); + Assertions.assertThat(metadata.id(lastKey)).isGreaterThan(0); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testThreeByteFieldIds(boolean sortFieldNames) { + Set<String> keySet = Sets.newHashSet(); + String lastKey = null; + for (int i = 0; i < 100_000; i += 1) { + lastKey = RandomUtil.generateString(10, random); + keySet.add(lastKey); + } + + ByteBuffer buffer = VariantTestUtil.createMetadata(keySet, sortFieldNames); + VariantMetadata metadata = VariantMetadata.from(buffer); + + Assertions.assertThat(metadata.dictionarySize()).isEqualTo(100_000); + Assertions.assertThat(metadata.id(lastKey)).isGreaterThan(0); + } + + @Test + public void testInvalidMetadataVersion() { + Assertions.assertThatThrownBy(() -> VariantMetadata.from(new byte[] {0x02, 0x00})) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Unsupported version: 2"); + } + + @Test + public void testMissingLength() { + Assertions.assertThatThrownBy(() -> VariantMetadata.from(new byte[] {0x01})) + .isInstanceOf(IndexOutOfBoundsException.class); + } + + @Test + public void testLengthTooShort() { + // missing the 4th length byte + Assertions.assertThatThrownBy( + () -> VariantMetadata.from(new byte[] {(byte) 0b11010001, 0x00, 0x00, 0x00})) + .isInstanceOf(IndexOutOfBoundsException.class); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestVariantObject.java b/core/src/test/java/org/apache/iceberg/TestVariantObject.java new file mode 100644 index 000000000000..4a154d97273f --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestVariantObject.java @@ -0,0 +1,263 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.Random; +import java.util.Set; +import org.apache.iceberg.Variants.PhysicalType; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.RandomUtil; +import org.assertj.core.api.Assertions; +import org.assertj.core.util.Sets; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +public class TestVariantObject { + private static final VariantMetadata EMPTY_METADATA = + VariantMetadata.from(VariantMetadata.EMPTY_V1_BUFFER); + private static final VariantPrimitive i1 = VariantPrimitive.from(new byte[] {0b1100, 1}); + private static final VariantPrimitive i2 = VariantPrimitive.from(new byte[] {0b1100, 2}); + private static final VariantPrimitive i3 = VariantPrimitive.from(new byte[] {0b1100, 3}); + private static final VariantPrimitive vNull = VariantPrimitive.from(new byte[] {0x00}); + private static final VariantPrimitive vTrue = VariantPrimitive.from(new byte[] {0b100}); + private static final VariantPrimitive date = + VariantPrimitive.from(new byte[] {0b101100, (byte) 0xF4, 0x43, 0x00, 0x00}); + + private final Random random = new Random(198725); + + @Test + public void testEmptyObject() { + VariantObject object = VariantObject.from(EMPTY_METADATA, new byte[] {0b10, 0x00}); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(0); + } + + @Test + public void testEmptyLargeObject() { + VariantObject object = + VariantObject.from(EMPTY_METADATA, new byte[] {0b1000010, 0x00, 0x00, 0x00, 0x00}); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(0); + } + + @Test + public void testSimpleObject() { + Map<String, Variants.Serialized> data = ImmutableMap.of("a", i1, "b", i2, "c", i3); + ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); + ByteBuffer value = VariantTestUtil.createObject(meta, data); + + VariantMetadata metadata = VariantMetadata.from(meta); + VariantObject object = VariantObject.from(metadata, value, value.get(0)); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(3); + + Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(2); + Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(3); + + Assertions.assertThat(object.get("d")).isEqualTo(null); + } + + @Test + public void testOutOfOrderKeys() { + Map<String, Variants.Serialized> data = ImmutableMap.of("b", i2, "a", i1, "c", i3); + ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), false /* sort names */); + ByteBuffer value = VariantTestUtil.createObject(meta, data); + + VariantMetadata metadata = VariantMetadata.from(meta); + VariantObject object = VariantObject.from(metadata, value, value.get(0)); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(3); + + Assertions.assertThat(object.get("d")).isEqualTo(null); + + Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(3); + Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(2); + } + + @Test + public void testMixedValueTypes() { + ByteBuffer meta = + VariantTestUtil.createMetadata( + ImmutableList.of("a", "b", "c", "d", "e", "f"), true /* sort names */); + VariantMetadata metadata = VariantMetadata.from(meta); + + Map<String, Variants.Serialized> inner = ImmutableMap.of("b", i2, "f", i3); + ByteBuffer innerBuffer = VariantTestUtil.createObject(meta, inner); + VariantObject innerObject = VariantObject.from(metadata, innerBuffer, innerBuffer.get(0)); + Map<String, Variants.Serialized> data = + ImmutableMap.of("a", i1, "b", date, "c", vNull, "d", vTrue, "e", innerObject); + ByteBuffer value = VariantTestUtil.createObject(meta, data); + + VariantObject object = VariantObject.from(metadata, value, value.get(0)); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(5); + + Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.DATE); + Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(17396); + Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.NULL); + Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(null); + Assertions.assertThat(object.get("d").type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); + Assertions.assertThat(((VariantPrimitive) object.get("d")).get()).isEqualTo(true); + + Assertions.assertThat(object.get("e").type()).isEqualTo(PhysicalType.OBJECT); + VariantObject actualInner = (VariantObject) object.get("e"); + Assertions.assertThat(actualInner.numElements()).isEqualTo(2); + Assertions.assertThat(actualInner.get("b").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) actualInner.get("b")).get()).isEqualTo(2); + Assertions.assertThat(actualInner.get("f").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) actualInner.get("f")).get()).isEqualTo(3); + } + + @Test + public void testTwoByteOffsets() { + // a string larger than 255 bytes to push the value offset size above 1 byte + String randomString = RandomUtil.generateString(300, random); + VariantPrimitive bigString = VariantTestUtil.createString(randomString); + + // note that order doesn't matter. fields are sorted by name + Map<String, Variants.Serialized> data = + ImmutableMap.of("big", bigString, "a", i1, "b", i2, "c", i3); + ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); + ByteBuffer value = VariantTestUtil.createObject(meta, data); + + VariantMetadata metadata = VariantMetadata.from(meta); + VariantObject object = VariantObject.from(metadata, value, value.get(0)); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(4); + + Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(2); + Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(3); + Assertions.assertThat(object.get("big").type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((VariantPrimitive) object.get("big")).get()).isEqualTo(randomString); + } + + @Test + public void testThreeByteOffsets() { + // a string larger than 65535 bytes to push the value offset size above 1 byte + String randomString = RandomUtil.generateString(70_000, random); + VariantPrimitive reallyBigString = VariantTestUtil.createString(randomString); + + // note that order doesn't matter. fields are sorted by name + Map<String, Variants.Serialized> data = + ImmutableMap.of("really-big", reallyBigString, "a", i1, "b", i2, "c", i3); + ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); + ByteBuffer value = VariantTestUtil.createObject(meta, data); + + VariantMetadata metadata = VariantMetadata.from(meta); + VariantObject object = VariantObject.from(metadata, value, value.get(0)); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(4); + + Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(2); + Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(3); + Assertions.assertThat(object.get("really-big").type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(((VariantPrimitive) object.get("really-big")).get()) + .isEqualTo(randomString); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testTwoByteFieldIds(boolean sortFieldNames) { + Set<String> keySet = Sets.newHashSet(); + for (int i = 0; i < 10_000; i += 1) { + keySet.add(RandomUtil.generateString(10, random)); + } + + Map<String, Variants.Serialized> data = ImmutableMap.of("aa", i1, "AA", i2, "ZZ", i3); + + // create metadata from the large key set and the actual keys + keySet.addAll(data.keySet()); + ByteBuffer meta = VariantTestUtil.createMetadata(keySet, sortFieldNames); + ByteBuffer value = VariantTestUtil.createObject(meta, data); + + VariantMetadata metadata = VariantMetadata.from(meta); + VariantObject object = VariantObject.from(metadata, value, value.get(0)); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(3); + + Assertions.assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("aa")).get()).isEqualTo(1); + Assertions.assertThat(object.get("AA").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("AA")).get()).isEqualTo(2); + Assertions.assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("ZZ")).get()).isEqualTo(3); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testThreeByteFieldIds(boolean sortFieldNames) { + Set<String> keySet = Sets.newHashSet(); + for (int i = 0; i < 100_000; i += 1) { + keySet.add(RandomUtil.generateString(10, random)); + } + + Map<String, Variants.Serialized> data = ImmutableMap.of("aa", i1, "AA", i2, "ZZ", i3); + + // create metadata from the large key set and the actual keys + keySet.addAll(data.keySet()); + ByteBuffer meta = VariantTestUtil.createMetadata(keySet, sortFieldNames); + ByteBuffer value = VariantTestUtil.createObject(meta, data); + + VariantMetadata metadata = VariantMetadata.from(meta); + VariantObject object = VariantObject.from(metadata, value, value.get(0)); + + Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(3); + + Assertions.assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("aa")).get()).isEqualTo(1); + Assertions.assertThat(object.get("AA").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("AA")).get()).isEqualTo(2); + Assertions.assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(((VariantPrimitive) object.get("ZZ")).get()).isEqualTo(3); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestVariantPrimitives.java b/core/src/test/java/org/apache/iceberg/TestVariantPrimitives.java new file mode 100644 index 000000000000..fc64f7f291e2 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestVariantPrimitives.java @@ -0,0 +1,421 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import org.apache.iceberg.Variants.PhysicalType; +import org.apache.iceberg.Variants.Primitive; +import org.apache.iceberg.util.DateTimeUtil; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestVariantPrimitives { + @Test + public void testNull() { + Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(0)}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.NULL); + Assertions.assertThat(value.get()).isEqualTo(null); + } + + @Test + public void testTrue() { + Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(1)}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); + Assertions.assertThat(value.get()).isEqualTo(true); + } + + @Test + public void testFalse() { + Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(2)}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BOOLEAN_FALSE); + Assertions.assertThat(value.get()).isEqualTo(false); + } + + @Test + public void testInt8() { + Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(3), 34}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(value.get()).isEqualTo(34); + } + + @Test + public void testNegativeInt8() { + Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(3), (byte) 0xFF}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT8); + Assertions.assertThat(value.get()).isEqualTo(-1); + } + + @Test + public void testInt16() { + Primitive<?> value = + VariantPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xD2, 0x04}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT16); + Assertions.assertThat(value.get()).isEqualTo(1234); + } + + @Test + public void testNegativeInt16() { + Primitive<?> value = + VariantPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xFF, (byte) 0xFF}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT16); + Assertions.assertThat(value.get()).isEqualTo(-1); + } + + @Test + public void testInt32() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] {primitiveHeader(5), (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT32); + Assertions.assertThat(value.get()).isEqualTo(1234567890); + } + + @Test + public void testNegativeInt32() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] {primitiveHeader(5), (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT32); + Assertions.assertThat(value.get()).isEqualTo(-1); + } + + @Test + public void testInt64() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(6), + (byte) 0xB1, + 0x1C, + 0x6C, + (byte) 0xB1, + (byte) 0xF4, + 0x10, + 0x22, + 0x11 + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT64); + Assertions.assertThat(value.get()).isEqualTo(1234567890987654321L); + } + + @Test + public void testNegativeInt64() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(6), + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT64); + Assertions.assertThat(value.get()).isEqualTo(-1L); + } + + @Test + public void testDouble() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(7), + (byte) 0xB1, + 0x1C, + 0x6C, + (byte) 0xB1, + (byte) 0xF4, + 0x10, + 0x22, + 0x11 + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DOUBLE); + Assertions.assertThat(value.get()).isEqualTo(Double.longBitsToDouble(1234567890987654321L)); + } + + @Test + public void testNegativeDouble() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] {primitiveHeader(7), 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, (byte) 0x80}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DOUBLE); + Assertions.assertThat(value.get()).isEqualTo(-0.0D); + } + + @Test + public void testDecimal4() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] {primitiveHeader(8), 0x04, (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL4); + Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("123456.7890")); + } + + @Test + public void testNegativeDecimal4() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(8), 0x04, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL4); + Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("-0.0001")); + } + + @Test + public void testDecimal8() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(9), + 0x09, // scale=9 + (byte) 0xB1, + 0x1C, + 0x6C, + (byte) 0xB1, + (byte) 0xF4, + 0x10, + 0x22, + 0x11 + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL8); + Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("1234567890.987654321")); + } + + @Test + public void testNegativeDecimal8() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(9), + 0x09, // scale=9 + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL8); + Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("-0.000000001")); + } + + @Test + public void testDecimal16() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(10), 0x09, // scale=9 + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL16); + Assertions.assertThatThrownBy(value::get).isInstanceOf(UnsupportedOperationException.class); + } + + @Test + public void testDate() { + Primitive<?> value = + VariantPrimitive.from(new byte[] {primitiveHeader(11), (byte) 0xF4, 0x43, 0x00, 0x00}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DATE); + Assertions.assertThat(DateTimeUtil.daysToIsoDate((int) value.get())).isEqualTo("2017-08-18"); + } + + @Test + public void testNegativeDate() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] {primitiveHeader(11), (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DATE); + Assertions.assertThat(DateTimeUtil.daysToIsoDate((int) value.get())).isEqualTo("1969-12-31"); + } + + @Test + public void testTimestamptz() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(12), + 0x18, + (byte) 0xD3, + (byte) 0xB1, + (byte) 0xD6, + 0x07, + 0x57, + 0x05, + 0x00 + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPTZ); + Assertions.assertThat(DateTimeUtil.microsToIsoTimestamptz((long) value.get())) + .isEqualTo("2017-08-18T14:21:01.919+00:00"); + } + + @Test + public void testNegativeTimestamptz() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(12), + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPTZ); + Assertions.assertThat(DateTimeUtil.microsToIsoTimestamptz((long) value.get())) + .isEqualTo("1969-12-31T23:59:59.999999+00:00"); + } + + @Test + public void testTimestampntz() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(13), + 0x18, + (byte) 0xD3, + (byte) 0xB1, + (byte) 0xD6, + 0x07, + 0x57, + 0x05, + 0x00 + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPNTZ); + Assertions.assertThat(DateTimeUtil.microsToIsoTimestamp((long) value.get())) + .isEqualTo("2017-08-18T14:21:01.919"); + } + + @Test + public void testNegativeTimestampntz() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(13), + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPNTZ); + Assertions.assertThat(DateTimeUtil.microsToIsoTimestamp((long) value.get())) + .isEqualTo("1969-12-31T23:59:59.999999"); + } + + @Test + public void testFloat() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] {primitiveHeader(14), (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.FLOAT); + Assertions.assertThat(value.get()).isEqualTo(Float.intBitsToFloat(1234567890)); + } + + @Test + public void testNegativeFloat() { + Primitive<?> value = + VariantPrimitive.from(new byte[] {primitiveHeader(14), 0x00, 0x00, 0x00, (byte) 0x80}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.FLOAT); + Assertions.assertThat(value.get()).isEqualTo(-0.0F); + } + + @Test + public void testBinary() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] {primitiveHeader(15), 0x05, 0x00, 0x00, 0x00, 'a', 'b', 'c', 'd', 'e'}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BINARY); + Assertions.assertThat(value.get()) + .isEqualTo(ByteBuffer.wrap(new byte[] {'a', 'b', 'c', 'd', 'e'})); + } + + @Test + public void testString() { + Primitive<?> value = + VariantPrimitive.from( + new byte[] { + primitiveHeader(16), 0x07, 0x00, 0x00, 0x00, 'i', 'c', 'e', 'b', 'e', 'r', 'g' + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(value.get()).isEqualTo("iceberg"); + } + + @Test + public void testShortString() { + Primitive<?> value = + VariantShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.STRING); + Assertions.assertThat(value.get()).isEqualTo("iceberg"); + } + + @Test + public void testUnsupportedType() { + Assertions.assertThatThrownBy(() -> VariantPrimitive.from(new byte[] {primitiveHeader(17)})) + .isInstanceOf(UnsupportedOperationException.class) + .hasMessage("Unknown primitive physical type: 17"); + } + + private static byte primitiveHeader(int primitiveType) { + return (byte) (primitiveType << 2); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestVariantUtil.java b/core/src/test/java/org/apache/iceberg/TestVariantUtil.java new file mode 100644 index 000000000000..ac4bacbb1b62 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestVariantUtil.java @@ -0,0 +1,46 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestVariantUtil { + @Test + public void testReadByteUnsigned() { + ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF}); + Assertions.assertThat(VariantUtil.readByte(buffer, 0)).isEqualTo(255); + } + + @Test + public void testRead2ByteUnsigned() { + ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF}); + Assertions.assertThat(VariantUtil.readLittleEndianUnsigned(buffer, 0, 2)).isEqualTo(65535); + } + + @Test + public void testRead3ByteUnsigned() { + ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); + Assertions.assertThat(VariantUtil.readLittleEndianUnsigned(buffer, 0, 3)).isEqualTo(16777215); + } +} diff --git a/core/src/test/java/org/apache/iceberg/VariantTestUtil.java b/core/src/test/java/org/apache/iceberg/VariantTestUtil.java new file mode 100644 index 000000000000..8f1e30d2e394 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/VariantTestUtil.java @@ -0,0 +1,229 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; + +public class VariantTestUtil { + private VariantTestUtil() { + } + + private static byte primitiveHeader(int primitiveType) { + return (byte) (primitiveType << 2); + } + + private static byte metadataHeader(boolean isSorted, int offsetSize) { + return (byte) (((offsetSize - 1) << 6) | (isSorted ? 0b10000 : 0) | 0b0001); + } + + private static byte objectHeader(boolean isLarge, int fieldIdSize, int offsetSize) { + return (byte) + ((isLarge ? 0x1000000 : 0) | ((fieldIdSize - 1) << 4) | ((offsetSize - 1) << 2) | 0b10); + } + + private static byte arrayHeader(boolean isLarge, int offsetSize) { + return (byte) ((isLarge ? 0b10000 : 0) | (offsetSize - 1) << 2 | 0b11); + } + + /** A hacky absolute put for ByteBuffer */ + private static int writeBufferAbsolute(ByteBuffer buffer, int offset, ByteBuffer toCopy) { + int originalPosition = buffer.position(); + buffer.position(offset); + ByteBuffer copy = toCopy.duplicate(); + buffer.put(copy); // duplicate so toCopy is not modified + buffer.position(originalPosition); + Preconditions.checkArgument(copy.remaining() <= 0, "Not fully written"); + return toCopy.remaining(); + } + + /** Creates a random string primitive of the given length for forcing large offset sizes */ + static VariantPrimitive createString(String string) { + byte[] utf8 = string.getBytes(StandardCharsets.UTF_8); + ByteBuffer buffer = ByteBuffer.allocate(5 + utf8.length).order(ByteOrder.LITTLE_ENDIAN); + buffer.put(0, primitiveHeader(16)); + buffer.putInt(1, utf8.length); + writeBufferAbsolute(buffer, 5, ByteBuffer.wrap(utf8)); + return VariantPrimitive.from(buffer, buffer.get(0)); + } + + static ByteBuffer createMetadata(Collection<String> fieldNames, boolean sortNames) { + if (fieldNames.isEmpty()) { + return VariantMetadata.EMPTY_V1_BUFFER; + } + + int numElements = fieldNames.size(); + Stream<String> names = sortNames ? fieldNames.stream().sorted() : fieldNames.stream(); + ByteBuffer[] nameBuffers = + names + .map(str -> ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8))) + .toArray(ByteBuffer[]::new); + + int dataSize = 0; + for (ByteBuffer nameBuffer : nameBuffers) { + dataSize += nameBuffer.remaining(); + } + + int offsetSize = VariantUtil.sizeOf(dataSize); + int offsetListOffset = 1 /* header size */ + offsetSize /* dictionary size */; + int dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); + int totalSize = dataOffset + dataSize; + + byte header = metadataHeader(sortNames, offsetSize); + ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); + + buffer.put(0, header); + VariantUtil.writeLittleEndianUnsigned(buffer, numElements, 1, offsetSize); + + // write offsets and strings + int nextOffset = 0; + int index = 0; + for (ByteBuffer nameBuffer : nameBuffers) { + // write the offset and the string + VariantUtil.writeLittleEndianUnsigned( + buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); + int nameSize = + writeBufferAbsolute(buffer, dataOffset + nextOffset, nameBuffer); + // update the offset and index + nextOffset += nameSize; + index += 1; + } + + // write the final size of the data section + VariantUtil.writeLittleEndianUnsigned( + buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); + + return buffer; + } + + static ByteBuffer createObject( + ByteBuffer metadataBuffer, Map<String, Variants.Serialized> data) { + // create the metadata to look up field names + VariantMetadata metadata = VariantMetadata.from(metadataBuffer); + + int numElements = data.size(); + boolean isLarge = numElements > 0xFF; + + int dataSize = 0; + for (Map.Entry<String, Variants.Serialized> field : data.entrySet()) { + dataSize += field.getValue().buffer().remaining(); + } + + // field ID size is the size needed to store the largest field ID in the data + int fieldIdSize = VariantUtil.sizeOf(metadata.dictionarySize()); + int fieldIdListOffset = 1 /* header size */ + (isLarge ? 4 : 1) /* num elements size */; + + // offset size is the size needed to store the length of the data section + int offsetSize = VariantUtil.sizeOf(dataSize); + int offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); + int dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); + int totalSize = dataOffset + dataSize; + + byte header = objectHeader(isLarge, fieldIdSize, offsetSize); + ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); + + buffer.put(0, header); + if (isLarge) { + buffer.putInt(1, numElements); + } else { + buffer.put(1, (byte) (numElements & 0xFF)); + } + + // write field IDs, values, and offsets + int nextOffset = 0; + int index = 0; + List<String> sortedFieldNames = data.keySet().stream().sorted().collect(Collectors.toList()); + for (String fieldName : sortedFieldNames) { + int id = metadata.id(fieldName); + VariantUtil.writeLittleEndianUnsigned( + buffer, id, fieldIdListOffset + (index * fieldIdSize), fieldIdSize); + VariantUtil.writeLittleEndianUnsigned( + buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); + int valueSize = + writeBufferAbsolute( + buffer, dataOffset + nextOffset, data.get(fieldName).buffer()); + + // update next offset and index + nextOffset += valueSize; + index += 1; + } + + // write the final size of the data section + VariantUtil.writeLittleEndianUnsigned( + buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); + + return buffer; + } + + static ByteBuffer createArray(Variants.Serialized... values) { + int numElements = values.length; + boolean isLarge = numElements > 0xFF; + + int dataSize = 0; + for (Variants.Serialized value : values) { + // TODO: produce size for every variant without serializing + dataSize += value.buffer().remaining(); + } + + // offset size is the size needed to store the length of the data section + int offsetSize = VariantUtil.sizeOf(dataSize); + int offsetListOffset = 1 /* header size */ + (isLarge ? 4 : 1) /* num elements size */; + int dataOffset = offsetListOffset + ((1 + numElements) * offsetSize) /* offset list size */; + int totalSize = dataOffset + dataSize; + + byte header = arrayHeader(isLarge, offsetSize); + ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); + + buffer.put(0, header); + if (isLarge) { + buffer.putInt(1, numElements); + } else { + buffer.put(1, (byte) (numElements & 0xFF)); + } + + // write values and offsets + int nextOffset = 0; // the first offset is always 0 + int index = 0; + for (Variants.Serialized value : values) { + // write the offset and value + VariantUtil.writeLittleEndianUnsigned(buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); + // in a real implementation, the buffer should be passed to serialize + ByteBuffer valueBuffer = value.buffer(); + int valueSize = writeBufferAbsolute(buffer, dataOffset + nextOffset, valueBuffer); + // update next offset and index + nextOffset += valueSize; + index += 1; + } + + // write the final size of the data section + VariantUtil.writeLittleEndianUnsigned(buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); + + return buffer; + } +} From 4d17ff389f634cbe9026808a13014dcb49b0ed33 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Wed, 27 Nov 2024 17:05:26 -0800 Subject: [PATCH 02/12] Move into variants module and clean up API interfaces. --- .../apache/iceberg/io/CloseableIterable.java | 17 ++ .../org/apache/iceberg/VariantObject.java | 114 ------- .../java/org/apache/iceberg/Variants.java | 207 ------------- .../org/apache/iceberg/util/SortedMerge.java | 11 + .../iceberg/variants/PrimitiveWrapper.java | 209 +++++++++++++ .../SerializedArray.java} | 27 +- .../SerializedMetadata.java} | 12 +- .../iceberg/variants/SerializedObject.java | 204 +++++++++++++ .../SerializedPrimitive.java} | 29 +- .../SerializedShortString.java} | 20 +- .../iceberg/variants/ShreddedObject.java | 214 ++++++++++++++ .../org/apache/iceberg/variants/Variant.java | 31 ++ .../apache/iceberg/variants/VariantArray.java | 38 +++ .../iceberg/variants/VariantMetadata.java | 35 +++ .../iceberg/variants/VariantObject.java | 37 +++ .../iceberg/variants/VariantPrimitive.java | 32 ++ .../iceberg/{ => variants}/VariantUtil.java | 57 +++- .../apache/iceberg/variants/VariantValue.java | 66 +++++ .../org/apache/iceberg/variants/Variants.java | 277 ++++++++++++++++++ .../variants/TestPrimitiveWrapper.java | 84 ++++++ .../TestSerializedArray.java} | 133 +++++---- .../TestSerializedMetadata.java} | 46 +-- .../TestSerializedObject.java} | 134 ++++----- .../TestSerializedPrimitives.java} | 174 +++++++---- .../iceberg/variants/TestShreddedObject.java | 259 ++++++++++++++++ .../{ => variants}/TestVariantUtil.java | 2 +- .../{ => variants}/VariantTestUtil.java | 46 ++- 27 files changed, 1902 insertions(+), 613 deletions(-) delete mode 100644 core/src/main/java/org/apache/iceberg/VariantObject.java delete mode 100644 core/src/main/java/org/apache/iceberg/Variants.java create mode 100644 core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java rename core/src/main/java/org/apache/iceberg/{VariantArray.java => variants/SerializedArray.java} (76%) rename core/src/main/java/org/apache/iceberg/{VariantMetadata.java => variants/SerializedMetadata.java} (91%) create mode 100644 core/src/main/java/org/apache/iceberg/variants/SerializedObject.java rename core/src/main/java/org/apache/iceberg/{VariantPrimitive.java => variants/SerializedPrimitive.java} (79%) rename core/src/main/java/org/apache/iceberg/{VariantShortString.java => variants/SerializedShortString.java} (75%) create mode 100644 core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java create mode 100644 core/src/main/java/org/apache/iceberg/variants/Variant.java create mode 100644 core/src/main/java/org/apache/iceberg/variants/VariantArray.java create mode 100644 core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java create mode 100644 core/src/main/java/org/apache/iceberg/variants/VariantObject.java create mode 100644 core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java rename core/src/main/java/org/apache/iceberg/{ => variants}/VariantUtil.java (66%) create mode 100644 core/src/main/java/org/apache/iceberg/variants/VariantValue.java create mode 100644 core/src/main/java/org/apache/iceberg/variants/Variants.java create mode 100644 core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java rename core/src/test/java/org/apache/iceberg/{TestVariantArray.java => variants/TestSerializedArray.java} (59%) rename core/src/test/java/org/apache/iceberg/{TestVariantMetadata.java => variants/TestSerializedMetadata.java} (83%) rename core/src/test/java/org/apache/iceberg/{TestVariantObject.java => variants/TestSerializedObject.java} (60%) rename core/src/test/java/org/apache/iceberg/{TestVariantPrimitives.java => variants/TestSerializedPrimitives.java} (71%) create mode 100644 core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java rename core/src/test/java/org/apache/iceberg/{ => variants}/TestVariantUtil.java (97%) rename core/src/test/java/org/apache/iceberg/{ => variants}/VariantTestUtil.java (83%) diff --git a/api/src/main/java/org/apache/iceberg/io/CloseableIterable.java b/api/src/main/java/org/apache/iceberg/io/CloseableIterable.java index 06323612a178..fde73e8e9f5b 100644 --- a/api/src/main/java/org/apache/iceberg/io/CloseableIterable.java +++ b/api/src/main/java/org/apache/iceberg/io/CloseableIterable.java @@ -32,6 +32,23 @@ public interface CloseableIterable<T> extends Iterable<T>, Closeable { + /** + * Adapts an Iterable to CloseableIterable using a no-op close if it is not Closeable. + * + * @param iterable an Iterable + * @return a CloseableIterable that closes Iterable if it is Closeable + */ + static <E> CloseableIterable<E> of(Iterable<E> iterable) { + if (iterable instanceof CloseableIterable) { + return (CloseableIterable<E>) iterable; + } else if (iterable instanceof Closeable) { + Closeable asCloseable = (Closeable) iterable; + return combine(iterable, asCloseable::close); + } else { + return withNoopClose(iterable); + } + } + /** * Returns a closeable iterator over elements of type {@code T}. * diff --git a/core/src/main/java/org/apache/iceberg/VariantObject.java b/core/src/main/java/org/apache/iceberg/VariantObject.java deleted file mode 100644 index 1961371a3919..000000000000 --- a/core/src/main/java/org/apache/iceberg/VariantObject.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. - * - */ - -package org.apache.iceberg; - -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; - -class VariantObject implements Variants.Object, Variants.Serialized { - private static final int OFFSET_SIZE_MASK = 0b1100; - private static final int OFFSET_SIZE_SHIFT = 2; - private static final int FIELD_ID_SIZE_MASK = 0b110000; - private static final int FIELD_ID_SIZE_SHIFT = 4; - private static final int IS_LARGE = 0b1000000; - - static VariantObject from(VariantMetadata metadata, byte[] bytes) { - return from(metadata, ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); - } - - static VariantObject from(VariantMetadata metadata, ByteBuffer value, int header) { - Preconditions.checkArgument( - value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - int basicType = header & Variants.BASIC_TYPE_MASK; - Preconditions.checkArgument( - basicType == Variants.BASIC_TYPE_OBJECT, "Invalid object, basic type != 2: " + basicType); - return new VariantObject(metadata, value, header); - } - - private final VariantMetadata metadata; - private final ByteBuffer value; - private final int fieldIdSize; - private final int fieldIdListOffset; - private final int[] fieldIds; - private final int offsetSize; - private final int offsetListOffset; - private final int dataOffset; - private final Variants.Value[] values; - - private VariantObject(VariantMetadata metadata, ByteBuffer value, int header) { - this.metadata = metadata; - this.value = value; - this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); - this.fieldIdSize = 1 + ((header & FIELD_ID_SIZE_MASK) >> FIELD_ID_SIZE_SHIFT); - int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; - int numElements = - VariantUtil.readLittleEndianUnsigned(value, Variants.HEADER_SIZE, numElementsSize); - this.fieldIdListOffset = Variants.HEADER_SIZE + numElementsSize; - this.fieldIds = new int[numElements]; - this.offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); - this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); - this.values = new Variants.Value[numElements]; - } - - @VisibleForTesting - int numElements() { - return fieldIds.length; - } - - // keys are ordered lexicographically by the name - @Override - public Variants.Value get(String name) { - int index = - VariantUtil.find( - fieldIds.length, - name, - pos -> { - int id = - VariantUtil.readLittleEndianUnsigned( - value, fieldIdListOffset + (pos * fieldIdSize), fieldIdSize); - return metadata.get(id); - }); - - if (index < 0) { - return null; - } - - if (null == values[index]) { - int offset = - VariantUtil.readLittleEndianUnsigned( - value, offsetListOffset + (index * offsetSize), offsetSize); - int next = - VariantUtil.readLittleEndianUnsigned( - value, offsetListOffset + ((1 + index) * offsetSize), offsetSize); - values[index] = Variants.from(metadata, VariantUtil.slice(value, dataOffset + offset, next - offset)); - } - - return values[index]; - } - - @Override - public ByteBuffer buffer() { - return value; - } -} diff --git a/core/src/main/java/org/apache/iceberg/Variants.java b/core/src/main/java/org/apache/iceberg/Variants.java deleted file mode 100644 index 872d7138d00f..000000000000 --- a/core/src/main/java/org/apache/iceberg/Variants.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. - * - */ - -package org.apache.iceberg; - -import java.math.BigDecimal; -import java.nio.ByteBuffer; -import java.util.List; -import java.util.Map; - -public class Variants { - enum LogicalType { - NULL, - BOOLEAN, - EXACT_NUMERIC, - FLOAT, - DOUBLE, - DATE, - TIMESTAMPTZ, - TIMESTAMPNTZ, - BINARY, - STRING, - ARRAY, - OBJECT - } - - public enum PhysicalType { - NULL(LogicalType.NULL, Void.class), - BOOLEAN_TRUE(LogicalType.BOOLEAN, Boolean.class), - BOOLEAN_FALSE(LogicalType.BOOLEAN, Boolean.class), - INT8(LogicalType.EXACT_NUMERIC, Integer.class), - INT16(LogicalType.EXACT_NUMERIC, Integer.class), - INT32(LogicalType.EXACT_NUMERIC, Integer.class), - INT64(LogicalType.EXACT_NUMERIC, Long.class), - DOUBLE(LogicalType.DOUBLE, Double.class), - DECIMAL4(LogicalType.EXACT_NUMERIC, BigDecimal.class), - DECIMAL8(LogicalType.EXACT_NUMERIC, BigDecimal.class), - DECIMAL16(LogicalType.EXACT_NUMERIC, BigDecimal.class), - DATE(LogicalType.DATE, Integer.class), - TIMESTAMPTZ(LogicalType.TIMESTAMPTZ, Long.class), - TIMESTAMPNTZ(LogicalType.TIMESTAMPNTZ, Long.class), - FLOAT(LogicalType.FLOAT, Float.class), - BINARY(LogicalType.BINARY, ByteBuffer.class), - STRING(LogicalType.STRING, String.class), - ARRAY(LogicalType.ARRAY, List.class), - OBJECT(LogicalType.OBJECT, Map.class); - - private final LogicalType logicalType; - private final Class<?> javaClass; - - PhysicalType(LogicalType logicalType, Class<?> javaClass) { - this.logicalType = logicalType; - this.javaClass = javaClass; - } - - LogicalType toLogicalType() { - return logicalType; - } - - public Class<?> javaClass() { - return javaClass; - } - - public static PhysicalType from(int primitiveType) { - switch (primitiveType) { - case Primitives.TYPE_NULL: - return NULL; - case Primitives.TYPE_TRUE: - return BOOLEAN_TRUE; - case Primitives.TYPE_FALSE: - return BOOLEAN_FALSE; - case Primitives.TYPE_INT8: - return INT8; - case Primitives.TYPE_INT16: - return INT16; - case Primitives.TYPE_INT32: - return INT32; - case Primitives.TYPE_INT64: - return INT64; - case Primitives.TYPE_DATE: - return DATE; - case Primitives.TYPE_TIMESTAMPTZ: - return TIMESTAMPTZ; - case Primitives.TYPE_TIMESTAMPNTZ: - return TIMESTAMPNTZ; - case Primitives.TYPE_FLOAT: - return FLOAT; - case Primitives.TYPE_DOUBLE: - return DOUBLE; - case Primitives.TYPE_DECIMAL4: - return DECIMAL4; - case Primitives.TYPE_DECIMAL8: - return DECIMAL8; - case Primitives.TYPE_DECIMAL16: - return DECIMAL16; - case Primitives.TYPE_BINARY: - return BINARY; - case Primitives.TYPE_STRING: - return STRING; - } - - throw new UnsupportedOperationException("Unknown primitive physical type: " + primitiveType); - } - } - - public interface Serialized { - ByteBuffer buffer(); - } - - public interface Metadata extends Serialized { - int id(String name); - - String get(int id); - } - - public interface Value { - PhysicalType type(); - } - - public interface Primitive<T> extends Value { - T get(); - } - - public interface Object extends Value { - Value get(String field); - - default PhysicalType type() { - return PhysicalType.OBJECT; - } - } - - public interface Array extends Value { - Value get(int index); - - default PhysicalType type() { - return PhysicalType.ARRAY; - } - } - - static class Primitives { - private static final int TYPE_NULL = 0; - private static final int TYPE_TRUE = 1; - private static final int TYPE_FALSE = 2; - private static final int TYPE_INT8 = 3; - private static final int TYPE_INT16 = 4; - private static final int TYPE_INT32 = 5; - private static final int TYPE_INT64 = 6; - private static final int TYPE_DOUBLE = 7; - private static final int TYPE_DECIMAL4 = 8; - private static final int TYPE_DECIMAL8 = 9; - private static final int TYPE_DECIMAL16 = 10; - private static final int TYPE_DATE = 11; - private static final int TYPE_TIMESTAMPTZ = 12; // equivalent to timestamptz - private static final int TYPE_TIMESTAMPNTZ = 13; // equivalent to timestamp - private static final int TYPE_FLOAT = 14; - private static final int TYPE_BINARY = 15; - private static final int TYPE_STRING = 16; - - private Primitives() {} - } - - static final int HEADER_SIZE = 1; - static final int BASIC_TYPE_MASK = 0b11; - static final int BASIC_TYPE_PRIMITIVE = 0; - static final int BASIC_TYPE_SHORT_STRING = 1; - static final int BASIC_TYPE_OBJECT = 2; - static final int BASIC_TYPE_ARRAY = 3; - - public static Value from(ByteBuffer metadata, ByteBuffer value) { - return from(VariantMetadata.from(metadata), value); - } - - static Value from(VariantMetadata metadata, ByteBuffer value) { - int header = VariantUtil.readByte(value, 0); - int basicType = header & BASIC_TYPE_MASK; - switch (basicType) { - case BASIC_TYPE_PRIMITIVE: - return VariantPrimitive.from(value, header); - case BASIC_TYPE_SHORT_STRING: - return VariantShortString.from(value, header); - case BASIC_TYPE_OBJECT: - return VariantObject.from(metadata, value, header); - case BASIC_TYPE_ARRAY: - return VariantArray.from(metadata, value, header); - default: - throw new UnsupportedOperationException("Unsupported basic type: %s" + basicType); - } - } -} diff --git a/core/src/main/java/org/apache/iceberg/util/SortedMerge.java b/core/src/main/java/org/apache/iceberg/util/SortedMerge.java index d93116852eb9..d5fecdabafa0 100644 --- a/core/src/main/java/org/apache/iceberg/util/SortedMerge.java +++ b/core/src/main/java/org/apache/iceberg/util/SortedMerge.java @@ -21,6 +21,7 @@ import java.io.Closeable; import java.io.IOException; import java.io.UncheckedIOException; +import java.util.Arrays; import java.util.Comparator; import java.util.Iterator; import java.util.List; @@ -30,6 +31,7 @@ import org.apache.iceberg.io.CloseableGroup; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.CloseableIterator; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; /** * An Iterable that merges the items from other Iterables in order. @@ -39,6 +41,15 @@ * @param <T> the type of objects produced by this Iterable */ public class SortedMerge<T> extends CloseableGroup implements CloseableIterable<T> { + public static <C extends Comparable<C>> CloseableIterable<C> of(Iterable<C> left, Iterable<C> right) { + return of(Arrays.asList(left, right)); + } + + public static <C extends Comparable<C>> CloseableIterable<C> of(List<Iterable<C>> iterables) { + List<CloseableIterable<C>> closeableIterables = Lists.transform(iterables, CloseableIterable::of); + return new SortedMerge<>(Comparator.naturalOrder(), closeableIterables); + } + private final Comparator<T> comparator; private final List<CloseableIterable<T>> iterables; diff --git a/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java b/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java new file mode 100644 index 000000000000..6cc3414789aa --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java @@ -0,0 +1,209 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.variants.Variants.Primitives; + +class PrimitiveWrapper<T> implements VariantPrimitive<T> { + private static final byte NULL_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_NULL); + private static final byte TRUE_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_TRUE); + private static final byte FALSE_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_FALSE); + private static final byte INT8_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_INT8); + private static final byte INT16_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_INT16); + private static final byte INT32_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_INT32); + private static final byte INT64_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_INT64); + private static final byte FLOAT_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_FLOAT); + private static final byte DOUBLE_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_DOUBLE); + private static final byte DATE_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_DATE); + private static final byte TIMESTAMPTZ_HEADER = + VariantUtil.primitiveHeader(Primitives.TYPE_TIMESTAMPTZ); + private static final byte TIMESTAMPNTZ_HEADER = + VariantUtil.primitiveHeader(Primitives.TYPE_TIMESTAMPNTZ); + private static final byte DECIMAL4_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_DECIMAL4); + private static final byte DECIMAL8_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_DECIMAL8); + private static final byte DECIMAL16_HEADER = + VariantUtil.primitiveHeader(Primitives.TYPE_DECIMAL16); + private static final byte BINARY_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_BINARY); + private static final byte STRING_HEADER = VariantUtil.primitiveHeader(Primitives.TYPE_STRING); + + private final Variants.PhysicalType type; + private final T value; + private ByteBuffer buffer = null; + + PrimitiveWrapper(Variants.PhysicalType type, T value) { + this.type = type; + this.value = value; + } + + @Override + public Variants.PhysicalType type() { + return type; + } + + @Override + public T get() { + return value; + } + + @Override + public int sizeInBytes() { + switch (type()) { + case NULL: + case BOOLEAN_TRUE: + case BOOLEAN_FALSE: + return 1; // 1 header only + case INT8: + return 2; // 1 header + 1 value + case INT16: + return 3; // 1 header + 2 value + case INT32: + case DATE: + case FLOAT: + return 5; // 1 header + 4 value + case INT64: + case DOUBLE: + case TIMESTAMPTZ: + case TIMESTAMPNTZ: + return 9; // 1 header + 8 value + case DECIMAL4: + return 6; // 1 header + 1 scale + 4 unscaled value + case DECIMAL8: + return 10; // 1 header + 1 scale + 8 unscaled value + case DECIMAL16: + return 18; // 1 header + 1 scale + 16 unscaled value + case BINARY: + return 5 + ((ByteBuffer) value).remaining(); // 1 header + 4 length + value length + case STRING: + if (null == buffer) { + this.buffer = ByteBuffer.wrap(((String) value).getBytes(StandardCharsets.UTF_8)); + } + + return 5 + buffer.remaining(); // 1 header + 4 length + value length + } + + throw new UnsupportedOperationException("Unsupported primitive type: " + type()); + } + + @Override + public int writeTo(ByteBuffer outBuffer, int offset) { + Preconditions.checkArgument( + outBuffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); + switch (type()) { + case NULL: + outBuffer.put(offset, NULL_HEADER); + return 1; + case BOOLEAN_TRUE: + outBuffer.put(offset, TRUE_HEADER); + return 1; + case BOOLEAN_FALSE: + outBuffer.put(offset, FALSE_HEADER); + return 1; + case INT8: + outBuffer.put(offset, INT8_HEADER); + outBuffer.put(offset + 1, (Byte) value); + return 2; + case INT16: + outBuffer.put(offset, INT16_HEADER); + outBuffer.putShort(offset + 1, (Short) value); + return 3; + case INT32: + outBuffer.put(offset, INT32_HEADER); + outBuffer.putInt(offset + 1, (Integer) value); + return 5; + case INT64: + outBuffer.put(offset, INT64_HEADER); + outBuffer.putLong(offset + 1, (Long) value); + return 9; + case FLOAT: + outBuffer.put(offset, FLOAT_HEADER); + outBuffer.putFloat(offset + 1, (Float) value); + return 5; + case DOUBLE: + outBuffer.put(offset, DOUBLE_HEADER); + outBuffer.putDouble(offset + 1, (Double) value); + return 9; + case DATE: + outBuffer.put(offset, DATE_HEADER); + outBuffer.putInt(offset + 1, (Integer) value); + return 5; + case TIMESTAMPTZ: + outBuffer.put(offset, TIMESTAMPTZ_HEADER); + outBuffer.putLong(offset + 1, (Long) value); + return 9; + case TIMESTAMPNTZ: + outBuffer.put(offset, TIMESTAMPNTZ_HEADER); + outBuffer.putLong(offset + 1, (Long) value); + return 9; + case DECIMAL4: + BigDecimal decimal4 = (BigDecimal) value; + outBuffer.put(offset, DECIMAL4_HEADER); + outBuffer.put(offset + 1, (byte) decimal4.scale()); + outBuffer.putInt(offset + 2, decimal4.unscaledValue().intValueExact()); + return 6; + case DECIMAL8: + BigDecimal decimal8 = (BigDecimal) value; + outBuffer.put(offset, DECIMAL8_HEADER); + outBuffer.put(offset + 1, (byte) decimal8.scale()); + outBuffer.putLong(offset + 2, decimal8.unscaledValue().longValueExact()); + return 10; + case DECIMAL16: + BigDecimal decimal16 = (BigDecimal) value; + byte padding = (byte) (decimal16.signum() < 0 ? 0xFF : 0x00); + byte[] bytes = decimal16.unscaledValue().toByteArray(); + outBuffer.put(offset, DECIMAL16_HEADER); + outBuffer.put(offset + 1, (byte) decimal16.scale()); + for (int i = 0; i < 16; i += 1) { + if (i < bytes.length) { + // copy the big endian value and convert to little endian + outBuffer.put(offset + 2 + i, bytes[bytes.length - i - 1]); + } else { + // pad with 0x00 or 0xFF depending on the sign + outBuffer.put(offset + 2 + i, padding); + } + } + return 18; + case BINARY: + ByteBuffer binary = (ByteBuffer) value; + outBuffer.put(offset, BINARY_HEADER); + outBuffer.putInt(offset + 1, binary.remaining()); + VariantUtil.writeBufferAbsolute(outBuffer, offset + 5, binary); + return 5 + binary.remaining(); + case STRING: + // TODO: use short string when possible + if (null == buffer) { + this.buffer = ByteBuffer.wrap(((String) value).getBytes(StandardCharsets.UTF_8)); + } + + outBuffer.put(offset, STRING_HEADER); + outBuffer.putInt(offset + 1, buffer.remaining()); + VariantUtil.writeBufferAbsolute(outBuffer, offset + 5, buffer); + return 5 + buffer.remaining(); + } + + throw new UnsupportedOperationException("Unsupported primitive type: " + type()); + } +} diff --git a/core/src/main/java/org/apache/iceberg/VariantArray.java b/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java similarity index 76% rename from core/src/main/java/org/apache/iceberg/VariantArray.java rename to core/src/main/java/org/apache/iceberg/variants/SerializedArray.java index 185b35a5ed44..4d43e576e535 100644 --- a/core/src/main/java/org/apache/iceberg/VariantArray.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java @@ -19,40 +19,43 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; + +import static org.apache.iceberg.variants.VariantUtil.basicType; import java.nio.ByteBuffer; import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -class VariantArray implements Variants.Array, Variants.Serialized { +class SerializedArray extends Variants.SerializedValue implements VariantArray { private static final int OFFSET_SIZE_MASK = 0b1100; private static final int OFFSET_SIZE_SHIFT = 2; private static final int IS_LARGE = 0b10000; @VisibleForTesting - static VariantArray from(VariantMetadata metadata, byte[] bytes) { + static SerializedArray from(SerializedMetadata metadata, byte[] bytes) { return from(metadata, ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); } - static VariantArray from(VariantMetadata metadata, ByteBuffer value, int header) { + static SerializedArray from(SerializedMetadata metadata, ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - int basicType = header & Variants.BASIC_TYPE_MASK; + Variants.BasicType basicType = basicType(header); Preconditions.checkArgument( - basicType == Variants.BASIC_TYPE_ARRAY, "Invalid array, basic type != 3: " + basicType); - return new VariantArray(metadata, value, header); + basicType == Variants.BasicType.ARRAY, + "Invalid array, basic type: " + basicType); + return new SerializedArray(metadata, value, header); } - private final VariantMetadata metadata; + private final SerializedMetadata metadata; private final ByteBuffer value; private final int offsetSize; private final int offsetListOffset; private final int dataOffset; - private final Variants.Value[] array; + private final VariantValue[] array; - private VariantArray(VariantMetadata metadata, ByteBuffer value, int header) { + private SerializedArray(SerializedMetadata metadata, ByteBuffer value, int header) { this.metadata = metadata; this.value = value; this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); @@ -61,7 +64,7 @@ private VariantArray(VariantMetadata metadata, ByteBuffer value, int header) { VariantUtil.readLittleEndianUnsigned(value, Variants.HEADER_SIZE, numElementsSize); this.offsetListOffset = Variants.HEADER_SIZE + numElementsSize; this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); - this.array = new Variants.Value[numElements]; + this.array = new VariantValue[numElements]; } @VisibleForTesting @@ -70,7 +73,7 @@ int numElements() { } @Override - public Variants.Value get(int index) { + public VariantValue get(int index) { if (null == array[index]) { int offset = VariantUtil.readLittleEndianUnsigned( diff --git a/core/src/main/java/org/apache/iceberg/VariantMetadata.java b/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java similarity index 91% rename from core/src/main/java/org/apache/iceberg/VariantMetadata.java rename to core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java index 421a3e66fe48..398736572022 100644 --- a/core/src/main/java/org/apache/iceberg/VariantMetadata.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java @@ -19,14 +19,14 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; import java.nio.ByteBuffer; import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -class VariantMetadata implements Variants.Metadata, Variants.Serialized { +class SerializedMetadata implements VariantMetadata, Variants.Serialized { private static final int SUPPORTED_VERSION = 1; private static final int VERSION_MASK = 0b1111; private static final int SORTED_STRINGS = 0b10000; @@ -37,17 +37,17 @@ class VariantMetadata implements Variants.Metadata, Variants.Serialized { static final ByteBuffer EMPTY_V1_BUFFER = ByteBuffer.wrap(new byte[] {0x01, 0x00}).order(ByteOrder.LITTLE_ENDIAN); - static VariantMetadata from(byte[] bytes) { + static SerializedMetadata from(byte[] bytes) { return from(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN)); } - static VariantMetadata from(ByteBuffer metadata) { + static SerializedMetadata from(ByteBuffer metadata) { Preconditions.checkArgument( metadata.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); int header = VariantUtil.readByte(metadata, 0); int version = header & VERSION_MASK; Preconditions.checkArgument(SUPPORTED_VERSION == version, "Unsupported version: %s", version); - return new VariantMetadata(metadata, header); + return new SerializedMetadata(metadata, header); } private final ByteBuffer metadata; @@ -57,7 +57,7 @@ static VariantMetadata from(ByteBuffer metadata) { private final int dataOffset; private final String[] dict; - private VariantMetadata(ByteBuffer metadata, int header) { + private SerializedMetadata(ByteBuffer metadata, int header) { this.metadata = metadata; this.isSorted = (header & SORTED_STRINGS) == SORTED_STRINGS; this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java b/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java new file mode 100644 index 000000000000..7f2e33adee31 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java @@ -0,0 +1,204 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import static org.apache.iceberg.variants.VariantUtil.basicType; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Iterator; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.util.Pair; + +class SerializedObject extends Variants.SerializedValue implements VariantObject { + private static final int OFFSET_SIZE_MASK = 0b1100; + private static final int OFFSET_SIZE_SHIFT = 2; + private static final int FIELD_ID_SIZE_MASK = 0b110000; + private static final int FIELD_ID_SIZE_SHIFT = 4; + private static final int IS_LARGE = 0b1000000; + + static SerializedObject from(SerializedMetadata metadata, byte[] bytes) { + return from(metadata, ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); + } + + static SerializedObject from(SerializedMetadata metadata, ByteBuffer value, int header) { + Preconditions.checkArgument( + value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); + Variants.BasicType basicType = basicType(header); + Preconditions.checkArgument( + basicType == Variants.BasicType.OBJECT, + "Invalid object, basic type: " + basicType); + return new SerializedObject(metadata, value, header); + } + + private final SerializedMetadata metadata; + private final ByteBuffer value; + private final int fieldIdSize; + private final int fieldIdListOffset; + private final Integer[] fieldIds; + private final int offsetSize; + private final int offsetListOffset; + private final int dataOffset; + private final VariantValue[] values; + + private SerializedObject(SerializedMetadata metadata, ByteBuffer value, int header) { + this.metadata = metadata; + this.value = value; + this.offsetSize = 1 + ((header & OFFSET_SIZE_MASK) >> OFFSET_SIZE_SHIFT); + this.fieldIdSize = 1 + ((header & FIELD_ID_SIZE_MASK) >> FIELD_ID_SIZE_SHIFT); + int numElementsSize = ((header & IS_LARGE) == IS_LARGE) ? 4 : 1; + int numElements = + VariantUtil.readLittleEndianUnsigned(value, Variants.HEADER_SIZE, numElementsSize); + this.fieldIdListOffset = Variants.HEADER_SIZE + numElementsSize; + this.fieldIds = new Integer[numElements]; + this.offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); + this.dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); + this.values = new VariantValue[numElements]; + } + + @VisibleForTesting + int numElements() { + return fieldIds.length; + } + + SerializedMetadata metadata() { + return metadata; + } + + Iterable<Pair<String, Integer>> fields() { + return () -> + new Iterator<>() { + private int index = 0; + + @Override + public boolean hasNext() { + return index < fieldIds.length; + } + + @Override + public Pair<String, Integer> next() { + Pair<String, Integer> next = Pair.of(metadata.get(id(index)), index); + index += 1; + return next; + } + }; + } + + public Iterable<String> fieldNames() { + return () -> + new Iterator<>() { + private int index = 0; + + @Override + public boolean hasNext() { + return index < fieldIds.length; + } + + @Override + public String next() { + int id = id(index); + index += 1; + return metadata.get(id); + } + }; + } + + private int id(int index) { + if (null == fieldIds[index]) { + fieldIds[index] = + VariantUtil.readLittleEndianUnsigned( + value, fieldIdListOffset + (index * fieldIdSize), fieldIdSize); + } + return fieldIds[index]; + } + + @Override + public VariantValue get(String name) { + // keys are ordered lexicographically by the name + int index = VariantUtil.find(fieldIds.length, name, pos -> metadata.get(id(pos))); + + if (index < 0) { + return null; + } + + if (null == values[index]) { + int offset = + VariantUtil.readLittleEndianUnsigned( + value, offsetListOffset + (index * offsetSize), offsetSize); + int next = + VariantUtil.readLittleEndianUnsigned( + value, offsetListOffset + ((1 + index) * offsetSize), offsetSize); + values[index] = + Variants.from(metadata, VariantUtil.slice(value, dataOffset + offset, next - offset)); + } + + return values[index]; + } + + /** + * Retrieve a field value as a ByteBuffer. + * + * @param name field name + * @return the field value as a ByteBuffer + */ + ByteBuffer sliceValue(String name) { + int index = VariantUtil.find(fieldIds.length, name, pos -> metadata.get(id(pos))); + + if (index < 0) { + return null; + } + + return sliceValue(index); + } + + /** + * Retrieve a field value as a ByteBuffer. + * + * @param index field index within the object + * @return the field value as a ByteBuffer + */ + ByteBuffer sliceValue(int index) { + if (values[index] != null) { + return ((Variants.Serialized) values[index]).buffer(); + } + + int offset = + VariantUtil.readLittleEndianUnsigned( + value, offsetListOffset + (index * offsetSize), offsetSize); + int next = + VariantUtil.readLittleEndianUnsigned( + value, offsetListOffset + ((1 + index) * offsetSize), offsetSize); + + return VariantUtil.slice(value, dataOffset + offset, next - offset); + } + + @Override + public ByteBuffer buffer() { + return value; + } + + @Override + public int sizeInBytes() { + return value.remaining(); + } +} diff --git a/core/src/main/java/org/apache/iceberg/VariantPrimitive.java b/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java similarity index 79% rename from core/src/main/java/org/apache/iceberg/VariantPrimitive.java rename to core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java index bd469bb2865a..381781321f68 100644 --- a/core/src/main/java/org/apache/iceberg/VariantPrimitive.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java @@ -19,7 +19,9 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; + +import static org.apache.iceberg.variants.VariantUtil.basicType; import java.math.BigDecimal; import java.math.BigInteger; @@ -27,29 +29,29 @@ import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -class VariantPrimitive implements Variants.Primitive<Object>, Variants.Serialized { +class SerializedPrimitive extends Variants.SerializedValue implements VariantPrimitive<Object> { private static final int PRIMITIVE_TYPE_SHIFT = 2; private static final int PRIMITIVE_OFFSET = Variants.HEADER_SIZE; - static VariantPrimitive from(byte[] bytes) { + static SerializedPrimitive from(byte[] bytes) { return from(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); } - static VariantPrimitive from(ByteBuffer value, int header) { + static SerializedPrimitive from(ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - int basicType = header & Variants.BASIC_TYPE_MASK; + Variants.BasicType basicType = basicType(header); Preconditions.checkArgument( - basicType == Variants.BASIC_TYPE_PRIMITIVE, - "Invalid primitive, basic type != 0: " + basicType); - return new VariantPrimitive(value, header); + basicType == Variants.BasicType.PRIMITIVE, + "Invalid primitive, basic type != PRIMITIVE: " + basicType); + return new SerializedPrimitive(value, header); } private final ByteBuffer value; private final Variants.PhysicalType type; private Object primitive = null; - private VariantPrimitive(ByteBuffer value, int header) { + private SerializedPrimitive(ByteBuffer value, int header) { this.value = value; this.type = Variants.PhysicalType.from(header >> PRIMITIVE_TYPE_SHIFT); } @@ -90,7 +92,14 @@ private Object read() { return new BigDecimal(BigInteger.valueOf(unscaled), scale); } case DECIMAL16: - throw new UnsupportedOperationException("unsupported"); + { + int scale = VariantUtil.readByte(value, PRIMITIVE_OFFSET); + byte[] unscaled = new byte[16]; + for (int i = 0; i < 16; i += 1) { + unscaled[i] = (byte) VariantUtil.readByte(value, PRIMITIVE_OFFSET + 16 - i); + } + return new BigDecimal(new BigInteger(unscaled), scale); + } case BINARY: { int size = VariantUtil.readLittleEndianInt32(value, PRIMITIVE_OFFSET); diff --git a/core/src/main/java/org/apache/iceberg/VariantShortString.java b/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java similarity index 75% rename from core/src/main/java/org/apache/iceberg/VariantShortString.java rename to core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java index 89f264b983cb..d092495d27e5 100644 --- a/core/src/main/java/org/apache/iceberg/VariantShortString.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java @@ -19,35 +19,37 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; + +import static org.apache.iceberg.variants.VariantUtil.basicType; import java.nio.ByteBuffer; import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; -class VariantShortString implements Variants.Primitive<String>, Variants.Serialized { +class SerializedShortString extends Variants.SerializedValue implements VariantPrimitive<String> { private static final int LENGTH_MASK = 0b11111100; private static final int LENGTH_SHIFT = 2; - static VariantShortString from(byte[] bytes) { + static SerializedShortString from(byte[] bytes) { return from(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN), bytes[0]); } - static VariantShortString from(ByteBuffer value, int header) { + static SerializedShortString from(ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - int basicType = header & Variants.BASIC_TYPE_MASK; + Variants.BasicType basicType = basicType(header); Preconditions.checkArgument( - basicType == Variants.BASIC_TYPE_SHORT_STRING, - "Invalid short string, basic type != 1: " + basicType); - return new VariantShortString(value, header); + basicType == Variants.BasicType.SHORT_STRING, + "Invalid short string, basic type: " + basicType); + return new SerializedShortString(value, header); } private final ByteBuffer value; private final int length; private String string = null; - private VariantShortString(ByteBuffer value, int header) { + private SerializedShortString(ByteBuffer value, int header) { this.value = value; this.length = ((header & LENGTH_MASK) >> LENGTH_SHIFT); } diff --git a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java new file mode 100644 index 000000000000..ba934d349e64 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java @@ -0,0 +1,214 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.util.Pair; +import org.apache.iceberg.util.SortedMerge; + +/** + * A variant Object that handles full or partial shredding. + * + * <p>Metadata stored for an object must be the same regardless of whether the object is shredded. + * This class assumes that the metadata from the unshredded object can be used for the shredded + * fields. This also does not allow updating or replacing the metadata for the unshredded object, + * which could require recursively rewriting field IDs. + */ +class ShreddedObject implements VariantObject { + private final SerializedMetadata metadata; + private final SerializedObject unshredded; + private final Map<String, VariantValue> shreddedFields = Maps.newHashMap(); + private SerializationState serializationState = null; + + ShreddedObject(SerializedMetadata metadata) { + this.metadata = metadata; + this.unshredded = null; + } + + ShreddedObject(SerializedObject unshredded) { + this.metadata = unshredded.metadata(); + this.unshredded = unshredded; + } + + public void put(String field, VariantValue value) { + Preconditions.checkArgument( + metadata.id(field) >= 0, "Cannot find field name in metadata: %s", field); + + // allow setting fields that are contained in unshredded. this avoids read-time failures and + // simplifies replacing field values. + shreddedFields.put(field, value); + this.serializationState = null; + } + + @Override + public VariantValue get(String field) { + // the shredded value takes precedence if there is a conflict + VariantValue value = shreddedFields.get(field); + if (value != null) { + return value; + } + + if (unshredded != null) { + return unshredded.get(field); + } + + return null; + } + + @Override + public int sizeInBytes() { + if (null == serializationState) { + this.serializationState = new SerializationState(metadata, unshredded, shreddedFields); + } + + return serializationState.size(); + } + + @Override + public int writeTo(ByteBuffer buffer, int offset) { + Preconditions.checkArgument( + buffer.order() == ByteOrder.LITTLE_ENDIAN, "Invalid byte order: big endian"); + + if (null == serializationState) { + this.serializationState = new SerializationState(metadata, unshredded, shreddedFields); + } + + return serializationState.writeTo(buffer, offset); + } + + /** Common state for {@link #size()} and {@link #writeTo(ByteBuffer, int)} */ + private static class SerializationState { + private final SerializedMetadata metadata; + private final Map<String, ByteBuffer> unshreddedFields; + private final Map<String, VariantValue> shreddedFields; + private final int dataSize; + private final int numElements; + private final boolean isLarge; + private final int fieldIdSize; + private final int offsetSize; + + private SerializationState( + SerializedMetadata metadata, + SerializedObject unshredded, + Map<String, VariantValue> shreddedFields) { + this.metadata = metadata; + // field ID size is the size needed to store the largest field ID in the data + this.fieldIdSize = VariantUtil.sizeOf(metadata.dictionarySize()); + this.shreddedFields = shreddedFields; + + int dataSize = 0; + // get the unshredded field names and values as byte buffers + ImmutableMap.Builder<String, ByteBuffer> unshreddedBuilder = ImmutableMap.builder(); + if (unshredded != null) { + for (Pair<String, Integer> field : unshredded.fields()) { + // if the value is replaced by an unshredded field, don't include it + String name = field.first(); + boolean replaced = shreddedFields.containsKey(name); + if (!replaced) { + ByteBuffer value = unshredded.sliceValue(field.second()); + unshreddedBuilder.put(name, value); + dataSize += value.remaining(); + } + } + } + + this.unshreddedFields = unshreddedBuilder.build(); + // duplicates are suppressed when creating unshreddedFields + this.numElements = unshreddedFields.size() + shreddedFields.size(); + // object is large if the number of elements can't be stored in 1 byte + this.isLarge = numElements > 0xFF; + + for (VariantValue value : shreddedFields.values()) { + dataSize += value.sizeInBytes(); + } + + this.dataSize = dataSize; + // offset size is the size needed to store the length of the data section + this.offsetSize = VariantUtil.sizeOf(dataSize); + } + + private int size() { + return 1 /* header */ + + (isLarge ? 4 : 1) /* num elements size */ + + numElements * fieldIdSize /* field ID list size */ + + (1 + numElements) * offsetSize /* offset list size */ + + dataSize; + } + + private int writeTo(ByteBuffer buffer, int offset) { + int fieldIdListOffset = + offset + 1 /* header size */ + (isLarge ? 4 : 1) /* num elements size */; + int offsetListOffset = fieldIdListOffset + (numElements * fieldIdSize); + int dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); + byte header = VariantUtil.objectHeader(isLarge, fieldIdSize, offsetSize); + + VariantUtil.writeByte(buffer, header, offset); + VariantUtil.writeLittleEndianUnsigned(buffer, numElements, offset + 1, isLarge ? 4 : 1); + + // neither iterable is closeable, so it is okay to use Iterable + Iterable<String> fields = + SortedMerge.of( + () -> unshreddedFields.keySet().stream().sorted().iterator(), + () -> shreddedFields.keySet().stream().sorted().iterator()); + + int nextValueOffset = 0; + int index = 0; + for (String field : fields) { + // write the field ID from the metadata dictionary + int id = metadata.id(field); + Preconditions.checkState(id >= 0, "Invalid metadata, missing: %s", field); + VariantUtil.writeLittleEndianUnsigned( + buffer, id, fieldIdListOffset + (index * fieldIdSize), fieldIdSize); + // write the data offset + VariantUtil.writeLittleEndianUnsigned( + buffer, nextValueOffset, offsetListOffset + (index * offsetSize), offsetSize); + + // copy or serialize the value into the data section + int valueSize; + VariantValue shreddedValue = shreddedFields.get(field); + if (shreddedValue != null) { + valueSize = shreddedValue.writeTo(buffer, dataOffset + nextValueOffset); + } else { + valueSize = + VariantUtil.writeBufferAbsolute( + buffer, dataOffset + nextValueOffset, unshreddedFields.get(field)); + } + + // update tracking + nextValueOffset += valueSize; + index += 1; + } + + // write the final size of the data section + VariantUtil.writeLittleEndianUnsigned( + buffer, nextValueOffset, offsetListOffset + (index * offsetSize), offsetSize); + + // return the total size + return (dataOffset - offset) + dataSize; + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/Variant.java b/core/src/main/java/org/apache/iceberg/variants/Variant.java new file mode 100644 index 000000000000..09c5d0fb2bc8 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/Variant.java @@ -0,0 +1,31 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +/** A variant metadata and value pair. */ +public interface Variant { + /** Returns the metadata for all values in the variant. */ + VariantMetadata metadata(); + + /** Returns the variant value. */ + VariantValue value(); +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantArray.java b/core/src/main/java/org/apache/iceberg/variants/VariantArray.java new file mode 100644 index 000000000000..ffd202be2b41 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantArray.java @@ -0,0 +1,38 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +/** An variant array value. */ +public interface VariantArray extends VariantValue { + /** Returns the {@link VariantValue} at {@code index} in this array. */ + VariantValue get(int index); + + @Override + default Variants.PhysicalType type() { + return Variants.PhysicalType.ARRAY; + } + + @Override + default VariantArray asArray() { + return this; + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java b/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java new file mode 100644 index 000000000000..24d1908a3eb4 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java @@ -0,0 +1,35 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +/** A variant metadata dictionary. */ +public interface VariantMetadata extends Variants.Serialized { + /** Returns the ID for a {@code name} in the dictionary, or -1 if not present. */ + int id(String name); + + /** + * Returns the field name for an ID in metadata. + * + * @throws ArrayIndexOutOfBoundsException if the dictionary does not contain the ID + */ + String get(int id); +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantObject.java b/core/src/main/java/org/apache/iceberg/variants/VariantObject.java new file mode 100644 index 000000000000..91ed2bfa3fde --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantObject.java @@ -0,0 +1,37 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +/** An variant object value. */ +public interface VariantObject extends VariantValue { + /** Returns the {@link VariantValue} for the field named {@code name} in this object. */ + VariantValue get(String name); + + default Variants.PhysicalType type() { + return Variants.PhysicalType.OBJECT; + } + + @Override + default VariantObject asObject() { + return this; + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java b/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java new file mode 100644 index 000000000000..c52108a2cfdc --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java @@ -0,0 +1,32 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +/** A primitive variant value. */ +public interface VariantPrimitive<T> extends VariantValue { + T get(); + + @Override + default VariantPrimitive<?> asPrimitive() { + return this; + } +} diff --git a/core/src/main/java/org/apache/iceberg/VariantUtil.java b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java similarity index 66% rename from core/src/main/java/org/apache/iceberg/VariantUtil.java rename to core/src/main/java/org/apache/iceberg/variants/VariantUtil.java index dd48ca411fd4..6326e4c488f7 100644 --- a/core/src/main/java/org/apache/iceberg/VariantUtil.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java @@ -19,16 +19,38 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import java.util.function.Function; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; class VariantUtil { + private static final int BASIC_TYPE_MASK = 0b11; + private static final int BASIC_TYPE_PRIMITIVE = 0; + private static final int BASIC_TYPE_SHORT_STRING = 1; + private static final int BASIC_TYPE_OBJECT = 2; + private static final int BASIC_TYPE_ARRAY = 3; + private VariantUtil() {} + /** A hacky absolute put for ByteBuffer */ + static int writeBufferAbsolute(ByteBuffer buffer, int offset, ByteBuffer toCopy) { + int originalPosition = buffer.position(); + buffer.position(offset); + ByteBuffer copy = toCopy.duplicate(); + buffer.put(copy); // duplicate so toCopy is not modified + buffer.position(originalPosition); + Preconditions.checkArgument(copy.remaining() <= 0, "Not fully written"); + return toCopy.remaining(); + } + + static void writeByte(ByteBuffer buffer, int value, int offset) { + buffer.put(buffer.position() + offset, (byte) (value & 0xFF)); + } + static void writeLittleEndianUnsigned(ByteBuffer buffer, int value, int offset, int size) { int base = buffer.position() + offset; switch (size) { @@ -50,11 +72,11 @@ static void writeLittleEndianUnsigned(ByteBuffer buffer, int value, int offset, throw new IllegalArgumentException("Invalid size: " + size); } - static int readLittleEndianInt8(ByteBuffer buffer, int offset) { + static byte readLittleEndianInt8(ByteBuffer buffer, int offset) { return buffer.get(buffer.position() + offset); } - static int readLittleEndianInt16(ByteBuffer buffer, int offset) { + static short readLittleEndianInt16(ByteBuffer buffer, int offset) { return buffer.getShort(buffer.position() + offset); } @@ -144,4 +166,33 @@ static int sizeOf(int maxValue) { return 4; } } + + static byte primitiveHeader(int primitiveType) { + return (byte) (primitiveType << Variants.Primitives.PRIMITIVE_TYPE_SHIFT); + } + + static byte objectHeader(boolean isLarge, int fieldIdSize, int offsetSize) { + return (byte) + ((isLarge ? 0x1000000 : 0) | ((fieldIdSize - 1) << 4) | ((offsetSize - 1) << 2) | 0b10); + } + + static byte arrayHeader(boolean isLarge, int offsetSize) { + return (byte) ((isLarge ? 0b10000 : 0) | (offsetSize - 1) << 2 | 0b11); + } + + static Variants.BasicType basicType(int header) { + int basicType = header & BASIC_TYPE_MASK; + switch (basicType) { + case BASIC_TYPE_PRIMITIVE: + return Variants.BasicType.PRIMITIVE; + case BASIC_TYPE_SHORT_STRING: + return Variants.BasicType.SHORT_STRING; + case BASIC_TYPE_OBJECT: + return Variants.BasicType.OBJECT; + case BASIC_TYPE_ARRAY: + return Variants.BasicType.ARRAY; + } + + throw new UnsupportedOperationException("Unsupported basic type: " + basicType); + } } diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantValue.java b/core/src/main/java/org/apache/iceberg/variants/VariantValue.java new file mode 100644 index 000000000000..b0eb49306b2b --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/VariantValue.java @@ -0,0 +1,66 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import java.nio.ByteBuffer; +import org.apache.iceberg.variants.Variants.PhysicalType; + +/** A variant value. */ +public interface VariantValue { + /** Returns the {@link PhysicalType} of this value. */ + PhysicalType type(); + + /** Returns the serialized size in bytes of this value. */ + int sizeInBytes(); + + /** + * Writes this value to the buffer at the given offset, ignoring the buffer's position and limit. + */ + int writeTo(ByteBuffer buffer, int offset); + + /** + * Returns this value as a {@link VariantPrimitive}. + * + * @throws IllegalArgumentException if the value is not a primitive + */ + default VariantPrimitive<?> asPrimitive() { + throw new IllegalArgumentException("Not a primitive: " + this); + } + + /** + * Returns this value as a {@link VariantObject}. + * + * @throws IllegalArgumentException if the value is not an object + */ + default VariantObject asObject() { + throw new IllegalArgumentException("Not an object: " + this); + } + + /** + * Returns this value as a {@link VariantArray}. + * + * @throws IllegalArgumentException if the value is not an array + */ + default VariantArray asArray() { + throw new IllegalArgumentException("Not an array: " + this); + } +} diff --git a/core/src/main/java/org/apache/iceberg/variants/Variants.java b/core/src/main/java/org/apache/iceberg/variants/Variants.java new file mode 100644 index 000000000000..8ae502974bfe --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/variants/Variants.java @@ -0,0 +1,277 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.util.DateTimeUtil; + +public class Variants { + enum LogicalType { + NULL, + BOOLEAN, + EXACT_NUMERIC, + FLOAT, + DOUBLE, + DATE, + TIMESTAMPTZ, + TIMESTAMPNTZ, + BINARY, + STRING, + ARRAY, + OBJECT + } + + public enum PhysicalType { + NULL(LogicalType.NULL, Void.class), + BOOLEAN_TRUE(LogicalType.BOOLEAN, Boolean.class), + BOOLEAN_FALSE(LogicalType.BOOLEAN, Boolean.class), + INT8(LogicalType.EXACT_NUMERIC, Integer.class), + INT16(LogicalType.EXACT_NUMERIC, Integer.class), + INT32(LogicalType.EXACT_NUMERIC, Integer.class), + INT64(LogicalType.EXACT_NUMERIC, Long.class), + DOUBLE(LogicalType.DOUBLE, Double.class), + DECIMAL4(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DECIMAL8(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DECIMAL16(LogicalType.EXACT_NUMERIC, BigDecimal.class), + DATE(LogicalType.DATE, Integer.class), + TIMESTAMPTZ(LogicalType.TIMESTAMPTZ, Long.class), + TIMESTAMPNTZ(LogicalType.TIMESTAMPNTZ, Long.class), + FLOAT(LogicalType.FLOAT, Float.class), + BINARY(LogicalType.BINARY, ByteBuffer.class), + STRING(LogicalType.STRING, String.class), + ARRAY(LogicalType.ARRAY, List.class), + OBJECT(LogicalType.OBJECT, Map.class); + + private final LogicalType logicalType; + private final Class<?> javaClass; + + PhysicalType(LogicalType logicalType, Class<?> javaClass) { + this.logicalType = logicalType; + this.javaClass = javaClass; + } + + LogicalType toLogicalType() { + return logicalType; + } + + public Class<?> javaClass() { + return javaClass; + } + + public static PhysicalType from(int primitiveType) { + switch (primitiveType) { + case Primitives.TYPE_NULL: + return NULL; + case Primitives.TYPE_TRUE: + return BOOLEAN_TRUE; + case Primitives.TYPE_FALSE: + return BOOLEAN_FALSE; + case Primitives.TYPE_INT8: + return INT8; + case Primitives.TYPE_INT16: + return INT16; + case Primitives.TYPE_INT32: + return INT32; + case Primitives.TYPE_INT64: + return INT64; + case Primitives.TYPE_DATE: + return DATE; + case Primitives.TYPE_TIMESTAMPTZ: + return TIMESTAMPTZ; + case Primitives.TYPE_TIMESTAMPNTZ: + return TIMESTAMPNTZ; + case Primitives.TYPE_FLOAT: + return FLOAT; + case Primitives.TYPE_DOUBLE: + return DOUBLE; + case Primitives.TYPE_DECIMAL4: + return DECIMAL4; + case Primitives.TYPE_DECIMAL8: + return DECIMAL8; + case Primitives.TYPE_DECIMAL16: + return DECIMAL16; + case Primitives.TYPE_BINARY: + return BINARY; + case Primitives.TYPE_STRING: + return STRING; + } + + throw new UnsupportedOperationException("Unknown primitive physical type: " + primitiveType); + } + } + + interface Serialized { + ByteBuffer buffer(); + } + + abstract static class SerializedValue implements VariantValue, Serialized { + @Override + public int sizeInBytes() { + return buffer().remaining(); + } + + @Override + public int writeTo(ByteBuffer buffer, int offset) { + ByteBuffer value = buffer(); + VariantUtil.writeBufferAbsolute(buffer, offset, value); + return value.remaining(); + } + } + + static class Primitives { + static final int TYPE_NULL = 0; + static final int TYPE_TRUE = 1; + static final int TYPE_FALSE = 2; + static final int TYPE_INT8 = 3; + static final int TYPE_INT16 = 4; + static final int TYPE_INT32 = 5; + static final int TYPE_INT64 = 6; + static final int TYPE_DOUBLE = 7; + static final int TYPE_DECIMAL4 = 8; + static final int TYPE_DECIMAL8 = 9; + static final int TYPE_DECIMAL16 = 10; + static final int TYPE_DATE = 11; + static final int TYPE_TIMESTAMPTZ = 12; // equivalent to timestamptz + static final int TYPE_TIMESTAMPNTZ = 13; // equivalent to timestamp + static final int TYPE_FLOAT = 14; + static final int TYPE_BINARY = 15; + static final int TYPE_STRING = 16; + + static final int PRIMITIVE_TYPE_SHIFT = 2; + + private Primitives() {} + } + + static final int HEADER_SIZE = 1; + + enum BasicType { + PRIMITIVE, + SHORT_STRING, + OBJECT, + ARRAY + } + + public static VariantValue from(ByteBuffer metadata, ByteBuffer value) { + return from(SerializedMetadata.from(metadata), value); + } + + static VariantValue from(SerializedMetadata metadata, ByteBuffer value) { + int header = VariantUtil.readByte(value, 0); + BasicType basicType = VariantUtil.basicType(header); + switch (basicType) { + case PRIMITIVE: + return SerializedPrimitive.from(value, header); + case SHORT_STRING: + return SerializedShortString.from(value, header); + case OBJECT: + return SerializedObject.from(metadata, value, header); + case ARRAY: + return SerializedArray.from(metadata, value, header); + } + + throw new UnsupportedOperationException("Unsupported basic type: " + basicType); + } + + static VariantPrimitive<Void> ofNull() { + return new PrimitiveWrapper<>(PhysicalType.NULL, null); + } + + static VariantPrimitive<Boolean> of(boolean value) { + if (value) { + return new PrimitiveWrapper<>(PhysicalType.BOOLEAN_TRUE, true); + } else { + return new PrimitiveWrapper<>(PhysicalType.BOOLEAN_FALSE, false); + } + } + + static VariantPrimitive<Byte> of(byte value) { + return new PrimitiveWrapper<>(PhysicalType.INT8, value); + } + + static VariantPrimitive<Short> of(short value) { + return new PrimitiveWrapper<>(PhysicalType.INT16, value); + } + + static VariantPrimitive<Integer> of(int value) { + return new PrimitiveWrapper<>(PhysicalType.INT32, value); + } + + static VariantPrimitive<Long> of(long value) { + return new PrimitiveWrapper<>(PhysicalType.INT64, value); + } + + static VariantPrimitive<Float> of(float value) { + return new PrimitiveWrapper<>(PhysicalType.FLOAT, value); + } + + static VariantPrimitive<Double> of(double value) { + return new PrimitiveWrapper<>(PhysicalType.DOUBLE, value); + } + + static VariantPrimitive<Integer> ofDate(int value) { + return new PrimitiveWrapper<>(PhysicalType.DATE, value); + } + + static VariantPrimitive<Integer> ofIsoDate(String value) { + return ofDate(DateTimeUtil.isoDateToDays(value)); + } + + static VariantPrimitive<Long> ofTimestamptz(long value) { + return new PrimitiveWrapper<>(PhysicalType.TIMESTAMPTZ, value); + } + + static VariantPrimitive<Long> ofIsoTimestamptz(String value) { + return ofTimestamptz(DateTimeUtil.isoTimestamptzToMicros(value)); + } + + static VariantPrimitive<Long> ofTimestampntz(long value) { + return new PrimitiveWrapper<>(PhysicalType.TIMESTAMPNTZ, value); + } + + static VariantPrimitive<Long> ofIsoTimestampntz(String value) { + return ofTimestampntz(DateTimeUtil.isoTimestampToMicros(value)); + } + + static VariantPrimitive<BigDecimal> of(BigDecimal value) { + int bitLength = value.unscaledValue().bitLength(); + if (bitLength < 32) { + return new PrimitiveWrapper<>(PhysicalType.DECIMAL4, value); + } else if (bitLength < 64) { + return new PrimitiveWrapper<>(PhysicalType.DECIMAL8, value); + } else if (bitLength < 128) { + return new PrimitiveWrapper<>(PhysicalType.DECIMAL16, value); + } + + throw new UnsupportedOperationException("Unsupported decimal precision: " + value.precision()); + } + + static VariantPrimitive<ByteBuffer> of(ByteBuffer value) { + return new PrimitiveWrapper<>(PhysicalType.BINARY, value); + } + + static VariantPrimitive<String> of(String value) { + return new PrimitiveWrapper<>(PhysicalType.STRING, value); + } +} diff --git a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java new file mode 100644 index 000000000000..d086b8551d62 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java @@ -0,0 +1,84 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.FieldSource; + +public class TestPrimitiveWrapper { + private static final VariantPrimitive<?>[] primitives = + new VariantPrimitive[] { + Variants.ofNull(), + Variants.of(true), + Variants.of(false), + Variants.of((byte) 34), + Variants.of((byte) -34), + Variants.of((short) 1234), + Variants.of((short) -1234), + Variants.of(12345), + Variants.of(-12345), + Variants.of(9876543210L), + Variants.of(-9876543210L), + Variants.of(10.11F), + Variants.of(-10.11F), + Variants.of(14.3D), + Variants.of(-14.3D), + Variants.ofIsoDate("2024-11-07"), + Variants.ofIsoDate("1957-11-07"), + Variants.ofIsoTimestamptz("2024-11-07T12:33:54.123456+00:00"), + Variants.ofIsoTimestamptz("1957-11-07T12:33:54.123456+00:00"), + Variants.ofIsoTimestampntz("2024-11-07T12:33:54.123456"), + Variants.ofIsoTimestampntz("1957-11-07T12:33:54.123456"), + Variants.of(new BigDecimal("123456.7890")), // decimal4 + Variants.of(new BigDecimal("-123456.7890")), // decimal4 + Variants.of(new BigDecimal("1234567890.987654321")), // decimal8 + Variants.of(new BigDecimal("-1234567890.987654321")), // decimal8 + Variants.of(new BigDecimal("9876543210.123456789")), // decimal16 + Variants.of(new BigDecimal("-9876543210.123456789")), // decimal16 + Variants.of(ByteBuffer.wrap(new byte[] {0x0a, 0x0b, 0x0c, 0x0d})), + Variants.of("iceberg"), + }; + + @ParameterizedTest + @FieldSource("primitives") + public void testPrimitiveValueSerialization(VariantPrimitive<?> primitive) { + // write the value to the middle of a large buffer + int size = primitive.sizeInBytes(); + ByteBuffer buffer = ByteBuffer.allocate(size + 1000).order(ByteOrder.LITTLE_ENDIAN); + primitive.writeTo(buffer, 300); + + // create a copy that is limited to the value range + ByteBuffer readBuffer = buffer.duplicate().order(ByteOrder.LITTLE_ENDIAN); + readBuffer.position(300); + readBuffer.limit(300 + size); + + // read and validate the serialized bytes + VariantValue actual = Variants.from(SerializedMetadata.EMPTY_V1_BUFFER, readBuffer); + Assertions.assertThat(actual.type()).isEqualTo(primitive.type()); + Assertions.assertThat(actual).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.asPrimitive().get()).isEqualTo(primitive.get()); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestVariantArray.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java similarity index 59% rename from core/src/test/java/org/apache/iceberg/TestVariantArray.java rename to core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java index 57ebe982b172..2e89878d3878 100644 --- a/core/src/test/java/org/apache/iceberg/TestVariantArray.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java @@ -19,40 +19,39 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; import java.nio.ByteBuffer; import java.util.Random; -import org.apache.iceberg.Variants.PhysicalType; -import org.apache.iceberg.Variants.Primitive; +import org.apache.iceberg.variants.Variants.PhysicalType; import org.apache.iceberg.util.RandomUtil; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; -public class TestVariantArray { - private static final VariantMetadata EMPTY_METADATA = - VariantMetadata.from(VariantMetadata.EMPTY_V1_BUFFER); - private static final VariantPrimitive vNull = VariantPrimitive.from(new byte[] {0x00}); - private static final VariantPrimitive vTrue = VariantPrimitive.from(new byte[] {0b100}); - private static final VariantPrimitive vFalse = VariantPrimitive.from(new byte[] {0b1000}); - private static final VariantShortString str = - VariantShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); - private static final VariantShortString a = VariantShortString.from(new byte[] {0b101, 'a'}); - private static final VariantShortString b = VariantShortString.from(new byte[] {0b101, 'b'}); - private static final VariantShortString c = VariantShortString.from(new byte[] {0b101, 'c'}); - private static final VariantShortString d = VariantShortString.from(new byte[] {0b101, 'd'}); - private static final VariantShortString e = VariantShortString.from(new byte[] {0b101, 'e'}); - private static final VariantPrimitive i34 = VariantPrimitive.from(new byte[] {0b1100, 34}); - private static final VariantPrimitive i1234 = - VariantPrimitive.from(new byte[] {0b10000, (byte) 0xD2, 0x04}); - private static final VariantPrimitive date = - VariantPrimitive.from(new byte[] {0b101100, (byte) 0xF4, 0x43, 0x00, 0x00}); +public class TestSerializedArray { + private static final SerializedMetadata EMPTY_METADATA = + SerializedMetadata.from(SerializedMetadata.EMPTY_V1_BUFFER); + private static final SerializedPrimitive vNull = SerializedPrimitive.from(new byte[] {0x00}); + private static final SerializedPrimitive vTrue = SerializedPrimitive.from(new byte[] {0b100}); + private static final SerializedPrimitive vFalse = SerializedPrimitive.from(new byte[] {0b1000}); + private static final SerializedShortString str = + SerializedShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); + private static final SerializedShortString a = SerializedShortString.from(new byte[] {0b101, 'a'}); + private static final SerializedShortString b = SerializedShortString.from(new byte[] {0b101, 'b'}); + private static final SerializedShortString c = SerializedShortString.from(new byte[] {0b101, 'c'}); + private static final SerializedShortString d = SerializedShortString.from(new byte[] {0b101, 'd'}); + private static final SerializedShortString e = SerializedShortString.from(new byte[] {0b101, 'e'}); + private static final SerializedPrimitive i34 = SerializedPrimitive.from(new byte[] {0b1100, 34}); + private static final SerializedPrimitive i1234 = + SerializedPrimitive.from(new byte[] {0b10000, (byte) 0xD2, 0x04}); + private static final SerializedPrimitive date = + SerializedPrimitive.from(new byte[] {0b101100, (byte) 0xF4, 0x43, 0x00, 0x00}); private final Random random = new Random(374513); @Test public void testEmptyArray() { - VariantArray array = VariantArray.from(EMPTY_METADATA, new byte[] {0b0011, 0x00}); + SerializedArray array = SerializedArray.from(EMPTY_METADATA, new byte[] {0b0011, 0x00}); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); Assertions.assertThat(array.numElements()).isEqualTo(0); @@ -60,8 +59,8 @@ public void testEmptyArray() { @Test public void testEmptyLargeArray() { - VariantArray array = - VariantArray.from(EMPTY_METADATA, new byte[] {0b10011, 0x00, 0x00, 0x00, 0x00}); + SerializedArray array = + SerializedArray.from(EMPTY_METADATA, new byte[] {0b10011, 0x00, 0x00, 0x00, 0x00}); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); Assertions.assertThat(array.numElements()).isEqualTo(0); @@ -70,20 +69,20 @@ public void testEmptyLargeArray() { @Test public void testStringArray() { ByteBuffer buffer = VariantTestUtil.createArray(a, b, c, d, e); - VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); Assertions.assertThat(array.numElements()).isEqualTo(5); Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo("a"); + Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo("a"); Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo("b"); Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("c"); + Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("c"); Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo("d"); + Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo("d"); Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(4)).get()).isEqualTo("e"); + Assertions.assertThat(array.get(4).asPrimitive().get()).isEqualTo("e"); Assertions.assertThatThrownBy(() -> array.get(5)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) @@ -93,22 +92,22 @@ public void testStringArray() { @Test public void testStringDifferentLengths() { ByteBuffer buffer = VariantTestUtil.createArray(a, b, c, str, d, e); - VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); Assertions.assertThat(array.numElements()).isEqualTo(6); Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo("a"); + Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo("a"); Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo("b"); Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("c"); + Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("c"); Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo("iceberg"); + Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo("iceberg"); Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(4)).get()).isEqualTo("d"); + Assertions.assertThat(array.get(4).asPrimitive().get()).isEqualTo("d"); Assertions.assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(5)).get()).isEqualTo("e"); + Assertions.assertThat(array.get(5).asPrimitive().get()).isEqualTo("e"); Assertions.assertThatThrownBy(() -> array.get(6)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) @@ -118,44 +117,44 @@ public void testStringDifferentLengths() { @Test public void testArrayOfMixedTypes() { ByteBuffer nestedBuffer = VariantTestUtil.createArray(a, c, d); - VariantArray nested = VariantArray.from(EMPTY_METADATA, nestedBuffer, nestedBuffer.get(0)); + SerializedArray nested = SerializedArray.from(EMPTY_METADATA, nestedBuffer, nestedBuffer.get(0)); ByteBuffer buffer = VariantTestUtil.createArray(date, i34, str, vNull, e, b, vFalse, nested, vTrue, i1234); - VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); Assertions.assertThat(array.numElements()).isEqualTo(10); Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.DATE); - Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo(17396); + Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo(17396); Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo(34); + Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo((byte) 34); Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("iceberg"); + Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("iceberg"); Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.NULL); - Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo(null); + Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo(null); Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(4)).get()).isEqualTo("e"); + Assertions.assertThat(array.get(4).asPrimitive().get()).isEqualTo("e"); Assertions.assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(5)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(5).asPrimitive().get()).isEqualTo("b"); Assertions.assertThat(array.get(6).type()).isEqualTo(PhysicalType.BOOLEAN_FALSE); - Assertions.assertThat(((Primitive<?>) array.get(6)).get()).isEqualTo(false); + Assertions.assertThat(array.get(6).asPrimitive().get()).isEqualTo(false); Assertions.assertThat(array.get(8).type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); - Assertions.assertThat(((Primitive<?>) array.get(8)).get()).isEqualTo(true); + Assertions.assertThat(array.get(8).asPrimitive().get()).isEqualTo(true); Assertions.assertThat(array.get(9).type()).isEqualTo(PhysicalType.INT16); - Assertions.assertThat(((Primitive<?>) array.get(9)).get()).isEqualTo(1234); + Assertions.assertThat(array.get(9).asPrimitive().get()).isEqualTo((short) 1234); Assertions.assertThatThrownBy(() -> array.get(10)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) .hasMessage("Index 10 out of bounds for length 10"); Assertions.assertThat(array.get(7).type()).isEqualTo(PhysicalType.ARRAY); - VariantArray actualNested = (VariantArray) array.get(7); + SerializedArray actualNested = (SerializedArray) array.get(7); Assertions.assertThat(actualNested.numElements()).isEqualTo(3); Assertions.assertThat(actualNested.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) actualNested.get(0)).get()).isEqualTo("a"); + Assertions.assertThat(actualNested.get(0).asPrimitive().get()).isEqualTo("a"); Assertions.assertThat(actualNested.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) actualNested.get(1)).get()).isEqualTo("c"); + Assertions.assertThat(actualNested.get(1).asPrimitive().get()).isEqualTo("c"); Assertions.assertThat(actualNested.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) actualNested.get(2)).get()).isEqualTo("d"); + Assertions.assertThat(actualNested.get(2).asPrimitive().get()).isEqualTo("d"); Assertions.assertThatThrownBy(() -> actualNested.get(3)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) @@ -166,21 +165,21 @@ public void testArrayOfMixedTypes() { public void testTwoByteOffsets() { // a string larger than 255 bytes to push the value offset size above 1 byte String randomString = RandomUtil.generateString(300, random); - VariantPrimitive bigString = VariantTestUtil.createString(randomString); + SerializedPrimitive bigString = VariantTestUtil.createString(randomString); ByteBuffer buffer = VariantTestUtil.createArray(bigString, a, b, c); - VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); Assertions.assertThat(array.numElements()).isEqualTo(4); Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo(randomString); + Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo(randomString); Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo("a"); + Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo("a"); Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("b"); Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo("c"); + Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo("c"); Assertions.assertThatThrownBy(() -> array.get(4)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) @@ -191,21 +190,21 @@ public void testTwoByteOffsets() { public void testThreeByteOffsets() { // a string larger than 65535 bytes to push the value offset size above 1 byte String randomString = RandomUtil.generateString(70_000, random); - VariantPrimitive reallyBigString = VariantTestUtil.createString(randomString); + SerializedPrimitive reallyBigString = VariantTestUtil.createString(randomString); ByteBuffer buffer = VariantTestUtil.createArray(reallyBigString, a, b, c); - VariantArray array = VariantArray.from(EMPTY_METADATA, buffer, buffer.get(0)); + SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); Assertions.assertThat(array.numElements()).isEqualTo(4); Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(0)).get()).isEqualTo(randomString); + Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo(randomString); Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(1)).get()).isEqualTo("a"); + Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo("a"); Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(2)).get()).isEqualTo("b"); + Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("b"); Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((Primitive<?>) array.get(3)).get()).isEqualTo("c"); + Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo("c"); Assertions.assertThatThrownBy(() -> array.get(4)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) @@ -214,8 +213,8 @@ public void testThreeByteOffsets() { @Test public void testLargeArraySize() { - VariantArray array = - VariantArray.from( + SerializedArray array = + SerializedArray.from( EMPTY_METADATA, new byte[] {0b10011, (byte) 0xFF, (byte) 0x01, 0x00, 0x00}); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); @@ -226,7 +225,7 @@ public void testLargeArraySize() { public void testNegativeArraySize() { Assertions.assertThatThrownBy( () -> - VariantArray.from( + SerializedArray.from( EMPTY_METADATA, new byte[] {0b10011, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF})) .isInstanceOf(NegativeArraySizeException.class) diff --git a/core/src/test/java/org/apache/iceberg/TestVariantMetadata.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java similarity index 83% rename from core/src/test/java/org/apache/iceberg/TestVariantMetadata.java rename to core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java index edb5220628b0..0aabbfcc0317 100644 --- a/core/src/test/java/org/apache/iceberg/TestVariantMetadata.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java @@ -19,7 +19,7 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; import java.nio.ByteBuffer; import java.util.Random; @@ -31,12 +31,12 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -public class TestVariantMetadata { +public class TestSerializedMetadata { private final Random random = new Random(872591); @Test public void testEmptyVariantMetadata() { - VariantMetadata metadata = VariantMetadata.from(VariantMetadata.EMPTY_V1_BUFFER); + SerializedMetadata metadata = SerializedMetadata.from(SerializedMetadata.EMPTY_V1_BUFFER); Assertions.assertThat(metadata.isSorted()).isFalse(); Assertions.assertThat(metadata.dictionarySize()).isEqualTo(0); @@ -46,7 +46,7 @@ public void testEmptyVariantMetadata() { @Test public void testHeaderSorted() { - VariantMetadata metadata = VariantMetadata.from(new byte[] {0b10001, 0x00}); + SerializedMetadata metadata = SerializedMetadata.from(new byte[] {0b10001, 0x00}); Assertions.assertThat(metadata.isSorted()).isTrue(); Assertions.assertThat(metadata.dictionarySize()).isEqualTo(0); @@ -56,30 +56,30 @@ public void testHeaderSorted() { public void testHeaderOffsetSize() { // offset size is 4-byte LE = 1 Assertions.assertThat( - VariantMetadata.from(new byte[] {(byte) 0b11010001, 0x01, 0x00, 0x00, 0x00}) + SerializedMetadata.from(new byte[] {(byte) 0b11010001, 0x01, 0x00, 0x00, 0x00}) .dictionarySize()) .isEqualTo(1); // offset size is 3-byte LE = 1 Assertions.assertThat( - VariantMetadata.from(new byte[] {(byte) 0b10010001, 0x01, 0x00, 0x00}).dictionarySize()) + SerializedMetadata.from(new byte[] {(byte) 0b10010001, 0x01, 0x00, 0x00}).dictionarySize()) .isEqualTo(1); // offset size is 2-byte LE = 1 Assertions.assertThat( - VariantMetadata.from(new byte[] {(byte) 0b01010001, 0x01, 0x00}).dictionarySize()) + SerializedMetadata.from(new byte[] {(byte) 0b01010001, 0x01, 0x00}).dictionarySize()) .isEqualTo(1); // offset size is 1-byte LE = 1 Assertions.assertThat( - VariantMetadata.from(new byte[] {(byte) 0b00010001, 0x01}).dictionarySize()) + SerializedMetadata.from(new byte[] {(byte) 0b00010001, 0x01}).dictionarySize()) .isEqualTo(1); } @Test public void testReadString() { - VariantMetadata metadata = - VariantMetadata.from( + SerializedMetadata metadata = + SerializedMetadata.from( new byte[] { 0b10001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'a', 'b', 'c', 'd', 'e' }); @@ -95,8 +95,8 @@ public void testReadString() { @Test public void testMultibyteString() { - VariantMetadata metadata = - VariantMetadata.from( + SerializedMetadata metadata = + SerializedMetadata.from( new byte[] { 0b10001, 0x05, 0x00, 0x01, 0x02, 0x05, 0x06, 0x07, 'a', 'b', 'x', 'y', 'z', 'd', 'e' }); @@ -112,8 +112,8 @@ public void testMultibyteString() { @Test public void testTwoByteOffsets() { - VariantMetadata metadata = - VariantMetadata.from( + SerializedMetadata metadata = + SerializedMetadata.from( new byte[] { 0b1010001, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 'a', 'b', 'x', 'y', 'z', 'd', 'e' @@ -130,8 +130,8 @@ public void testTwoByteOffsets() { @Test public void testFindStringSorted() { - VariantMetadata metadata = - VariantMetadata.from( + SerializedMetadata metadata = + SerializedMetadata.from( new byte[] { 0b10001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'a', 'b', 'c', 'd', 'e' }); @@ -150,8 +150,8 @@ public void testFindStringSorted() { @Test public void testFindStringUnsorted() { - VariantMetadata metadata = - VariantMetadata.from( + SerializedMetadata metadata = + SerializedMetadata.from( new byte[] { 0b00001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'e', 'd', 'c', 'b', 'a' }); @@ -179,7 +179,7 @@ public void testTwoByteFieldIds(boolean sortFieldNames) { } ByteBuffer buffer = VariantTestUtil.createMetadata(keySet, sortFieldNames); - VariantMetadata metadata = VariantMetadata.from(buffer); + SerializedMetadata metadata = SerializedMetadata.from(buffer); Assertions.assertThat(metadata.dictionarySize()).isEqualTo(10_000); Assertions.assertThat(metadata.id(lastKey)).isGreaterThan(0); @@ -196,7 +196,7 @@ public void testThreeByteFieldIds(boolean sortFieldNames) { } ByteBuffer buffer = VariantTestUtil.createMetadata(keySet, sortFieldNames); - VariantMetadata metadata = VariantMetadata.from(buffer); + SerializedMetadata metadata = SerializedMetadata.from(buffer); Assertions.assertThat(metadata.dictionarySize()).isEqualTo(100_000); Assertions.assertThat(metadata.id(lastKey)).isGreaterThan(0); @@ -204,14 +204,14 @@ public void testThreeByteFieldIds(boolean sortFieldNames) { @Test public void testInvalidMetadataVersion() { - Assertions.assertThatThrownBy(() -> VariantMetadata.from(new byte[] {0x02, 0x00})) + Assertions.assertThatThrownBy(() -> SerializedMetadata.from(new byte[] {0x02, 0x00})) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Unsupported version: 2"); } @Test public void testMissingLength() { - Assertions.assertThatThrownBy(() -> VariantMetadata.from(new byte[] {0x01})) + Assertions.assertThatThrownBy(() -> SerializedMetadata.from(new byte[] {0x01})) .isInstanceOf(IndexOutOfBoundsException.class); } @@ -219,7 +219,7 @@ public void testMissingLength() { public void testLengthTooShort() { // missing the 4th length byte Assertions.assertThatThrownBy( - () -> VariantMetadata.from(new byte[] {(byte) 0b11010001, 0x00, 0x00, 0x00})) + () -> SerializedMetadata.from(new byte[] {(byte) 0b11010001, 0x00, 0x00, 0x00})) .isInstanceOf(IndexOutOfBoundsException.class); } } diff --git a/core/src/test/java/org/apache/iceberg/TestVariantObject.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java similarity index 60% rename from core/src/test/java/org/apache/iceberg/TestVariantObject.java rename to core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java index 4a154d97273f..6e407dd5cf0e 100644 --- a/core/src/test/java/org/apache/iceberg/TestVariantObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java @@ -19,13 +19,13 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; import java.nio.ByteBuffer; import java.util.Map; import java.util.Random; import java.util.Set; -import org.apache.iceberg.Variants.PhysicalType; +import org.apache.iceberg.variants.Variants.PhysicalType; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.util.RandomUtil; @@ -35,22 +35,22 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; -public class TestVariantObject { - private static final VariantMetadata EMPTY_METADATA = - VariantMetadata.from(VariantMetadata.EMPTY_V1_BUFFER); - private static final VariantPrimitive i1 = VariantPrimitive.from(new byte[] {0b1100, 1}); - private static final VariantPrimitive i2 = VariantPrimitive.from(new byte[] {0b1100, 2}); - private static final VariantPrimitive i3 = VariantPrimitive.from(new byte[] {0b1100, 3}); - private static final VariantPrimitive vNull = VariantPrimitive.from(new byte[] {0x00}); - private static final VariantPrimitive vTrue = VariantPrimitive.from(new byte[] {0b100}); - private static final VariantPrimitive date = - VariantPrimitive.from(new byte[] {0b101100, (byte) 0xF4, 0x43, 0x00, 0x00}); +public class TestSerializedObject { + private static final SerializedMetadata EMPTY_METADATA = + SerializedMetadata.from(SerializedMetadata.EMPTY_V1_BUFFER); + private static final SerializedPrimitive i1 = SerializedPrimitive.from(new byte[] {0b1100, 1}); + private static final SerializedPrimitive i2 = SerializedPrimitive.from(new byte[] {0b1100, 2}); + private static final SerializedPrimitive i3 = SerializedPrimitive.from(new byte[] {0b1100, 3}); + private static final SerializedPrimitive vNull = SerializedPrimitive.from(new byte[] {0x00}); + private static final SerializedPrimitive vTrue = SerializedPrimitive.from(new byte[] {0b100}); + private static final SerializedPrimitive date = + SerializedPrimitive.from(new byte[] {0b101100, (byte) 0xF4, 0x43, 0x00, 0x00}); private final Random random = new Random(198725); @Test public void testEmptyObject() { - VariantObject object = VariantObject.from(EMPTY_METADATA, new byte[] {0b10, 0x00}); + SerializedObject object = SerializedObject.from(EMPTY_METADATA, new byte[] {0b10, 0x00}); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(0); @@ -58,8 +58,8 @@ public void testEmptyObject() { @Test public void testEmptyLargeObject() { - VariantObject object = - VariantObject.from(EMPTY_METADATA, new byte[] {0b1000010, 0x00, 0x00, 0x00, 0x00}); + SerializedObject object = + SerializedObject.from(EMPTY_METADATA, new byte[] {0b1000010, 0x00, 0x00, 0x00, 0x00}); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(0); @@ -67,34 +67,34 @@ public void testEmptyLargeObject() { @Test public void testSimpleObject() { - Map<String, Variants.Serialized> data = ImmutableMap.of("a", i1, "b", i2, "c", i3); + Map<String, VariantValue> data = ImmutableMap.of("a", i1, "b", i2, "c", i3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = VariantMetadata.from(meta); - VariantObject object = VariantObject.from(metadata, value, value.get(0)); + SerializedMetadata metadata = SerializedMetadata.from(meta); + SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(3); Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(2); + Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(3); + Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); Assertions.assertThat(object.get("d")).isEqualTo(null); } @Test public void testOutOfOrderKeys() { - Map<String, Variants.Serialized> data = ImmutableMap.of("b", i2, "a", i1, "c", i3); + Map<String, VariantValue> data = ImmutableMap.of("b", i2, "a", i1, "c", i3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), false /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = VariantMetadata.from(meta); - VariantObject object = VariantObject.from(metadata, value, value.get(0)); + SerializedMetadata metadata = SerializedMetadata.from(meta); + SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(3); @@ -102,11 +102,11 @@ public void testOutOfOrderKeys() { Assertions.assertThat(object.get("d")).isEqualTo(null); Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(3); + Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(2); + Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); } @Test @@ -114,92 +114,92 @@ public void testMixedValueTypes() { ByteBuffer meta = VariantTestUtil.createMetadata( ImmutableList.of("a", "b", "c", "d", "e", "f"), true /* sort names */); - VariantMetadata metadata = VariantMetadata.from(meta); + SerializedMetadata metadata = SerializedMetadata.from(meta); - Map<String, Variants.Serialized> inner = ImmutableMap.of("b", i2, "f", i3); + Map<String, VariantValue> inner = ImmutableMap.of("b", i2, "f", i3); ByteBuffer innerBuffer = VariantTestUtil.createObject(meta, inner); - VariantObject innerObject = VariantObject.from(metadata, innerBuffer, innerBuffer.get(0)); - Map<String, Variants.Serialized> data = + SerializedObject innerObject = SerializedObject.from(metadata, innerBuffer, innerBuffer.get(0)); + Map<String, VariantValue> data = ImmutableMap.of("a", i1, "b", date, "c", vNull, "d", vTrue, "e", innerObject); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantObject object = VariantObject.from(metadata, value, value.get(0)); + SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(5); Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.DATE); - Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(17396); + Assertions.assertThat(((SerializedPrimitive) object.get("b")).get()).isEqualTo(17396); Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.NULL); - Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(null); + Assertions.assertThat(((SerializedPrimitive) object.get("c")).get()).isEqualTo(null); Assertions.assertThat(object.get("d").type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); - Assertions.assertThat(((VariantPrimitive) object.get("d")).get()).isEqualTo(true); + Assertions.assertThat(((SerializedPrimitive) object.get("d")).get()).isEqualTo(true); Assertions.assertThat(object.get("e").type()).isEqualTo(PhysicalType.OBJECT); - VariantObject actualInner = (VariantObject) object.get("e"); + SerializedObject actualInner = (SerializedObject) object.get("e").asObject(); Assertions.assertThat(actualInner.numElements()).isEqualTo(2); Assertions.assertThat(actualInner.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) actualInner.get("b")).get()).isEqualTo(2); + Assertions.assertThat(actualInner.get("b").asPrimitive().get()).isEqualTo((byte) 2); Assertions.assertThat(actualInner.get("f").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) actualInner.get("f")).get()).isEqualTo(3); + Assertions.assertThat(actualInner.get("f").asPrimitive().get()).isEqualTo((byte) 3); } @Test public void testTwoByteOffsets() { // a string larger than 255 bytes to push the value offset size above 1 byte String randomString = RandomUtil.generateString(300, random); - VariantPrimitive bigString = VariantTestUtil.createString(randomString); + SerializedPrimitive bigString = VariantTestUtil.createString(randomString); // note that order doesn't matter. fields are sorted by name - Map<String, Variants.Serialized> data = + Map<String, VariantValue> data = ImmutableMap.of("big", bigString, "a", i1, "b", i2, "c", i3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = VariantMetadata.from(meta); - VariantObject object = VariantObject.from(metadata, value, value.get(0)); + SerializedMetadata metadata = SerializedMetadata.from(meta); + SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(4); Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(2); + Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(3); + Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); Assertions.assertThat(object.get("big").type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((VariantPrimitive) object.get("big")).get()).isEqualTo(randomString); + Assertions.assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString); } @Test public void testThreeByteOffsets() { // a string larger than 65535 bytes to push the value offset size above 1 byte String randomString = RandomUtil.generateString(70_000, random); - VariantPrimitive reallyBigString = VariantTestUtil.createString(randomString); + SerializedPrimitive reallyBigString = VariantTestUtil.createString(randomString); // note that order doesn't matter. fields are sorted by name - Map<String, Variants.Serialized> data = + Map<String, VariantValue> data = ImmutableMap.of("really-big", reallyBigString, "a", i1, "b", i2, "c", i3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = VariantMetadata.from(meta); - VariantObject object = VariantObject.from(metadata, value, value.get(0)); + SerializedMetadata metadata = SerializedMetadata.from(meta); + SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(4); Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("a")).get()).isEqualTo(1); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("b")).get()).isEqualTo(2); + Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("c")).get()).isEqualTo(3); + Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); Assertions.assertThat(object.get("really-big").type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(((VariantPrimitive) object.get("really-big")).get()) + Assertions.assertThat(object.get("really-big").asPrimitive().get()) .isEqualTo(randomString); } @@ -211,25 +211,25 @@ public void testTwoByteFieldIds(boolean sortFieldNames) { keySet.add(RandomUtil.generateString(10, random)); } - Map<String, Variants.Serialized> data = ImmutableMap.of("aa", i1, "AA", i2, "ZZ", i3); + Map<String, VariantValue> data = ImmutableMap.of("aa", i1, "AA", i2, "ZZ", i3); // create metadata from the large key set and the actual keys keySet.addAll(data.keySet()); ByteBuffer meta = VariantTestUtil.createMetadata(keySet, sortFieldNames); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = VariantMetadata.from(meta); - VariantObject object = VariantObject.from(metadata, value, value.get(0)); + SerializedMetadata metadata = SerializedMetadata.from(meta); + SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(3); Assertions.assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("aa")).get()).isEqualTo(1); + Assertions.assertThat(object.get("aa").asPrimitive().get()).isEqualTo((byte) 1); Assertions.assertThat(object.get("AA").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("AA")).get()).isEqualTo(2); + Assertions.assertThat(object.get("AA").asPrimitive().get()).isEqualTo((byte) 2); Assertions.assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("ZZ")).get()).isEqualTo(3); + Assertions.assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo((byte) 3); } @ParameterizedTest @@ -240,24 +240,24 @@ public void testThreeByteFieldIds(boolean sortFieldNames) { keySet.add(RandomUtil.generateString(10, random)); } - Map<String, Variants.Serialized> data = ImmutableMap.of("aa", i1, "AA", i2, "ZZ", i3); + Map<String, VariantValue> data = ImmutableMap.of("aa", i1, "AA", i2, "ZZ", i3); // create metadata from the large key set and the actual keys keySet.addAll(data.keySet()); ByteBuffer meta = VariantTestUtil.createMetadata(keySet, sortFieldNames); ByteBuffer value = VariantTestUtil.createObject(meta, data); - VariantMetadata metadata = VariantMetadata.from(meta); - VariantObject object = VariantObject.from(metadata, value, value.get(0)); + SerializedMetadata metadata = SerializedMetadata.from(meta); + SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); Assertions.assertThat(object.numElements()).isEqualTo(3); Assertions.assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("aa")).get()).isEqualTo(1); + Assertions.assertThat(object.get("aa").asPrimitive().get()).isEqualTo((byte) 1); Assertions.assertThat(object.get("AA").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("AA")).get()).isEqualTo(2); + Assertions.assertThat(object.get("AA").asPrimitive().get()).isEqualTo((byte) 2); Assertions.assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(((VariantPrimitive) object.get("ZZ")).get()).isEqualTo(3); + Assertions.assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo((byte) 3); } } diff --git a/core/src/test/java/org/apache/iceberg/TestVariantPrimitives.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java similarity index 71% rename from core/src/test/java/org/apache/iceberg/TestVariantPrimitives.java rename to core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java index fc64f7f291e2..654fd1431bf9 100644 --- a/core/src/test/java/org/apache/iceberg/TestVariantPrimitives.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java @@ -16,23 +16,22 @@ * * KIND, either express or implied. See the License for the * * specific language governing permissions and limitations * * under the License. - * + * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; import java.math.BigDecimal; import java.nio.ByteBuffer; -import org.apache.iceberg.Variants.PhysicalType; -import org.apache.iceberg.Variants.Primitive; import org.apache.iceberg.util.DateTimeUtil; +import org.apache.iceberg.variants.Variants.PhysicalType; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; -public class TestVariantPrimitives { +public class TestSerializedPrimitives { @Test public void testNull() { - Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(0)}); + VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(0)}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.NULL); Assertions.assertThat(value.get()).isEqualTo(null); @@ -40,7 +39,7 @@ public void testNull() { @Test public void testTrue() { - Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(1)}); + VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(1)}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); Assertions.assertThat(value.get()).isEqualTo(true); @@ -48,7 +47,7 @@ public void testTrue() { @Test public void testFalse() { - Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(2)}); + VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(2)}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BOOLEAN_FALSE); Assertions.assertThat(value.get()).isEqualTo(false); @@ -56,42 +55,41 @@ public void testFalse() { @Test public void testInt8() { - Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(3), 34}); + VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(3), 34}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(value.get()).isEqualTo(34); + Assertions.assertThat(value.get()).isEqualTo((byte) 34); } @Test public void testNegativeInt8() { - Primitive<?> value = VariantPrimitive.from(new byte[] {primitiveHeader(3), (byte) 0xFF}); + VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(3), (byte) 0xFF}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(value.get()).isEqualTo(-1); + Assertions.assertThat(value.get()).isEqualTo((byte) -1); } @Test public void testInt16() { - Primitive<?> value = - VariantPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xD2, 0x04}); + VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xD2, 0x04}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT16); - Assertions.assertThat(value.get()).isEqualTo(1234); + Assertions.assertThat(value.get()).isEqualTo((short) 1234); } @Test public void testNegativeInt16() { - Primitive<?> value = - VariantPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xFF, (byte) 0xFF}); + VariantPrimitive<?> value = + SerializedPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xFF, (byte) 0xFF}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT16); - Assertions.assertThat(value.get()).isEqualTo(-1); + Assertions.assertThat(value.get()).isEqualTo((short) -1); } @Test public void testInt32() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] {primitiveHeader(5), (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT32); @@ -100,8 +98,8 @@ public void testInt32() { @Test public void testNegativeInt32() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] {primitiveHeader(5), (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT32); @@ -110,8 +108,8 @@ public void testNegativeInt32() { @Test public void testInt64() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(6), (byte) 0xB1, @@ -130,8 +128,8 @@ public void testInt64() { @Test public void testNegativeInt64() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(6), (byte) 0xFF, @@ -150,8 +148,8 @@ public void testNegativeInt64() { @Test public void testDouble() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(7), (byte) 0xB1, @@ -170,8 +168,8 @@ public void testDouble() { @Test public void testNegativeDouble() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] {primitiveHeader(7), 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, (byte) 0x80}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DOUBLE); @@ -180,8 +178,8 @@ public void testNegativeDouble() { @Test public void testDecimal4() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] {primitiveHeader(8), 0x04, (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL4); @@ -190,8 +188,8 @@ public void testDecimal4() { @Test public void testNegativeDecimal4() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(8), 0x04, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF }); @@ -202,8 +200,8 @@ public void testNegativeDecimal4() { @Test public void testDecimal8() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(9), 0x09, // scale=9 @@ -223,8 +221,8 @@ public void testDecimal8() { @Test public void testNegativeDecimal8() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(9), 0x09, // scale=9 @@ -244,20 +242,66 @@ public void testNegativeDecimal8() { @Test public void testDecimal16() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( + new byte[] { + primitiveHeader(10), + 0x09, // scale=9 + 0x15, + 0x71, + 0x34, + (byte) 0xB0, + (byte) 0xB8, + (byte) 0x87, + 0x10, + (byte) 0x89, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00 + }); + + Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL16); + Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("9876543210.123456789")); + } + + @Test + public void testNegativeDecimal16() { + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { - primitiveHeader(10), 0x09, // scale=9 + primitiveHeader(10), + 0x09, // scale=9 + (byte) 0xEB, + (byte) 0x8E, + (byte) 0xCB, + 0x4F, + 0x47, + 0x78, + (byte) 0xEF, + 0x76, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, + (byte) 0xFF, }); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL16); - Assertions.assertThatThrownBy(value::get).isInstanceOf(UnsupportedOperationException.class); + Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("-9876543210.123456789")); } @Test public void testDate() { - Primitive<?> value = - VariantPrimitive.from(new byte[] {primitiveHeader(11), (byte) 0xF4, 0x43, 0x00, 0x00}); + VariantPrimitive<?> value = + SerializedPrimitive.from(new byte[] {primitiveHeader(11), (byte) 0xF4, 0x43, 0x00, 0x00}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DATE); Assertions.assertThat(DateTimeUtil.daysToIsoDate((int) value.get())).isEqualTo("2017-08-18"); @@ -265,8 +309,8 @@ public void testDate() { @Test public void testNegativeDate() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] {primitiveHeader(11), (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DATE); @@ -275,8 +319,8 @@ public void testNegativeDate() { @Test public void testTimestamptz() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(12), 0x18, @@ -296,8 +340,8 @@ public void testTimestamptz() { @Test public void testNegativeTimestamptz() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(12), (byte) 0xFF, @@ -317,8 +361,8 @@ public void testNegativeTimestamptz() { @Test public void testTimestampntz() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(13), 0x18, @@ -338,8 +382,8 @@ public void testTimestampntz() { @Test public void testNegativeTimestampntz() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(13), (byte) 0xFF, @@ -359,8 +403,8 @@ public void testNegativeTimestampntz() { @Test public void testFloat() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] {primitiveHeader(14), (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.FLOAT); @@ -369,8 +413,8 @@ public void testFloat() { @Test public void testNegativeFloat() { - Primitive<?> value = - VariantPrimitive.from(new byte[] {primitiveHeader(14), 0x00, 0x00, 0x00, (byte) 0x80}); + VariantPrimitive<?> value = + SerializedPrimitive.from(new byte[] {primitiveHeader(14), 0x00, 0x00, 0x00, (byte) 0x80}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.FLOAT); Assertions.assertThat(value.get()).isEqualTo(-0.0F); @@ -378,8 +422,8 @@ public void testNegativeFloat() { @Test public void testBinary() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] {primitiveHeader(15), 0x05, 0x00, 0x00, 0x00, 'a', 'b', 'c', 'd', 'e'}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BINARY); @@ -389,8 +433,8 @@ public void testBinary() { @Test public void testString() { - Primitive<?> value = - VariantPrimitive.from( + VariantPrimitive<?> value = + SerializedPrimitive.from( new byte[] { primitiveHeader(16), 0x07, 0x00, 0x00, 0x00, 'i', 'c', 'e', 'b', 'e', 'r', 'g' }); @@ -401,8 +445,8 @@ public void testString() { @Test public void testShortString() { - Primitive<?> value = - VariantShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); + VariantPrimitive<?> value = + SerializedShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.STRING); Assertions.assertThat(value.get()).isEqualTo("iceberg"); @@ -410,7 +454,7 @@ public void testShortString() { @Test public void testUnsupportedType() { - Assertions.assertThatThrownBy(() -> VariantPrimitive.from(new byte[] {primitiveHeader(17)})) + Assertions.assertThatThrownBy(() -> SerializedPrimitive.from(new byte[] {primitiveHeader(17)})) .isInstanceOf(UnsupportedOperationException.class) .hasMessage("Unknown primitive physical type: 17"); } diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java new file mode 100644 index 000000000000..b226fc228a64 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -0,0 +1,259 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.variants; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Map; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.util.DateTimeUtil; +import org.apache.iceberg.util.Pair; +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestShreddedObject { + private static final Map<String, VariantValue> FIELDS = + ImmutableMap.of( + "a", + Variants.of(34), + "b", + Variants.of("iceberg"), + "c", + Variants.of(new BigDecimal("12.21"))); + + @Test + public void testShreddedFields() { + ShreddedObject object = createShreddedObject(FIELDS).second(); + + Assertions.assertThat(object.get("a")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(object.get("b")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(object.get("c")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(object.get("c").asPrimitive().get()) + .isEqualTo(new BigDecimal("12.21")); + } + + @Test + public void testShreddedSerializationMinimalBuffer() { + Pair<SerializedMetadata, ShreddedObject> pair = createShreddedObject(FIELDS); + SerializedMetadata metadata = pair.first(); + ShreddedObject object = pair.second(); + + ByteBuffer serialized = + ByteBuffer.allocate(object.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + object.writeTo(serialized, 0); + SerializedObject actual = SerializedObject.from(metadata, serialized, serialized.get(0)); + + Assertions.assertThat(actual.numElements()).isEqualTo(3); + Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("c").asPrimitive().get()) + .isEqualTo(new BigDecimal("12.21")); + } + + @Test + public void testShreddedSerializationLargeBuffer() { + Pair<SerializedMetadata, ShreddedObject> pair = createShreddedObject(FIELDS); + SerializedMetadata metadata = pair.first(); + ShreddedObject object = pair.second(); + + ByteBuffer serialized = + ByteBuffer.allocate(1000 + object.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + object.writeTo(serialized, 300); + ByteBuffer slice = serialized.duplicate().order(ByteOrder.LITTLE_ENDIAN); + slice.position(300); + slice.limit(300 + object.sizeInBytes()); + + VariantValue value = Variants.from(metadata, slice); + + Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + SerializedObject actual = (SerializedObject) value; + + Assertions.assertThat(actual.numElements()).isEqualTo(3); + Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("c").asPrimitive().get()) + .isEqualTo(new BigDecimal("12.21")); + } + + @Test + public void testUnshreddedObjectSerializationMinimalBuffer() { + SerializedObject unshredded = createUnshreddedObject(FIELDS); + + ByteBuffer serialized = + ByteBuffer.allocate(unshredded.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + unshredded.writeTo(serialized, 0); + SerializedObject actual = + SerializedObject.from(unshredded.metadata(), serialized, serialized.get(0)); + + Assertions.assertThat(actual.numElements()).isEqualTo(3); + Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("c").asPrimitive().get()) + .isEqualTo(new BigDecimal("12.21")); + } + + @Test + public void testUnshreddedObjectSerializationLargeBuffer() { + SerializedObject unshredded = createUnshreddedObject(FIELDS); + + ByteBuffer serialized = + ByteBuffer.allocate(1000 + unshredded.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + unshredded.writeTo(serialized, 300); + ByteBuffer slice = serialized.duplicate().order(ByteOrder.LITTLE_ENDIAN); + slice.position(300); + slice.limit(300 + unshredded.sizeInBytes()); + + VariantValue value = Variants.from(unshredded.metadata(), slice); + + Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + SerializedObject actual = (SerializedObject) value; + + Assertions.assertThat(actual.numElements()).isEqualTo(3); + Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("c").asPrimitive().get()) + .isEqualTo(new BigDecimal("12.21")); + } + + @Test + public void testPartiallyShreddedObjectReplacement() { + ShreddedObject partial = new ShreddedObject(createUnshreddedObject(FIELDS)); + + // replace field c with a new value + partial.put("c", Variants.ofIsoDate("2024-10-12")); + + Assertions.assertThat(partial.get("a")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(partial.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(partial.get("b")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(partial.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(partial.get("c")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(partial.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); + Assertions.assertThat(partial.get("c").asPrimitive().get()) + .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); + } + + @Test + public void testPartiallyShreddedObjectGetMissingField() { + ShreddedObject partial = new ShreddedObject(createUnshreddedObject(FIELDS)); + + // missing fields are returned as null + Assertions.assertThat(partial.get("d")).isNull(); + } + + @Test + public void testPartiallyShreddedObjectPutMissingFieldFailure() { + ShreddedObject partial = new ShreddedObject(createUnshreddedObject(FIELDS)); + + // d is not defined in the variant metadata and will fail + Assertions.assertThatThrownBy(() -> partial.put("d", Variants.ofIsoDate("2024-10-12"))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot find field name in metadata: d"); + } + + @Test + public void testPartiallyShreddedObjectSerializationMinimalBuffer() { + SerializedObject unshredded = createUnshreddedObject(FIELDS); + ShreddedObject partial = new ShreddedObject(unshredded); + + // replace field c with a new value + partial.put("c", Variants.ofIsoDate("2024-10-12")); + + ByteBuffer serialized = + ByteBuffer.allocate(partial.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + partial.writeTo(serialized, 0); + SerializedObject actual = + SerializedObject.from(unshredded.metadata(), serialized, serialized.get(0)); + + Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); + Assertions.assertThat(actual.get("c").asPrimitive().get()) + .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); + } + + @Test + public void testPartiallyShreddedObjectSerializationLargeBuffer() { + SerializedObject unshredded = createUnshreddedObject(FIELDS); + ShreddedObject partial = new ShreddedObject(unshredded); + + // replace field c with a new value + partial.put("c", Variants.ofIsoDate("2024-10-12")); + + ByteBuffer serialized = + ByteBuffer.allocate(1000 + unshredded.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + partial.writeTo(serialized, 300); + ByteBuffer slice = serialized.duplicate().order(ByteOrder.LITTLE_ENDIAN); + slice.position(300); + slice.limit(300 + unshredded.sizeInBytes()); + + VariantValue value = Variants.from(unshredded.metadata(), slice); + + Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + SerializedObject actual = (SerializedObject) value; + + Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + Assertions.assertThat(actual.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); + Assertions.assertThat(actual.get("c").asPrimitive().get()) + .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); + } + + private Pair<SerializedMetadata, ShreddedObject> createShreddedObject( + Map<String, VariantValue> fields) { + ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); + SerializedMetadata metadata = SerializedMetadata.from(metadataBuffer); + + ShreddedObject object = new ShreddedObject(metadata); + for (Map.Entry<String, VariantValue> field : fields.entrySet()) { + object.put(field.getKey(), field.getValue()); + } + + return Pair.of(metadata, object); + } + + private SerializedObject createUnshreddedObject(Map<String, VariantValue> fields) { + ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); + return (SerializedObject) + Variants.from(metadataBuffer, VariantTestUtil.createObject(metadataBuffer, fields)); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestVariantUtil.java b/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java similarity index 97% rename from core/src/test/java/org/apache/iceberg/TestVariantUtil.java rename to core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java index ac4bacbb1b62..567c5cbde909 100644 --- a/core/src/test/java/org/apache/iceberg/TestVariantUtil.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java @@ -19,7 +19,7 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; import java.nio.ByteBuffer; import org.assertj.core.api.Assertions; diff --git a/core/src/test/java/org/apache/iceberg/VariantTestUtil.java b/core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java similarity index 83% rename from core/src/test/java/org/apache/iceberg/VariantTestUtil.java rename to core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java index 8f1e30d2e394..0492f7a4067a 100644 --- a/core/src/test/java/org/apache/iceberg/VariantTestUtil.java +++ b/core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java @@ -19,7 +19,7 @@ * */ -package org.apache.iceberg; +package org.apache.iceberg.variants; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -32,8 +32,7 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; public class VariantTestUtil { - private VariantTestUtil() { - } + private VariantTestUtil() {} private static byte primitiveHeader(int primitiveType) { return (byte) (primitiveType << 2); @@ -43,15 +42,6 @@ private static byte metadataHeader(boolean isSorted, int offsetSize) { return (byte) (((offsetSize - 1) << 6) | (isSorted ? 0b10000 : 0) | 0b0001); } - private static byte objectHeader(boolean isLarge, int fieldIdSize, int offsetSize) { - return (byte) - ((isLarge ? 0x1000000 : 0) | ((fieldIdSize - 1) << 4) | ((offsetSize - 1) << 2) | 0b10); - } - - private static byte arrayHeader(boolean isLarge, int offsetSize) { - return (byte) ((isLarge ? 0b10000 : 0) | (offsetSize - 1) << 2 | 0b11); - } - /** A hacky absolute put for ByteBuffer */ private static int writeBufferAbsolute(ByteBuffer buffer, int offset, ByteBuffer toCopy) { int originalPosition = buffer.position(); @@ -64,18 +54,18 @@ private static int writeBufferAbsolute(ByteBuffer buffer, int offset, ByteBuffer } /** Creates a random string primitive of the given length for forcing large offset sizes */ - static VariantPrimitive createString(String string) { + static SerializedPrimitive createString(String string) { byte[] utf8 = string.getBytes(StandardCharsets.UTF_8); ByteBuffer buffer = ByteBuffer.allocate(5 + utf8.length).order(ByteOrder.LITTLE_ENDIAN); buffer.put(0, primitiveHeader(16)); buffer.putInt(1, utf8.length); writeBufferAbsolute(buffer, 5, ByteBuffer.wrap(utf8)); - return VariantPrimitive.from(buffer, buffer.get(0)); + return SerializedPrimitive.from(buffer, buffer.get(0)); } static ByteBuffer createMetadata(Collection<String> fieldNames, boolean sortNames) { if (fieldNames.isEmpty()) { - return VariantMetadata.EMPTY_V1_BUFFER; + return SerializedMetadata.EMPTY_V1_BUFFER; } int numElements = fieldNames.size(); @@ -108,8 +98,7 @@ static ByteBuffer createMetadata(Collection<String> fieldNames, boolean sortName // write the offset and the string VariantUtil.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); - int nameSize = - writeBufferAbsolute(buffer, dataOffset + nextOffset, nameBuffer); + int nameSize = writeBufferAbsolute(buffer, dataOffset + nextOffset, nameBuffer); // update the offset and index nextOffset += nameSize; index += 1; @@ -122,17 +111,16 @@ static ByteBuffer createMetadata(Collection<String> fieldNames, boolean sortName return buffer; } - static ByteBuffer createObject( - ByteBuffer metadataBuffer, Map<String, Variants.Serialized> data) { + static ByteBuffer createObject(ByteBuffer metadataBuffer, Map<String, VariantValue> data) { // create the metadata to look up field names - VariantMetadata metadata = VariantMetadata.from(metadataBuffer); + SerializedMetadata metadata = SerializedMetadata.from(metadataBuffer); int numElements = data.size(); boolean isLarge = numElements > 0xFF; int dataSize = 0; - for (Map.Entry<String, Variants.Serialized> field : data.entrySet()) { - dataSize += field.getValue().buffer().remaining(); + for (Map.Entry<String, VariantValue> field : data.entrySet()) { + dataSize += field.getValue().sizeInBytes(); } // field ID size is the size needed to store the largest field ID in the data @@ -145,7 +133,7 @@ static ByteBuffer createObject( int dataOffset = offsetListOffset + ((1 + numElements) * offsetSize); int totalSize = dataOffset + dataSize; - byte header = objectHeader(isLarge, fieldIdSize, offsetSize); + byte header = VariantUtil.objectHeader(isLarge, fieldIdSize, offsetSize); ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); buffer.put(0, header); @@ -165,9 +153,7 @@ static ByteBuffer createObject( buffer, id, fieldIdListOffset + (index * fieldIdSize), fieldIdSize); VariantUtil.writeLittleEndianUnsigned( buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); - int valueSize = - writeBufferAbsolute( - buffer, dataOffset + nextOffset, data.get(fieldName).buffer()); + int valueSize = data.get(fieldName).writeTo(buffer, dataOffset + nextOffset); // update next offset and index nextOffset += valueSize; @@ -197,7 +183,7 @@ static ByteBuffer createArray(Variants.Serialized... values) { int dataOffset = offsetListOffset + ((1 + numElements) * offsetSize) /* offset list size */; int totalSize = dataOffset + dataSize; - byte header = arrayHeader(isLarge, offsetSize); + byte header = VariantUtil.arrayHeader(isLarge, offsetSize); ByteBuffer buffer = ByteBuffer.allocate(totalSize).order(ByteOrder.LITTLE_ENDIAN); buffer.put(0, header); @@ -212,7 +198,8 @@ static ByteBuffer createArray(Variants.Serialized... values) { int index = 0; for (Variants.Serialized value : values) { // write the offset and value - VariantUtil.writeLittleEndianUnsigned(buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); + VariantUtil.writeLittleEndianUnsigned( + buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); // in a real implementation, the buffer should be passed to serialize ByteBuffer valueBuffer = value.buffer(); int valueSize = writeBufferAbsolute(buffer, dataOffset + nextOffset, valueBuffer); @@ -222,7 +209,8 @@ static ByteBuffer createArray(Variants.Serialized... values) { } // write the final size of the data section - VariantUtil.writeLittleEndianUnsigned(buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); + VariantUtil.writeLittleEndianUnsigned( + buffer, nextOffset, offsetListOffset + (index * offsetSize), offsetSize); return buffer; } From c93d1a3c8cd0e299588223f9f448dff8d6d914a2 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Wed, 27 Nov 2024 17:11:59 -0800 Subject: [PATCH 03/12] Fix checkstyle warning --- api/src/main/java/org/apache/iceberg/io/CloseableIterable.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/iceberg/io/CloseableIterable.java b/api/src/main/java/org/apache/iceberg/io/CloseableIterable.java index fde73e8e9f5b..34c561bc373d 100644 --- a/api/src/main/java/org/apache/iceberg/io/CloseableIterable.java +++ b/api/src/main/java/org/apache/iceberg/io/CloseableIterable.java @@ -43,7 +43,7 @@ static <E> CloseableIterable<E> of(Iterable<E> iterable) { return (CloseableIterable<E>) iterable; } else if (iterable instanceof Closeable) { Closeable asCloseable = (Closeable) iterable; - return combine(iterable, asCloseable::close); + return combine(iterable, asCloseable); } else { return withNoopClose(iterable); } From c7e9f697aa1b1462a664dabc1ebb5d538fac2f39 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Wed, 27 Nov 2024 17:13:39 -0800 Subject: [PATCH 04/12] Apply spotless --- .../org/apache/iceberg/util/SortedMerge.java | 6 ++- .../iceberg/variants/PrimitiveWrapper.java | 31 +++++------ .../iceberg/variants/SerializedArray.java | 34 ++++++------ .../iceberg/variants/SerializedMetadata.java | 31 +++++------ .../iceberg/variants/SerializedObject.java | 34 ++++++------ .../iceberg/variants/SerializedPrimitive.java | 31 +++++------ .../variants/SerializedShortString.java | 31 +++++------ .../iceberg/variants/ShreddedObject.java | 31 +++++------ .../org/apache/iceberg/variants/Variant.java | 31 +++++------ .../apache/iceberg/variants/VariantArray.java | 33 ++++++------ .../iceberg/variants/VariantMetadata.java | 31 +++++------ .../iceberg/variants/VariantObject.java | 31 +++++------ .../iceberg/variants/VariantPrimitive.java | 31 +++++------ .../apache/iceberg/variants/VariantUtil.java | 31 +++++------ .../apache/iceberg/variants/VariantValue.java | 31 +++++------ .../org/apache/iceberg/variants/Variants.java | 31 +++++------ .../variants/TestPrimitiveWrapper.java | 31 +++++------ .../iceberg/variants/TestSerializedArray.java | 54 ++++++++++--------- .../variants/TestSerializedMetadata.java | 34 ++++++------ .../variants/TestSerializedObject.java | 39 ++++++-------- .../variants/TestSerializedPrimitives.java | 39 +++++++------- .../iceberg/variants/TestShreddedObject.java | 46 +++++++--------- .../iceberg/variants/TestVariantUtil.java | 31 +++++------ .../iceberg/variants/VariantTestUtil.java | 31 +++++------ 24 files changed, 359 insertions(+), 425 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/util/SortedMerge.java b/core/src/main/java/org/apache/iceberg/util/SortedMerge.java index d5fecdabafa0..62bc89bae96f 100644 --- a/core/src/main/java/org/apache/iceberg/util/SortedMerge.java +++ b/core/src/main/java/org/apache/iceberg/util/SortedMerge.java @@ -41,12 +41,14 @@ * @param <T> the type of objects produced by this Iterable */ public class SortedMerge<T> extends CloseableGroup implements CloseableIterable<T> { - public static <C extends Comparable<C>> CloseableIterable<C> of(Iterable<C> left, Iterable<C> right) { + public static <C extends Comparable<C>> CloseableIterable<C> of( + Iterable<C> left, Iterable<C> right) { return of(Arrays.asList(left, right)); } public static <C extends Comparable<C>> CloseableIterable<C> of(List<Iterable<C>> iterables) { - List<CloseableIterable<C>> closeableIterables = Lists.transform(iterables, CloseableIterable::of); + List<CloseableIterable<C>> closeableIterables = + Lists.transform(iterables, CloseableIterable::of); return new SortedMerge<>(Comparator.naturalOrder(), closeableIterables); } diff --git a/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java b/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java index 6cc3414789aa..96d6229cbd27 100644 --- a/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java +++ b/core/src/main/java/org/apache/iceberg/variants/PrimitiveWrapper.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.math.BigDecimal; diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java b/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java index 4d43e576e535..641da29e429f 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import static org.apache.iceberg.variants.VariantUtil.basicType; @@ -43,8 +40,7 @@ static SerializedArray from(SerializedMetadata metadata, ByteBuffer value, int h value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); Variants.BasicType basicType = basicType(header); Preconditions.checkArgument( - basicType == Variants.BasicType.ARRAY, - "Invalid array, basic type: " + basicType); + basicType == Variants.BasicType.ARRAY, "Invalid array, basic type: " + basicType); return new SerializedArray(metadata, value, header); } diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java b/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java index 398736572022..8a4e0be1098b 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java b/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java index 7f2e33adee31..720adc2b4be8 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import static org.apache.iceberg.variants.VariantUtil.basicType; @@ -46,8 +43,7 @@ static SerializedObject from(SerializedMetadata metadata, ByteBuffer value, int value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); Variants.BasicType basicType = basicType(header); Preconditions.checkArgument( - basicType == Variants.BasicType.OBJECT, - "Invalid object, basic type: " + basicType); + basicType == Variants.BasicType.OBJECT, "Invalid object, basic type: " + basicType); return new SerializedObject(metadata, value, header); } diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java b/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java index 381781321f68..7fefdc7489fe 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import static org.apache.iceberg.variants.VariantUtil.basicType; diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java b/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java index d092495d27e5..5cec6d0c0e02 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import static org.apache.iceberg.variants.VariantUtil.basicType; diff --git a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java index ba934d349e64..0832f36cff96 100644 --- a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; diff --git a/core/src/main/java/org/apache/iceberg/variants/Variant.java b/core/src/main/java/org/apache/iceberg/variants/Variant.java index 09c5d0fb2bc8..b5606fa094b6 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variant.java +++ b/core/src/main/java/org/apache/iceberg/variants/Variant.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; /** A variant metadata and value pair. */ diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantArray.java b/core/src/main/java/org/apache/iceberg/variants/VariantArray.java index ffd202be2b41..55dbc071f15b 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantArray.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantArray.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; /** An variant array value. */ @@ -26,7 +23,7 @@ public interface VariantArray extends VariantValue { /** Returns the {@link VariantValue} at {@code index} in this array. */ VariantValue get(int index); - @Override + @Override default Variants.PhysicalType type() { return Variants.PhysicalType.ARRAY; } diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java b/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java index 24d1908a3eb4..91dc591c64e7 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantMetadata.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; /** A variant metadata dictionary. */ diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantObject.java b/core/src/main/java/org/apache/iceberg/variants/VariantObject.java index 91ed2bfa3fde..6e6b719ba561 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantObject.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; /** An variant object value. */ diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java b/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java index c52108a2cfdc..73efb45ae91b 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantPrimitive.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; /** A primitive variant value. */ diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java index 6326e4c488f7..228479cb6be1 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantValue.java b/core/src/main/java/org/apache/iceberg/variants/VariantValue.java index b0eb49306b2b..26a43795f778 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantValue.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantValue.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; diff --git a/core/src/main/java/org/apache/iceberg/variants/Variants.java b/core/src/main/java/org/apache/iceberg/variants/Variants.java index 8ae502974bfe..0fd39f043468 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variants.java +++ b/core/src/main/java/org/apache/iceberg/variants/Variants.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.math.BigDecimal; diff --git a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java index d086b8551d62..052ef7c5192a 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.math.BigDecimal; diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java index 2e89878d3878..c8b69dbfe544 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java @@ -1,30 +1,27 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; import java.util.Random; -import org.apache.iceberg.variants.Variants.PhysicalType; import org.apache.iceberg.util.RandomUtil; +import org.apache.iceberg.variants.Variants.PhysicalType; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; @@ -36,11 +33,16 @@ public class TestSerializedArray { private static final SerializedPrimitive vFalse = SerializedPrimitive.from(new byte[] {0b1000}); private static final SerializedShortString str = SerializedShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); - private static final SerializedShortString a = SerializedShortString.from(new byte[] {0b101, 'a'}); - private static final SerializedShortString b = SerializedShortString.from(new byte[] {0b101, 'b'}); - private static final SerializedShortString c = SerializedShortString.from(new byte[] {0b101, 'c'}); - private static final SerializedShortString d = SerializedShortString.from(new byte[] {0b101, 'd'}); - private static final SerializedShortString e = SerializedShortString.from(new byte[] {0b101, 'e'}); + private static final SerializedShortString a = + SerializedShortString.from(new byte[] {0b101, 'a'}); + private static final SerializedShortString b = + SerializedShortString.from(new byte[] {0b101, 'b'}); + private static final SerializedShortString c = + SerializedShortString.from(new byte[] {0b101, 'c'}); + private static final SerializedShortString d = + SerializedShortString.from(new byte[] {0b101, 'd'}); + private static final SerializedShortString e = + SerializedShortString.from(new byte[] {0b101, 'e'}); private static final SerializedPrimitive i34 = SerializedPrimitive.from(new byte[] {0b1100, 34}); private static final SerializedPrimitive i1234 = SerializedPrimitive.from(new byte[] {0b10000, (byte) 0xD2, 0x04}); @@ -117,8 +119,10 @@ public void testStringDifferentLengths() { @Test public void testArrayOfMixedTypes() { ByteBuffer nestedBuffer = VariantTestUtil.createArray(a, c, d); - SerializedArray nested = SerializedArray.from(EMPTY_METADATA, nestedBuffer, nestedBuffer.get(0)); - ByteBuffer buffer = VariantTestUtil.createArray(date, i34, str, vNull, e, b, vFalse, nested, vTrue, i1234); + SerializedArray nested = + SerializedArray.from(EMPTY_METADATA, nestedBuffer, nestedBuffer.get(0)); + ByteBuffer buffer = + VariantTestUtil.createArray(date, i34, str, vNull, e, b, vFalse, nested, vTrue, i1234); SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java index 0aabbfcc0317..5555a8bac9c4 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; @@ -62,7 +59,8 @@ public void testHeaderOffsetSize() { // offset size is 3-byte LE = 1 Assertions.assertThat( - SerializedMetadata.from(new byte[] {(byte) 0b10010001, 0x01, 0x00, 0x00}).dictionarySize()) + SerializedMetadata.from(new byte[] {(byte) 0b10010001, 0x01, 0x00, 0x00}) + .dictionarySize()) .isEqualTo(1); // offset size is 2-byte LE = 1 diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java index 6e407dd5cf0e..63fc0990af79 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java @@ -1,34 +1,31 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; import java.util.Map; import java.util.Random; import java.util.Set; -import org.apache.iceberg.variants.Variants.PhysicalType; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.util.RandomUtil; +import org.apache.iceberg.variants.Variants.PhysicalType; import org.assertj.core.api.Assertions; import org.assertj.core.util.Sets; import org.junit.jupiter.api.Test; @@ -153,8 +150,7 @@ public void testTwoByteOffsets() { SerializedPrimitive bigString = VariantTestUtil.createString(randomString); // note that order doesn't matter. fields are sorted by name - Map<String, VariantValue> data = - ImmutableMap.of("big", bigString, "a", i1, "b", i2, "c", i3); + Map<String, VariantValue> data = ImmutableMap.of("big", bigString, "a", i1, "b", i2, "c", i3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); @@ -199,8 +195,7 @@ public void testThreeByteOffsets() { Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); Assertions.assertThat(object.get("really-big").type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(object.get("really-big").asPrimitive().get()) - .isEqualTo(randomString); + Assertions.assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); } @ParameterizedTest diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java index 654fd1431bf9..7e713508827a 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. - * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.math.BigDecimal; @@ -63,7 +60,8 @@ public void testInt8() { @Test public void testNegativeInt8() { - VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(3), (byte) 0xFF}); + VariantPrimitive<?> value = + SerializedPrimitive.from(new byte[] {primitiveHeader(3), (byte) 0xFF}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT8); Assertions.assertThat(value.get()).isEqualTo((byte) -1); @@ -71,7 +69,8 @@ public void testNegativeInt8() { @Test public void testInt16() { - VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xD2, 0x04}); + VariantPrimitive<?> value = + SerializedPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xD2, 0x04}); Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT16); Assertions.assertThat(value.get()).isEqualTo((short) 1234); diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java index b226fc228a64..a5eb461a6e1a 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.math.BigDecimal; @@ -50,8 +47,7 @@ public void testShreddedFields() { Assertions.assertThat(object.get("b")).isInstanceOf(VariantPrimitive.class); Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); Assertions.assertThat(object.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(object.get("c").asPrimitive().get()) - .isEqualTo(new BigDecimal("12.21")); + Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -71,8 +67,7 @@ public void testShreddedSerializationMinimalBuffer() { Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").asPrimitive().get()) - .isEqualTo(new BigDecimal("12.21")); + Assertions.assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -99,8 +94,7 @@ public void testShreddedSerializationLargeBuffer() { Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").asPrimitive().get()) - .isEqualTo(new BigDecimal("12.21")); + Assertions.assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -119,8 +113,7 @@ public void testUnshreddedObjectSerializationMinimalBuffer() { Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").asPrimitive().get()) - .isEqualTo(new BigDecimal("12.21")); + Assertions.assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -145,8 +138,7 @@ public void testUnshreddedObjectSerializationLargeBuffer() { Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").asPrimitive().get()) - .isEqualTo(new BigDecimal("12.21")); + Assertions.assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test diff --git a/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java b/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java index 567c5cbde909..ed0a9be61d5f 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; diff --git a/core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java b/core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java index 0492f7a4067a..b6caec63758a 100644 --- a/core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java +++ b/core/src/test/java/org/apache/iceberg/variants/VariantTestUtil.java @@ -1,24 +1,21 @@ /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at * - * * Licensed to the Apache Software Foundation (ASF) under one - * * or more contributor license agreements. See the NOTICE file - * * distributed with this work for additional information - * * regarding copyright ownership. The ASF licenses this file - * * to you under the Apache License, Version 2.0 (the - * * "License"); you may not use this file except in compliance - * * with the License. You may obtain a copy of the License at - * * - * * http://www.apache.org/licenses/LICENSE-2.0 - * * - * * Unless required by applicable law or agreed to in writing, - * * software distributed under the License is distributed on an - * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * * KIND, either express or implied. See the License for the - * * specific language governing permissions and limitations - * * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ - package org.apache.iceberg.variants; import java.nio.ByteBuffer; From dff73ab3ba6873e03465567c8e9c54c9490cf46a Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Mon, 9 Dec 2024 16:49:28 -0800 Subject: [PATCH 05/12] Fix checkstyle --- .../java/org/apache/iceberg/variants/SerializedArray.java | 4 +--- .../java/org/apache/iceberg/variants/SerializedObject.java | 4 +--- .../java/org/apache/iceberg/variants/SerializedPrimitive.java | 4 +--- .../org/apache/iceberg/variants/SerializedShortString.java | 4 +--- .../org/apache/iceberg/variants/TestPrimitiveWrapper.java | 4 ++-- 5 files changed, 6 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java b/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java index 641da29e429f..774553cbb4a3 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedArray.java @@ -18,8 +18,6 @@ */ package org.apache.iceberg.variants; -import static org.apache.iceberg.variants.VariantUtil.basicType; - import java.nio.ByteBuffer; import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; @@ -38,7 +36,7 @@ static SerializedArray from(SerializedMetadata metadata, byte[] bytes) { static SerializedArray from(SerializedMetadata metadata, ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - Variants.BasicType basicType = basicType(header); + Variants.BasicType basicType = VariantUtil.basicType(header); Preconditions.checkArgument( basicType == Variants.BasicType.ARRAY, "Invalid array, basic type: " + basicType); return new SerializedArray(metadata, value, header); diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java b/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java index 720adc2b4be8..f941b62731d7 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedObject.java @@ -18,8 +18,6 @@ */ package org.apache.iceberg.variants; -import static org.apache.iceberg.variants.VariantUtil.basicType; - import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Iterator; @@ -41,7 +39,7 @@ static SerializedObject from(SerializedMetadata metadata, byte[] bytes) { static SerializedObject from(SerializedMetadata metadata, ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - Variants.BasicType basicType = basicType(header); + Variants.BasicType basicType = VariantUtil.basicType(header); Preconditions.checkArgument( basicType == Variants.BasicType.OBJECT, "Invalid object, basic type: " + basicType); return new SerializedObject(metadata, value, header); diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java b/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java index 7fefdc7489fe..1a6bd37a4ff3 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedPrimitive.java @@ -18,8 +18,6 @@ */ package org.apache.iceberg.variants; -import static org.apache.iceberg.variants.VariantUtil.basicType; - import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; @@ -37,7 +35,7 @@ static SerializedPrimitive from(byte[] bytes) { static SerializedPrimitive from(ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - Variants.BasicType basicType = basicType(header); + Variants.BasicType basicType = VariantUtil.basicType(header); Preconditions.checkArgument( basicType == Variants.BasicType.PRIMITIVE, "Invalid primitive, basic type != PRIMITIVE: " + basicType); diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java b/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java index 5cec6d0c0e02..3004a075def1 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedShortString.java @@ -18,8 +18,6 @@ */ package org.apache.iceberg.variants; -import static org.apache.iceberg.variants.VariantUtil.basicType; - import java.nio.ByteBuffer; import java.nio.ByteOrder; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; @@ -35,7 +33,7 @@ static SerializedShortString from(byte[] bytes) { static SerializedShortString from(ByteBuffer value, int header) { Preconditions.checkArgument( value.order() == ByteOrder.LITTLE_ENDIAN, "Unsupported byte order: big endian"); - Variants.BasicType basicType = basicType(header); + Variants.BasicType basicType = VariantUtil.basicType(header); Preconditions.checkArgument( basicType == Variants.BasicType.SHORT_STRING, "Invalid short string, basic type: " + basicType); diff --git a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java index 052ef7c5192a..331290d2535e 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java @@ -26,7 +26,7 @@ import org.junit.jupiter.params.provider.FieldSource; public class TestPrimitiveWrapper { - private static final VariantPrimitive<?>[] primitives = + private static final VariantPrimitive<?>[] PRIMITIVES = new VariantPrimitive[] { Variants.ofNull(), Variants.of(true), @@ -60,7 +60,7 @@ public class TestPrimitiveWrapper { }; @ParameterizedTest - @FieldSource("primitives") + @FieldSource("PRIMITIVES") public void testPrimitiveValueSerialization(VariantPrimitive<?> primitive) { // write the value to the middle of a large buffer int size = primitive.sizeInBytes(); From 496defe6b4983a4d81a5416c3d58e1f19bf55aa0 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Mon, 9 Dec 2024 17:02:41 -0800 Subject: [PATCH 06/12] Fix more checkstyle --- .../java/org/apache/iceberg/variants/SerializedMetadata.java | 1 - .../main/java/org/apache/iceberg/variants/VariantObject.java | 1 + core/src/main/java/org/apache/iceberg/variants/VariantUtil.java | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java b/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java index 8a4e0be1098b..30f4903db281 100644 --- a/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java +++ b/core/src/main/java/org/apache/iceberg/variants/SerializedMetadata.java @@ -27,7 +27,6 @@ class SerializedMetadata implements VariantMetadata, Variants.Serialized { private static final int SUPPORTED_VERSION = 1; private static final int VERSION_MASK = 0b1111; private static final int SORTED_STRINGS = 0b10000; - private static final int RESERVED = 0b100000; private static final int OFFSET_SIZE_MASK = 0b11000000; private static final int OFFSET_SIZE_SHIFT = 6; diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantObject.java b/core/src/main/java/org/apache/iceberg/variants/VariantObject.java index 6e6b719ba561..7bb82f94a467 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantObject.java @@ -23,6 +23,7 @@ public interface VariantObject extends VariantValue { /** Returns the {@link VariantValue} for the field named {@code name} in this object. */ VariantValue get(String name); + @Override default Variants.PhysicalType type() { return Variants.PhysicalType.OBJECT; } diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java index 228479cb6be1..2c9937337278 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java @@ -87,7 +87,7 @@ static int readLittleEndianUnsigned(ByteBuffer buffer, int offset, int size) { case 4: return buffer.getInt(base); case 3: - return ((int) buffer.getShort(base)) & 0xFFFF | (buffer.get(base + 2) & 0xFF) << 16; + return (((int) buffer.getShort(base)) & 0xFFFF) | ((buffer.get(base + 2) & 0xFF) << 16); case 2: return ((int) buffer.getShort(base)) & 0xFFFF; case 1: From 4f09a4f83a77eb7c350c3cbf6e6e93dbe528975a Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Tue, 10 Dec 2024 13:37:45 -0800 Subject: [PATCH 07/12] Add more tests for large objects, id, and offset sizes. --- .../apache/iceberg/variants/VariantUtil.java | 2 +- .../variants/TestSerializedObject.java | 26 ++ .../iceberg/variants/TestShreddedObject.java | 270 ++++++++++++++---- 3 files changed, 244 insertions(+), 54 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java index 2c9937337278..d6b78fe899e6 100644 --- a/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java +++ b/core/src/main/java/org/apache/iceberg/variants/VariantUtil.java @@ -170,7 +170,7 @@ static byte primitiveHeader(int primitiveType) { static byte objectHeader(boolean isLarge, int fieldIdSize, int offsetSize) { return (byte) - ((isLarge ? 0x1000000 : 0) | ((fieldIdSize - 1) << 4) | ((offsetSize - 1) << 2) | 0b10); + ((isLarge ? 0b1000000 : 0) | ((fieldIdSize - 1) << 4) | ((offsetSize - 1) << 2) | 0b10); } static byte arrayHeader(boolean isLarge, int offsetSize) { diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java index 63fc0990af79..806ff604d738 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java @@ -24,6 +24,7 @@ import java.util.Set; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.RandomUtil; import org.apache.iceberg.variants.Variants.PhysicalType; import org.assertj.core.api.Assertions; @@ -198,6 +199,31 @@ public void testThreeByteOffsets() { Assertions.assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); } + @ParameterizedTest + @ValueSource(booleans = {true, false}) + @SuppressWarnings({"unchecked", "rawtypes"}) + public void testLargeObject(boolean sortFieldNames) { + Map<String, VariantPrimitive<String>> fields = Maps.newHashMap(); + for (int i = 0; i < 10_000; i += 1) { + fields.put(RandomUtil.generateString(10, random), Variants.of(RandomUtil.generateString(10, random))); + } + + ByteBuffer meta = VariantTestUtil.createMetadata(fields.keySet(), sortFieldNames); + ByteBuffer value = VariantTestUtil.createObject(meta, (Map) fields); + + SerializedMetadata metadata = SerializedMetadata.from(meta); + SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); + + Assertions.assertThat(object.type()).isEqualTo(Variants.PhysicalType.OBJECT); + Assertions.assertThat(object.numElements()).isEqualTo(10_000); + + for (Map.Entry<String, VariantPrimitive<String>> entry : fields.entrySet()) { + VariantValue fieldValue = object.get(entry.getKey()); + Assertions.assertThat(fieldValue.type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(fieldValue.asPrimitive().get()).isEqualTo(entry.getValue().get()); + } + } + @ParameterizedTest @ValueSource(booleans = {true, false}) public void testTwoByteFieldIds(boolean sortFieldNames) { diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java index a5eb461a6e1a..c299d053cf78 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -22,11 +22,18 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Map; +import java.util.Random; +import java.util.Set; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.Pair; +import org.apache.iceberg.util.RandomUtil; import org.assertj.core.api.Assertions; +import org.assertj.core.util.Sets; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; public class TestShreddedObject { private static final Map<String, VariantValue> FIELDS = @@ -38,6 +45,8 @@ public class TestShreddedObject { "c", Variants.of(new BigDecimal("12.21"))); + private final Random random = new Random(871925); + @Test public void testShreddedFields() { ShreddedObject object = createShreddedObject(FIELDS).second(); @@ -56,10 +65,10 @@ public void testShreddedSerializationMinimalBuffer() { SerializedMetadata metadata = pair.first(); ShreddedObject object = pair.second(); - ByteBuffer serialized = - ByteBuffer.allocate(object.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); - object.writeTo(serialized, 0); - SerializedObject actual = SerializedObject.from(metadata, serialized, serialized.get(0)); + VariantValue value = roundTripMinimalBuffer(object, metadata); + + Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + SerializedObject actual = (SerializedObject) value; Assertions.assertThat(actual.numElements()).isEqualTo(3); Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); @@ -76,14 +85,7 @@ public void testShreddedSerializationLargeBuffer() { SerializedMetadata metadata = pair.first(); ShreddedObject object = pair.second(); - ByteBuffer serialized = - ByteBuffer.allocate(1000 + object.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); - object.writeTo(serialized, 300); - ByteBuffer slice = serialized.duplicate().order(ByteOrder.LITTLE_ENDIAN); - slice.position(300); - slice.limit(300 + object.sizeInBytes()); - - VariantValue value = Variants.from(metadata, slice); + VariantValue value = roundTripLargeBuffer(object, metadata); Assertions.assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; @@ -99,13 +101,14 @@ public void testShreddedSerializationLargeBuffer() { @Test public void testUnshreddedObjectSerializationMinimalBuffer() { - SerializedObject unshredded = createUnshreddedObject(FIELDS); + Pair<SerializedMetadata, ShreddedObject> pair = createUnshreddedObject(FIELDS); + SerializedMetadata metadata = pair.first(); + ShreddedObject object = pair.second(); - ByteBuffer serialized = - ByteBuffer.allocate(unshredded.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); - unshredded.writeTo(serialized, 0); - SerializedObject actual = - SerializedObject.from(unshredded.metadata(), serialized, serialized.get(0)); + VariantValue value = roundTripMinimalBuffer(object, metadata); + + Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + SerializedObject actual = (SerializedObject) value; Assertions.assertThat(actual.numElements()).isEqualTo(3); Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); @@ -118,16 +121,11 @@ public void testUnshreddedObjectSerializationMinimalBuffer() { @Test public void testUnshreddedObjectSerializationLargeBuffer() { - SerializedObject unshredded = createUnshreddedObject(FIELDS); - - ByteBuffer serialized = - ByteBuffer.allocate(1000 + unshredded.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); - unshredded.writeTo(serialized, 300); - ByteBuffer slice = serialized.duplicate().order(ByteOrder.LITTLE_ENDIAN); - slice.position(300); - slice.limit(300 + unshredded.sizeInBytes()); + Pair<SerializedMetadata, ShreddedObject> pair = createUnshreddedObject(FIELDS); + SerializedMetadata metadata = pair.first(); + ShreddedObject object = pair.second(); - VariantValue value = Variants.from(unshredded.metadata(), slice); + VariantValue value = roundTripLargeBuffer(object, metadata); Assertions.assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; @@ -143,7 +141,7 @@ public void testUnshreddedObjectSerializationLargeBuffer() { @Test public void testPartiallyShreddedObjectReplacement() { - ShreddedObject partial = new ShreddedObject(createUnshreddedObject(FIELDS)); + ShreddedObject partial = createUnshreddedObject(FIELDS).second(); // replace field c with a new value partial.put("c", Variants.ofIsoDate("2024-10-12")); @@ -160,7 +158,7 @@ public void testPartiallyShreddedObjectReplacement() { @Test public void testPartiallyShreddedObjectGetMissingField() { - ShreddedObject partial = new ShreddedObject(createUnshreddedObject(FIELDS)); + ShreddedObject partial = createUnshreddedObject(FIELDS).second(); // missing fields are returned as null Assertions.assertThat(partial.get("d")).isNull(); @@ -168,7 +166,7 @@ public void testPartiallyShreddedObjectGetMissingField() { @Test public void testPartiallyShreddedObjectPutMissingFieldFailure() { - ShreddedObject partial = new ShreddedObject(createUnshreddedObject(FIELDS)); + ShreddedObject partial = createUnshreddedObject(FIELDS).second(); // d is not defined in the variant metadata and will fail Assertions.assertThatThrownBy(() -> partial.put("d", Variants.ofIsoDate("2024-10-12"))) @@ -178,17 +176,17 @@ public void testPartiallyShreddedObjectPutMissingFieldFailure() { @Test public void testPartiallyShreddedObjectSerializationMinimalBuffer() { - SerializedObject unshredded = createUnshreddedObject(FIELDS); - ShreddedObject partial = new ShreddedObject(unshredded); + Pair<SerializedMetadata, ShreddedObject> pair = createUnshreddedObject(FIELDS); + SerializedMetadata metadata = pair.first(); + ShreddedObject partial = pair.second(); // replace field c with a new value partial.put("c", Variants.ofIsoDate("2024-10-12")); - ByteBuffer serialized = - ByteBuffer.allocate(partial.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); - partial.writeTo(serialized, 0); - SerializedObject actual = - SerializedObject.from(unshredded.metadata(), serialized, serialized.get(0)); + VariantValue value = roundTripMinimalBuffer(partial, metadata); + + Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + SerializedObject actual = (SerializedObject) value; Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); @@ -202,20 +200,14 @@ public void testPartiallyShreddedObjectSerializationMinimalBuffer() { @Test public void testPartiallyShreddedObjectSerializationLargeBuffer() { - SerializedObject unshredded = createUnshreddedObject(FIELDS); - ShreddedObject partial = new ShreddedObject(unshredded); + Pair<SerializedMetadata, ShreddedObject> pair = createUnshreddedObject(FIELDS); + SerializedMetadata metadata = pair.first(); + ShreddedObject partial = pair.second(); // replace field c with a new value partial.put("c", Variants.ofIsoDate("2024-10-12")); - ByteBuffer serialized = - ByteBuffer.allocate(1000 + unshredded.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); - partial.writeTo(serialized, 300); - ByteBuffer slice = serialized.duplicate().order(ByteOrder.LITTLE_ENDIAN); - slice.position(300); - slice.limit(300 + unshredded.sizeInBytes()); - - VariantValue value = Variants.from(unshredded.metadata(), slice); + VariantValue value = roundTripLargeBuffer(partial, metadata); Assertions.assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; @@ -230,20 +222,192 @@ public void testPartiallyShreddedObjectSerializationLargeBuffer() { .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); } - private Pair<SerializedMetadata, ShreddedObject> createShreddedObject( - Map<String, VariantValue> fields) { - ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); - SerializedMetadata metadata = SerializedMetadata.from(metadataBuffer); + @Test + public void testTwoByteOffsets() { + // a string larger than 255 bytes to push the value offset size above 1 byte + String randomString = RandomUtil.generateString(300, random); + SerializedPrimitive bigString = VariantTestUtil.createString(randomString); + + Map<String, VariantValue> data = Maps.newHashMap(); + data.putAll(FIELDS); + data.put("big", bigString); + + Pair<SerializedMetadata, ShreddedObject> pair = createShreddedObject(data); + VariantValue value = roundTripLargeBuffer(pair.second(), pair.first()); + + Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + SerializedObject object = (SerializedObject) value; + Assertions.assertThat(object.numElements()).isEqualTo(4); + + Assertions.assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + Assertions.assertThat(object.get("big").type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString); + } + + @Test + public void testThreeByteOffsets() { + // a string larger than 65535 bytes to push the value offset size above 1 byte + String randomString = RandomUtil.generateString(70_000, random); + SerializedPrimitive reallyBigString = VariantTestUtil.createString(randomString); + + Map<String, VariantValue> data = Maps.newHashMap(); + data.putAll(FIELDS); + data.put("really-big", reallyBigString); + + Pair<SerializedMetadata, ShreddedObject> pair = createShreddedObject(data); + VariantValue value = roundTripLargeBuffer(pair.second(), pair.first()); + + Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + SerializedObject object = (SerializedObject) value; + Assertions.assertThat(object.numElements()).isEqualTo(4); + Assertions.assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + Assertions.assertThat(object.get("really-big").type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + @SuppressWarnings({"unchecked", "rawtypes"}) + public void testLargeObject(boolean sortFieldNames) { + Map<String, VariantPrimitive<String>> fields = Maps.newHashMap(); + for (int i = 0; i < 10_000; i += 1) { + fields.put(RandomUtil.generateString(10, random), Variants.of(RandomUtil.generateString(10, random))); + } + + SerializedMetadata metadata = + SerializedMetadata.from(VariantTestUtil.createMetadata(fields.keySet(), sortFieldNames)); + + ShreddedObject shredded = createShreddedObject(metadata, (Map) fields); + VariantValue value = roundTripLargeBuffer(shredded, metadata); + + Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + SerializedObject object = (SerializedObject) value; + Assertions.assertThat(object.numElements()).isEqualTo(10_000); + + for (Map.Entry<String, VariantPrimitive<String>> entry : fields.entrySet()) { + VariantValue fieldValue = object.get(entry.getKey()); + Assertions.assertThat(fieldValue.type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(fieldValue.asPrimitive().get()).isEqualTo(entry.getValue().get()); + } + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testTwoByteFieldIds(boolean sortFieldNames) { + Set<String> keySet = Sets.newHashSet(); + for (int i = 0; i < 10_000; i += 1) { + keySet.add(RandomUtil.generateString(10, random)); + } + + Map<String, VariantValue> data = + ImmutableMap.of("aa", FIELDS.get("a"), "AA", FIELDS.get("b"), "ZZ", FIELDS.get("c")); + + // create metadata from the large key set and the actual keys + keySet.addAll(data.keySet()); + SerializedMetadata metadata = + SerializedMetadata.from(VariantTestUtil.createMetadata(keySet, sortFieldNames)); + + ShreddedObject shredded = createShreddedObject(metadata, data); + VariantValue value = roundTripLargeBuffer(shredded, metadata); + + Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + SerializedObject object = (SerializedObject) value; + Assertions.assertThat(object.numElements()).isEqualTo(3); + + Assertions.assertThat(object.get("aa").type()).isEqualTo(Variants.PhysicalType.INT32); + Assertions.assertThat(object.get("aa").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(object.get("AA").type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(object.get("AA").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(object.get("ZZ").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + Assertions.assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + } + + @ParameterizedTest + @ValueSource(booleans = {true, false}) + public void testThreeByteFieldIds(boolean sortFieldNames) { + Set<String> keySet = Sets.newHashSet(); + for (int i = 0; i < 100_000; i += 1) { + keySet.add(RandomUtil.generateString(10, random)); + } + + Map<String, VariantValue> data = + ImmutableMap.of("aa", FIELDS.get("a"), "AA", FIELDS.get("b"), "ZZ", FIELDS.get("c")); + + // create metadata from the large key set and the actual keys + keySet.addAll(data.keySet()); + SerializedMetadata metadata = + SerializedMetadata.from(VariantTestUtil.createMetadata(keySet, sortFieldNames)); + + ShreddedObject shredded = createShreddedObject(metadata, data); + VariantValue value = roundTripLargeBuffer(shredded, metadata); + + Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + SerializedObject object = (SerializedObject) value; + Assertions.assertThat(object.numElements()).isEqualTo(3); + + Assertions.assertThat(object.get("aa").type()).isEqualTo(Variants.PhysicalType.INT32); + Assertions.assertThat(object.get("aa").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(object.get("AA").type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(object.get("AA").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(object.get("ZZ").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + Assertions.assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + } + + static VariantValue roundTripMinimalBuffer(ShreddedObject object, SerializedMetadata metadata) { + ByteBuffer serialized = + ByteBuffer.allocate(object.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + object.writeTo(serialized, 0); + + return Variants.from(metadata, serialized); + } + + static VariantValue roundTripLargeBuffer(ShreddedObject object, SerializedMetadata metadata) { + ByteBuffer serialized = + ByteBuffer.allocate(1000 + object.sizeInBytes()).order(ByteOrder.LITTLE_ENDIAN); + object.writeTo(serialized, 300); + + ByteBuffer slice = serialized.duplicate().order(ByteOrder.LITTLE_ENDIAN); + slice.position(300); + slice.limit(300 + object.sizeInBytes()); + + return Variants.from(metadata, slice); + } + + private static ShreddedObject createShreddedObject( + SerializedMetadata metadata, Map<String, VariantValue> fields) { ShreddedObject object = new ShreddedObject(metadata); for (Map.Entry<String, VariantValue> field : fields.entrySet()) { object.put(field.getKey(), field.getValue()); } - return Pair.of(metadata, object); + return object; + } + + private static Pair<SerializedMetadata, ShreddedObject> createShreddedObject( + Map<String, VariantValue> fields) { + ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); + SerializedMetadata metadata = SerializedMetadata.from(metadataBuffer); + return Pair.of(metadata, createShreddedObject(metadata, fields)); + } + + private static Pair<SerializedMetadata, ShreddedObject> createUnshreddedObject( + Map<String, VariantValue> fields) { + SerializedObject serialized = createSerializedObject(fields); + return Pair.of(serialized.metadata(), new ShreddedObject(serialized)); } - private SerializedObject createUnshreddedObject(Map<String, VariantValue> fields) { + private static SerializedObject createSerializedObject(Map<String, VariantValue> fields) { ByteBuffer metadataBuffer = VariantTestUtil.createMetadata(fields.keySet(), false); return (SerializedObject) Variants.from(metadataBuffer, VariantTestUtil.createObject(metadataBuffer, fields)); From 7ba0190113ffd76b98aa7ba7e389687d0a8eeff2 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Wed, 11 Dec 2024 16:08:33 -0800 Subject: [PATCH 08/12] Spotless. --- .../org/apache/iceberg/variants/TestSerializedObject.java | 4 +++- .../java/org/apache/iceberg/variants/TestShreddedObject.java | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java index 806ff604d738..f769530668f0 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java @@ -205,7 +205,9 @@ public void testThreeByteOffsets() { public void testLargeObject(boolean sortFieldNames) { Map<String, VariantPrimitive<String>> fields = Maps.newHashMap(); for (int i = 0; i < 10_000; i += 1) { - fields.put(RandomUtil.generateString(10, random), Variants.of(RandomUtil.generateString(10, random))); + fields.put( + RandomUtil.generateString(10, random), + Variants.of(RandomUtil.generateString(10, random))); } ByteBuffer meta = VariantTestUtil.createMetadata(fields.keySet(), sortFieldNames); diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java index c299d053cf78..8b84a2285034 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -282,7 +282,9 @@ public void testThreeByteOffsets() { public void testLargeObject(boolean sortFieldNames) { Map<String, VariantPrimitive<String>> fields = Maps.newHashMap(); for (int i = 0; i < 10_000; i += 1) { - fields.put(RandomUtil.generateString(10, random), Variants.of(RandomUtil.generateString(10, random))); + fields.put( + RandomUtil.generateString(10, random), + Variants.of(RandomUtil.generateString(10, random))); } SerializedMetadata metadata = From c5245e3af4dced8db246bf0660f0322841d90bf8 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Wed, 11 Dec 2024 16:28:31 -0800 Subject: [PATCH 09/12] Add another test. --- .../iceberg/variants/TestShreddedObject.java | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java index 8b84a2285034..e640cfb9e32f 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -251,7 +251,7 @@ public void testTwoByteOffsets() { @Test public void testThreeByteOffsets() { - // a string larger than 65535 bytes to push the value offset size above 1 byte + // a string larger than 65535 bytes to push the value offset size above 2 bytes String randomString = RandomUtil.generateString(70_000, random); SerializedPrimitive reallyBigString = VariantTestUtil.createString(randomString); @@ -276,6 +276,33 @@ public void testThreeByteOffsets() { Assertions.assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); } + @Test + public void testFourByteOffsets() { + // a string larger than 16777215 bytes to push the value offset size above 3 bytes + String randomString = RandomUtil.generateString(16_777_300, random); + SerializedPrimitive reallyBigString = VariantTestUtil.createString(randomString); + + Map<String, VariantValue> data = Maps.newHashMap(); + data.putAll(FIELDS); + data.put("really-big", reallyBigString); + + Pair<SerializedMetadata, ShreddedObject> pair = createShreddedObject(data); + VariantValue value = roundTripLargeBuffer(pair.second(), pair.first()); + + Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + SerializedObject object = (SerializedObject) value; + Assertions.assertThat(object.numElements()).isEqualTo(4); + + Assertions.assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); + Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); + Assertions.assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); + Assertions.assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + Assertions.assertThat(object.get("really-big").type()).isEqualTo(Variants.PhysicalType.STRING); + Assertions.assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); + } + @ParameterizedTest @ValueSource(booleans = {true, false}) @SuppressWarnings({"unchecked", "rawtypes"}) From e962312be898af2a7d11eee580bd636300a2f35e Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Wed, 11 Dec 2024 16:36:35 -0800 Subject: [PATCH 10/12] Fix checkstyle in tests. --- .../variants/TestPrimitiveWrapper.java | 9 +- .../iceberg/variants/TestSerializedArray.java | 226 ++++++++-------- .../variants/TestSerializedMetadata.java | 124 +++++---- .../variants/TestSerializedObject.java | 193 +++++++------- .../variants/TestSerializedPrimitives.java | 127 ++++----- .../iceberg/variants/TestShreddedObject.java | 242 +++++++++--------- .../iceberg/variants/TestVariantUtil.java | 9 +- 7 files changed, 467 insertions(+), 463 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java index 331290d2535e..fd113f9cece3 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestPrimitiveWrapper.java @@ -18,10 +18,11 @@ */ package org.apache.iceberg.variants; +import static org.assertj.core.api.Assertions.assertThat; + import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import org.assertj.core.api.Assertions; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.FieldSource; @@ -74,8 +75,8 @@ public void testPrimitiveValueSerialization(VariantPrimitive<?> primitive) { // read and validate the serialized bytes VariantValue actual = Variants.from(SerializedMetadata.EMPTY_V1_BUFFER, readBuffer); - Assertions.assertThat(actual.type()).isEqualTo(primitive.type()); - Assertions.assertThat(actual).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.asPrimitive().get()).isEqualTo(primitive.get()); + assertThat(actual.type()).isEqualTo(primitive.type()); + assertThat(actual).isInstanceOf(VariantPrimitive.class); + assertThat(actual.asPrimitive().get()).isEqualTo(primitive.get()); } } diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java index c8b69dbfe544..1e052572b85d 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedArray.java @@ -18,35 +18,37 @@ */ package org.apache.iceberg.variants; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.nio.ByteBuffer; import java.util.Random; import org.apache.iceberg.util.RandomUtil; import org.apache.iceberg.variants.Variants.PhysicalType; -import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; public class TestSerializedArray { private static final SerializedMetadata EMPTY_METADATA = SerializedMetadata.from(SerializedMetadata.EMPTY_V1_BUFFER); - private static final SerializedPrimitive vNull = SerializedPrimitive.from(new byte[] {0x00}); - private static final SerializedPrimitive vTrue = SerializedPrimitive.from(new byte[] {0b100}); - private static final SerializedPrimitive vFalse = SerializedPrimitive.from(new byte[] {0b1000}); - private static final SerializedShortString str = + private static final SerializedPrimitive NULL = SerializedPrimitive.from(new byte[] {0x00}); + private static final SerializedPrimitive TRUE = SerializedPrimitive.from(new byte[] {0b100}); + private static final SerializedPrimitive FALSE = SerializedPrimitive.from(new byte[] {0b1000}); + private static final SerializedShortString STR = SerializedShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); - private static final SerializedShortString a = + private static final SerializedShortString A = SerializedShortString.from(new byte[] {0b101, 'a'}); - private static final SerializedShortString b = + private static final SerializedShortString B = SerializedShortString.from(new byte[] {0b101, 'b'}); - private static final SerializedShortString c = + private static final SerializedShortString C = SerializedShortString.from(new byte[] {0b101, 'c'}); - private static final SerializedShortString d = + private static final SerializedShortString D = SerializedShortString.from(new byte[] {0b101, 'd'}); - private static final SerializedShortString e = + private static final SerializedShortString E = SerializedShortString.from(new byte[] {0b101, 'e'}); - private static final SerializedPrimitive i34 = SerializedPrimitive.from(new byte[] {0b1100, 34}); - private static final SerializedPrimitive i1234 = + private static final SerializedPrimitive I34 = SerializedPrimitive.from(new byte[] {0b1100, 34}); + private static final SerializedPrimitive I1234 = SerializedPrimitive.from(new byte[] {0b10000, (byte) 0xD2, 0x04}); - private static final SerializedPrimitive date = + private static final SerializedPrimitive DATE = SerializedPrimitive.from(new byte[] {0b101100, (byte) 0xF4, 0x43, 0x00, 0x00}); private final Random random = new Random(374513); @@ -55,8 +57,8 @@ public class TestSerializedArray { public void testEmptyArray() { SerializedArray array = SerializedArray.from(EMPTY_METADATA, new byte[] {0b0011, 0x00}); - Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); - Assertions.assertThat(array.numElements()).isEqualTo(0); + assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.numElements()).isEqualTo(0); } @Test @@ -64,103 +66,103 @@ public void testEmptyLargeArray() { SerializedArray array = SerializedArray.from(EMPTY_METADATA, new byte[] {0b10011, 0x00, 0x00, 0x00, 0x00}); - Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); - Assertions.assertThat(array.numElements()).isEqualTo(0); + assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.numElements()).isEqualTo(0); } @Test public void testStringArray() { - ByteBuffer buffer = VariantTestUtil.createArray(a, b, c, d, e); + ByteBuffer buffer = VariantTestUtil.createArray(A, B, C, D, E); SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); - Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); - Assertions.assertThat(array.numElements()).isEqualTo(5); - Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo("a"); - Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo("b"); - Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("c"); - Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo("d"); - Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(4).asPrimitive().get()).isEqualTo("e"); - - Assertions.assertThatThrownBy(() -> array.get(5)) + assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.numElements()).isEqualTo(5); + assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(0).asPrimitive().get()).isEqualTo("a"); + assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(1).asPrimitive().get()).isEqualTo("b"); + assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(2).asPrimitive().get()).isEqualTo("c"); + assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(3).asPrimitive().get()).isEqualTo("d"); + assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(4).asPrimitive().get()).isEqualTo("e"); + + assertThatThrownBy(() -> array.get(5)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) .hasMessage("Index 5 out of bounds for length 5"); } @Test public void testStringDifferentLengths() { - ByteBuffer buffer = VariantTestUtil.createArray(a, b, c, str, d, e); + ByteBuffer buffer = VariantTestUtil.createArray(A, B, C, STR, D, E); SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); - Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); - Assertions.assertThat(array.numElements()).isEqualTo(6); - Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo("a"); - Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo("b"); - Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("c"); - Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(4).asPrimitive().get()).isEqualTo("d"); - Assertions.assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(5).asPrimitive().get()).isEqualTo("e"); - - Assertions.assertThatThrownBy(() -> array.get(6)) + assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.numElements()).isEqualTo(6); + assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(0).asPrimitive().get()).isEqualTo("a"); + assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(1).asPrimitive().get()).isEqualTo("b"); + assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(2).asPrimitive().get()).isEqualTo("c"); + assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(3).asPrimitive().get()).isEqualTo("iceberg"); + assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(4).asPrimitive().get()).isEqualTo("d"); + assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(5).asPrimitive().get()).isEqualTo("e"); + + assertThatThrownBy(() -> array.get(6)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) .hasMessage("Index 6 out of bounds for length 6"); } @Test public void testArrayOfMixedTypes() { - ByteBuffer nestedBuffer = VariantTestUtil.createArray(a, c, d); + ByteBuffer nestedBuffer = VariantTestUtil.createArray(A, C, D); SerializedArray nested = SerializedArray.from(EMPTY_METADATA, nestedBuffer, nestedBuffer.get(0)); ByteBuffer buffer = - VariantTestUtil.createArray(date, i34, str, vNull, e, b, vFalse, nested, vTrue, i1234); + VariantTestUtil.createArray(DATE, I34, STR, NULL, E, B, FALSE, nested, TRUE, I1234); SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); - Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); - Assertions.assertThat(array.numElements()).isEqualTo(10); - Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.DATE); - Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo(17396); - Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo((byte) 34); - Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.NULL); - Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo(null); - Assertions.assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(4).asPrimitive().get()).isEqualTo("e"); - Assertions.assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(5).asPrimitive().get()).isEqualTo("b"); - Assertions.assertThat(array.get(6).type()).isEqualTo(PhysicalType.BOOLEAN_FALSE); - Assertions.assertThat(array.get(6).asPrimitive().get()).isEqualTo(false); - Assertions.assertThat(array.get(8).type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); - Assertions.assertThat(array.get(8).asPrimitive().get()).isEqualTo(true); - Assertions.assertThat(array.get(9).type()).isEqualTo(PhysicalType.INT16); - Assertions.assertThat(array.get(9).asPrimitive().get()).isEqualTo((short) 1234); - - Assertions.assertThatThrownBy(() -> array.get(10)) + assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.numElements()).isEqualTo(10); + assertThat(array.get(0).type()).isEqualTo(PhysicalType.DATE); + assertThat(array.get(0).asPrimitive().get()).isEqualTo(17396); + assertThat(array.get(1).type()).isEqualTo(PhysicalType.INT8); + assertThat(array.get(1).asPrimitive().get()).isEqualTo((byte) 34); + assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(2).asPrimitive().get()).isEqualTo("iceberg"); + assertThat(array.get(3).type()).isEqualTo(PhysicalType.NULL); + assertThat(array.get(3).asPrimitive().get()).isEqualTo(null); + assertThat(array.get(4).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(4).asPrimitive().get()).isEqualTo("e"); + assertThat(array.get(5).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(5).asPrimitive().get()).isEqualTo("b"); + assertThat(array.get(6).type()).isEqualTo(PhysicalType.BOOLEAN_FALSE); + assertThat(array.get(6).asPrimitive().get()).isEqualTo(false); + assertThat(array.get(8).type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); + assertThat(array.get(8).asPrimitive().get()).isEqualTo(true); + assertThat(array.get(9).type()).isEqualTo(PhysicalType.INT16); + assertThat(array.get(9).asPrimitive().get()).isEqualTo((short) 1234); + + assertThatThrownBy(() -> array.get(10)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) .hasMessage("Index 10 out of bounds for length 10"); - Assertions.assertThat(array.get(7).type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.get(7).type()).isEqualTo(PhysicalType.ARRAY); SerializedArray actualNested = (SerializedArray) array.get(7); - Assertions.assertThat(actualNested.numElements()).isEqualTo(3); - Assertions.assertThat(actualNested.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(actualNested.get(0).asPrimitive().get()).isEqualTo("a"); - Assertions.assertThat(actualNested.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(actualNested.get(1).asPrimitive().get()).isEqualTo("c"); - Assertions.assertThat(actualNested.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(actualNested.get(2).asPrimitive().get()).isEqualTo("d"); - - Assertions.assertThatThrownBy(() -> actualNested.get(3)) + assertThat(actualNested.numElements()).isEqualTo(3); + assertThat(actualNested.get(0).type()).isEqualTo(PhysicalType.STRING); + assertThat(actualNested.get(0).asPrimitive().get()).isEqualTo("a"); + assertThat(actualNested.get(1).type()).isEqualTo(PhysicalType.STRING); + assertThat(actualNested.get(1).asPrimitive().get()).isEqualTo("c"); + assertThat(actualNested.get(2).type()).isEqualTo(PhysicalType.STRING); + assertThat(actualNested.get(2).asPrimitive().get()).isEqualTo("d"); + + assertThatThrownBy(() -> actualNested.get(3)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) .hasMessage("Index 3 out of bounds for length 3"); } @@ -171,21 +173,21 @@ public void testTwoByteOffsets() { String randomString = RandomUtil.generateString(300, random); SerializedPrimitive bigString = VariantTestUtil.createString(randomString); - ByteBuffer buffer = VariantTestUtil.createArray(bigString, a, b, c); + ByteBuffer buffer = VariantTestUtil.createArray(bigString, A, B, C); SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); - Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); - Assertions.assertThat(array.numElements()).isEqualTo(4); - Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo(randomString); - Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo("a"); - Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("b"); - Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo("c"); - - Assertions.assertThatThrownBy(() -> array.get(4)) + assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.numElements()).isEqualTo(4); + assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(0).asPrimitive().get()).isEqualTo(randomString); + assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(1).asPrimitive().get()).isEqualTo("a"); + assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(2).asPrimitive().get()).isEqualTo("b"); + assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(3).asPrimitive().get()).isEqualTo("c"); + + assertThatThrownBy(() -> array.get(4)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) .hasMessage("Index 4 out of bounds for length 4"); } @@ -196,21 +198,21 @@ public void testThreeByteOffsets() { String randomString = RandomUtil.generateString(70_000, random); SerializedPrimitive reallyBigString = VariantTestUtil.createString(randomString); - ByteBuffer buffer = VariantTestUtil.createArray(reallyBigString, a, b, c); + ByteBuffer buffer = VariantTestUtil.createArray(reallyBigString, A, B, C); SerializedArray array = SerializedArray.from(EMPTY_METADATA, buffer, buffer.get(0)); - Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); - Assertions.assertThat(array.numElements()).isEqualTo(4); - Assertions.assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(0).asPrimitive().get()).isEqualTo(randomString); - Assertions.assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(1).asPrimitive().get()).isEqualTo("a"); - Assertions.assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(2).asPrimitive().get()).isEqualTo("b"); - Assertions.assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(array.get(3).asPrimitive().get()).isEqualTo("c"); - - Assertions.assertThatThrownBy(() -> array.get(4)) + assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.numElements()).isEqualTo(4); + assertThat(array.get(0).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(0).asPrimitive().get()).isEqualTo(randomString); + assertThat(array.get(1).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(1).asPrimitive().get()).isEqualTo("a"); + assertThat(array.get(2).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(2).asPrimitive().get()).isEqualTo("b"); + assertThat(array.get(3).type()).isEqualTo(PhysicalType.STRING); + assertThat(array.get(3).asPrimitive().get()).isEqualTo("c"); + + assertThatThrownBy(() -> array.get(4)) .isInstanceOf(ArrayIndexOutOfBoundsException.class) .hasMessage("Index 4 out of bounds for length 4"); } @@ -221,13 +223,13 @@ public void testLargeArraySize() { SerializedArray.from( EMPTY_METADATA, new byte[] {0b10011, (byte) 0xFF, (byte) 0x01, 0x00, 0x00}); - Assertions.assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); - Assertions.assertThat(array.numElements()).isEqualTo(511); + assertThat(array.type()).isEqualTo(PhysicalType.ARRAY); + assertThat(array.numElements()).isEqualTo(511); } @Test public void testNegativeArraySize() { - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> SerializedArray.from( EMPTY_METADATA, diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java index 5555a8bac9c4..27a4dda3dc3e 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedMetadata.java @@ -18,12 +18,14 @@ */ package org.apache.iceberg.variants; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.nio.ByteBuffer; import java.util.Random; import java.util.Set; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.util.RandomUtil; -import org.assertj.core.api.Assertions; -import org.assertj.core.util.Sets; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -35,42 +37,39 @@ public class TestSerializedMetadata { public void testEmptyVariantMetadata() { SerializedMetadata metadata = SerializedMetadata.from(SerializedMetadata.EMPTY_V1_BUFFER); - Assertions.assertThat(metadata.isSorted()).isFalse(); - Assertions.assertThat(metadata.dictionarySize()).isEqualTo(0); - Assertions.assertThatThrownBy(() -> metadata.get(0)) - .isInstanceOf(ArrayIndexOutOfBoundsException.class); + assertThat(metadata.isSorted()).isFalse(); + assertThat(metadata.dictionarySize()).isEqualTo(0); + assertThatThrownBy(() -> metadata.get(0)).isInstanceOf(ArrayIndexOutOfBoundsException.class); } @Test public void testHeaderSorted() { SerializedMetadata metadata = SerializedMetadata.from(new byte[] {0b10001, 0x00}); - Assertions.assertThat(metadata.isSorted()).isTrue(); - Assertions.assertThat(metadata.dictionarySize()).isEqualTo(0); + assertThat(metadata.isSorted()).isTrue(); + assertThat(metadata.dictionarySize()).isEqualTo(0); } @Test public void testHeaderOffsetSize() { // offset size is 4-byte LE = 1 - Assertions.assertThat( + assertThat( SerializedMetadata.from(new byte[] {(byte) 0b11010001, 0x01, 0x00, 0x00, 0x00}) .dictionarySize()) .isEqualTo(1); // offset size is 3-byte LE = 1 - Assertions.assertThat( + assertThat( SerializedMetadata.from(new byte[] {(byte) 0b10010001, 0x01, 0x00, 0x00}) .dictionarySize()) .isEqualTo(1); // offset size is 2-byte LE = 1 - Assertions.assertThat( - SerializedMetadata.from(new byte[] {(byte) 0b01010001, 0x01, 0x00}).dictionarySize()) + assertThat(SerializedMetadata.from(new byte[] {(byte) 0b01010001, 0x01, 0x00}).dictionarySize()) .isEqualTo(1); // offset size is 1-byte LE = 1 - Assertions.assertThat( - SerializedMetadata.from(new byte[] {(byte) 0b00010001, 0x01}).dictionarySize()) + assertThat(SerializedMetadata.from(new byte[] {(byte) 0b00010001, 0x01}).dictionarySize()) .isEqualTo(1); } @@ -82,13 +81,12 @@ public void testReadString() { 0b10001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'a', 'b', 'c', 'd', 'e' }); - Assertions.assertThat(metadata.get(0)).isEqualTo("a"); - Assertions.assertThat(metadata.get(1)).isEqualTo("b"); - Assertions.assertThat(metadata.get(2)).isEqualTo("c"); - Assertions.assertThat(metadata.get(3)).isEqualTo("d"); - Assertions.assertThat(metadata.get(4)).isEqualTo("e"); - Assertions.assertThatThrownBy(() -> metadata.get(5)) - .isInstanceOf(ArrayIndexOutOfBoundsException.class); + assertThat(metadata.get(0)).isEqualTo("a"); + assertThat(metadata.get(1)).isEqualTo("b"); + assertThat(metadata.get(2)).isEqualTo("c"); + assertThat(metadata.get(3)).isEqualTo("d"); + assertThat(metadata.get(4)).isEqualTo("e"); + assertThatThrownBy(() -> metadata.get(5)).isInstanceOf(ArrayIndexOutOfBoundsException.class); } @Test @@ -99,13 +97,12 @@ public void testMultibyteString() { 0b10001, 0x05, 0x00, 0x01, 0x02, 0x05, 0x06, 0x07, 'a', 'b', 'x', 'y', 'z', 'd', 'e' }); - Assertions.assertThat(metadata.get(0)).isEqualTo("a"); - Assertions.assertThat(metadata.get(1)).isEqualTo("b"); - Assertions.assertThat(metadata.get(2)).isEqualTo("xyz"); - Assertions.assertThat(metadata.get(3)).isEqualTo("d"); - Assertions.assertThat(metadata.get(4)).isEqualTo("e"); - Assertions.assertThatThrownBy(() -> metadata.get(5)) - .isInstanceOf(ArrayIndexOutOfBoundsException.class); + assertThat(metadata.get(0)).isEqualTo("a"); + assertThat(metadata.get(1)).isEqualTo("b"); + assertThat(metadata.get(2)).isEqualTo("xyz"); + assertThat(metadata.get(3)).isEqualTo("d"); + assertThat(metadata.get(4)).isEqualTo("e"); + assertThatThrownBy(() -> metadata.get(5)).isInstanceOf(ArrayIndexOutOfBoundsException.class); } @Test @@ -117,13 +114,12 @@ public void testTwoByteOffsets() { 0x07, 0x00, 'a', 'b', 'x', 'y', 'z', 'd', 'e' }); - Assertions.assertThat(metadata.get(0)).isEqualTo("a"); - Assertions.assertThat(metadata.get(1)).isEqualTo("b"); - Assertions.assertThat(metadata.get(2)).isEqualTo("xyz"); - Assertions.assertThat(metadata.get(3)).isEqualTo("d"); - Assertions.assertThat(metadata.get(4)).isEqualTo("e"); - Assertions.assertThatThrownBy(() -> metadata.get(5)) - .isInstanceOf(ArrayIndexOutOfBoundsException.class); + assertThat(metadata.get(0)).isEqualTo("a"); + assertThat(metadata.get(1)).isEqualTo("b"); + assertThat(metadata.get(2)).isEqualTo("xyz"); + assertThat(metadata.get(3)).isEqualTo("d"); + assertThat(metadata.get(4)).isEqualTo("e"); + assertThatThrownBy(() -> metadata.get(5)).isInstanceOf(ArrayIndexOutOfBoundsException.class); } @Test @@ -133,17 +129,17 @@ public void testFindStringSorted() { new byte[] { 0b10001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'a', 'b', 'c', 'd', 'e' }); - Assertions.assertThat(metadata.id("A")).isEqualTo(-1); - Assertions.assertThat(metadata.id("a")).isEqualTo(0); - Assertions.assertThat(metadata.id("aa")).isEqualTo(-1); - Assertions.assertThat(metadata.id("b")).isEqualTo(1); - Assertions.assertThat(metadata.id("bb")).isEqualTo(-1); - Assertions.assertThat(metadata.id("c")).isEqualTo(2); - Assertions.assertThat(metadata.id("cc")).isEqualTo(-1); - Assertions.assertThat(metadata.id("d")).isEqualTo(3); - Assertions.assertThat(metadata.id("dd")).isEqualTo(-1); - Assertions.assertThat(metadata.id("e")).isEqualTo(4); - Assertions.assertThat(metadata.id("ee")).isEqualTo(-1); + assertThat(metadata.id("A")).isEqualTo(-1); + assertThat(metadata.id("a")).isEqualTo(0); + assertThat(metadata.id("aa")).isEqualTo(-1); + assertThat(metadata.id("b")).isEqualTo(1); + assertThat(metadata.id("bb")).isEqualTo(-1); + assertThat(metadata.id("c")).isEqualTo(2); + assertThat(metadata.id("cc")).isEqualTo(-1); + assertThat(metadata.id("d")).isEqualTo(3); + assertThat(metadata.id("dd")).isEqualTo(-1); + assertThat(metadata.id("e")).isEqualTo(4); + assertThat(metadata.id("ee")).isEqualTo(-1); } @Test @@ -153,17 +149,17 @@ public void testFindStringUnsorted() { new byte[] { 0b00001, 0x05, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 'e', 'd', 'c', 'b', 'a' }); - Assertions.assertThat(metadata.id("A")).isEqualTo(-1); - Assertions.assertThat(metadata.id("a")).isEqualTo(4); - Assertions.assertThat(metadata.id("aa")).isEqualTo(-1); - Assertions.assertThat(metadata.id("b")).isEqualTo(3); - Assertions.assertThat(metadata.id("bb")).isEqualTo(-1); - Assertions.assertThat(metadata.id("c")).isEqualTo(2); - Assertions.assertThat(metadata.id("cc")).isEqualTo(-1); - Assertions.assertThat(metadata.id("d")).isEqualTo(1); - Assertions.assertThat(metadata.id("dd")).isEqualTo(-1); - Assertions.assertThat(metadata.id("e")).isEqualTo(0); - Assertions.assertThat(metadata.id("ee")).isEqualTo(-1); + assertThat(metadata.id("A")).isEqualTo(-1); + assertThat(metadata.id("a")).isEqualTo(4); + assertThat(metadata.id("aa")).isEqualTo(-1); + assertThat(metadata.id("b")).isEqualTo(3); + assertThat(metadata.id("bb")).isEqualTo(-1); + assertThat(metadata.id("c")).isEqualTo(2); + assertThat(metadata.id("cc")).isEqualTo(-1); + assertThat(metadata.id("d")).isEqualTo(1); + assertThat(metadata.id("dd")).isEqualTo(-1); + assertThat(metadata.id("e")).isEqualTo(0); + assertThat(metadata.id("ee")).isEqualTo(-1); } @ParameterizedTest @@ -179,8 +175,8 @@ public void testTwoByteFieldIds(boolean sortFieldNames) { ByteBuffer buffer = VariantTestUtil.createMetadata(keySet, sortFieldNames); SerializedMetadata metadata = SerializedMetadata.from(buffer); - Assertions.assertThat(metadata.dictionarySize()).isEqualTo(10_000); - Assertions.assertThat(metadata.id(lastKey)).isGreaterThan(0); + assertThat(metadata.dictionarySize()).isEqualTo(10_000); + assertThat(metadata.id(lastKey)).isGreaterThan(0); } @ParameterizedTest @@ -196,27 +192,27 @@ public void testThreeByteFieldIds(boolean sortFieldNames) { ByteBuffer buffer = VariantTestUtil.createMetadata(keySet, sortFieldNames); SerializedMetadata metadata = SerializedMetadata.from(buffer); - Assertions.assertThat(metadata.dictionarySize()).isEqualTo(100_000); - Assertions.assertThat(metadata.id(lastKey)).isGreaterThan(0); + assertThat(metadata.dictionarySize()).isEqualTo(100_000); + assertThat(metadata.id(lastKey)).isGreaterThan(0); } @Test public void testInvalidMetadataVersion() { - Assertions.assertThatThrownBy(() -> SerializedMetadata.from(new byte[] {0x02, 0x00})) + assertThatThrownBy(() -> SerializedMetadata.from(new byte[] {0x02, 0x00})) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Unsupported version: 2"); } @Test public void testMissingLength() { - Assertions.assertThatThrownBy(() -> SerializedMetadata.from(new byte[] {0x01})) + assertThatThrownBy(() -> SerializedMetadata.from(new byte[] {0x01})) .isInstanceOf(IndexOutOfBoundsException.class); } @Test public void testLengthTooShort() { // missing the 4th length byte - Assertions.assertThatThrownBy( + assertThatThrownBy( () -> SerializedMetadata.from(new byte[] {(byte) 0b11010001, 0x00, 0x00, 0x00})) .isInstanceOf(IndexOutOfBoundsException.class); } diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java index f769530668f0..3c5fb808d835 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedObject.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.variants; +import static org.assertj.core.api.Assertions.assertThat; + import java.nio.ByteBuffer; import java.util.Map; import java.util.Random; @@ -25,10 +27,9 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.util.RandomUtil; import org.apache.iceberg.variants.Variants.PhysicalType; -import org.assertj.core.api.Assertions; -import org.assertj.core.util.Sets; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -36,12 +37,12 @@ public class TestSerializedObject { private static final SerializedMetadata EMPTY_METADATA = SerializedMetadata.from(SerializedMetadata.EMPTY_V1_BUFFER); - private static final SerializedPrimitive i1 = SerializedPrimitive.from(new byte[] {0b1100, 1}); - private static final SerializedPrimitive i2 = SerializedPrimitive.from(new byte[] {0b1100, 2}); - private static final SerializedPrimitive i3 = SerializedPrimitive.from(new byte[] {0b1100, 3}); - private static final SerializedPrimitive vNull = SerializedPrimitive.from(new byte[] {0x00}); - private static final SerializedPrimitive vTrue = SerializedPrimitive.from(new byte[] {0b100}); - private static final SerializedPrimitive date = + private static final SerializedPrimitive I1 = SerializedPrimitive.from(new byte[] {0b1100, 1}); + private static final SerializedPrimitive I2 = SerializedPrimitive.from(new byte[] {0b1100, 2}); + private static final SerializedPrimitive I3 = SerializedPrimitive.from(new byte[] {0b1100, 3}); + private static final SerializedPrimitive NULL = SerializedPrimitive.from(new byte[] {0x00}); + private static final SerializedPrimitive TRUE = SerializedPrimitive.from(new byte[] {0b100}); + private static final SerializedPrimitive DATE = SerializedPrimitive.from(new byte[] {0b101100, (byte) 0xF4, 0x43, 0x00, 0x00}); private final Random random = new Random(198725); @@ -50,8 +51,8 @@ public class TestSerializedObject { public void testEmptyObject() { SerializedObject object = SerializedObject.from(EMPTY_METADATA, new byte[] {0b10, 0x00}); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(0); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(0); } @Test @@ -59,52 +60,52 @@ public void testEmptyLargeObject() { SerializedObject object = SerializedObject.from(EMPTY_METADATA, new byte[] {0b1000010, 0x00, 0x00, 0x00, 0x00}); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(0); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(0); } @Test public void testSimpleObject() { - Map<String, VariantValue> data = ImmutableMap.of("a", i1, "b", i2, "c", i3); + Map<String, VariantValue> data = ImmutableMap.of("a", I1, "b", I2, "c", I3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); SerializedMetadata metadata = SerializedMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(3); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(3); - Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); - Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); - Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); + assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); + assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); + assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); - Assertions.assertThat(object.get("d")).isEqualTo(null); + assertThat(object.get("d")).isEqualTo(null); } @Test public void testOutOfOrderKeys() { - Map<String, VariantValue> data = ImmutableMap.of("b", i2, "a", i1, "c", i3); + Map<String, VariantValue> data = ImmutableMap.of("b", I2, "a", I1, "c", I3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), false /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); SerializedMetadata metadata = SerializedMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(3); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(3); - Assertions.assertThat(object.get("d")).isEqualTo(null); + assertThat(object.get("d")).isEqualTo(null); - Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); - Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); - Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); + assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); + assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); + assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); } @Test @@ -114,34 +115,34 @@ public void testMixedValueTypes() { ImmutableList.of("a", "b", "c", "d", "e", "f"), true /* sort names */); SerializedMetadata metadata = SerializedMetadata.from(meta); - Map<String, VariantValue> inner = ImmutableMap.of("b", i2, "f", i3); + Map<String, VariantValue> inner = ImmutableMap.of("b", I2, "f", I3); ByteBuffer innerBuffer = VariantTestUtil.createObject(meta, inner); SerializedObject innerObject = SerializedObject.from(metadata, innerBuffer, innerBuffer.get(0)); Map<String, VariantValue> data = - ImmutableMap.of("a", i1, "b", date, "c", vNull, "d", vTrue, "e", innerObject); + ImmutableMap.of("a", I1, "b", DATE, "c", NULL, "d", TRUE, "e", innerObject); ByteBuffer value = VariantTestUtil.createObject(meta, data); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(5); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(5); - Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); - Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.DATE); - Assertions.assertThat(((SerializedPrimitive) object.get("b")).get()).isEqualTo(17396); - Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.NULL); - Assertions.assertThat(((SerializedPrimitive) object.get("c")).get()).isEqualTo(null); - Assertions.assertThat(object.get("d").type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); - Assertions.assertThat(((SerializedPrimitive) object.get("d")).get()).isEqualTo(true); + assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); + assertThat(object.get("b").type()).isEqualTo(PhysicalType.DATE); + assertThat(((SerializedPrimitive) object.get("b")).get()).isEqualTo(17396); + assertThat(object.get("c").type()).isEqualTo(PhysicalType.NULL); + assertThat(((SerializedPrimitive) object.get("c")).get()).isEqualTo(null); + assertThat(object.get("d").type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); + assertThat(((SerializedPrimitive) object.get("d")).get()).isEqualTo(true); - Assertions.assertThat(object.get("e").type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.get("e").type()).isEqualTo(PhysicalType.OBJECT); SerializedObject actualInner = (SerializedObject) object.get("e").asObject(); - Assertions.assertThat(actualInner.numElements()).isEqualTo(2); - Assertions.assertThat(actualInner.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(actualInner.get("b").asPrimitive().get()).isEqualTo((byte) 2); - Assertions.assertThat(actualInner.get("f").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(actualInner.get("f").asPrimitive().get()).isEqualTo((byte) 3); + assertThat(actualInner.numElements()).isEqualTo(2); + assertThat(actualInner.get("b").type()).isEqualTo(PhysicalType.INT8); + assertThat(actualInner.get("b").asPrimitive().get()).isEqualTo((byte) 2); + assertThat(actualInner.get("f").type()).isEqualTo(PhysicalType.INT8); + assertThat(actualInner.get("f").asPrimitive().get()).isEqualTo((byte) 3); } @Test @@ -151,24 +152,24 @@ public void testTwoByteOffsets() { SerializedPrimitive bigString = VariantTestUtil.createString(randomString); // note that order doesn't matter. fields are sorted by name - Map<String, VariantValue> data = ImmutableMap.of("big", bigString, "a", i1, "b", i2, "c", i3); + Map<String, VariantValue> data = ImmutableMap.of("big", bigString, "a", I1, "b", I2, "c", I3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); SerializedMetadata metadata = SerializedMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(4); - - Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); - Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); - Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); - Assertions.assertThat(object.get("big").type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(4); + + assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); + assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); + assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); + assertThat(object.get("big").type()).isEqualTo(PhysicalType.STRING); + assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString); } @Test @@ -179,24 +180,24 @@ public void testThreeByteOffsets() { // note that order doesn't matter. fields are sorted by name Map<String, VariantValue> data = - ImmutableMap.of("really-big", reallyBigString, "a", i1, "b", i2, "c", i3); + ImmutableMap.of("really-big", reallyBigString, "a", I1, "b", I2, "c", I3); ByteBuffer meta = VariantTestUtil.createMetadata(data.keySet(), true /* sort names */); ByteBuffer value = VariantTestUtil.createObject(meta, data); SerializedMetadata metadata = SerializedMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(4); - - Assertions.assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); - Assertions.assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); - Assertions.assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); - Assertions.assertThat(object.get("really-big").type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(4); + + assertThat(object.get("a").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("a").asPrimitive().get()).isEqualTo((byte) 1); + assertThat(object.get("b").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("b").asPrimitive().get()).isEqualTo((byte) 2); + assertThat(object.get("c").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("c").asPrimitive().get()).isEqualTo((byte) 3); + assertThat(object.get("really-big").type()).isEqualTo(PhysicalType.STRING); + assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); } @ParameterizedTest @@ -216,13 +217,13 @@ public void testLargeObject(boolean sortFieldNames) { SerializedMetadata metadata = SerializedMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); - Assertions.assertThat(object.type()).isEqualTo(Variants.PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(10_000); + assertThat(object.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(10_000); for (Map.Entry<String, VariantPrimitive<String>> entry : fields.entrySet()) { VariantValue fieldValue = object.get(entry.getKey()); - Assertions.assertThat(fieldValue.type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(fieldValue.asPrimitive().get()).isEqualTo(entry.getValue().get()); + assertThat(fieldValue.type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(fieldValue.asPrimitive().get()).isEqualTo(entry.getValue().get()); } } @@ -234,7 +235,7 @@ public void testTwoByteFieldIds(boolean sortFieldNames) { keySet.add(RandomUtil.generateString(10, random)); } - Map<String, VariantValue> data = ImmutableMap.of("aa", i1, "AA", i2, "ZZ", i3); + Map<String, VariantValue> data = ImmutableMap.of("aa", I1, "AA", I2, "ZZ", I3); // create metadata from the large key set and the actual keys keySet.addAll(data.keySet()); @@ -244,15 +245,15 @@ public void testTwoByteFieldIds(boolean sortFieldNames) { SerializedMetadata metadata = SerializedMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(3); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(3); - Assertions.assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("aa").asPrimitive().get()).isEqualTo((byte) 1); - Assertions.assertThat(object.get("AA").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("AA").asPrimitive().get()).isEqualTo((byte) 2); - Assertions.assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo((byte) 3); + assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("aa").asPrimitive().get()).isEqualTo((byte) 1); + assertThat(object.get("AA").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("AA").asPrimitive().get()).isEqualTo((byte) 2); + assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo((byte) 3); } @ParameterizedTest @@ -263,7 +264,7 @@ public void testThreeByteFieldIds(boolean sortFieldNames) { keySet.add(RandomUtil.generateString(10, random)); } - Map<String, VariantValue> data = ImmutableMap.of("aa", i1, "AA", i2, "ZZ", i3); + Map<String, VariantValue> data = ImmutableMap.of("aa", I1, "AA", I2, "ZZ", I3); // create metadata from the large key set and the actual keys keySet.addAll(data.keySet()); @@ -273,14 +274,14 @@ public void testThreeByteFieldIds(boolean sortFieldNames) { SerializedMetadata metadata = SerializedMetadata.from(meta); SerializedObject object = SerializedObject.from(metadata, value, value.get(0)); - Assertions.assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); - Assertions.assertThat(object.numElements()).isEqualTo(3); + assertThat(object.type()).isEqualTo(PhysicalType.OBJECT); + assertThat(object.numElements()).isEqualTo(3); - Assertions.assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("aa").asPrimitive().get()).isEqualTo((byte) 1); - Assertions.assertThat(object.get("AA").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("AA").asPrimitive().get()).isEqualTo((byte) 2); - Assertions.assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo((byte) 3); + assertThat(object.get("aa").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("aa").asPrimitive().get()).isEqualTo((byte) 1); + assertThat(object.get("AA").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("AA").asPrimitive().get()).isEqualTo((byte) 2); + assertThat(object.get("ZZ").type()).isEqualTo(PhysicalType.INT8); + assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo((byte) 3); } } diff --git a/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java b/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java index 7e713508827a..b4646c0d13e9 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestSerializedPrimitives.java @@ -18,11 +18,13 @@ */ package org.apache.iceberg.variants; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.math.BigDecimal; import java.nio.ByteBuffer; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.variants.Variants.PhysicalType; -import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; public class TestSerializedPrimitives { @@ -30,32 +32,32 @@ public class TestSerializedPrimitives { public void testNull() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(0)}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.NULL); - Assertions.assertThat(value.get()).isEqualTo(null); + assertThat(value.type()).isEqualTo(PhysicalType.NULL); + assertThat(value.get()).isEqualTo(null); } @Test public void testTrue() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(1)}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); - Assertions.assertThat(value.get()).isEqualTo(true); + assertThat(value.type()).isEqualTo(PhysicalType.BOOLEAN_TRUE); + assertThat(value.get()).isEqualTo(true); } @Test public void testFalse() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(2)}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BOOLEAN_FALSE); - Assertions.assertThat(value.get()).isEqualTo(false); + assertThat(value.type()).isEqualTo(PhysicalType.BOOLEAN_FALSE); + assertThat(value.get()).isEqualTo(false); } @Test public void testInt8() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(3), 34}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(value.get()).isEqualTo((byte) 34); + assertThat(value.type()).isEqualTo(PhysicalType.INT8); + assertThat(value.get()).isEqualTo((byte) 34); } @Test @@ -63,8 +65,8 @@ public void testNegativeInt8() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(3), (byte) 0xFF}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT8); - Assertions.assertThat(value.get()).isEqualTo((byte) -1); + assertThat(value.type()).isEqualTo(PhysicalType.INT8); + assertThat(value.get()).isEqualTo((byte) -1); } @Test @@ -72,8 +74,8 @@ public void testInt16() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xD2, 0x04}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT16); - Assertions.assertThat(value.get()).isEqualTo((short) 1234); + assertThat(value.type()).isEqualTo(PhysicalType.INT16); + assertThat(value.get()).isEqualTo((short) 1234); } @Test @@ -81,8 +83,8 @@ public void testNegativeInt16() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(4), (byte) 0xFF, (byte) 0xFF}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT16); - Assertions.assertThat(value.get()).isEqualTo((short) -1); + assertThat(value.type()).isEqualTo(PhysicalType.INT16); + assertThat(value.get()).isEqualTo((short) -1); } @Test @@ -91,8 +93,8 @@ public void testInt32() { SerializedPrimitive.from( new byte[] {primitiveHeader(5), (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT32); - Assertions.assertThat(value.get()).isEqualTo(1234567890); + assertThat(value.type()).isEqualTo(PhysicalType.INT32); + assertThat(value.get()).isEqualTo(1234567890); } @Test @@ -101,8 +103,8 @@ public void testNegativeInt32() { SerializedPrimitive.from( new byte[] {primitiveHeader(5), (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT32); - Assertions.assertThat(value.get()).isEqualTo(-1); + assertThat(value.type()).isEqualTo(PhysicalType.INT32); + assertThat(value.get()).isEqualTo(-1); } @Test @@ -121,8 +123,8 @@ public void testInt64() { 0x11 }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT64); - Assertions.assertThat(value.get()).isEqualTo(1234567890987654321L); + assertThat(value.type()).isEqualTo(PhysicalType.INT64); + assertThat(value.get()).isEqualTo(1234567890987654321L); } @Test @@ -141,8 +143,8 @@ public void testNegativeInt64() { (byte) 0xFF }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.INT64); - Assertions.assertThat(value.get()).isEqualTo(-1L); + assertThat(value.type()).isEqualTo(PhysicalType.INT64); + assertThat(value.get()).isEqualTo(-1L); } @Test @@ -161,8 +163,8 @@ public void testDouble() { 0x11 }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DOUBLE); - Assertions.assertThat(value.get()).isEqualTo(Double.longBitsToDouble(1234567890987654321L)); + assertThat(value.type()).isEqualTo(PhysicalType.DOUBLE); + assertThat(value.get()).isEqualTo(Double.longBitsToDouble(1234567890987654321L)); } @Test @@ -171,8 +173,8 @@ public void testNegativeDouble() { SerializedPrimitive.from( new byte[] {primitiveHeader(7), 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, (byte) 0x80}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DOUBLE); - Assertions.assertThat(value.get()).isEqualTo(-0.0D); + assertThat(value.type()).isEqualTo(PhysicalType.DOUBLE); + assertThat(value.get()).isEqualTo(-0.0D); } @Test @@ -181,8 +183,8 @@ public void testDecimal4() { SerializedPrimitive.from( new byte[] {primitiveHeader(8), 0x04, (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL4); - Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("123456.7890")); + assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL4); + assertThat(value.get()).isEqualTo(new BigDecimal("123456.7890")); } @Test @@ -193,8 +195,8 @@ public void testNegativeDecimal4() { primitiveHeader(8), 0x04, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL4); - Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("-0.0001")); + assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL4); + assertThat(value.get()).isEqualTo(new BigDecimal("-0.0001")); } @Test @@ -214,8 +216,8 @@ public void testDecimal8() { 0x11 }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL8); - Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("1234567890.987654321")); + assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL8); + assertThat(value.get()).isEqualTo(new BigDecimal("1234567890.987654321")); } @Test @@ -235,8 +237,8 @@ public void testNegativeDecimal8() { (byte) 0xFF }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL8); - Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("-0.000000001")); + assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL8); + assertThat(value.get()).isEqualTo(new BigDecimal("-0.000000001")); } @Test @@ -264,8 +266,8 @@ public void testDecimal16() { 0x00 }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL16); - Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("9876543210.123456789")); + assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL16); + assertThat(value.get()).isEqualTo(new BigDecimal("9876543210.123456789")); } @Test @@ -293,8 +295,8 @@ public void testNegativeDecimal16() { (byte) 0xFF, }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL16); - Assertions.assertThat(value.get()).isEqualTo(new BigDecimal("-9876543210.123456789")); + assertThat(value.type()).isEqualTo(PhysicalType.DECIMAL16); + assertThat(value.get()).isEqualTo(new BigDecimal("-9876543210.123456789")); } @Test @@ -302,8 +304,8 @@ public void testDate() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(11), (byte) 0xF4, 0x43, 0x00, 0x00}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DATE); - Assertions.assertThat(DateTimeUtil.daysToIsoDate((int) value.get())).isEqualTo("2017-08-18"); + assertThat(value.type()).isEqualTo(PhysicalType.DATE); + assertThat(DateTimeUtil.daysToIsoDate((int) value.get())).isEqualTo("2017-08-18"); } @Test @@ -312,8 +314,8 @@ public void testNegativeDate() { SerializedPrimitive.from( new byte[] {primitiveHeader(11), (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.DATE); - Assertions.assertThat(DateTimeUtil.daysToIsoDate((int) value.get())).isEqualTo("1969-12-31"); + assertThat(value.type()).isEqualTo(PhysicalType.DATE); + assertThat(DateTimeUtil.daysToIsoDate((int) value.get())).isEqualTo("1969-12-31"); } @Test @@ -332,8 +334,8 @@ public void testTimestamptz() { 0x00 }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPTZ); - Assertions.assertThat(DateTimeUtil.microsToIsoTimestamptz((long) value.get())) + assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPTZ); + assertThat(DateTimeUtil.microsToIsoTimestamptz((long) value.get())) .isEqualTo("2017-08-18T14:21:01.919+00:00"); } @@ -353,8 +355,8 @@ public void testNegativeTimestamptz() { (byte) 0xFF }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPTZ); - Assertions.assertThat(DateTimeUtil.microsToIsoTimestamptz((long) value.get())) + assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPTZ); + assertThat(DateTimeUtil.microsToIsoTimestamptz((long) value.get())) .isEqualTo("1969-12-31T23:59:59.999999+00:00"); } @@ -374,8 +376,8 @@ public void testTimestampntz() { 0x00 }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPNTZ); - Assertions.assertThat(DateTimeUtil.microsToIsoTimestamp((long) value.get())) + assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPNTZ); + assertThat(DateTimeUtil.microsToIsoTimestamp((long) value.get())) .isEqualTo("2017-08-18T14:21:01.919"); } @@ -395,8 +397,8 @@ public void testNegativeTimestampntz() { (byte) 0xFF }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPNTZ); - Assertions.assertThat(DateTimeUtil.microsToIsoTimestamp((long) value.get())) + assertThat(value.type()).isEqualTo(PhysicalType.TIMESTAMPNTZ); + assertThat(DateTimeUtil.microsToIsoTimestamp((long) value.get())) .isEqualTo("1969-12-31T23:59:59.999999"); } @@ -406,8 +408,8 @@ public void testFloat() { SerializedPrimitive.from( new byte[] {primitiveHeader(14), (byte) 0xD2, 0x02, (byte) 0x96, 0x49}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.FLOAT); - Assertions.assertThat(value.get()).isEqualTo(Float.intBitsToFloat(1234567890)); + assertThat(value.type()).isEqualTo(PhysicalType.FLOAT); + assertThat(value.get()).isEqualTo(Float.intBitsToFloat(1234567890)); } @Test @@ -415,8 +417,8 @@ public void testNegativeFloat() { VariantPrimitive<?> value = SerializedPrimitive.from(new byte[] {primitiveHeader(14), 0x00, 0x00, 0x00, (byte) 0x80}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.FLOAT); - Assertions.assertThat(value.get()).isEqualTo(-0.0F); + assertThat(value.type()).isEqualTo(PhysicalType.FLOAT); + assertThat(value.get()).isEqualTo(-0.0F); } @Test @@ -425,9 +427,8 @@ public void testBinary() { SerializedPrimitive.from( new byte[] {primitiveHeader(15), 0x05, 0x00, 0x00, 0x00, 'a', 'b', 'c', 'd', 'e'}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.BINARY); - Assertions.assertThat(value.get()) - .isEqualTo(ByteBuffer.wrap(new byte[] {'a', 'b', 'c', 'd', 'e'})); + assertThat(value.type()).isEqualTo(PhysicalType.BINARY); + assertThat(value.get()).isEqualTo(ByteBuffer.wrap(new byte[] {'a', 'b', 'c', 'd', 'e'})); } @Test @@ -438,8 +439,8 @@ public void testString() { primitiveHeader(16), 0x07, 0x00, 0x00, 0x00, 'i', 'c', 'e', 'b', 'e', 'r', 'g' }); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(value.get()).isEqualTo("iceberg"); + assertThat(value.type()).isEqualTo(PhysicalType.STRING); + assertThat(value.get()).isEqualTo("iceberg"); } @Test @@ -447,13 +448,13 @@ public void testShortString() { VariantPrimitive<?> value = SerializedShortString.from(new byte[] {0b11101, 'i', 'c', 'e', 'b', 'e', 'r', 'g'}); - Assertions.assertThat(value.type()).isEqualTo(PhysicalType.STRING); - Assertions.assertThat(value.get()).isEqualTo("iceberg"); + assertThat(value.type()).isEqualTo(PhysicalType.STRING); + assertThat(value.get()).isEqualTo("iceberg"); } @Test public void testUnsupportedType() { - Assertions.assertThatThrownBy(() -> SerializedPrimitive.from(new byte[] {primitiveHeader(17)})) + assertThatThrownBy(() -> SerializedPrimitive.from(new byte[] {primitiveHeader(17)})) .isInstanceOf(UnsupportedOperationException.class) .hasMessage("Unknown primitive physical type: 17"); } diff --git a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java index e640cfb9e32f..7ce1ea3383d5 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestShreddedObject.java @@ -18,6 +18,9 @@ */ package org.apache.iceberg.variants; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -26,11 +29,10 @@ import java.util.Set; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Maps; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.Pair; import org.apache.iceberg.util.RandomUtil; -import org.assertj.core.api.Assertions; -import org.assertj.core.util.Sets; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; @@ -51,12 +53,12 @@ public class TestShreddedObject { public void testShreddedFields() { ShreddedObject object = createShreddedObject(FIELDS).second(); - Assertions.assertThat(object.get("a")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(object.get("b")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(object.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(object.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(object.get("b")).isInstanceOf(VariantPrimitive.class); + assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(object.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -67,16 +69,16 @@ public void testShreddedSerializationMinimalBuffer() { VariantValue value = roundTripMinimalBuffer(object, metadata); - Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; - Assertions.assertThat(actual.numElements()).isEqualTo(3); - Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(actual.numElements()).isEqualTo(3); + assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -87,16 +89,16 @@ public void testShreddedSerializationLargeBuffer() { VariantValue value = roundTripLargeBuffer(object, metadata); - Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; - Assertions.assertThat(actual.numElements()).isEqualTo(3); - Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(actual.numElements()).isEqualTo(3); + assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -107,16 +109,16 @@ public void testUnshreddedObjectSerializationMinimalBuffer() { VariantValue value = roundTripMinimalBuffer(object, metadata); - Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; - Assertions.assertThat(actual.numElements()).isEqualTo(3); - Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(actual.numElements()).isEqualTo(3); + assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -127,16 +129,16 @@ public void testUnshreddedObjectSerializationLargeBuffer() { VariantValue value = roundTripLargeBuffer(object, metadata); - Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; - Assertions.assertThat(actual.numElements()).isEqualTo(3); - Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(actual.numElements()).isEqualTo(3); + assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @Test @@ -146,13 +148,13 @@ public void testPartiallyShreddedObjectReplacement() { // replace field c with a new value partial.put("c", Variants.ofIsoDate("2024-10-12")); - Assertions.assertThat(partial.get("a")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(partial.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(partial.get("b")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(partial.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(partial.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(partial.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); - Assertions.assertThat(partial.get("c").asPrimitive().get()) + assertThat(partial.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(partial.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(partial.get("b")).isInstanceOf(VariantPrimitive.class); + assertThat(partial.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(partial.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(partial.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); + assertThat(partial.get("c").asPrimitive().get()) .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); } @@ -161,7 +163,7 @@ public void testPartiallyShreddedObjectGetMissingField() { ShreddedObject partial = createUnshreddedObject(FIELDS).second(); // missing fields are returned as null - Assertions.assertThat(partial.get("d")).isNull(); + assertThat(partial.get("d")).isNull(); } @Test @@ -169,7 +171,7 @@ public void testPartiallyShreddedObjectPutMissingFieldFailure() { ShreddedObject partial = createUnshreddedObject(FIELDS).second(); // d is not defined in the variant metadata and will fail - Assertions.assertThatThrownBy(() -> partial.put("d", Variants.ofIsoDate("2024-10-12"))) + assertThatThrownBy(() -> partial.put("d", Variants.ofIsoDate("2024-10-12"))) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot find field name in metadata: d"); } @@ -185,16 +187,16 @@ public void testPartiallyShreddedObjectSerializationMinimalBuffer() { VariantValue value = roundTripMinimalBuffer(partial, metadata); - Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; - Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); - Assertions.assertThat(actual.get("c").asPrimitive().get()) + assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); + assertThat(actual.get("c").asPrimitive().get()) .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); } @@ -209,16 +211,16 @@ public void testPartiallyShreddedObjectSerializationLargeBuffer() { VariantValue value = roundTripLargeBuffer(partial, metadata); - Assertions.assertThat(value).isInstanceOf(SerializedObject.class); + assertThat(value).isInstanceOf(SerializedObject.class); SerializedObject actual = (SerializedObject) value; - Assertions.assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); - Assertions.assertThat(actual.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); - Assertions.assertThat(actual.get("c").asPrimitive().get()) + assertThat(actual.get("a")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(actual.get("b")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(actual.get("c")).isInstanceOf(VariantPrimitive.class); + assertThat(actual.get("c").type()).isEqualTo(Variants.PhysicalType.DATE); + assertThat(actual.get("c").asPrimitive().get()) .isEqualTo(DateTimeUtil.isoDateToDays("2024-10-12")); } @@ -235,18 +237,18 @@ public void testTwoByteOffsets() { Pair<SerializedMetadata, ShreddedObject> pair = createShreddedObject(data); VariantValue value = roundTripLargeBuffer(pair.second(), pair.first()); - Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); SerializedObject object = (SerializedObject) value; - Assertions.assertThat(object.numElements()).isEqualTo(4); - - Assertions.assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); - Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); - Assertions.assertThat(object.get("big").type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString); + assertThat(object.numElements()).isEqualTo(4); + + assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); + assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(object.get("big").type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(object.get("big").asPrimitive().get()).isEqualTo(randomString); } @Test @@ -262,18 +264,18 @@ public void testThreeByteOffsets() { Pair<SerializedMetadata, ShreddedObject> pair = createShreddedObject(data); VariantValue value = roundTripLargeBuffer(pair.second(), pair.first()); - Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); SerializedObject object = (SerializedObject) value; - Assertions.assertThat(object.numElements()).isEqualTo(4); - - Assertions.assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); - Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); - Assertions.assertThat(object.get("really-big").type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); + assertThat(object.numElements()).isEqualTo(4); + + assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); + assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(object.get("really-big").type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); } @Test @@ -289,18 +291,18 @@ public void testFourByteOffsets() { Pair<SerializedMetadata, ShreddedObject> pair = createShreddedObject(data); VariantValue value = roundTripLargeBuffer(pair.second(), pair.first()); - Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); SerializedObject object = (SerializedObject) value; - Assertions.assertThat(object.numElements()).isEqualTo(4); - - Assertions.assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); - Assertions.assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); - Assertions.assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); - Assertions.assertThat(object.get("really-big").type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); + assertThat(object.numElements()).isEqualTo(4); + + assertThat(object.get("a").type()).isEqualTo(Variants.PhysicalType.INT32); + assertThat(object.get("a").asPrimitive().get()).isEqualTo(34); + assertThat(object.get("b").type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(object.get("b").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(object.get("c").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + assertThat(object.get("c").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(object.get("really-big").type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(object.get("really-big").asPrimitive().get()).isEqualTo(randomString); } @ParameterizedTest @@ -320,14 +322,14 @@ public void testLargeObject(boolean sortFieldNames) { ShreddedObject shredded = createShreddedObject(metadata, (Map) fields); VariantValue value = roundTripLargeBuffer(shredded, metadata); - Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); SerializedObject object = (SerializedObject) value; - Assertions.assertThat(object.numElements()).isEqualTo(10_000); + assertThat(object.numElements()).isEqualTo(10_000); for (Map.Entry<String, VariantPrimitive<String>> entry : fields.entrySet()) { VariantValue fieldValue = object.get(entry.getKey()); - Assertions.assertThat(fieldValue.type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(fieldValue.asPrimitive().get()).isEqualTo(entry.getValue().get()); + assertThat(fieldValue.type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(fieldValue.asPrimitive().get()).isEqualTo(entry.getValue().get()); } } @@ -350,16 +352,16 @@ public void testTwoByteFieldIds(boolean sortFieldNames) { ShreddedObject shredded = createShreddedObject(metadata, data); VariantValue value = roundTripLargeBuffer(shredded, metadata); - Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); SerializedObject object = (SerializedObject) value; - Assertions.assertThat(object.numElements()).isEqualTo(3); - - Assertions.assertThat(object.get("aa").type()).isEqualTo(Variants.PhysicalType.INT32); - Assertions.assertThat(object.get("aa").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(object.get("AA").type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(object.get("AA").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(object.get("ZZ").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); - Assertions.assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(object.numElements()).isEqualTo(3); + + assertThat(object.get("aa").type()).isEqualTo(Variants.PhysicalType.INT32); + assertThat(object.get("aa").asPrimitive().get()).isEqualTo(34); + assertThat(object.get("AA").type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(object.get("AA").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(object.get("ZZ").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } @ParameterizedTest @@ -381,16 +383,16 @@ public void testThreeByteFieldIds(boolean sortFieldNames) { ShreddedObject shredded = createShreddedObject(metadata, data); VariantValue value = roundTripLargeBuffer(shredded, metadata); - Assertions.assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); + assertThat(value.type()).isEqualTo(Variants.PhysicalType.OBJECT); SerializedObject object = (SerializedObject) value; - Assertions.assertThat(object.numElements()).isEqualTo(3); - - Assertions.assertThat(object.get("aa").type()).isEqualTo(Variants.PhysicalType.INT32); - Assertions.assertThat(object.get("aa").asPrimitive().get()).isEqualTo(34); - Assertions.assertThat(object.get("AA").type()).isEqualTo(Variants.PhysicalType.STRING); - Assertions.assertThat(object.get("AA").asPrimitive().get()).isEqualTo("iceberg"); - Assertions.assertThat(object.get("ZZ").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); - Assertions.assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); + assertThat(object.numElements()).isEqualTo(3); + + assertThat(object.get("aa").type()).isEqualTo(Variants.PhysicalType.INT32); + assertThat(object.get("aa").asPrimitive().get()).isEqualTo(34); + assertThat(object.get("AA").type()).isEqualTo(Variants.PhysicalType.STRING); + assertThat(object.get("AA").asPrimitive().get()).isEqualTo("iceberg"); + assertThat(object.get("ZZ").type()).isEqualTo(Variants.PhysicalType.DECIMAL4); + assertThat(object.get("ZZ").asPrimitive().get()).isEqualTo(new BigDecimal("12.21")); } static VariantValue roundTripMinimalBuffer(ShreddedObject object, SerializedMetadata metadata) { diff --git a/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java b/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java index ed0a9be61d5f..9d9536fbf0d6 100644 --- a/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java +++ b/core/src/test/java/org/apache/iceberg/variants/TestVariantUtil.java @@ -18,26 +18,27 @@ */ package org.apache.iceberg.variants; +import static org.assertj.core.api.Assertions.assertThat; + import java.nio.ByteBuffer; -import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; public class TestVariantUtil { @Test public void testReadByteUnsigned() { ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF}); - Assertions.assertThat(VariantUtil.readByte(buffer, 0)).isEqualTo(255); + assertThat(VariantUtil.readByte(buffer, 0)).isEqualTo(255); } @Test public void testRead2ByteUnsigned() { ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF}); - Assertions.assertThat(VariantUtil.readLittleEndianUnsigned(buffer, 0, 2)).isEqualTo(65535); + assertThat(VariantUtil.readLittleEndianUnsigned(buffer, 0, 2)).isEqualTo(65535); } @Test public void testRead3ByteUnsigned() { ByteBuffer buffer = ByteBuffer.wrap(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); - Assertions.assertThat(VariantUtil.readLittleEndianUnsigned(buffer, 0, 3)).isEqualTo(16777215); + assertThat(VariantUtil.readLittleEndianUnsigned(buffer, 0, 3)).isEqualTo(16777215); } } From c220d50debe4be52a95d6570ec23337c85a2da17 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Wed, 11 Dec 2024 16:38:59 -0800 Subject: [PATCH 11/12] Fix checkstyle for main. --- .../org/apache/iceberg/variants/ShreddedObject.java | 10 +++++----- .../java/org/apache/iceberg/variants/Variants.java | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java index 0832f36cff96..e9e734fce0dd 100644 --- a/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java +++ b/core/src/main/java/org/apache/iceberg/variants/ShreddedObject.java @@ -117,7 +117,7 @@ private SerializationState( this.fieldIdSize = VariantUtil.sizeOf(metadata.dictionarySize()); this.shreddedFields = shreddedFields; - int dataSize = 0; + int totalDataSize = 0; // get the unshredded field names and values as byte buffers ImmutableMap.Builder<String, ByteBuffer> unshreddedBuilder = ImmutableMap.builder(); if (unshredded != null) { @@ -128,7 +128,7 @@ private SerializationState( if (!replaced) { ByteBuffer value = unshredded.sliceValue(field.second()); unshreddedBuilder.put(name, value); - dataSize += value.remaining(); + totalDataSize += value.remaining(); } } } @@ -140,12 +140,12 @@ private SerializationState( this.isLarge = numElements > 0xFF; for (VariantValue value : shreddedFields.values()) { - dataSize += value.sizeInBytes(); + totalDataSize += value.sizeInBytes(); } - this.dataSize = dataSize; + this.dataSize = totalDataSize; // offset size is the size needed to store the length of the data section - this.offsetSize = VariantUtil.sizeOf(dataSize); + this.offsetSize = VariantUtil.sizeOf(totalDataSize); } private int size() { diff --git a/core/src/main/java/org/apache/iceberg/variants/Variants.java b/core/src/main/java/org/apache/iceberg/variants/Variants.java index 0fd39f043468..9c7a89c572f5 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variants.java +++ b/core/src/main/java/org/apache/iceberg/variants/Variants.java @@ -25,6 +25,8 @@ import org.apache.iceberg.util.DateTimeUtil; public class Variants { + private Variants() {} + enum LogicalType { NULL, BOOLEAN, From 68a535ff00d9015e9156bec0abb45c4d839ce204 Mon Sep 17 00:00:00 2001 From: Ryan Blue <blue@apache.org> Date: Fri, 13 Dec 2024 12:24:24 -0800 Subject: [PATCH 12/12] Fix Java classes in PrimitiveType. --- core/src/main/java/org/apache/iceberg/variants/Variants.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/variants/Variants.java b/core/src/main/java/org/apache/iceberg/variants/Variants.java index 9c7a89c572f5..e8ea3d93ab77 100644 --- a/core/src/main/java/org/apache/iceberg/variants/Variants.java +++ b/core/src/main/java/org/apache/iceberg/variants/Variants.java @@ -46,8 +46,8 @@ public enum PhysicalType { NULL(LogicalType.NULL, Void.class), BOOLEAN_TRUE(LogicalType.BOOLEAN, Boolean.class), BOOLEAN_FALSE(LogicalType.BOOLEAN, Boolean.class), - INT8(LogicalType.EXACT_NUMERIC, Integer.class), - INT16(LogicalType.EXACT_NUMERIC, Integer.class), + INT8(LogicalType.EXACT_NUMERIC, Byte.class), + INT16(LogicalType.EXACT_NUMERIC, Short.class), INT32(LogicalType.EXACT_NUMERIC, Integer.class), INT64(LogicalType.EXACT_NUMERIC, Long.class), DOUBLE(LogicalType.DOUBLE, Double.class),