diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java index 6f837fe91..5eea1faa4 100644 --- a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java +++ b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaValueConverter.java @@ -19,7 +19,8 @@ package org.apache.xtable.delta; import java.math.BigDecimal; -import java.math.BigInteger; +import java.math.MathContext; +import java.math.RoundingMode; import java.nio.charset.StandardCharsets; import java.sql.Date; import java.text.DateFormat; @@ -63,7 +64,7 @@ public static Object convertFromDeltaColumnStatValue(Object value, InternalSchem return null; } if (noConversionForSchema(fieldSchema)) { - return castObjectToInternalType(value, fieldSchema.getDataType()); + return castObjectToInternalType(value, fieldSchema); } // Needs special handling for date and time. InternalType fieldType = fieldSchema.getDataType(); @@ -198,7 +199,8 @@ public static Object convertFromDeltaPartitionValue( } } - private static Object castObjectToInternalType(Object value, InternalType valueType) { + private static Object castObjectToInternalType(Object value, InternalSchema schema) { + InternalType valueType = schema.getDataType(); switch (valueType) { case DOUBLE: if (value instanceof String) @@ -232,7 +234,7 @@ private static Object castObjectToInternalType(Object value, InternalType valueT } break; case DECIMAL: - return numberTypeToBigDecimal(value); + return numberTypeToBigDecimal(value, schema); case LONG: if (value instanceof Integer) { return ((Integer) value).longValue(); @@ -242,18 +244,12 @@ private static Object castObjectToInternalType(Object value, InternalType valueT return value; } - private static BigDecimal numberTypeToBigDecimal(Object value) { - // BigDecimal is parsed as Integer, Long, BigInteger and double if none of the above. - if (value instanceof Integer) { - return BigDecimal.valueOf((Integer) value); - } else if (value instanceof Long) { - return BigDecimal.valueOf((Long) value); - } else if (value instanceof BigInteger) { - return new BigDecimal((BigInteger) value); - } else if (value instanceof Double) { - return BigDecimal.valueOf((Double) value); - } else { - return (BigDecimal) value; - } + private static BigDecimal numberTypeToBigDecimal(Object value, InternalSchema schema) { + // BigDecimal is parsed by converting the value to a string and setting the proper scale and + // precision. + int precision = (int) schema.getMetadata().get(InternalSchema.MetadataKey.DECIMAL_PRECISION); + int scale = (int) schema.getMetadata().get(InternalSchema.MetadataKey.DECIMAL_SCALE); + return new BigDecimal(String.valueOf(value), new MathContext(precision)) + .setScale(scale, RoundingMode.UNNECESSARY); } } diff --git a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java index 2ffbad048..e47ef72e0 100644 --- a/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java +++ b/xtable-core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java @@ -20,6 +20,7 @@ import java.math.BigDecimal; import java.math.BigInteger; +import java.math.RoundingMode; import java.nio.ByteBuffer; import java.sql.Date; import java.util.ArrayList; @@ -200,14 +201,16 @@ private static ColumnStat getColumnStatFromHudiStat( Comparable minValue = HoodieAvroUtils.unwrapAvroValueWrapper(columnStats.getMinValue()); Comparable maxValue = HoodieAvroUtils.unwrapAvroValueWrapper(columnStats.getMaxValue()); if (field.getSchema().getDataType() == InternalType.DECIMAL) { + int scale = + (int) field.getSchema().getMetadata().get(InternalSchema.MetadataKey.DECIMAL_SCALE); minValue = minValue instanceof ByteBuffer - ? convertBytesToBigDecimal((ByteBuffer) minValue, DECIMAL_WRAPPER_SCALE) - : minValue; + ? convertBytesToBigDecimal((ByteBuffer) minValue, scale) + : ((BigDecimal) minValue).setScale(scale, RoundingMode.UNNECESSARY); maxValue = maxValue instanceof ByteBuffer - ? convertBytesToBigDecimal((ByteBuffer) maxValue, DECIMAL_WRAPPER_SCALE) - : maxValue; + ? convertBytesToBigDecimal((ByteBuffer) maxValue, scale) + : ((BigDecimal) maxValue).setScale(scale, RoundingMode.UNNECESSARY); } return getColumnStatFromValues( minValue, @@ -221,7 +224,9 @@ private static ColumnStat getColumnStatFromHudiStat( private static BigDecimal convertBytesToBigDecimal(ByteBuffer value, int scale) { byte[] bytes = new byte[value.remaining()]; value.duplicate().get(bytes); - return new BigDecimal(new BigInteger(bytes), scale); + BigDecimal serializedValue = new BigDecimal(new BigInteger(bytes), DECIMAL_WRAPPER_SCALE); + // set the scale to match the schema + return serializedValue.setScale(scale, RoundingMode.UNNECESSARY); } private static ColumnStat getColumnStatFromColRange( diff --git a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java index 1cdd33a75..d69021f12 100644 --- a/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java +++ b/xtable-core/src/test/java/org/apache/xtable/delta/TestDeltaValueConverter.java @@ -21,6 +21,9 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; @@ -33,6 +36,8 @@ import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import com.google.common.collect.ImmutableMap; + import org.apache.xtable.model.schema.InternalSchema; import org.apache.xtable.model.schema.InternalType; import org.apache.xtable.model.schema.PartitionTransformType; @@ -214,4 +219,109 @@ private static Stream nonNumericValuesForColStats() { Arguments.of(Double.NaN, doubleSchema, Double.NaN), Arguments.of(Double.POSITIVE_INFINITY, doubleSchema, Double.POSITIVE_INFINITY)); } + + @ParameterizedTest + @MethodSource("decimalValues") + void parseDecimalValues( + Object deltaValue, InternalSchema fieldSchema, BigDecimal expectedOutput) { + assertEquals( + expectedOutput, + DeltaValueConverter.convertFromDeltaColumnStatValue(deltaValue, fieldSchema)); + } + + private static Stream decimalValues() { + return Stream.of( + Arguments.of( + -8.00, + InternalSchema.builder() + .name("decimal") + .dataType(InternalType.DECIMAL) + .metadata( + ImmutableMap.builder() + .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2) + .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5) + .build()) + .build(), + new BigDecimal("-8.00", new MathContext(5, RoundingMode.UNNECESSARY)) + .setScale(2, RoundingMode.UNNECESSARY)), + Arguments.of( + -8.00f, + InternalSchema.builder() + .name("decimal") + .dataType(InternalType.DECIMAL) + .metadata( + ImmutableMap.builder() + .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2) + .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5) + .build()) + .build(), + new BigDecimal("-8.00", new MathContext(5, RoundingMode.UNNECESSARY)) + .setScale(2, RoundingMode.UNNECESSARY)), + Arguments.of( + 1000, + InternalSchema.builder() + .name("decimal") + .dataType(InternalType.DECIMAL) + .metadata( + ImmutableMap.builder() + .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2) + .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6) + .build()) + .build(), + new BigDecimal("1000.00", new MathContext(6, RoundingMode.UNNECESSARY)) + .setScale(2, RoundingMode.UNNECESSARY)), + Arguments.of( + 1000L, + InternalSchema.builder() + .name("decimal") + .dataType(InternalType.DECIMAL) + .metadata( + ImmutableMap.builder() + .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2) + .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6) + .build()) + .build(), + new BigDecimal("1000.00", new MathContext(6, RoundingMode.UNNECESSARY)) + .setScale(2, RoundingMode.UNNECESSARY)), + Arguments.of( + "1000", + InternalSchema.builder() + .name("decimal") + .dataType(InternalType.DECIMAL) + .metadata( + ImmutableMap.builder() + .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2) + .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6) + .build()) + .build(), + new BigDecimal("1000.00", new MathContext(6, RoundingMode.UNNECESSARY)) + .setScale(2, RoundingMode.UNNECESSARY)), + Arguments.of( + 1234.56, + InternalSchema.builder() + .name("decimal") + .dataType(InternalType.DECIMAL) + .metadata( + ImmutableMap.builder() + .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2) + .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6) + .build()) + .build(), + new BigDecimal("1234.56", new MathContext(6, RoundingMode.UNNECESSARY)) + .setScale(2, RoundingMode.UNNECESSARY)), + Arguments.of( + new BigDecimal("1234.56", new MathContext(6, RoundingMode.UNNECESSARY)) + .setScale(2, RoundingMode.UNNECESSARY), + InternalSchema.builder() + .name("decimal") + .dataType(InternalType.DECIMAL) + .metadata( + ImmutableMap.builder() + .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2) + .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 6) + .build()) + .build(), + new BigDecimal("1234.56", new MathContext(6, RoundingMode.UNNECESSARY)) + .setScale(2, RoundingMode.UNNECESSARY))); + } } diff --git a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java index 82149c8b6..a18bb743d 100644 --- a/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java +++ b/xtable-core/src/test/java/org/apache/xtable/hudi/TestHudiFileStatsExtractor.java @@ -286,12 +286,8 @@ private void validateOutput(List output) { assertEquals(1, decimalColumnStat.getNumNulls()); assertEquals(2, decimalColumnStat.getNumValues()); assertTrue(decimalColumnStat.getTotalSize() > 0); - assertEquals( - new BigDecimal("1234.56"), - ((BigDecimal) decimalColumnStat.getRange().getMinValue()).setScale(2)); - assertEquals( - new BigDecimal("1234.56"), - ((BigDecimal) decimalColumnStat.getRange().getMaxValue()).setScale(2)); + assertEquals(new BigDecimal("1234.56"), decimalColumnStat.getRange().getMinValue()); + assertEquals(new BigDecimal("1234.56"), decimalColumnStat.getRange().getMaxValue()); } private HoodieRecord buildRecord(GenericRecord record) { diff --git a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java index 09b34308d..1703b9169 100644 --- a/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java +++ b/xtable-core/src/test/java/org/apache/xtable/testutil/ColumnStatMapUtil.java @@ -26,6 +26,8 @@ import lombok.AccessLevel; import lombok.NoArgsConstructor; +import com.google.common.collect.ImmutableMap; + import org.apache.xtable.model.schema.InternalField; import org.apache.xtable.model.schema.InternalSchema; import org.apache.xtable.model.schema.InternalType; @@ -174,7 +176,16 @@ public class ColumnStatMapUtil { private static final InternalField DECIMAL_FIELD = InternalField.builder() .name("decimal_field") - .schema(InternalSchema.builder().name("decimal").dataType(InternalType.DECIMAL).build()) + .schema( + InternalSchema.builder() + .name("decimal") + .dataType(InternalType.DECIMAL) + .metadata( + ImmutableMap.builder() + .put(InternalSchema.MetadataKey.DECIMAL_SCALE, 2) + .put(InternalSchema.MetadataKey.DECIMAL_PRECISION, 5) + .build()) + .build()) .build(); private static final InternalField FLOAT_FIELD = @@ -312,7 +323,7 @@ public static List getColumnStats() { ColumnStat.builder() .field(DECIMAL_FIELD) .numNulls(1) - .range(Range.vector(new BigDecimal("1.0"), new BigDecimal("2.0"))) + .range(Range.vector(new BigDecimal("1.00"), new BigDecimal("2.00"))) .numValues(50) .totalSize(123) .build();