From 9473e1f3b396dc2bed121566ba68f6979aafa0d6 Mon Sep 17 00:00:00 2001 From: Aihua Xu Date: Thu, 21 Nov 2024 17:05:37 -0800 Subject: [PATCH] Add Variant logical type annotation to parquet-java --- .../parquet/schema/LogicalTypeAnnotation.java | 36 +++++++++++++++++++ .../parquet/schema/TestTypeBuilders.java | 21 +++++++++++ .../TestTypeBuildersWithLogicalTypes.java | 25 +++++++++++++ 3 files changed, 82 insertions(+) diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 05629dd388..a4188578c2 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -56,6 +56,12 @@ protected LogicalTypeAnnotation fromString(List params) { return listType(); } }, + VARIANT { + @Override + protected LogicalTypeAnnotation fromString(List params) { + return variantType(); + } + }, STRING { @Override protected LogicalTypeAnnotation fromString(List params) { @@ -263,6 +269,10 @@ public static ListLogicalTypeAnnotation listType() { return ListLogicalTypeAnnotation.INSTANCE; } + public static VariantLogicalTypeAnnotation variantType() { + return VariantLogicalTypeAnnotation.INSTANCE; + } + public static EnumLogicalTypeAnnotation enumType() { return EnumLogicalTypeAnnotation.INSTANCE; } @@ -1091,6 +1101,28 @@ public int hashCode() { } } + public static class VariantLogicalTypeAnnotation extends LogicalTypeAnnotation { + private static final VariantLogicalTypeAnnotation INSTANCE = new VariantLogicalTypeAnnotation(); + + private VariantLogicalTypeAnnotation() {} + + @Override + public OriginalType toOriginalType() { + // No OriginalType for Variant + return null; + } + + @Override + public Optional accept(LogicalTypeAnnotationVisitor logicalTypeAnnotationVisitor) { + return logicalTypeAnnotationVisitor.visit(this); + } + + @Override + LogicalTypeToken getType() { + return LogicalTypeToken.VARIANT; + } + } + /** * Implement this interface to visit a logical type annotation in the schema. * The default implementation for each logical type specific visitor method is empty. @@ -1115,6 +1147,10 @@ default Optional visit(ListLogicalTypeAnnotation listLogicalType) { return empty(); } + default Optional visit(VariantLogicalTypeAnnotation variantLogicalType) { + return empty(); + } + default Optional visit(EnumLogicalTypeAnnotation enumLogicalType) { return empty(); } diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java index 579077897f..551f0a4859 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuilders.java @@ -50,6 +50,7 @@ import static org.apache.parquet.schema.Type.Repetition.OPTIONAL; import static org.apache.parquet.schema.Type.Repetition.REPEATED; import static org.apache.parquet.schema.Type.Repetition.REQUIRED; +import static org.junit.Assert.assertEquals; import java.util.ArrayList; import java.util.List; @@ -1414,6 +1415,26 @@ public void testTimestampLogicalTypeWithUTCParameter() { Assert.assertEquals(nonUtcMicrosExpected, nonUtcMicrosActual); } + @Test + public void testVariantLogicalType() { + String name = "variant_field"; + GroupType variantExpected = new GroupType( + REQUIRED, + name, + LogicalTypeAnnotation.variantType(), + new PrimitiveType(REQUIRED, BINARY, "metadata"), + new PrimitiveType(REQUIRED, BINARY, "value")); + + GroupType variantActual = Types.buildGroup(REQUIRED) + .addFields( + Types.required(BINARY).named("metadata"), + Types.required(BINARY).named("value")) + .as(LogicalTypeAnnotation.variantType()) + .named(name); + + assertEquals(variantExpected, variantActual); + } + @Test(expected = IllegalArgumentException.class) public void testDecimalLogicalTypeWithDeprecatedScaleMismatch() { Types.required(BINARY) diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java index 54853e8138..d2b12740f3 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestTypeBuildersWithLogicalTypes.java @@ -41,6 +41,8 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT96; import static org.apache.parquet.schema.Type.Repetition.REQUIRED; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; import java.util.concurrent.Callable; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; @@ -473,6 +475,29 @@ public void testFloat16LogicalType() { .toString()); } + @Test + public void testVariantLogicalType() { + String name = "variant_field"; + GroupType variant = new GroupType( + REQUIRED, + name, + LogicalTypeAnnotation.variantType(), + Types.required(BINARY).named("metadata"), + Types.required(BINARY).named("value")); + + assertEquals( + "required group variant_field (VARIANT) {\n" + + " required binary metadata;\n" + + " required binary value;\n" + + "}", + variant.toString()); + + LogicalTypeAnnotation annotation = variant.getLogicalTypeAnnotation(); + assertEquals(LogicalTypeAnnotation.LogicalTypeToken.VARIANT, annotation.getType()); + assertNull(annotation.toOriginalType()); + assertTrue(annotation instanceof LogicalTypeAnnotation.VariantLogicalTypeAnnotation); + } + /** * A convenience method to avoid a large number of @Test(expected=...) tests *