From 9692a02911bea029692bc05824968a1681d120cb Mon Sep 17 00:00:00 2001 From: Shivam Malhotra Date: Fri, 15 Nov 2024 20:56:41 +0530 Subject: [PATCH] refactor: Removed some unused Parquet APIs (#6373) --- .../deephaven/parquet/table/MappedSchema.java | 13 +- .../parquet/table/ParquetInstructions.java | 115 +++--------------- 2 files changed, 22 insertions(+), 106 deletions(-) diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/MappedSchema.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/MappedSchema.java index d818443bb58..3eedfa05f15 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/MappedSchema.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/MappedSchema.java @@ -26,21 +26,14 @@ static MappedSchema create( final TableDefinition definition, final RowSet rowSet, final Map> columnSourceMap, - final ParquetInstructions instructions, - final ColumnDefinition... extraColumns) { + final ParquetInstructions instructions) { final MessageTypeBuilder builder = Types.buildMessage(); for (final ColumnDefinition columnDefinition : definition.getColumns()) { - TypeInfos.TypeInfo typeInfo = + final TypeInfos.TypeInfo typeInfo = getTypeInfo(computedCache, columnDefinition, rowSet, columnSourceMap, instructions); - Type schemaType = typeInfo.createSchemaType(columnDefinition, instructions); + final Type schemaType = typeInfo.createSchemaType(columnDefinition, instructions); builder.addField(schemaType); } - - for (final ColumnDefinition extraColumn : extraColumns) { - builder.addField(getTypeInfo(computedCache, extraColumn, rowSet, columnSourceMap, instructions) - .createSchemaType(extraColumn, instructions)); - } - final MessageType schema = builder.named("root"); return new MappedSchema(definition, schema); } diff --git a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java index a34d6ad2c20..3df5b7a6e4c 100644 --- a/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java +++ b/extensions/parquet/table/src/main/java/io/deephaven/parquet/table/ParquetInstructions.java @@ -33,7 +33,15 @@ */ public abstract class ParquetInstructions implements ColumnToCodecMappings { - private static volatile String defaultCompressionCodecName = CompressionCodecName.SNAPPY.toString(); + public static final String DEFAULT_COMPRESSION_CODEC_NAME = CompressionCodecName.SNAPPY.toString(); + public static final int DEFAULT_MAXIMUM_DICTIONARY_KEYS = 1 << 20; + public static final int DEFAULT_MAXIMUM_DICTIONARY_SIZE = 1 << 20; + + public static final int MIN_TARGET_PAGE_SIZE = Configuration.getInstance().getIntegerWithDefault( + "Parquet.minTargetPageSize", 1 << 11); // 2KB + + public static final int DEFAULT_TARGET_PAGE_SIZE = Configuration.getInstance().getIntegerWithDefault( + "Parquet.defaultTargetPageSize", 1 << 16); // 64KB /** * Throws an exception if {@link ParquetInstructions#getTableDefinition()} is empty. @@ -41,98 +49,13 @@ public abstract class ParquetInstructions implements ColumnToCodecMappings { * @param parquetInstructions the parquet instructions * @throws IllegalArgumentException if there is not a table definition */ - public static TableDefinition ensureDefinition(ParquetInstructions parquetInstructions) { + static TableDefinition ensureDefinition(final ParquetInstructions parquetInstructions) { return parquetInstructions.getTableDefinition() .orElseThrow(() -> new IllegalArgumentException("Table definition must be provided")); } - /** - * Set the default for {@link #getCompressionCodecName()}. - * - * @deprecated Use {@link Builder#setCompressionCodecName(String)} instead. - * @param name The new default - */ - @Deprecated - public static void setDefaultCompressionCodecName(final String name) { - defaultCompressionCodecName = name; - } - - /** - * @return The default for {@link #getCompressionCodecName()} - */ - public static String getDefaultCompressionCodecName() { - return defaultCompressionCodecName; - } - - private static volatile int defaultMaximumDictionaryKeys = 1 << 20; - - /** - * Set the default for {@link #getMaximumDictionaryKeys()}. - * - * @param maximumDictionaryKeys The new default - * @see Builder#setMaximumDictionaryKeys(int) - */ - public static void setDefaultMaximumDictionaryKeys(final int maximumDictionaryKeys) { - defaultMaximumDictionaryKeys = Require.geqZero(maximumDictionaryKeys, "maximumDictionaryKeys"); - } - - /** - * @return The default for {@link #getMaximumDictionaryKeys()} - */ - public static int getDefaultMaximumDictionaryKeys() { - return defaultMaximumDictionaryKeys; - } - - private static volatile int defaultMaximumDictionarySize = 1 << 20; - - /** - * Set the default for {@link #getMaximumDictionarySize()}. - * - * @param maximumDictionarySize The new default - * @see Builder#setMaximumDictionarySize(int) - */ - public static void setDefaultMaximumDictionarySize(final int maximumDictionarySize) { - defaultMaximumDictionarySize = Require.geqZero(maximumDictionarySize, "maximumDictionarySize"); - } - - /** - * @return The default for {@link #getMaximumDictionarySize()} - */ - public static int getDefaultMaximumDictionarySize() { - return defaultMaximumDictionarySize; - } - - public static final int MIN_TARGET_PAGE_SIZE = - Configuration.getInstance().getIntegerWithDefault("Parquet.minTargetPageSize", 1 << 11); // 2KB - private static final int DEFAULT_TARGET_PAGE_SIZE = - Configuration.getInstance().getIntegerWithDefault("Parquet.defaultTargetPageSize", 1 << 16); // 64KB - private static volatile int defaultTargetPageSize = DEFAULT_TARGET_PAGE_SIZE; - private static final boolean DEFAULT_IS_REFRESHING = false; - /** - * Set the default target page size (in bytes) used to section rows of data into pages during column writing. This - * number should be no smaller than {@link #MIN_TARGET_PAGE_SIZE}. - * - * @param newDefaultSizeBytes the new default target page size. - */ - public static void setDefaultTargetPageSize(final int newDefaultSizeBytes) { - if (newDefaultSizeBytes < MIN_TARGET_PAGE_SIZE) { - throw new IllegalArgumentException( - "Default target page size should be larger than " + MIN_TARGET_PAGE_SIZE + " bytes"); - } - defaultTargetPageSize = newDefaultSizeBytes; - } - - /** - * Get the current default target page size in bytes. - * - * @return the current default target page size in bytes. - */ - public static int getDefaultTargetPageSize() { - return defaultTargetPageSize; - } - public enum ParquetFileLayout { /** * A single parquet file. @@ -168,7 +91,7 @@ public enum ParquetFileLayout { static final String FILE_INDEX_TOKEN = "{i}"; private static final String DEFAULT_BASE_NAME_FOR_PARTITIONED_PARQUET_DATA = UUID_TOKEN; - public ParquetInstructions() {} + private ParquetInstructions() {} public final String getColumnNameFromParquetColumnNameOrDefault(final String parquetColumnName) { final String mapped = getColumnNameFromParquetColumnName(parquetColumnName); @@ -313,17 +236,17 @@ public Object getSpecialInstructions() { @Override public String getCompressionCodecName() { - return defaultCompressionCodecName; + return DEFAULT_COMPRESSION_CODEC_NAME; } @Override public int getMaximumDictionaryKeys() { - return defaultMaximumDictionaryKeys; + return DEFAULT_MAXIMUM_DICTIONARY_KEYS; } @Override public int getMaximumDictionarySize() { - return defaultMaximumDictionarySize; + return DEFAULT_MAXIMUM_DICTIONARY_SIZE; } @Override @@ -333,7 +256,7 @@ public boolean isLegacyParquet() { @Override public int getTargetPageSize() { - return defaultTargetPageSize; + return DEFAULT_TARGET_PAGE_SIZE; } @Override @@ -692,11 +615,11 @@ public static class Builder { // We only store entries in parquetColumnNameToInstructions when the parquetColumnName is // different than the columnName (ie, the column name mapping is not the default mapping) private KeyedObjectHashMap parquetColumnNameToInstructions; - private String compressionCodecName = defaultCompressionCodecName; - private int maximumDictionaryKeys = defaultMaximumDictionaryKeys; - private int maximumDictionarySize = defaultMaximumDictionarySize; + private String compressionCodecName = DEFAULT_COMPRESSION_CODEC_NAME; + private int maximumDictionaryKeys = DEFAULT_MAXIMUM_DICTIONARY_KEYS; + private int maximumDictionarySize = DEFAULT_MAXIMUM_DICTIONARY_SIZE; private boolean isLegacyParquet; - private int targetPageSize = defaultTargetPageSize; + private int targetPageSize = DEFAULT_TARGET_PAGE_SIZE; private boolean isRefreshing = DEFAULT_IS_REFRESHING; private Object specialInstructions; private boolean generateMetadataFiles = DEFAULT_GENERATE_METADATA_FILES;