Skip to content

Commit

Permalink
refactor: Removed some unused Parquet APIs (#6373)
Browse files Browse the repository at this point in the history
  • Loading branch information
malhotrashivam authored Nov 15, 2024
1 parent 08c596d commit 9692a02
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 106 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,14 @@ static MappedSchema create(
final TableDefinition definition,
final RowSet rowSet,
final Map<String, ? extends ColumnSource<?>> columnSourceMap,
final ParquetInstructions instructions,
final ColumnDefinition<?>... extraColumns) {
final ParquetInstructions instructions) {
final MessageTypeBuilder builder = Types.buildMessage();
for (final ColumnDefinition<?> columnDefinition : definition.getColumns()) {
TypeInfos.TypeInfo typeInfo =
final TypeInfos.TypeInfo typeInfo =
getTypeInfo(computedCache, columnDefinition, rowSet, columnSourceMap, instructions);
Type schemaType = typeInfo.createSchemaType(columnDefinition, instructions);
final Type schemaType = typeInfo.createSchemaType(columnDefinition, instructions);
builder.addField(schemaType);
}

for (final ColumnDefinition<?> extraColumn : extraColumns) {
builder.addField(getTypeInfo(computedCache, extraColumn, rowSet, columnSourceMap, instructions)
.createSchemaType(extraColumn, instructions));
}

final MessageType schema = builder.named("root");
return new MappedSchema(definition, schema);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,106 +33,29 @@
*/
public abstract class ParquetInstructions implements ColumnToCodecMappings {

private static volatile String defaultCompressionCodecName = CompressionCodecName.SNAPPY.toString();
public static final String DEFAULT_COMPRESSION_CODEC_NAME = CompressionCodecName.SNAPPY.toString();
public static final int DEFAULT_MAXIMUM_DICTIONARY_KEYS = 1 << 20;
public static final int DEFAULT_MAXIMUM_DICTIONARY_SIZE = 1 << 20;

public static final int MIN_TARGET_PAGE_SIZE = Configuration.getInstance().getIntegerWithDefault(
"Parquet.minTargetPageSize", 1 << 11); // 2KB

public static final int DEFAULT_TARGET_PAGE_SIZE = Configuration.getInstance().getIntegerWithDefault(
"Parquet.defaultTargetPageSize", 1 << 16); // 64KB

/**
* Throws an exception if {@link ParquetInstructions#getTableDefinition()} is empty.
*
* @param parquetInstructions the parquet instructions
* @throws IllegalArgumentException if there is not a table definition
*/
public static TableDefinition ensureDefinition(ParquetInstructions parquetInstructions) {
static TableDefinition ensureDefinition(final ParquetInstructions parquetInstructions) {
return parquetInstructions.getTableDefinition()
.orElseThrow(() -> new IllegalArgumentException("Table definition must be provided"));
}

/**
* Set the default for {@link #getCompressionCodecName()}.
*
* @deprecated Use {@link Builder#setCompressionCodecName(String)} instead.
* @param name The new default
*/
@Deprecated
public static void setDefaultCompressionCodecName(final String name) {
defaultCompressionCodecName = name;
}

/**
* @return The default for {@link #getCompressionCodecName()}
*/
public static String getDefaultCompressionCodecName() {
return defaultCompressionCodecName;
}

private static volatile int defaultMaximumDictionaryKeys = 1 << 20;

/**
* Set the default for {@link #getMaximumDictionaryKeys()}.
*
* @param maximumDictionaryKeys The new default
* @see Builder#setMaximumDictionaryKeys(int)
*/
public static void setDefaultMaximumDictionaryKeys(final int maximumDictionaryKeys) {
defaultMaximumDictionaryKeys = Require.geqZero(maximumDictionaryKeys, "maximumDictionaryKeys");
}

/**
* @return The default for {@link #getMaximumDictionaryKeys()}
*/
public static int getDefaultMaximumDictionaryKeys() {
return defaultMaximumDictionaryKeys;
}

private static volatile int defaultMaximumDictionarySize = 1 << 20;

/**
* Set the default for {@link #getMaximumDictionarySize()}.
*
* @param maximumDictionarySize The new default
* @see Builder#setMaximumDictionarySize(int)
*/
public static void setDefaultMaximumDictionarySize(final int maximumDictionarySize) {
defaultMaximumDictionarySize = Require.geqZero(maximumDictionarySize, "maximumDictionarySize");
}

/**
* @return The default for {@link #getMaximumDictionarySize()}
*/
public static int getDefaultMaximumDictionarySize() {
return defaultMaximumDictionarySize;
}

public static final int MIN_TARGET_PAGE_SIZE =
Configuration.getInstance().getIntegerWithDefault("Parquet.minTargetPageSize", 1 << 11); // 2KB
private static final int DEFAULT_TARGET_PAGE_SIZE =
Configuration.getInstance().getIntegerWithDefault("Parquet.defaultTargetPageSize", 1 << 16); // 64KB
private static volatile int defaultTargetPageSize = DEFAULT_TARGET_PAGE_SIZE;

private static final boolean DEFAULT_IS_REFRESHING = false;

/**
* Set the default target page size (in bytes) used to section rows of data into pages during column writing. This
* number should be no smaller than {@link #MIN_TARGET_PAGE_SIZE}.
*
* @param newDefaultSizeBytes the new default target page size.
*/
public static void setDefaultTargetPageSize(final int newDefaultSizeBytes) {
if (newDefaultSizeBytes < MIN_TARGET_PAGE_SIZE) {
throw new IllegalArgumentException(
"Default target page size should be larger than " + MIN_TARGET_PAGE_SIZE + " bytes");
}
defaultTargetPageSize = newDefaultSizeBytes;
}

/**
* Get the current default target page size in bytes.
*
* @return the current default target page size in bytes.
*/
public static int getDefaultTargetPageSize() {
return defaultTargetPageSize;
}

public enum ParquetFileLayout {
/**
* A single parquet file.
Expand Down Expand Up @@ -168,7 +91,7 @@ public enum ParquetFileLayout {
static final String FILE_INDEX_TOKEN = "{i}";
private static final String DEFAULT_BASE_NAME_FOR_PARTITIONED_PARQUET_DATA = UUID_TOKEN;

public ParquetInstructions() {}
private ParquetInstructions() {}

public final String getColumnNameFromParquetColumnNameOrDefault(final String parquetColumnName) {
final String mapped = getColumnNameFromParquetColumnName(parquetColumnName);
Expand Down Expand Up @@ -313,17 +236,17 @@ public Object getSpecialInstructions() {

@Override
public String getCompressionCodecName() {
return defaultCompressionCodecName;
return DEFAULT_COMPRESSION_CODEC_NAME;
}

@Override
public int getMaximumDictionaryKeys() {
return defaultMaximumDictionaryKeys;
return DEFAULT_MAXIMUM_DICTIONARY_KEYS;
}

@Override
public int getMaximumDictionarySize() {
return defaultMaximumDictionarySize;
return DEFAULT_MAXIMUM_DICTIONARY_SIZE;
}

@Override
Expand All @@ -333,7 +256,7 @@ public boolean isLegacyParquet() {

@Override
public int getTargetPageSize() {
return defaultTargetPageSize;
return DEFAULT_TARGET_PAGE_SIZE;
}

@Override
Expand Down Expand Up @@ -692,11 +615,11 @@ public static class Builder {
// We only store entries in parquetColumnNameToInstructions when the parquetColumnName is
// different than the columnName (ie, the column name mapping is not the default mapping)
private KeyedObjectHashMap<String, ColumnInstructions> parquetColumnNameToInstructions;
private String compressionCodecName = defaultCompressionCodecName;
private int maximumDictionaryKeys = defaultMaximumDictionaryKeys;
private int maximumDictionarySize = defaultMaximumDictionarySize;
private String compressionCodecName = DEFAULT_COMPRESSION_CODEC_NAME;
private int maximumDictionaryKeys = DEFAULT_MAXIMUM_DICTIONARY_KEYS;
private int maximumDictionarySize = DEFAULT_MAXIMUM_DICTIONARY_SIZE;
private boolean isLegacyParquet;
private int targetPageSize = defaultTargetPageSize;
private int targetPageSize = DEFAULT_TARGET_PAGE_SIZE;
private boolean isRefreshing = DEFAULT_IS_REFRESHING;
private Object specialInstructions;
private boolean generateMetadataFiles = DEFAULT_GENERATE_METADATA_FILES;
Expand Down

0 comments on commit 9692a02

Please sign in to comment.