Skip to content

Commit

Permalink
add per column config
Browse files Browse the repository at this point in the history
  • Loading branch information
wgtmac committed Nov 13, 2024
1 parent 009a63d commit b9c22c1
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 0 deletions.
9 changes: 9 additions & 0 deletions parquet-hadoop/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -515,4 +515,13 @@ if not found then the library will use the classic non-vectored reads: it is saf
**Description:** Whether to enable column statistics collection.
If `true`, statistics will be collected for all columns unless explicitly disabled for specific columns.
If `false`, statistics will be disabled for all columns regardless of column-specific settings.
It is possible to enable or disable statistics for specific columns by appending `#` followed by the column path.
**Default value:** `true`
**Example:**
```java
// Enable statistics for all columns
conf.set("parquet.column.statistics.enabled", true);
// Disable statistics for 'column.path'
conf.set("parquet.column.statistics.enabled#column.path", false);
// The final configuration will be: Enable statistics for all columns except 'column.path'
```
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,18 @@ public static boolean getStatisticsEnabled(Configuration conf) {
return conf.getBoolean(STATISTICS_ENABLED, ParquetProperties.DEFAULT_STATISTICS_ENABLED);
}

public static void setStatisticsEnabled(JobContext jobContext, String columnPath, boolean enabled) {
getConfiguration(jobContext).set(STATISTICS_ENABLED + "#" + columnPath, String.valueOf(enabled));
}

public static boolean getStatisticsEnabled(Configuration conf, String columnPath) {
String columnSpecific = conf.get(STATISTICS_ENABLED + "#" + columnPath);
if (columnSpecific != null) {
return Boolean.parseBoolean(columnSpecific);
}
return conf.getBoolean(STATISTICS_ENABLED, ParquetProperties.DEFAULT_STATISTICS_ENABLED);
}

private WriteSupport<T> writeSupport;
private ParquetOutputCommitter committer;

Expand Down Expand Up @@ -489,6 +501,10 @@ public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, Comp
BLOOM_FILTER_CANDIDATES_NUMBER,
key -> conf.getInt(key, ParquetProperties.DEFAULT_BLOOM_FILTER_CANDIDATES_NUMBER),
propsBuilder::withBloomFilterCandidatesNumber)
.withColumnConfig(
STATISTICS_ENABLED,
key -> conf.getBoolean(key, ParquetProperties.DEFAULT_STATISTICS_ENABLED),
propsBuilder::withStatisticsEnabled)
.parseConfig(conf);

ParquetProperties props = propsBuilder.build();
Expand Down

0 comments on commit b9c22c1

Please sign in to comment.