Skip to content

Commit

Permalink
KE-41399 Avoid parquet footer reads twice in vectorized reader (#71)
Browse files Browse the repository at this point in the history
 KE-41399 Avoid parquet footer reads twice in vectorized reader
  • Loading branch information
yabola authored May 22, 2023
1 parent 1f2ac54 commit f4b689b
Show file tree
Hide file tree
Showing 23 changed files with 40 additions and 36 deletions.
2 changes: 1 addition & 1 deletion parquet-arrow/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-avro/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-benchmarks/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-cascading-deprecated/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-cascading3-deprecated/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-column/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-encoding/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-format-structures/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<artifactId>parquet-format-structures</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion parquet-generator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-hadoop-bundle/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-hadoop/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -663,12 +663,12 @@ public static ParquetFileReader open(InputFile file, ParquetReadOptions options)

protected final SeekableInputStream f;
private final InputFile file;
private final ParquetReadOptions options;
private ParquetReadOptions options;
private final Map<ColumnPath, ColumnDescriptor> paths = new HashMap<>();
private final FileMetaData fileMetaData; // may be null
private final List<BlockMetaData> blocks;
private final List<ColumnIndexStore> blockIndexStores;
private final List<RowRanges> blockRowRanges;
private List<BlockMetaData> blocks;
private List<ColumnIndexStore> blockIndexStores;
private List<RowRanges> blockRowRanges;

// not final. in some cases, this may be lazily loaded for backward-compat.
private ParquetMetadata footer;
Expand Down Expand Up @@ -754,17 +754,8 @@ public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer)
this.f = this.file.newStream();
this.fileMetaData = footer.getFileMetaData();
this.fileDecryptor = fileMetaData.getFileDecryptor();
if (null == fileDecryptor) {
this.options = HadoopReadOptions.builder(conf).build();
} else {
this.options = HadoopReadOptions.builder(conf)
.withDecryption(fileDecryptor.getDecryptionProperties())
.build();
}
this.footer = footer;
this.blocks = filterRowGroups(footer.getBlocks());
this.blockIndexStores = listWithNulls(this.blocks.size());
this.blockRowRanges = listWithNulls(this.blocks.size());
resetBlocks(conf);
for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
paths.put(ColumnPath.get(col.getPath()), col);
}
Expand Down Expand Up @@ -808,6 +799,19 @@ public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOEx
}
}

public void resetBlocks(Configuration conf) throws IOException {
if (null == fileDecryptor) {
this.options = HadoopReadOptions.builder(conf).build();
} else {
this.options = HadoopReadOptions.builder(conf)
.withDecryption(fileDecryptor.getDecryptionProperties())
.build();
}
this.blocks = filterRowGroups(footer.getBlocks());
this.blockIndexStores = listWithNulls(this.blocks.size());
this.blockRowRanges = listWithNulls(this.blocks.size());
}

private static <T> List<T> listWithNulls(int size) {
return Stream.generate(() -> (T) null).limit(size).collect(Collectors.toCollection(ArrayList<T>::new));
}
Expand Down
2 changes: 1 addition & 1 deletion parquet-jackson/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-pig-bundle/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-pig/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-protobuf/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-scala/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-scrooge-deprecated/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-thrift/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion parquet-tools-deprecated/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<relativePath>../pom.xml</relativePath>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<groupId>org.apache.parquet</groupId>
<artifactId>parquet</artifactId>
<version>1.12.2-kylin-r5</version>
<version>1.12.2-kylin-r6</version>
<packaging>pom</packaging>

<name>Apache Parquet MR</name>
Expand Down

0 comments on commit f4b689b

Please sign in to comment.