Skip to content

Commit

Permalink
Support multiple GZip members in parquet page (#4951)
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold authored Nov 15, 2023
1 parent 7941577 commit 7fa78b7
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 2 deletions.
2 changes: 1 addition & 1 deletion parquet-testing
2 changes: 1 addition & 1 deletion parquet/src/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ mod gzip_codec {
output_buf: &mut Vec<u8>,
_uncompress_size: Option<usize>,
) -> Result<usize> {
let mut decoder = read::GzDecoder::new(input_buf);
let mut decoder = read::MultiGzDecoder::new(input_buf);
decoder.read_to_end(output_buf).map_err(|e| e.into())
}

Expand Down
25 changes: 25 additions & 0 deletions parquet/src/file/serialized_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,6 +775,7 @@ mod tests {
use crate::format::BoundaryOrder;

use crate::basic::{self, ColumnOrder};
use crate::column::reader::ColumnReader;
use crate::data_type::private::ParquetValueType;
use crate::data_type::{AsBytes, FixedLenByteArrayType};
use crate::file::page_index::index::{Index, NativeIndex};
Expand Down Expand Up @@ -1730,4 +1731,28 @@ mod tests {
_ => unreachable!(),
}
}

#[test]
fn test_multi_gz() {
let file = get_test_file("concatenated_gzip_members.parquet");
let reader = SerializedFileReader::new(file).unwrap();
let row_group_reader = reader.get_row_group(0).unwrap();
match row_group_reader.get_column_reader(0).unwrap() {
ColumnReader::Int64ColumnReader(mut reader) => {
let mut buffer = [0; 1024];
let mut def_levels = [0; 1024];
let (num_records, num_values, num_levels) = reader
.read_records(1024, Some(&mut def_levels), None, &mut buffer)
.unwrap();

assert_eq!(num_records, 513);
assert_eq!(num_values, 513);
assert_eq!(num_levels, 513);

let expected: Vec<i64> = (1..514).collect();
assert_eq!(&buffer[..513], &expected);
}
_ => unreachable!(),
}
}
}

0 comments on commit 7fa78b7

Please sign in to comment.