Skip to content

Commit

Permalink
Error if page starts not on record boundary
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Oct 16, 2023
1 parent 684f749 commit ff217be
Showing 1 changed file with 37 additions and 0 deletions.
37 changes: 37 additions & 0 deletions parquet/src/column/reader/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ pub struct RepetitionLevelDecoderImpl {
buffer_len: usize,
buffer_offset: usize,
has_partial: bool,
page_start: bool,
}

impl RepetitionLevelDecoderImpl {
Expand All @@ -407,13 +408,23 @@ impl RepetitionLevelDecoderImpl {
buffer_offset: 0,
buffer_len: 0,
has_partial: false,
page_start: false,
}
}

fn fill_buf(&mut self) -> Result<()> {
let read = self.decoder.as_mut().unwrap().read(self.buffer.as_mut())?;
self.buffer_offset = 0;
self.buffer_len = read;
if self.buffer_len != 0 && self.page_start {
if self.buffer[0] != 0 {
return Err(general_err!(
"Record must not be split across page boundary (#4943)"
));
}
self.page_start = false;
}

Ok(())
}

Expand Down Expand Up @@ -452,6 +463,7 @@ impl ColumnLevelDecoder for RepetitionLevelDecoderImpl {
self.buffer_len = 0;
self.buffer_offset = 0;
self.has_partial = false;
self.page_start = true;
}
}

Expand Down Expand Up @@ -548,6 +560,31 @@ mod tests {
assert_eq!(levels, 6);
}

#[test]
fn test_record_boundary() {
let mut encoder = RleEncoder::new(1, 1024);
encoder.put(1);
let data = ByteBufferPtr::new(encoder.consume());
let mut decoder = RepetitionLevelDecoderImpl::new(1);
decoder.set_data(Encoding::RLE, data.clone());
let err = decoder.skip_rep_levels(1, 4).unwrap_err().to_string();
assert_eq!(
err,
"Parquet error: Record must not be split across page boundary (#4943)"
);

decoder.set_data(Encoding::RLE, data);
let mut out = [0; 8];
let err = decoder
.read_rep_levels(&mut out, 0..1, 4)
.unwrap_err()
.to_string();
assert_eq!(
err,
"Parquet error: Record must not be split across page boundary (#4943)"
);
}

#[test]
fn test_skip_rep_levels() {
for _ in 0..10 {
Expand Down

0 comments on commit ff217be

Please sign in to comment.