Skip to content

Commit

Permalink
fix: fix deserialization of parquets with large string list columns c…
Browse files Browse the repository at this point in the history
…ausing stack overflow (#11471)
  • Loading branch information
ByteNybbler authored Oct 3, 2023
1 parent e3a491e commit 414130d
Showing 1 changed file with 20 additions and 16 deletions.
36 changes: 20 additions & 16 deletions crates/nano-arrow/src/io/parquet/read/deserialize/binary/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,22 +166,26 @@ impl<O: Offset, I: Pages> Iterator for NestedIter<O, I> {
type Item = Result<(NestedState, Box<dyn Array>)>;

fn next(&mut self) -> Option<Self::Item> {
let maybe_state = next(
&mut self.iter,
&mut self.items,
&mut self.dict,
&mut self.remaining,
&self.init,
self.chunk_size,
&BinaryDecoder::<O>::default(),
);
match maybe_state {
MaybeNext::Some(Ok((nested, decoded))) => {
Some(finish(&self.data_type, decoded.0, decoded.1).map(|array| (nested, array)))
},
MaybeNext::Some(Err(e)) => Some(Err(e)),
MaybeNext::None => None,
MaybeNext::More => self.next(),
loop {
let maybe_state = next(
&mut self.iter,
&mut self.items,
&mut self.dict,
&mut self.remaining,
&self.init,
self.chunk_size,
&BinaryDecoder::<O>::default(),
);
match maybe_state {
MaybeNext::Some(Ok((nested, decoded))) => {
return Some(
finish(&self.data_type, decoded.0, decoded.1).map(|array| (nested, array)),
)
},
MaybeNext::Some(Err(e)) => return Some(Err(e)),
MaybeNext::None => return None,
MaybeNext::More => continue, // Using continue in a loop instead of calling next helps prevent stack overflow.
}
}
}
}

0 comments on commit 414130d

Please sign in to comment.