diff --git a/crates/nano-arrow/src/io/parquet/read/deserialize/nested_utils.rs b/crates/nano-arrow/src/io/parquet/read/deserialize/nested_utils.rs index fc68080e0799..595f161bb73e 100644 --- a/crates/nano-arrow/src/io/parquet/read/deserialize/nested_utils.rs +++ b/crates/nano-arrow/src/io/parquet/read/deserialize/nested_utils.rs @@ -498,9 +498,6 @@ where if items.len() > 1 { return MaybeNext::Some(Ok(items.pop_front().unwrap())); } - if (items.len() == 1) && items.front().unwrap().0.len() == chunk_size.unwrap_or(usize::MAX) { - return MaybeNext::Some(Ok(items.pop_front().unwrap())); - } if *remaining == 0 { return match items.pop_front() { Some(decoded) => MaybeNext::Some(Ok(decoded)), @@ -541,11 +538,11 @@ where }; if (items.len() == 1) - && items.front().unwrap().0.len() < chunk_size.unwrap_or(usize::MAX) + && items.front().unwrap().0.len() > chunk_size.unwrap_or(usize::MAX) { - MaybeNext::More - } else { MaybeNext::Some(Ok(items.pop_front().unwrap())) + } else { + MaybeNext::More } }, } diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py index 0ca782b8bc87..18a4d6863669 100644 --- a/py-polars/tests/unit/io/test_parquet.py +++ b/py-polars/tests/unit/io/test_parquet.py @@ -8,6 +8,7 @@ import pandas as pd import pyarrow as pa import pyarrow.dataset as ds +import pyarrow.parquet as pq import pytest import polars as pl @@ -494,3 +495,22 @@ def test_tz_aware_parquet_9586(io_files_path: Path) -> None: {"UTC_DATETIME_ID": [datetime(2023, 6, 26, 14, 15, 0, tzinfo=timezone.utc)]} ).select(pl.col("*").cast(pl.Datetime("ns", "UTC"))) assert_frame_equal(result, expected) + + +def test_nested_list_page_reads_to_end_11548() -> None: + df = pl.select( + pl.repeat(pl.arange(0, 2048, dtype=pl.UInt64).implode(), 2).alias("x"), + ) + + f = io.BytesIO() + + pq.write_table(df.to_arrow(), f, data_page_size=1) + + f.seek(0) + + assert pl.read_parquet(f).select( + pl.col("x").list.lengths() + ).to_series().to_list() == [ + 2048, + 2048, + ]