Skip to content

Commit 7e6f939

Browse files
committed
wrap read_table with partial load checking and use everywhere
1 parent 1d99e93 commit 7e6f939

File tree

2 files changed

+23
-15
lines changed
  • src/nested_pandas/nestedframe
  • tests/nested_pandas/nestedframe

2 files changed

+23
-15
lines changed

src/nested_pandas/nestedframe/io.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -229,28 +229,36 @@ def _read_parquet_into_table(
229229
fs=filesystem,
230230
engine="pyarrow",
231231
) as parquet_file:
232-
try:
233-
return pq.read_table(parquet_file, columns=columns, **kwargs)
234-
except ArrowInvalid as e:
235-
if columns is not None:
236-
check_schema = any("." in col for col in columns) # Check for potential partial loads
237-
if check_schema:
238-
try:
239-
_validate_structs_from_schema(data, columns=columns, filesystem=filesystem)
240-
except ValueError as validation_error:
241-
raise validation_error from e # Chain the exceptions for better context
242-
raise e
232+
return _read_table_with_partial_load_check(parquet_file, columns=columns, **kwargs)
243233

244234
# All other cases, including file-like objects, directories, and
245235
# even lists of the foregoing.
246236

247237
# If `filesystem` is specified - use it, passing it as part of **kwargs
248238
if kwargs.get("filesystem") is not None:
249-
return pq.read_table(data, columns=columns, **kwargs)
239+
return _read_table_with_partial_load_check(data, columns=columns, **kwargs)
250240

251241
# Otherwise convert with a special function
252242
data, filesystem = _transform_read_parquet_data_arg(data)
253-
return pq.read_table(data, filesystem=filesystem, columns=columns, **kwargs)
243+
return _read_table_with_partial_load_check(data, columns=columns, filesystem=filesystem, **kwargs)
244+
245+
246+
def _read_table_with_partial_load_check(data, columns=None, filesystem=None, **kwargs):
247+
"""Read a pyarrow table with partial load check for nested structures"""
248+
try:
249+
return pq.read_table(data, columns=columns, **kwargs)
250+
except ArrowInvalid as e:
251+
# if it's not related to partial loading of nested structures, re-raise
252+
if "No match for" not in str(e):
253+
raise e
254+
if columns is not None:
255+
check_schema = any("." in col for col in columns) # Check for potential partial loads
256+
if check_schema:
257+
try:
258+
_validate_structs_from_schema(data, columns=columns, filesystem=filesystem)
259+
except ValueError as validation_error:
260+
raise validation_error from e # Chain the exceptions for better context
261+
raise e
254262

255263

256264
def _validate_structs_from_schema(data, columns=None, filesystem=None):
@@ -317,7 +325,7 @@ def _read_remote_parquet_directory(
317325
fs=filesystem,
318326
engine="pyarrow",
319327
) as parquet_file:
320-
table = pq.read_table(parquet_file, columns=columns, **kwargs)
328+
table = _read_table_with_partial_load_check(parquet_file, columns=columns, **kwargs)
321329
tables.append(table)
322330
return pa.concat_tables(tables)
323331

tests/nested_pandas/nestedframe/test_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,5 +471,5 @@ def test_list_struct_partial_loading_error():
471471
def test_normal_loading_error():
472472
"""Test that making a normal naming mistake raises the normal pyarrow error."""
473473
# Load in the example file
474-
with pytest.raises(ValueError):
474+
with pytest.raises(ValueError, match="No match for*"):
475475
read_parquet("tests/test_data/nested.parquet", columns=["not_a_column"])

0 commit comments

Comments
 (0)