diff --git a/api/delta_table/index.html b/api/delta_table/index.html index 8920a28750..dafc57aed6 100644 --- a/api/delta_table/index.html +++ b/api/delta_table/index.html @@ -3703,7 +3703,7 @@

-
to_pyarrow_dataset(partitions: Optional[List[Tuple[str, str, Any]]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, parquet_read_options: Optional[ParquetReadOptions] = None) -> pyarrow.dataset.Dataset
+
to_pyarrow_dataset(partitions: Optional[List[Tuple[str, str, Any]]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, parquet_read_options: Optional[ParquetReadOptions] = None, schema: Optional[pyarrow.Schema] = None, as_large_types: bool = False) -> pyarrow.dataset.Dataset
 
@@ -3765,6 +3765,38 @@

None + + schema + + Optional[Schema] + + +
+

The schema to use for the dataset. If None, the schema of the DeltaTable will be used. This can be used to force reading of Parquet/Arrow datatypes +that DeltaLake can't represent in it's schema (e.g. LargeString). +If you only need to read the schema with large types (e.g. for compatibility with Polars) you may want to use the as_large_types parameter instead.

+
+ + + None + + + + as_large_types + + bool + + +
+

get schema with all variable size types (list, binary, string) as large variants (with int64 indices). +This is for compatibility with systems like Polars that only support the large versions of Arrow types. +If schema is passed it takes precedence over this option.

+
+ + + False + +

More info: https://arrow.apache.org/docs/python/generated/pyarrow.dataset.ParquetReadOptions.html

diff --git a/api/storage/index.html b/api/storage/index.html index eb3c5a37e9..d2e6de3697 100644 --- a/api/storage/index.html +++ b/api/storage/index.html @@ -1277,11 +1277,74 @@