From a07b64a6ce7c4cc9770e8ccbdbf1996e3bfe9432 Mon Sep 17 00:00:00 2001 From: Github Action Date: Sat, 11 May 2024 12:15:12 +0000 Subject: [PATCH] doc update for tag `python-v0.17.4` --- api/delta_table/index.html | 34 +++- api/storage/index.html | 327 ++++++++++++++++++++++++++++++++- objects.inv | Bin 1732 -> 1795 bytes search/search_index.json | 2 +- sitemap.xml | 78 ++++---- sitemap.xml.gz | Bin 606 -> 606 bytes usage/constraints/index.html | 10 +- usage/loading-table/index.html | 2 +- usage/read-cdf/index.html | 6 +- usage/writing/index.html | 26 +-- 10 files changed, 420 insertions(+), 65 deletions(-) diff --git a/api/delta_table/index.html b/api/delta_table/index.html index 8920a28750..dafc57aed6 100644 --- a/api/delta_table/index.html +++ b/api/delta_table/index.html @@ -3703,7 +3703,7 @@

-
to_pyarrow_dataset(partitions: Optional[List[Tuple[str, str, Any]]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, parquet_read_options: Optional[ParquetReadOptions] = None) -> pyarrow.dataset.Dataset
+
to_pyarrow_dataset(partitions: Optional[List[Tuple[str, str, Any]]] = None, filesystem: Optional[Union[str, pa_fs.FileSystem]] = None, parquet_read_options: Optional[ParquetReadOptions] = None, schema: Optional[pyarrow.Schema] = None, as_large_types: bool = False) -> pyarrow.dataset.Dataset
 
@@ -3765,6 +3765,38 @@

None + + schema + + Optional[Schema] + + +
+

The schema to use for the dataset. If None, the schema of the DeltaTable will be used. This can be used to force reading of Parquet/Arrow datatypes +that DeltaLake can't represent in it's schema (e.g. LargeString). +If you only need to read the schema with large types (e.g. for compatibility with Polars) you may want to use the as_large_types parameter instead.

+
+ + + None + + + + as_large_types + + bool + + +
+

get schema with all variable size types (list, binary, string) as large variants (with int64 indices). +This is for compatibility with systems like Polars that only support the large versions of Arrow types. +If schema is passed it takes precedence over this option.

+
+ + + False + +

More info: https://arrow.apache.org/docs/python/generated/pyarrow.dataset.ParquetReadOptions.html

diff --git a/api/storage/index.html b/api/storage/index.html index eb3c5a37e9..d2e6de3697 100644 --- a/api/storage/index.html +++ b/api/storage/index.html @@ -1277,11 +1277,74 @@