@@ -984,6 +984,7 @@ def to_parquet(
984
984
map_symbols : bool = True ,
985
985
schema : Schema | str | None = None ,
986
986
mode : Literal ["w" , "x" ] = "w" ,
987
+ parquet_schema : pa .Schema | None = None ,
987
988
** kwargs : Any ,
988
989
) -> None :
989
990
"""
@@ -1010,6 +1011,9 @@ def to_parquet(
1010
1011
This is only required when reading a DBN stream with mixed record types.
1011
1012
mode : str, default "w"
1012
1013
The file write mode to use, either "x" or "w".
1014
+ parquet_schema : pyarrow.Schema, optional
1015
+ The pyarrow parquet schema to use to write the parquet file.
1016
+ This defaults to a detected schema based on the DataFrame representation.
1013
1017
**kwargs : Any
1014
1018
Keyword arguments to pass to the `pyarrow.parquet.ParquetWriter`.
1015
1019
These can be used to override the default behavior of the writer.
@@ -1046,10 +1050,11 @@ def to_parquet(
1046
1050
for frame in dataframe_iter :
1047
1051
if writer is None :
1048
1052
# Initialize the writer using the first DataFrame
1049
- parquet_schema = pa .Schema .from_pandas (frame )
1053
+ if parquet_schema is None :
1054
+ parquet_schema = pa .Schema .from_pandas (frame )
1050
1055
writer = pq .ParquetWriter (
1051
1056
where = kwargs .pop ("where" , file_path ),
1052
- schema = kwargs . pop ( "schema" , parquet_schema ) ,
1057
+ schema = parquet_schema ,
1053
1058
** kwargs ,
1054
1059
)
1055
1060
writer .write_table (
0 commit comments