Skip to content

Commit

Permalink
Also upcast from normal to large when large = True
Browse files Browse the repository at this point in the history
  • Loading branch information
ion-elgreco committed Nov 7, 2023
1 parent 2302148 commit 843c1d8
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions python/deltalake/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,32 @@ def _convert_pa_schema_to_delta(
) -> pa.schema:
"""Convert a PyArrow schema to a schema compatible with Delta Lake. Converts unsigned to signed equivalent, and
converts all timestamps to `us` timestamps. With the boolean flag large_dtypes you can control if the schema
should keep large types in the schema.
should keep cast normal to large types in the schema, or from large to normal.
Args
schema: Source schema
large_dtypes: If True, the pyarrow schema is kept in large_dtypes
large_dtypes: If True, the pyarrow schema is casted to large_dtypes
"""
dtype_map = {
pa.uint8(): pa.int8(),
pa.uint16(): pa.int16(),
pa.uint32(): pa.int32(),
pa.uint64(): pa.int64(),
}
if not large_dtypes:
if large_dtypes:
dtype_map = {
**dtype_map,
**{pa.string(): pa.large_string(), pa.binary(): pa.large_binary()},
}
else:
dtype_map = {
**dtype_map,
**{pa.large_string(): pa.string(), pa.large_binary(): pa.binary()},
}

def dtype_to_delta_dtype(dtype: pa.DataType) -> pa.DataType:
# Handle nested types
if isinstance(dtype, pa.LargeListType):
if isinstance(dtype, (pa.LargeListType, pa.ListType)):
return list_to_delta_dtype(dtype)
elif isinstance(dtype, pa.StructType):
return struct_to_delta_dtype(dtype)
Expand All @@ -55,7 +60,7 @@ def dtype_to_delta_dtype(dtype: pa.DataType) -> pa.DataType:
return dtype

def list_to_delta_dtype(
dtype: pa.LargeListType,
dtype: Union[pa.LargeListType, pa.ListType],
) -> Union[pa.LargeListType, pa.ListType]:
nested_dtype = dtype.value_type
nested_dtype_cast = dtype_to_delta_dtype(nested_dtype)
Expand Down

0 comments on commit 843c1d8

Please sign in to comment.