diff --git a/python/deltalake/schema.py b/python/deltalake/schema.py index a22725fdc5..2f89cfaa2d 100644 --- a/python/deltalake/schema.py +++ b/python/deltalake/schema.py @@ -46,7 +46,7 @@ def _convert_pa_schema_to_delta( def dtype_to_delta_dtype(dtype: pa.DataType) -> pa.DataType: # Handle nested types - if isinstance(dtype, (pa.LargeListType, pa.ListType)): + if isinstance(dtype, (pa.LargeListType, pa.ListType, pa.FixedSizeListType)): return list_to_delta_dtype(dtype) elif isinstance(dtype, pa.StructType): return struct_to_delta_dtype(dtype) @@ -54,6 +54,8 @@ def dtype_to_delta_dtype(dtype: pa.DataType) -> pa.DataType: return pa.timestamp( "us" ) # TODO(ion): propagate also timezone information during writeonce we can properly read TZ in delta schema + elif type(dtype) is pa.FixedSizeBinaryType: + return pa.binary() try: return dtype_map[dtype] except KeyError: diff --git a/python/stubs/pyarrow/__init__.pyi b/python/stubs/pyarrow/__init__.pyi index 10edfcf663..aaf92ea962 100644 --- a/python/stubs/pyarrow/__init__.pyi +++ b/python/stubs/pyarrow/__init__.pyi @@ -10,6 +10,8 @@ DataType: Any ListType: Any StructType: Any MapType: Any +FixedSizeListType: Any +FixedSizeBinaryType: Any schema: Any map_: Any list_: Any diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py index 57d66849df..7721774d21 100644 --- a/python/tests/test_schema.py +++ b/python/tests/test_schema.py @@ -235,12 +235,16 @@ def test_delta_schema(): [ pa.field("some_int", pa.uint32(), nullable=True), pa.field("some_string", pa.string(), nullable=False), + pa.field("some_fixed_binary", pa.binary(5), nullable=False), + pa.field("some_decimal", pa.decimal128(10, 2), nullable=False), ] ), pa.schema( [ pa.field("some_int", pa.int32(), nullable=True), pa.field("some_string", pa.string(), nullable=False), + pa.field("some_fixed_binary", pa.binary(), nullable=False), + pa.field("some_decimal", pa.decimal128(10, 2), nullable=False), ] ), False, @@ -293,6 +297,7 @@ def test_delta_schema(): pa.schema( [ ("some_list", pa.list_(pa.string())), + ("some_fixed_list_int", pa.list_(pa.uint32(), 5)), ("some_list_binary", pa.list_(pa.binary())), ("some_string", pa.large_string()), ] @@ -300,6 +305,7 @@ def test_delta_schema(): pa.schema( [ ("some_list", pa.large_list(pa.large_string())), + ("some_fixed_list_int", pa.large_list(pa.int32())), ("some_list_binary", pa.large_list(pa.large_binary())), ("some_string", pa.large_string()), ]