Skip to content

Commit

Permalink
fix-schema-comparaison
Browse files Browse the repository at this point in the history
  • Loading branch information
Pierre Dubrulle committed Nov 13, 2023
1 parent 2b913b3 commit 446a2bb
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion python/deltalake/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Union,
)
from urllib.parse import unquote
from hashlib import md5

from deltalake.fs import DeltaStorageHandler

Expand Down Expand Up @@ -61,6 +62,10 @@ class AddAction:
data_change: bool
stats: str

def _hash_schema(schema: pa.Schema) -> str:
dict_schema = dict(zip(schema.names, [str(pa_types) for pa_types in schema.types]))
hash_dt_schema = md5(json.dumps(dict_schema, sort_keys=True).encode('utf-8')).hexdigest()
return hash_dt_schema

def write_deltalake(
table_or_uri: Union[str, Path, DeltaTable],
Expand Down Expand Up @@ -195,7 +200,11 @@ def write_deltalake(
partition_by = [partition_by]

if table: # already exists
if schema != table.schema().to_pyarrow(as_large_types=large_dtypes) and not (

hash_table_schema = _hash_schema(table.schema().to_pyarrow(as_large_types=large_dtypes))
hash_schema_provided = _hash_schema(schema)

if hash_schema_provided != hash_table_schema and not (
mode == "overwrite" and overwrite_schema
):
raise ValueError(
Expand Down

0 comments on commit 446a2bb

Please sign in to comment.