Skip to content

Commit

Permalink
make duckdb handle iceberg table with nested types
Browse files Browse the repository at this point in the history
  • Loading branch information
jorritsandbrink committed Dec 12, 2024
1 parent 4e5a240 commit 865e0e9
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
4 changes: 3 additions & 1 deletion dlt/destinations/impl/filesystem/sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,9 @@ def create_views_for_tables(self, tables: Dict[str, str]) -> None:
self._setup_iceberg(self._conn)
metadata_path = f"{resolved_folder}/metadata"
last_metadata_file = _get_last_metadata_file(metadata_path, self.fs_client)
from_statement = f"iceberg_scan('{last_metadata_file}')"
# skip schema inference to make nested data types work
# https://github.com/duckdb/duckdb_iceberg/issues/47
from_statement = f"iceberg_scan('{last_metadata_file}', skip_schema_inference=True)"
elif first_file_type == "parquet":
from_statement = f"read_parquet([{resolved_files_string}])"
elif first_file_type == "jsonl":
Expand Down
10 changes: 9 additions & 1 deletion tests/load/filesystem/test_sql_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from dlt.destinations import filesystem
from tests.utils import TEST_STORAGE_ROOT
from tests.cases import arrow_table_all_data_types
from dlt.destinations.exceptions import DatabaseUndefinedRelation


Expand Down Expand Up @@ -81,7 +82,11 @@ def double_items():
for i in range(total_records)
]

return [items, double_items]
@dlt.resource(table_format=table_format)
def arrow_all_types():
yield arrow_table_all_data_types("arrow-table")[0]

return [items, double_items, arrow_all_types]

# run source
pipeline.run(source(), loader_file_format=destination_config.file_format)
Expand All @@ -98,6 +103,9 @@ def double_items():

# check we can create new tables from the views
with pipeline.sql_client() as c:
# check if all data types are handled properly
c.execute_sql("SELECT * FROM arrow_all_types;")

c.execute_sql(
"CREATE TABLE items_joined AS (SELECT i.id, di.double_id FROM items as i JOIN"
" double_items as di ON (i.id = di.id));"
Expand Down

0 comments on commit 865e0e9

Please sign in to comment.