From 865e0e974268654c98a60c7496a109e69868f7de Mon Sep 17 00:00:00 2001
From: Jorrit Sandbrink <jorritsandbrink@proton.me>
Date: Thu, 12 Dec 2024 11:13:58 +0400
Subject: [PATCH] make duckdb handle iceberg table with nested types

---
 dlt/destinations/impl/filesystem/sql_client.py |  4 +++-
 tests/load/filesystem/test_sql_client.py       | 10 +++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/dlt/destinations/impl/filesystem/sql_client.py b/dlt/destinations/impl/filesystem/sql_client.py
index d39f4c3431..dd1fc6a330 100644
--- a/dlt/destinations/impl/filesystem/sql_client.py
+++ b/dlt/destinations/impl/filesystem/sql_client.py
@@ -266,7 +266,9 @@ def create_views_for_tables(self, tables: Dict[str, str]) -> None:
                 self._setup_iceberg(self._conn)
                 metadata_path = f"{resolved_folder}/metadata"
                 last_metadata_file = _get_last_metadata_file(metadata_path, self.fs_client)
-                from_statement = f"iceberg_scan('{last_metadata_file}')"
+                # skip schema inference to make nested data types work
+                # https://github.com/duckdb/duckdb_iceberg/issues/47
+                from_statement = f"iceberg_scan('{last_metadata_file}', skip_schema_inference=True)"
             elif first_file_type == "parquet":
                 from_statement = f"read_parquet([{resolved_files_string}])"
             elif first_file_type == "jsonl":
diff --git a/tests/load/filesystem/test_sql_client.py b/tests/load/filesystem/test_sql_client.py
index a73b0f7e31..1d5eaa9984 100644
--- a/tests/load/filesystem/test_sql_client.py
+++ b/tests/load/filesystem/test_sql_client.py
@@ -22,6 +22,7 @@
 )
 from dlt.destinations import filesystem
 from tests.utils import TEST_STORAGE_ROOT
+from tests.cases import arrow_table_all_data_types
 from dlt.destinations.exceptions import DatabaseUndefinedRelation
 
 
@@ -81,7 +82,11 @@ def double_items():
                 for i in range(total_records)
             ]
 
-        return [items, double_items]
+        @dlt.resource(table_format=table_format)
+        def arrow_all_types():
+            yield arrow_table_all_data_types("arrow-table")[0]
+
+        return [items, double_items, arrow_all_types]
 
     # run source
     pipeline.run(source(), loader_file_format=destination_config.file_format)
@@ -98,6 +103,9 @@ def double_items():
 
     # check we can create new tables from the views
     with pipeline.sql_client() as c:
+        # check if all data types are handled properly
+        c.execute_sql("SELECT * FROM arrow_all_types;")
+
         c.execute_sql(
             "CREATE TABLE items_joined AS (SELECT i.id, di.double_id FROM items as i JOIN"
             " double_items as di ON (i.id = di.id));"