From 58c3968cd13b1e60b8b0b22bdcc6e1ca91294503 Mon Sep 17 00:00:00 2001 From: Steinthor Palsson Date: Wed, 8 Nov 2023 18:07:13 -0500 Subject: [PATCH] Add passing test loading empty rows --- tests/pipeline/test_pipeline.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 917dac75c4..560a683709 100644 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -1201,4 +1201,21 @@ def nested_resource(): # print(pipeline.default_schema.to_pretty_yaml()) assert pipeline.default_schema.get_table("flattened_dict__values")["columns"]["value__timestamp_value"]["data_type"] == "timestamp" # make sure data is there - assert pipeline.last_trace.last_normalize_info.row_counts["flattened_dict__values"] == 4 \ No newline at end of file + assert pipeline.last_trace.last_normalize_info.row_counts["flattened_dict__values"] == 4 + + +def test_empty_rows_are_included() -> None: + """Empty rows where all values are `None` or empty dicts + create rows in the dataset with `NULL` in all columns + """ + pipeline = dlt.pipeline(destination='duckdb') + + pipeline.run(iter([{}, {}, {}]), table_name="empty_rows") + pipeline.run(iter([{"a": 1}, {}, {}]), table_name="empty_rows") + pipeline.run(iter([{"a": None}, {}]), table_name="empty_rows") + + with pipeline.sql_client() as client: + rows = client.execute_sql("SELECT a FROM empty_rows ORDER BY a") + + values = [r[0] for r in rows] + assert values == [1, None, None, None, None, None, None, None]