From bd46a0fdcb8ed4fb9cec6731b16c2e6e45501cd6 Mon Sep 17 00:00:00 2001 From: Dave Date: Thu, 18 Jan 2024 16:40:48 +0100 Subject: [PATCH] add pyarrow normalizer tests --- tests/libs/pyarrow/test_pyarrow_normalizer.py | 49 ++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/libs/pyarrow/test_pyarrow_normalizer.py b/tests/libs/pyarrow/test_pyarrow_normalizer.py index 1f4eb5f9b5..97a3c21d23 100644 --- a/tests/libs/pyarrow/test_pyarrow_normalizer.py +++ b/tests/libs/pyarrow/test_pyarrow_normalizer.py @@ -1,8 +1,9 @@ from typing import List, Any import pyarrow as pa +import pytest -from dlt.common.libs.pyarrow import normalize_py_arrow_schema +from dlt.common.libs.pyarrow import normalize_py_arrow_schema, NameNormalizationClash from dlt.common.normalizers import explicit_normalizers, import_normalizers from dlt.common.schema.utils import new_column, TColumnSchema from dlt.common.destination import DestinationCapabilitiesContext @@ -56,3 +57,49 @@ def test_pyarrow_add_empty_types() -> None: assert result.column_names == ["col1", "col2"] assert _row_at_index(result, 0) == [1, None] assert result.schema.field(1).type == "string" + + +def test_field_normalization_clash() -> None: + table = pa.Table.from_pylist( + [ + {"col^New": "hello", "col_new": 1}, + ] + ) + with pytest.raises(NameNormalizationClash): + _normalize(table, []) + + +def test_field_normalization() -> None: + table = pa.Table.from_pylist( + [ + {"col^New": "hello", "col2": 1}, + ] + ) + result = _normalize(table, []) + assert result.column_names == ["col_new", "col2"] + assert _row_at_index(result, 0) == ["hello", 1] + + +def test_dlt_columns_not_added() -> None: + table = pa.Table.from_pylist( + [ + {"col1": 1}, + ] + ) + columns = [new_column("_dlt_something", "bigint"), new_column("col2", "text")] + result = _normalize(table, columns) + # no dlt columns + assert result.column_names == ["col2", "col1"] + assert _row_at_index(result, 0) == [None, 1] + + +@pytest.mark.skip(reason="Somehow this does not fail, should we add an exception??") +def test_fails_if_adding_non_nullable_column() -> None: + table = pa.Table.from_pylist( + [ + {"col1": 1}, + ] + ) + columns = [new_column("col1", "bigint"), new_column("col2", "text", nullable=False)] + with pytest.raises(Exception): + _normalize(table, columns)