From 607a3af4f7d11102c81565c298c50c33e23a9df1 Mon Sep 17 00:00:00 2001 From: Dave Date: Tue, 23 Apr 2024 16:32:40 +0200 Subject: [PATCH] update tests --- tests/libs/pyarrow/test_pyarrow_normalizer.py | 16 +++++++++++----- tests/normalize/test_normalize.py | 16 ++++++++++++---- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/tests/libs/pyarrow/test_pyarrow_normalizer.py b/tests/libs/pyarrow/test_pyarrow_normalizer.py index 6059b97600..25871edd45 100644 --- a/tests/libs/pyarrow/test_pyarrow_normalizer.py +++ b/tests/libs/pyarrow/test_pyarrow_normalizer.py @@ -80,17 +80,23 @@ def test_field_normalization() -> None: assert _row_at_index(result, 0) == ["hello", 1] -def test_dlt_columns_not_added() -> None: +def test_default_dlt_columns_not_added() -> None: table = pa.Table.from_pylist( [ {"col1": 1}, ] ) - columns = [new_column("_dlt_something", "bigint"), new_column("col2", "text")] + columns = [ + new_column("_dlt_something", "bigint"), + new_column("_dlt_id", "text"), + new_column("_dlt_load_id", "text"), + new_column("col2", "text"), + new_column("col1", "text"), + ] result = _normalize(table, columns) - # no dlt columns - assert result.column_names == ["col2", "col1"] - assert _row_at_index(result, 0) == [None, 1] + # no dlt_id or dlt_load_id columns + assert result.column_names == ["_dlt_something", "col2", "col1"] + assert _row_at_index(result, 0) == [None, None, 1] @pytest.mark.skip(reason="Somehow this does not fail, should we add an exception??") diff --git a/tests/normalize/test_normalize.py b/tests/normalize/test_normalize.py index 4e5bc7dbaa..3891c667c3 100644 --- a/tests/normalize/test_normalize.py +++ b/tests/normalize/test_normalize.py @@ -711,7 +711,7 @@ def assert_timestamp_data_type(load_storage: LoadStorage, data_type: TDataType) assert event_schema.get_table_columns("event")["timestamp"]["data_type"] == data_type -def test_removal_of_normalizer_schema_section(raw_normalize: Normalize) -> None: +def test_removal_of_normalizer_schema_section_and_add_seen_data(raw_normalize: Normalize) -> None: extract_cases( raw_normalize, [ @@ -727,14 +727,22 @@ def test_removal_of_normalizer_schema_section(raw_normalize: Normalize) -> None: extracted_schema.tables["event__parse_data__intent_ranking"] = new_table( "event__parse_data__intent_ranking" ) + extracted_schema.tables["event__random_table"] = new_table("event__random_table") # add x-normalizer info (and other block to control) extracted_schema.tables["event"]["x-normalizer"] = {"evolve-columns-once": True} # type: ignore extracted_schema.tables["event"]["x-other-info"] = "blah" # type: ignore - extracted_schema.tables["event__parse_data__intent_ranking"]["x-normalizer"] = {} # type: ignore + extracted_schema.tables["event__parse_data__intent_ranking"]["x-normalizer"] = {"seen-data": True, "random-entry": 1234} # type: ignore + extracted_schema.tables["event__random_table"]["x-normalizer"] = {"evolve-columns-once": True} # type: ignore normalize_pending(raw_normalize, extracted_schema) schema = raw_normalize.schema_storage.load_schema("event") - assert "x-normalizer" not in schema.tables["event"] - assert "x-normalizer" not in schema.tables["event__parse_data__intent_ranking"] + # seen data gets added, schema settings get removed + assert schema.tables["event"]["x-normalizer"] == {"seen-data": True} # type: ignore + assert schema.tables["event__parse_data__intent_ranking"]["x-normalizer"] == { # type: ignore + "seen-data": True, + "random-entry": 1234, + } + # no data seen here, so seen-data is not set and evolve settings stays until first data is seen + assert schema.tables["event__random_table"]["x-normalizer"] == {"evolve-columns-once": True} # type: ignore assert "x-other-info" in schema.tables["event"]