Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sh-rp committed Apr 23, 2024
1 parent e4c8684 commit 607a3af
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 9 deletions.
16 changes: 11 additions & 5 deletions tests/libs/pyarrow/test_pyarrow_normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,17 +80,23 @@ def test_field_normalization() -> None:
assert _row_at_index(result, 0) == ["hello", 1]


def test_dlt_columns_not_added() -> None:
def test_default_dlt_columns_not_added() -> None:
table = pa.Table.from_pylist(
[
{"col1": 1},
]
)
columns = [new_column("_dlt_something", "bigint"), new_column("col2", "text")]
columns = [
new_column("_dlt_something", "bigint"),
new_column("_dlt_id", "text"),
new_column("_dlt_load_id", "text"),
new_column("col2", "text"),
new_column("col1", "text"),
]
result = _normalize(table, columns)
# no dlt columns
assert result.column_names == ["col2", "col1"]
assert _row_at_index(result, 0) == [None, 1]
# no dlt_id or dlt_load_id columns
assert result.column_names == ["_dlt_something", "col2", "col1"]
assert _row_at_index(result, 0) == [None, None, 1]


@pytest.mark.skip(reason="Somehow this does not fail, should we add an exception??")
Expand Down
16 changes: 12 additions & 4 deletions tests/normalize/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ def assert_timestamp_data_type(load_storage: LoadStorage, data_type: TDataType)
assert event_schema.get_table_columns("event")["timestamp"]["data_type"] == data_type


def test_removal_of_normalizer_schema_section(raw_normalize: Normalize) -> None:
def test_removal_of_normalizer_schema_section_and_add_seen_data(raw_normalize: Normalize) -> None:
extract_cases(
raw_normalize,
[
Expand All @@ -727,14 +727,22 @@ def test_removal_of_normalizer_schema_section(raw_normalize: Normalize) -> None:
extracted_schema.tables["event__parse_data__intent_ranking"] = new_table(
"event__parse_data__intent_ranking"
)
extracted_schema.tables["event__random_table"] = new_table("event__random_table")

# add x-normalizer info (and other block to control)
extracted_schema.tables["event"]["x-normalizer"] = {"evolve-columns-once": True} # type: ignore
extracted_schema.tables["event"]["x-other-info"] = "blah" # type: ignore
extracted_schema.tables["event__parse_data__intent_ranking"]["x-normalizer"] = {} # type: ignore
extracted_schema.tables["event__parse_data__intent_ranking"]["x-normalizer"] = {"seen-data": True, "random-entry": 1234} # type: ignore
extracted_schema.tables["event__random_table"]["x-normalizer"] = {"evolve-columns-once": True} # type: ignore

normalize_pending(raw_normalize, extracted_schema)
schema = raw_normalize.schema_storage.load_schema("event")
assert "x-normalizer" not in schema.tables["event"]
assert "x-normalizer" not in schema.tables["event__parse_data__intent_ranking"]
# seen data gets added, schema settings get removed
assert schema.tables["event"]["x-normalizer"] == {"seen-data": True} # type: ignore
assert schema.tables["event__parse_data__intent_ranking"]["x-normalizer"] == { # type: ignore
"seen-data": True,
"random-entry": 1234,
}
# no data seen here, so seen-data is not set and evolve settings stays until first data is seen
assert schema.tables["event__random_table"]["x-normalizer"] == {"evolve-columns-once": True} # type: ignore
assert "x-other-info" in schema.tables["event"]

0 comments on commit 607a3af

Please sign in to comment.