From 7f2f0bc2691e50a1b1167e654a749e270ee967f3 Mon Sep 17 00:00:00 2001 From: jorgenherje Date: Fri, 29 Nov 2024 14:53:20 +0100 Subject: [PATCH] Unit tests back-end --- .../utils/summary_vector_table_helpers.py | 11 ++ .../services/test_summary_delta_vectors.py | 118 ++++++++++++++++++ .../test_summary_vector_table_helpers.py | 72 +++++++++++ 3 files changed, 201 insertions(+) create mode 100644 backend_py/primary/tests/unit/services/test_summary_delta_vectors.py create mode 100644 backend_py/primary/tests/unit/services/utils/test_summary_vector_table_helpers.py diff --git a/backend_py/primary/primary/services/utils/summary_vector_table_helpers.py b/backend_py/primary/primary/services/utils/summary_vector_table_helpers.py index 1a1bb9554..4dcc8e43e 100644 --- a/backend_py/primary/primary/services/utils/summary_vector_table_helpers.py +++ b/backend_py/primary/primary/services/utils/summary_vector_table_helpers.py @@ -18,3 +18,14 @@ def validate_summary_vector_table_pa( if actual_columns != expected_columns: unexpected_columns = actual_columns - expected_columns raise InvalidDataError(f"Unexpected columns in table {unexpected_columns}", service) + + # Validate table column types + + if vector_table.field("DATE").type != pa.timestamp("ms"): + raise InvalidDataError( + f'DATE column must be of type timestamp(ms), but got {vector_table.field("DATE").type}', service + ) + if vector_table.field("REAL").type != pa.int16(): + raise InvalidDataError("REAL column must be of type int16", service) + if vector_table.field(vector_name).type != pa.float32(): + raise InvalidDataError(f"{vector_name} column must be of type float32", service) diff --git a/backend_py/primary/tests/unit/services/test_summary_delta_vectors.py b/backend_py/primary/tests/unit/services/test_summary_delta_vectors.py new file mode 100644 index 000000000..046f234fc --- /dev/null +++ b/backend_py/primary/tests/unit/services/test_summary_delta_vectors.py @@ -0,0 +1,118 @@ +import pyarrow as pa + +from primary.services.summary_delta_vectors import create_delta_vector_table +from primary.services.summary_delta_vectors import create_realization_delta_vector_list, RealizationDeltaVector + + +VECTOR_TABLE_SCHEMA = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), ("vector", pa.float32())]) + + +def test_create_delta_vector_table(): + # Create sample data for compare_vector_table + compare_data = {"DATE": [1, 2, 3, 4], "REAL": [1, 1, 2, 2], "vector": [10.0, 20.0, 30.0, 40.0]} + compare_vector_table = pa.table(compare_data, schema=VECTOR_TABLE_SCHEMA) + + # Create sample data for reference_vector_table + reference_data = {"DATE": [1, 2, 3, 4], "REAL": [1, 1, 2, 2], "vector": [5.0, 15.0, 25.0, 35.0]} + reference_vector_table = pa.table(reference_data, schema=VECTOR_TABLE_SCHEMA) + + # Expected delta values + expected_delta_data = {"DATE": [1, 2, 3, 4], "REAL": [1, 1, 2, 2], "vector": [5.0, 5.0, 5.0, 5.0]} + expected_delta_table = pa.table(expected_delta_data, schema=VECTOR_TABLE_SCHEMA) + + # Call the function + result_table = create_delta_vector_table(compare_vector_table, reference_vector_table, "vector") + + # Validate the result + assert result_table.equals(expected_delta_table) + + +def test_create_delta_vector_table_with_missing_dates(): + # Create sample data for compare_vector_table + compare_data = {"DATE": [1, 2, 4], "REAL": [1, 1, 2], "vector": [10.0, 20.0, 40.0]} + compare_vector_table = pa.table(compare_data, schema=VECTOR_TABLE_SCHEMA) + + # Create sample data for reference_vector_table + reference_data = {"DATE": [1, 2, 3], "REAL": [1, 1, 2], "vector": [5.0, 15.0, 25.0]} + reference_vector_table = pa.table(reference_data, schema=VECTOR_TABLE_SCHEMA) + + # Expected delta values + expected_delta_data = {"DATE": [1, 2], "REAL": [1, 1], "vector": [5.0, 5.0]} + expected_delta_table = pa.table(expected_delta_data, schema=VECTOR_TABLE_SCHEMA) + + # Call the function + result_table = create_delta_vector_table(compare_vector_table, reference_vector_table, "vector") + + # Validate the result + assert result_table.equals(expected_delta_table) + + +def test_create_delta_vector_table_with_different_reals(): + # Create sample data for compare_vector_table + compare_data = {"DATE": [1, 2, 3, 4], "REAL": [1, 1, 2, 3], "vector": [10.0, 20.0, 30.0, 40.0]} + compare_vector_table = pa.table(compare_data, schema=VECTOR_TABLE_SCHEMA) + + # Create sample data for reference_vector_table + reference_data = {"DATE": [1, 2, 3, 4], "REAL": [1, 1, 2, 2], "vector": [5.0, 15.0, 25.0, 35.0]} + reference_vector_table = pa.table(reference_data, schema=VECTOR_TABLE_SCHEMA) + + # Expected delta values + expected_delta_data = {"DATE": [1, 2, 3], "REAL": [1, 1, 2], "vector": [5.0, 5.0, 5.0]} + expected_delta_table = pa.table(expected_delta_data, schema=VECTOR_TABLE_SCHEMA) + + # Call the function + result_table = create_delta_vector_table(compare_vector_table, reference_vector_table, "vector") + + # Validate the result + assert result_table.equals(expected_delta_table) + + +def test_create_realization_delta_vector_list(): + # Create sample data for delta_vector_table + delta_data = {"DATE": [1, 2, 3, 4], "REAL": [1, 1, 2, 2], "vector": [5.0, 10.0, 15.0, 20.0]} + delta_vector_table = pa.table(delta_data, schema=VECTOR_TABLE_SCHEMA) + + # Expected result + expected_result = [ + RealizationDeltaVector(realization=1, timestamps_utc_ms=[1, 2], values=[5.0, 10.0], is_rate=True, unit="unit"), + RealizationDeltaVector(realization=2, timestamps_utc_ms=[3, 4], values=[15.0, 20.0], is_rate=True, unit="unit"), + ] + + # Call the function + result = create_realization_delta_vector_list(delta_vector_table, "vector", is_rate=True, unit="unit") + + # Validate the result + assert result == expected_result + + +def test_create_realization_delta_vector_list_with_single_real(): + # Create sample data for delta_vector_table + delta_data = {"DATE": [1, 2, 3, 4], "REAL": [1, 1, 1, 1], "vector": [5.0, 10.0, 15.0, 20.0]} + delta_vector_table = pa.table(delta_data, schema=VECTOR_TABLE_SCHEMA) + + # Expected result + expected_result = [ + RealizationDeltaVector( + realization=1, timestamps_utc_ms=[1, 2, 3, 4], values=[5.0, 10.0, 15.0, 20.0], is_rate=False, unit="unit" + ) + ] + + # Call the function + result = create_realization_delta_vector_list(delta_vector_table, "vector", is_rate=False, unit="unit") + + # Validate the result + assert result == expected_result + + +def test_create_realization_delta_vector_list_with_empty_table(): + # Create an empty delta_vector_table + delta_vector_table = pa.table({"DATE": [], "REAL": [], "vector": []}, schema=VECTOR_TABLE_SCHEMA) + + # Expected result + expected_result = [] + + # Call the function + result = create_realization_delta_vector_list(delta_vector_table, "vector", is_rate=True, unit="unit") + + # Validate the result + assert result == expected_result diff --git a/backend_py/primary/tests/unit/services/utils/test_summary_vector_table_helpers.py b/backend_py/primary/tests/unit/services/utils/test_summary_vector_table_helpers.py new file mode 100644 index 000000000..c0fa35b82 --- /dev/null +++ b/backend_py/primary/tests/unit/services/utils/test_summary_vector_table_helpers.py @@ -0,0 +1,72 @@ +import pytest +import pyarrow as pa +from primary.services.service_exceptions import InvalidDataError, Service +from primary.services.utils.summary_vector_table_helpers import validate_summary_vector_table_pa + + +def test_validate_summary_vector_table_pa_valid(): + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.float32())]) + table = pa.Table.from_pydict(data, schema=schema) + try: + validate_summary_vector_table_pa(table, vector_name) + except InvalidDataError: + pytest.fail("validate_summary_vector_table_pa raised InvalidDataError unexpectedly!") + + +def test_validate_summary_vector_table_pa_missing_column(): + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_unexpected_column(): + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0], "EXTRA": [10.0, 11.0, 12.0]} + schema = pa.schema( + [("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.float32()), ("EXTRA", pa.float32())] + ) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_invalid_date_type(): + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0]} + schema = pa.schema([("DATE", pa.int32()), ("REAL", pa.int16()), (vector_name, pa.float32())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_invalid_real_type(): + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4.0, 5.0, 6.0], vector_name: [7.0, 8.0, 9.0]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.float32()), (vector_name, pa.float32())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_invalid_vector_type(): + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7, 8, 9]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.int32())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_sumo_service(): + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError) as excinfo: + validate_summary_vector_table_pa(table, vector_name, Service.SUMO) + assert excinfo.value.service == Service.SUMO