Skip to content

Commit

Permalink
Refactor util in back-end
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgenherje committed Nov 27, 2024
1 parent 401874b commit 281b6cb
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 24 deletions.
22 changes: 4 additions & 18 deletions backend_py/primary/primary/services/summary_delta_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
import pyarrow.compute as pc
import numpy as np


from primary.services.service_exceptions import InvalidDataError, Service
from .utils.summary_vector_table_helpers import validate_summary_vector_table


@dataclass
Expand All @@ -17,18 +16,6 @@ class RealizationDeltaVector:
unit: str


def _is_valid_vector_table(vector_table: pa.Table, vector_name: str) -> bool:
"""
Check if the vector table is valid.
Expect the table to contain the following columns: DATE, REAL, vector_name.
"""
expected_columns = {"DATE", "REAL", vector_name}
if set(vector_table.column_names) != expected_columns:
unexpected_columns = set(vector_table.column_names) - expected_columns
raise InvalidDataError(f"Unexpected columns in table {unexpected_columns}", Service.GENERAL)


def create_delta_vector_table(
first_vector_table: pa.Table, second_vector_table: pa.Table, vector_name: str
) -> pa.Table:
Expand All @@ -42,8 +29,8 @@ def create_delta_vector_table(
`Note`: Pre-processing of DATE-columns, e.g. resampling, should be done before calling this function.
"""
_is_valid_vector_table(first_vector_table, vector_name)
_is_valid_vector_table(second_vector_table, vector_name)
validate_summary_vector_table(first_vector_table, vector_name)
validate_summary_vector_table(second_vector_table, vector_name)

joined_vector_table = first_vector_table.join(
second_vector_table, keys=["DATE", "REAL"], join_type="inner", right_suffix="_second"
Expand All @@ -52,7 +39,6 @@ def create_delta_vector_table(
joined_vector_table.column(vector_name), joined_vector_table.column(f"{vector_name}_second")
)

# TODO: Should a schema be defined for the delta vector?
delta_table = pa.table(
{
"DATE": joined_vector_table.column("DATE"),
Expand All @@ -70,7 +56,7 @@ def create_realization_delta_vector_list(
"""
Create a list of RealizationDeltaVector from the delta vector table.
"""
_is_valid_vector_table(delta_vector_table, vector_name)
validate_summary_vector_table(delta_vector_table, vector_name)

real_arr_np = delta_vector_table.column("REAL").to_numpy()
unique_reals, first_occurrence_idx, real_counts = np.unique(real_arr_np, return_index=True, return_counts=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,12 @@
from fmu.sumo.explorer.objects import TableCollection, Table
from webviz_pkg.core_utils.perf_timer import PerfTimer

from primary.services.utils.arrow_helpers import sort_table_on_real_then_date, is_date_column_monotonically_increasing
from primary.services.utils.arrow_helpers import find_first_non_increasing_date_pair
from primary.services.utils.arrow_helpers import (
find_first_non_increasing_date_pair,
sort_table_on_real_then_date,
is_date_column_monotonically_increasing,
)
from primary.services.utils.summary_vector_table_helpers import validate_summary_vector_table
from primary.services.service_exceptions import (
Service,
NoDataError,
Expand Down Expand Up @@ -160,10 +164,7 @@ async def get_vector_async(
table, vector_metadata = await self.get_vector_table_async(vector_name, resampling_frequency, realizations)

# Verify that columns are as we expect
expected_columns = {"DATE", "REAL", vector_name}
if set(table.column_names) != expected_columns:
unexpected_columns = set(table.column_names) - expected_columns
raise InvalidDataError(f"Unexpected columns in table {unexpected_columns}", Service.SUMO)
validate_summary_vector_table(table, vector_name, Service.SUMO)

real_arr_np = table.column("REAL").to_numpy()
unique_reals, first_occurrence_idx, real_counts = np.unique(real_arr_np, return_index=True, return_counts=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import pyarrow as pa

from primary.services.service_exceptions import InvalidDataError, Service


def validate_summary_vector_table(vector_table: pa.Table, vector_name: str, service: Service = Service.GENERAL) -> None:
"""
Check if the vector table is valid - single vector table should contain columns DATE, REAL, vector_name.
Expect the pyarrow table to contain the following columns: DATE, REAL, vector_name.
Raises InvalidDataError if the table does not contain the expected columns.
"""
expected_columns = {"DATE", "REAL", vector_name}
if set(vector_table.column_names) != expected_columns:
unexpected_columns = set(vector_table.column_names) - expected_columns
raise InvalidDataError(f"Unexpected columns in table {unexpected_columns}", service)

0 comments on commit 281b6cb

Please sign in to comment.