Skip to content

Commit

Permalink
Merge branch 'develop' into issue1303-refine-test-sequencing-and-vers…
Browse files Browse the repository at this point in the history
…ions
  • Loading branch information
nj1973 authored Nov 7, 2024
2 parents 6520ed3 + 783d1b0 commit 4e6f557
Show file tree
Hide file tree
Showing 9 changed files with 160 additions and 96 deletions.
39 changes: 25 additions & 14 deletions data_validation/cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,12 @@ def _add_common_arguments(
# TODO: update if we start to support other statuses
help="Comma separated list of statuses to filter the validation results. Supported statuses are (success, fail). If no list is provided, all statuses are returned",
)
optional_arguments.add_argument(
"--run-id",
"-rid",
default=None,
help="Set a string for the run_id, if None is input then a randomly generated UUID will be used, which is the default behaviour",
)


def _check_positive(value: int) -> int:
Expand Down Expand Up @@ -1407,8 +1413,8 @@ def cols_from_arg(concat_arg: str, client, table_obj: dict, query_str: str) -> l
result_handler_config = None

# Set filter_config and threshold. Not supported in case of schema validation
filter_config = getattr(args, "filters", [])
threshold = getattr(args, "threshold", 0.0)
filter_config = getattr(args, consts.CONFIG_FILTERS, [])
threshold = getattr(args, consts.CONFIG_THRESHOLD, 0.0)

# Get labels
if args.labels is None:
Expand All @@ -1425,8 +1431,8 @@ def cols_from_arg(concat_arg: str, client, table_obj: dict, query_str: str) -> l
format = args.format if args.format else "table"

# Get random row arguments. Only in row validations these attributes can be present.
use_random_rows = getattr(args, "use_random_row", False)
random_row_batch_size = getattr(args, "random_row_batch_size", None)
use_random_rows = getattr(args, consts.CONFIG_USE_RANDOM_ROWS, False)
random_row_batch_size = getattr(args, consts.CONFIG_RANDOM_ROW_BATCH_SIZE, None)

# Get table list. Not supported in case of custom query validation
is_filesystem = source_client._source_type == "FileSystem"
Expand Down Expand Up @@ -1459,23 +1465,28 @@ def cols_from_arg(concat_arg: str, client, table_obj: dict, query_str: str) -> l
for table_obj in tables_list:
pre_build_configs = {
"config_type": config_type,
"source_conn_name": args.source_conn,
"target_conn_name": args.target_conn,
consts.CONFIG_SOURCE_CONN_NAME: args.source_conn,
consts.CONFIG_TARGET_CONN_NAME: args.target_conn,
"table_obj": table_obj,
"labels": labels,
"threshold": threshold,
"format": format,
"use_random_rows": use_random_rows,
"random_row_batch_size": random_row_batch_size,
consts.CONFIG_LABELS: labels,
consts.CONFIG_THRESHOLD: threshold,
consts.CONFIG_FORMAT: format,
consts.CONFIG_USE_RANDOM_ROWS: use_random_rows,
consts.CONFIG_RANDOM_ROW_BATCH_SIZE: random_row_batch_size,
"source_client": source_client,
"target_client": target_client,
"result_handler_config": result_handler_config,
"filter_config": filter_config,
"filter_status": filter_status,
"trim_string_pks": getattr(args, "trim_string_pks", False),
"case_insensitive_match": getattr(args, "case_insensitive_match", False),
consts.CONFIG_FILTER_STATUS: filter_status,
consts.CONFIG_TRIM_STRING_PKS: getattr(
args, consts.CONFIG_TRIM_STRING_PKS, False
),
consts.CONFIG_CASE_INSENSITIVE_MATCH: getattr(
args, consts.CONFIG_CASE_INSENSITIVE_MATCH, False
),
consts.CONFIG_ROW_CONCAT: getattr(args, consts.CONFIG_ROW_CONCAT, None),
consts.CONFIG_ROW_HASH: getattr(args, consts.CONFIG_ROW_HASH, None),
consts.CONFIG_RUN_ID: getattr(args, consts.CONFIG_RUN_ID, None),
"verbose": args.verbose,
}
if (
Expand Down
7 changes: 7 additions & 0 deletions data_validation/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,11 @@ def hash(self):
"""Return field from Config"""
return self._config.get(consts.CONFIG_ROW_HASH, [])

@property
def run_id(self):
"""Return field from Config"""
return self._config.get(consts.CONFIG_RUN_ID, None)

@property
def filters(self):
"""Return Filters from Config"""
Expand Down Expand Up @@ -504,6 +509,7 @@ def build_config_manager(
case_insensitive_match=None,
concat=None,
hash=None,
run_id=None,
verbose=False,
):
if isinstance(filter_config, dict):
Expand Down Expand Up @@ -536,6 +542,7 @@ def build_config_manager(
consts.CONFIG_CASE_INSENSITIVE_MATCH: case_insensitive_match,
consts.CONFIG_ROW_CONCAT: concat,
consts.CONFIG_ROW_HASH: hash,
consts.CONFIG_RUN_ID: run_id,
}

return ConfigManager(
Expand Down
1 change: 1 addition & 0 deletions data_validation/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
CONFIG_CASE_INSENSITIVE_MATCH = "case_insensitive_match"
CONFIG_ROW_CONCAT = "concat"
CONFIG_ROW_HASH = "hash"
CONFIG_RUN_ID = "run_id"
CONFIG_SOURCE_COLUMN = "source_column"
CONFIG_TARGET_COLUMN = "target_column"
CONFIG_THRESHOLD = "threshold"
Expand Down
5 changes: 4 additions & 1 deletion data_validation/data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
import logging
import warnings
from concurrent.futures import ThreadPoolExecutor

import ibis.backends.pandas
import pandas
import uuid

from data_validation import combiner, consts, metadata
from data_validation.config_manager import ConfigManager
Expand Down Expand Up @@ -63,6 +63,9 @@ def __init__(
self.run_metadata = metadata.RunMetadata()
self.run_metadata.labels = self.config_manager.labels

# Use a generated uuid for the run_id if None was supplied via config
self.run_metadata.run_id = self.config_manager.run_id or str(uuid.uuid4())

# Initialize Validation Builder if None was supplied
self.validation_builder = validation_builder or ValidationBuilder(
self.config_manager
Expand Down
4 changes: 1 addition & 3 deletions data_validation/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@

"""Metadata classes with data about the validation run."""


import dataclasses
import datetime
import typing
import uuid

from data_validation import consts

Expand Down Expand Up @@ -64,7 +62,7 @@ def get_column_name(self, result_type: str) -> str:

@dataclasses.dataclass
class RunMetadata(object):
run_id: str = dataclasses.field(default_factory=lambda: str(uuid.uuid4()))
run_id: str = dataclasses.field(default_factory=str)
validations: dict = dataclasses.field(default_factory=dict)
labels: list = dataclasses.field(default_factory=list)
start_time: typing.Optional[datetime.datetime] = dataclasses.field(
Expand Down
11 changes: 6 additions & 5 deletions tests/system/data_sources/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@
CLI_CONFIG_FILE_GCS,
]

EXPECTED_NUM_YAML_LINES = 25 # Expected number of lines for validation config generated by CLI_STORE_COLUMN_ARGS
EXPECTED_NUM_YAML_LINES = 26 # Expected number of lines for validation config generated by CLI_STORE_COLUMN_ARGS
CLI_CONFIGS_RUN_ARGS_LOCAL = ["configs", "run", "--config-file", CLI_CONFIG_FILE]
CLI_CONFIGS_RUN_ARGS_GCS = ["configs", "run", "--config-file", CLI_CONFIG_FILE_GCS]

Expand All @@ -298,7 +298,7 @@
"--config-file",
CLI_CONFIG_FILE,
]
EXPECTED_NUM_YAML_LINES_WILDCARD = 155
EXPECTED_NUM_YAML_LINES_WILDCARD = 156

CLI_TIMESTAMP_MIN_MAX_ARGS = [
"validate",
Expand All @@ -316,7 +316,7 @@
"--config-file",
CLI_CONFIG_FILE,
]
EXPECTED_NUM_YAML_LINES_TIMESTAMP_MIN_MAX = 33
EXPECTED_NUM_YAML_LINES_TIMESTAMP_MIN_MAX = 34

CLI_TIMESTAMP_SUM_AVG_BITXOR_ARGS = [
"validate",
Expand All @@ -336,7 +336,7 @@
"--config-file",
CLI_CONFIG_FILE,
]
EXPECTED_NUM_YAML_LINES_TIMESTAMP_SUM_AVG_BITXOR = 53
EXPECTED_NUM_YAML_LINES_TIMESTAMP_SUM_AVG_BITXOR = 54

CLI_BQ_DATETIME_SUM_AVG_BITXOR_ARGS = [
"validate",
Expand All @@ -356,7 +356,7 @@
"--config-file",
CLI_CONFIG_FILE,
]
EXPECTED_NUM_YAML_LINES_BQ_DATETIME_SUM_AVG_BITXOR = 53
EXPECTED_NUM_YAML_LINES_BQ_DATETIME_SUM_AVG_BITXOR = 54

CLI_FIND_TABLES_ARGS = [
"find-tables",
Expand Down Expand Up @@ -392,6 +392,7 @@
consts.CONFIG_THRESHOLD: 0.0,
consts.CONFIG_FORMAT: "table",
consts.CONFIG_RESULT_HANDLER: None,
consts.CONFIG_RUN_ID: None,
consts.CONFIG_FILTERS: [],
consts.CONFIG_USE_RANDOM_ROWS: False,
consts.CONFIG_RANDOM_ROW_BATCH_SIZE: None,
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/test_cli_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"config_file": "example_test.yaml",
"labels": "name=test_run",
"threshold": 30.0,
"run_id": "aa000000-0000-0000-0000-000000000001",
"verbose": True,
}

Expand Down Expand Up @@ -98,6 +99,7 @@
"type": "count",
}
],
"run_id": "aa000000-0000-0000-0000-000000000001",
}
],
}
Expand Down Expand Up @@ -125,6 +127,7 @@ def test_get_parsed_args(mock_args):
assert args.command == "validate"
assert args.labels == "name=test_run"
assert args.threshold == 30.0
assert args.run_id == "aa000000-0000-0000-0000-000000000001"
assert args.verbose


Expand Down
16 changes: 15 additions & 1 deletion tests/unit/test_config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import copy
import pytest
from unittest import mock

import ibis.expr.datatypes as dt

from data_validation import consts
Expand Down Expand Up @@ -60,6 +59,7 @@
consts.CONFIG_THRESHOLD: 0.0,
consts.CONFIG_PRIMARY_KEYS: "id",
consts.CONFIG_CALCULATED_FIELDS: ["name", "station_id"],
consts.CONFIG_RUN_ID: "aa000000-0000-0000-0000-000000000001",
}

SAMPLE_ROW_CONFIG_DEP_ALIASES = {
Expand Down Expand Up @@ -594,3 +594,17 @@ def test_build_dependent_aliases_exception(module_under_test):
str(excinfo.value)
== "Exclude columns flag cannot be present with column list '*'"
)


def test_get_correct_run_id(module_under_test):
config_manager = module_under_test.ConfigManager(
SAMPLE_ROW_CONFIG, MockIbisClient(), MockIbisClient(), verbose=False
)
assert config_manager.run_id == SAMPLE_ROW_CONFIG[consts.CONFIG_RUN_ID]


def test_get_none_run_id(module_under_test):
config_manager = module_under_test.ConfigManager(
SAMPLE_CONFIG, MockIbisClient(), MockIbisClient(), verbose=False
)
assert config_manager.run_id is None
Loading

0 comments on commit 4e6f557

Please sign in to comment.