diff --git a/data_validation/cli_tools.py b/data_validation/cli_tools.py index cabffb20..f5e3b8ce 100644 --- a/data_validation/cli_tools.py +++ b/data_validation/cli_tools.py @@ -990,6 +990,12 @@ def _add_common_arguments( # TODO: update if we start to support other statuses help="Comma separated list of statuses to filter the validation results. Supported statuses are (success, fail). If no list is provided, all statuses are returned", ) + optional_arguments.add_argument( + "--run-id", + "-rid", + default=None, + help="Set a string for the run_id, if None is input then a randomly generated UUID will be used, which is the default behaviour", + ) def _check_positive(value: int) -> int: @@ -1407,8 +1413,8 @@ def cols_from_arg(concat_arg: str, client, table_obj: dict, query_str: str) -> l result_handler_config = None # Set filter_config and threshold. Not supported in case of schema validation - filter_config = getattr(args, "filters", []) - threshold = getattr(args, "threshold", 0.0) + filter_config = getattr(args, consts.CONFIG_FILTERS, []) + threshold = getattr(args, consts.CONFIG_THRESHOLD, 0.0) # Get labels if args.labels is None: @@ -1425,8 +1431,8 @@ def cols_from_arg(concat_arg: str, client, table_obj: dict, query_str: str) -> l format = args.format if args.format else "table" # Get random row arguments. Only in row validations these attributes can be present. - use_random_rows = getattr(args, "use_random_row", False) - random_row_batch_size = getattr(args, "random_row_batch_size", None) + use_random_rows = getattr(args, consts.CONFIG_USE_RANDOM_ROWS, False) + random_row_batch_size = getattr(args, consts.CONFIG_RANDOM_ROW_BATCH_SIZE, None) # Get table list. Not supported in case of custom query validation is_filesystem = source_client._source_type == "FileSystem" @@ -1459,23 +1465,28 @@ def cols_from_arg(concat_arg: str, client, table_obj: dict, query_str: str) -> l for table_obj in tables_list: pre_build_configs = { "config_type": config_type, - "source_conn_name": args.source_conn, - "target_conn_name": args.target_conn, + consts.CONFIG_SOURCE_CONN_NAME: args.source_conn, + consts.CONFIG_TARGET_CONN_NAME: args.target_conn, "table_obj": table_obj, - "labels": labels, - "threshold": threshold, - "format": format, - "use_random_rows": use_random_rows, - "random_row_batch_size": random_row_batch_size, + consts.CONFIG_LABELS: labels, + consts.CONFIG_THRESHOLD: threshold, + consts.CONFIG_FORMAT: format, + consts.CONFIG_USE_RANDOM_ROWS: use_random_rows, + consts.CONFIG_RANDOM_ROW_BATCH_SIZE: random_row_batch_size, "source_client": source_client, "target_client": target_client, "result_handler_config": result_handler_config, "filter_config": filter_config, - "filter_status": filter_status, - "trim_string_pks": getattr(args, "trim_string_pks", False), - "case_insensitive_match": getattr(args, "case_insensitive_match", False), + consts.CONFIG_FILTER_STATUS: filter_status, + consts.CONFIG_TRIM_STRING_PKS: getattr( + args, consts.CONFIG_TRIM_STRING_PKS, False + ), + consts.CONFIG_CASE_INSENSITIVE_MATCH: getattr( + args, consts.CONFIG_CASE_INSENSITIVE_MATCH, False + ), consts.CONFIG_ROW_CONCAT: getattr(args, consts.CONFIG_ROW_CONCAT, None), consts.CONFIG_ROW_HASH: getattr(args, consts.CONFIG_ROW_HASH, None), + consts.CONFIG_RUN_ID: getattr(args, consts.CONFIG_RUN_ID, None), "verbose": args.verbose, } if ( diff --git a/data_validation/config_manager.py b/data_validation/config_manager.py index ae46680a..f1d8f9d9 100644 --- a/data_validation/config_manager.py +++ b/data_validation/config_manager.py @@ -245,6 +245,11 @@ def hash(self): """Return field from Config""" return self._config.get(consts.CONFIG_ROW_HASH, []) + @property + def run_id(self): + """Return field from Config""" + return self._config.get(consts.CONFIG_RUN_ID, None) + @property def filters(self): """Return Filters from Config""" @@ -504,6 +509,7 @@ def build_config_manager( case_insensitive_match=None, concat=None, hash=None, + run_id=None, verbose=False, ): if isinstance(filter_config, dict): @@ -536,6 +542,7 @@ def build_config_manager( consts.CONFIG_CASE_INSENSITIVE_MATCH: case_insensitive_match, consts.CONFIG_ROW_CONCAT: concat, consts.CONFIG_ROW_HASH: hash, + consts.CONFIG_RUN_ID: run_id, } return ConfigManager( diff --git a/data_validation/consts.py b/data_validation/consts.py index 5955918b..8d79e8c7 100644 --- a/data_validation/consts.py +++ b/data_validation/consts.py @@ -49,6 +49,7 @@ CONFIG_CASE_INSENSITIVE_MATCH = "case_insensitive_match" CONFIG_ROW_CONCAT = "concat" CONFIG_ROW_HASH = "hash" +CONFIG_RUN_ID = "run_id" CONFIG_SOURCE_COLUMN = "source_column" CONFIG_TARGET_COLUMN = "target_column" CONFIG_THRESHOLD = "threshold" diff --git a/data_validation/data_validation.py b/data_validation/data_validation.py index d8bb1cbf..1fbc450a 100644 --- a/data_validation/data_validation.py +++ b/data_validation/data_validation.py @@ -16,9 +16,9 @@ import logging import warnings from concurrent.futures import ThreadPoolExecutor - import ibis.backends.pandas import pandas +import uuid from data_validation import combiner, consts, metadata from data_validation.config_manager import ConfigManager @@ -63,6 +63,9 @@ def __init__( self.run_metadata = metadata.RunMetadata() self.run_metadata.labels = self.config_manager.labels + # Use a generated uuid for the run_id if None was supplied via config + self.run_metadata.run_id = self.config_manager.run_id or str(uuid.uuid4()) + # Initialize Validation Builder if None was supplied self.validation_builder = validation_builder or ValidationBuilder( self.config_manager diff --git a/data_validation/metadata.py b/data_validation/metadata.py index 5297dfa5..2b7c6600 100644 --- a/data_validation/metadata.py +++ b/data_validation/metadata.py @@ -14,11 +14,9 @@ """Metadata classes with data about the validation run.""" - import dataclasses import datetime import typing -import uuid from data_validation import consts @@ -64,7 +62,7 @@ def get_column_name(self, result_type: str) -> str: @dataclasses.dataclass class RunMetadata(object): - run_id: str = dataclasses.field(default_factory=lambda: str(uuid.uuid4())) + run_id: str = dataclasses.field(default_factory=str) validations: dict = dataclasses.field(default_factory=dict) labels: list = dataclasses.field(default_factory=list) start_time: typing.Optional[datetime.datetime] = dataclasses.field( diff --git a/tests/system/data_sources/test_bigquery.py b/tests/system/data_sources/test_bigquery.py index 65e894b8..3610b1d1 100644 --- a/tests/system/data_sources/test_bigquery.py +++ b/tests/system/data_sources/test_bigquery.py @@ -278,7 +278,7 @@ CLI_CONFIG_FILE_GCS, ] -EXPECTED_NUM_YAML_LINES = 25 # Expected number of lines for validation config generated by CLI_STORE_COLUMN_ARGS +EXPECTED_NUM_YAML_LINES = 26 # Expected number of lines for validation config generated by CLI_STORE_COLUMN_ARGS CLI_CONFIGS_RUN_ARGS_LOCAL = ["configs", "run", "--config-file", CLI_CONFIG_FILE] CLI_CONFIGS_RUN_ARGS_GCS = ["configs", "run", "--config-file", CLI_CONFIG_FILE_GCS] @@ -298,7 +298,7 @@ "--config-file", CLI_CONFIG_FILE, ] -EXPECTED_NUM_YAML_LINES_WILDCARD = 155 +EXPECTED_NUM_YAML_LINES_WILDCARD = 156 CLI_TIMESTAMP_MIN_MAX_ARGS = [ "validate", @@ -316,7 +316,7 @@ "--config-file", CLI_CONFIG_FILE, ] -EXPECTED_NUM_YAML_LINES_TIMESTAMP_MIN_MAX = 33 +EXPECTED_NUM_YAML_LINES_TIMESTAMP_MIN_MAX = 34 CLI_TIMESTAMP_SUM_AVG_BITXOR_ARGS = [ "validate", @@ -336,7 +336,7 @@ "--config-file", CLI_CONFIG_FILE, ] -EXPECTED_NUM_YAML_LINES_TIMESTAMP_SUM_AVG_BITXOR = 53 +EXPECTED_NUM_YAML_LINES_TIMESTAMP_SUM_AVG_BITXOR = 54 CLI_BQ_DATETIME_SUM_AVG_BITXOR_ARGS = [ "validate", @@ -356,7 +356,7 @@ "--config-file", CLI_CONFIG_FILE, ] -EXPECTED_NUM_YAML_LINES_BQ_DATETIME_SUM_AVG_BITXOR = 53 +EXPECTED_NUM_YAML_LINES_BQ_DATETIME_SUM_AVG_BITXOR = 54 CLI_FIND_TABLES_ARGS = [ "find-tables", @@ -392,6 +392,7 @@ consts.CONFIG_THRESHOLD: 0.0, consts.CONFIG_FORMAT: "table", consts.CONFIG_RESULT_HANDLER: None, + consts.CONFIG_RUN_ID: None, consts.CONFIG_FILTERS: [], consts.CONFIG_USE_RANDOM_ROWS: False, consts.CONFIG_RANDOM_ROW_BATCH_SIZE: None, diff --git a/tests/unit/test_cli_tools.py b/tests/unit/test_cli_tools.py index faeaa33d..5188528a 100644 --- a/tests/unit/test_cli_tools.py +++ b/tests/unit/test_cli_tools.py @@ -32,6 +32,7 @@ "config_file": "example_test.yaml", "labels": "name=test_run", "threshold": 30.0, + "run_id": "aa000000-0000-0000-0000-000000000001", "verbose": True, } @@ -98,6 +99,7 @@ "type": "count", } ], + "run_id": "aa000000-0000-0000-0000-000000000001", } ], } @@ -125,6 +127,7 @@ def test_get_parsed_args(mock_args): assert args.command == "validate" assert args.labels == "name=test_run" assert args.threshold == 30.0 + assert args.run_id == "aa000000-0000-0000-0000-000000000001" assert args.verbose diff --git a/tests/unit/test_config_manager.py b/tests/unit/test_config_manager.py index 76976282..8706952a 100644 --- a/tests/unit/test_config_manager.py +++ b/tests/unit/test_config_manager.py @@ -15,7 +15,6 @@ import copy import pytest from unittest import mock - import ibis.expr.datatypes as dt from data_validation import consts @@ -60,6 +59,7 @@ consts.CONFIG_THRESHOLD: 0.0, consts.CONFIG_PRIMARY_KEYS: "id", consts.CONFIG_CALCULATED_FIELDS: ["name", "station_id"], + consts.CONFIG_RUN_ID: "aa000000-0000-0000-0000-000000000001", } SAMPLE_ROW_CONFIG_DEP_ALIASES = { @@ -594,3 +594,17 @@ def test_build_dependent_aliases_exception(module_under_test): str(excinfo.value) == "Exclude columns flag cannot be present with column list '*'" ) + + +def test_get_correct_run_id(module_under_test): + config_manager = module_under_test.ConfigManager( + SAMPLE_ROW_CONFIG, MockIbisClient(), MockIbisClient(), verbose=False + ) + assert config_manager.run_id == SAMPLE_ROW_CONFIG[consts.CONFIG_RUN_ID] + + +def test_get_none_run_id(module_under_test): + config_manager = module_under_test.ConfigManager( + SAMPLE_CONFIG, MockIbisClient(), MockIbisClient(), verbose=False + ) + assert config_manager.run_id is None diff --git a/tests/unit/test_data_validation.py b/tests/unit/test_data_validation.py index 860da424..cd4c4d5c 100644 --- a/tests/unit/test_data_validation.py +++ b/tests/unit/test_data_validation.py @@ -29,31 +29,33 @@ SOURCE_TABLE_FILE_PATH = "source_table_data.json" TARGET_TABLE_FILE_PATH = "target_table_data.json" +DUMMY_RUN_ID = "aa000000-0000-0000-0000-000000000001" + SOURCE_CONN_CONFIG = { - "source_type": "FileSystem", - "table_name": "my_table", + consts.SOURCE_TYPE: "FileSystem", + consts.CONFIG_TABLE_NAME: "my_table", "file_path": SOURCE_TABLE_FILE_PATH, "file_type": "json", } TARGET_CONN_CONFIG = { - "source_type": "FileSystem", - "table_name": "my_table", + consts.SOURCE_TYPE: "FileSystem", + consts.CONFIG_TABLE_NAME: "my_table", "file_path": TARGET_TABLE_FILE_PATH, "file_type": "json", } SAMPLE_CONFIG = { # BigQuery Specific Connection Config - "source_conn": SOURCE_CONN_CONFIG, - "target_conn": TARGET_CONN_CONFIG, + consts.CONFIG_SOURCE_CONN: SOURCE_CONN_CONFIG, + consts.CONFIG_TARGET_CONN: TARGET_CONN_CONFIG, # Validation Type - consts.CONFIG_TYPE: "Column", + consts.CONFIG_TYPE: consts.COLUMN_VALIDATION, # Configuration Required Depending on Validator Type - "schema_name": None, - "table_name": "my_table", - "target_schema_name": None, - "target_table_name": "my_table", + consts.CONFIG_SCHEMA_NAME: None, + consts.CONFIG_TABLE_NAME: "my_table", + consts.CONFIG_TARGET_SCHEMA_NAME: None, + consts.CONFIG_TARGET_TABLE_NAME: "my_table", consts.CONFIG_GROUPED_COLUMNS: [], consts.CONFIG_AGGREGATES: [ { @@ -77,15 +79,15 @@ SAMPLE_THRESHOLD_CONFIG = { # BigQuery Specific Connection Config - "source_conn": SOURCE_CONN_CONFIG, - "target_conn": TARGET_CONN_CONFIG, + consts.CONFIG_SOURCE_CONN: SOURCE_CONN_CONFIG, + consts.CONFIG_TARGET_CONN: TARGET_CONN_CONFIG, # Validation Type - consts.CONFIG_TYPE: "Column", + consts.CONFIG_TYPE: consts.COLUMN_VALIDATION, # Configuration Required Depending on Validator Type - "schema_name": None, - "table_name": "my_table", - "target_schema_name": None, - "target_table_name": "my_table", + consts.CONFIG_SCHEMA_NAME: None, + consts.CONFIG_TABLE_NAME: "my_table", + consts.CONFIG_TARGET_SCHEMA_NAME: None, + consts.CONFIG_TARGET_TABLE_NAME: "my_table", consts.CONFIG_GROUPED_COLUMNS: [], consts.CONFIG_AGGREGATES: [ { @@ -110,16 +112,16 @@ # Grouped Column Row config SAMPLE_GC_CONFIG = { # BigQuery Specific Connection Config - "source_conn": SOURCE_CONN_CONFIG, - "target_conn": TARGET_CONN_CONFIG, + consts.CONFIG_SOURCE_CONN: SOURCE_CONN_CONFIG, + consts.CONFIG_TARGET_CONN: TARGET_CONN_CONFIG, # Validation Type consts.CONFIG_TYPE: consts.COLUMN_VALIDATION, consts.CONFIG_MAX_RECURSIVE_QUERY_SIZE: 50, # Configuration Required Depending on Validator Type - "schema_name": None, - "table_name": "my_table", - "target_schema_name": None, - "target_table_name": "my_table", + consts.CONFIG_SCHEMA_NAME: None, + consts.CONFIG_TABLE_NAME: "my_table", + consts.CONFIG_TARGET_SCHEMA_NAME: None, + consts.CONFIG_TARGET_TABLE_NAME: "my_table", consts.CONFIG_GROUPED_COLUMNS: [ { consts.CONFIG_FIELD_ALIAS: "date_value", @@ -152,16 +154,16 @@ # Grouped Column Row config SAMPLE_MULTI_GC_CONFIG = { # BigQuery Specific Connection Config - "source_conn": SOURCE_CONN_CONFIG, - "target_conn": TARGET_CONN_CONFIG, + consts.CONFIG_SOURCE_CONN: SOURCE_CONN_CONFIG, + consts.CONFIG_TARGET_CONN: TARGET_CONN_CONFIG, # Validation Type consts.CONFIG_TYPE: consts.COLUMN_VALIDATION, consts.CONFIG_MAX_RECURSIVE_QUERY_SIZE: 50, # Configuration Required Depending on Validator Type - "schema_name": None, - "table_name": "my_table", - "target_schema_name": None, - "target_table_name": "my_table", + consts.CONFIG_SCHEMA_NAME: None, + consts.CONFIG_TABLE_NAME: "my_table", + consts.CONFIG_TARGET_SCHEMA_NAME: None, + consts.CONFIG_TARGET_TABLE_NAME: "my_table", consts.CONFIG_GROUPED_COLUMNS: [ { consts.CONFIG_FIELD_ALIAS: "date_value", @@ -199,16 +201,16 @@ SAMPLE_GC_CALC_CONFIG = { # BigQuery Specific Connection Config - "source_conn": SOURCE_CONN_CONFIG, - "target_conn": TARGET_CONN_CONFIG, + consts.CONFIG_SOURCE_CONN: SOURCE_CONN_CONFIG, + consts.CONFIG_TARGET_CONN: TARGET_CONN_CONFIG, # Validation Type consts.CONFIG_TYPE: consts.COLUMN_VALIDATION, consts.CONFIG_MAX_RECURSIVE_QUERY_SIZE: 50, # Configuration Required Depending on Validator Type - "schema_name": None, - "table_name": "my_table", - "target_schema_name": None, - "target_table_name": "my_table", + consts.CONFIG_SCHEMA_NAME: None, + consts.CONFIG_TABLE_NAME: "my_table", + consts.CONFIG_TARGET_SCHEMA_NAME: None, + consts.CONFIG_TARGET_TABLE_NAME: "my_table", consts.CONFIG_GROUPED_COLUMNS: [ { consts.CONFIG_FIELD_ALIAS: "date_value", @@ -297,15 +299,15 @@ # Row config SAMPLE_ROW_CONFIG = { # BigQuery Specific Connection Config - "source_conn": SOURCE_CONN_CONFIG, - "target_conn": TARGET_CONN_CONFIG, + consts.CONFIG_SOURCE_CONN: SOURCE_CONN_CONFIG, + consts.CONFIG_TARGET_CONN: TARGET_CONN_CONFIG, # Validation Type consts.CONFIG_TYPE: consts.ROW_VALIDATION, # Configuration Required Depending on Validator Type - "schema_name": None, - "table_name": "my_table", - "target_schema_name": None, - "target_table_name": "my_table", + consts.CONFIG_SCHEMA_NAME: None, + consts.CONFIG_TABLE_NAME: "my_table", + consts.CONFIG_TARGET_SCHEMA_NAME: None, + consts.CONFIG_TARGET_TABLE_NAME: "my_table", consts.CONFIG_PRIMARY_KEYS: [ { consts.CONFIG_FIELD_ALIAS: "id", @@ -328,23 +330,27 @@ consts.CONFIG_CAST: None, }, ], - consts.CONFIG_RESULT_HANDLER: None, - consts.CONFIG_FORMAT: "table", + consts.CONFIG_RESULT_HANDLER: { + consts.CONFIG_TYPE: "BigQuery", + consts.PROJECT_ID: "my-project", + consts.TABLE_ID: "dataset.table_name", + }, + consts.CONFIG_FORMAT: "text", consts.CONFIG_FILTER_STATUS: None, } # Row config SAMPLE_JSON_ROW_CONFIG = { # BigQuery Specific Connection Config - "source_conn": SOURCE_CONN_CONFIG, - "target_conn": TARGET_CONN_CONFIG, + consts.CONFIG_SOURCE_CONN: SOURCE_CONN_CONFIG, + consts.CONFIG_TARGET_CONN: TARGET_CONN_CONFIG, # Validation Type consts.CONFIG_TYPE: consts.ROW_VALIDATION, # Configuration Required Depending on Validator Type - "schema_name": None, - "table_name": "my_table", - "target_schema_name": None, - "target_table_name": "my_table", + consts.CONFIG_SCHEMA_NAME: None, + consts.CONFIG_TABLE_NAME: "my_table", + consts.CONFIG_TARGET_SCHEMA_NAME: None, + consts.CONFIG_TARGET_TABLE_NAME: "my_table", consts.CONFIG_PRIMARY_KEYS: [ { consts.CONFIG_FIELD_ALIAS: "pkey", @@ -369,15 +375,15 @@ # Row validation where we only care about failures and we write them to BQ SAMPLE_ROW_CONFIG_BQ_FAILURES = { # BigQuery Specific Connection Config - "source_conn": SOURCE_CONN_CONFIG, - "target_conn": TARGET_CONN_CONFIG, + consts.CONFIG_SOURCE_CONN: SOURCE_CONN_CONFIG, + consts.CONFIG_TARGET_CONN: TARGET_CONN_CONFIG, # Validation Type consts.CONFIG_TYPE: consts.ROW_VALIDATION, # Configuration Required Depending on Validator Type - "schema_name": None, - "table_name": "my_table", - "target_schema_name": None, - "target_table_name": "my_table", + consts.CONFIG_SCHEMA_NAME: None, + consts.CONFIG_TABLE_NAME: "my_table", + consts.CONFIG_TARGET_SCHEMA_NAME: None, + consts.CONFIG_TARGET_TABLE_NAME: "my_table", consts.CONFIG_PRIMARY_KEYS: [ { consts.CONFIG_FIELD_ALIAS: "id", @@ -407,6 +413,7 @@ }, consts.CONFIG_FORMAT: "text", consts.CONFIG_FILTER_STATUS: ["fail"], + consts.CONFIG_RUN_ID: DUMMY_RUN_ID, } JSON_DATA = """[{"col_a":1,"col_b":"a"},{"col_a":1,"col_b":"b"}]""" @@ -698,21 +705,22 @@ def test_grouped_column_level_validation_multiple_aggregations(module_under_test assert validation_df["target_agg_value"].astype(float).sum() == 11 -def test_row_level_validation(module_under_test, fs): +def test_row_level_validation(module_under_test, fs, monkeypatch): + # Mock the big query client + mock_bq_client = mock.create_autospec(bigquery.Client) + monkeypatch.setattr(bigquery, "Client", value=mock_bq_client) + # With some mocked data - source and target the same data = _generate_fake_data(rows=100, second_range=0) - source_json_data = _get_fake_json_data(data) target_json_data = _get_fake_json_data(data) - _create_table_file(SOURCE_TABLE_FILE_PATH, source_json_data) _create_table_file(TARGET_TABLE_FILE_PATH, target_json_data) - + # When we validate client = module_under_test.DataValidation(SAMPLE_ROW_CONFIG) result_df = client.execute() - + # Then we expect str_comparison_df = result_df[result_df["validation_name"] == "text_value"] int_comparison_df = result_df[result_df["validation_name"] == "int_value"] - assert len(result_df) == 200 assert len(str_comparison_df) == 100 assert len(int_comparison_df) == 100 @@ -730,25 +738,39 @@ def test_fail_row_level_validation(module_under_test, fs): assert len(fail_df) == 5 -def test_bad_join_row_level_validation(module_under_test, fs): +def test_bad_join_row_level_validation(module_under_test, fs, caplog, monkeypatch): + # Mock the big query client + mock_bq_client = mock.create_autospec(bigquery.Client) + monkeypatch.setattr(bigquery, "Client", value=mock_bq_client) + # With some mocked data - source and target different data = _generate_fake_data(rows=100, second_range=0) target_data = _generate_fake_data(initial_id=100, rows=1, second_range=0) - source_json_data = _get_fake_json_data(data) target_json_data = _get_fake_json_data(target_data) - _create_table_file(SOURCE_TABLE_FILE_PATH, source_json_data) _create_table_file(TARGET_TABLE_FILE_PATH, target_json_data) - + # ... and the log level being DEBUG + caplog.set_level(logging.DEBUG) + # When we validate client = module_under_test.DataValidation(SAMPLE_ROW_CONFIG) result_df = client.execute() - comparison_df = result_df[ result_df["validation_status"] == consts.VALIDATION_STATUS_FAIL ] + # Then we expect # 2 validations * (100 source + 1 target) assert len(result_df) == 202 assert len(comparison_df) == 202 + # The "Results written" message happens + info about the failed data, all against a generated run_id + # assert len(caplog.records) == 202 + run_id = result_df.iloc[0]["run_id"] + assert run_id != DUMMY_RUN_ID + assert caplog.records[0].message == f"Results written to BigQuery, run id: {run_id}" + assert ( + "validation_name validation_type source_table_name source_column_name source_agg_value target_agg_value pct_difference validation_status" + in caplog.records[1].message + ) + assert f"fail {run_id}" in caplog.records[1].message def test_no_console_data_shown_for_validation_with_result_written_to_bq_in_info_mode( @@ -777,8 +799,10 @@ def test_no_console_data_shown_for_validation_with_result_written_to_bq_in_info_ # Only the "Results written" message happens # Important because the results could include sensitive data, which some users need to exclude assert len(caplog.records) == 1 - run_id = result_df.iloc[0]["run_id"] - assert caplog.records[0].message == f"Results written to BigQuery, run id: {run_id}" + assert ( + caplog.records[0].message + == f"Results written to BigQuery, run id: {DUMMY_RUN_ID}" + ) def test_no_console_data_shown_for_matching_validation_with_result_written_to_bq_in_info_mode( @@ -831,13 +855,15 @@ def test_console_data_shown_for_validation_with_result_written_to_bq_in_debug_mo assert len(fail_df) == 2 # The "Results written" message happens + info about the failed data assert len(caplog.records) == 2 - run_id = result_df.iloc[0]["run_id"] - assert caplog.records[0].message == f"Results written to BigQuery, run id: {run_id}" + assert ( + caplog.records[0].message + == f"Results written to BigQuery, run id: {DUMMY_RUN_ID}" + ) assert ( "validation_name validation_type source_table_name source_column_name source_agg_value target_agg_value pct_difference validation_status" in caplog.records[1].message ) - assert f"fail {run_id}" in caplog.records[1].message + assert f"fail {DUMMY_RUN_ID}" in caplog.records[1].message def test_console_data_shown_for_matching_validation_with_result_written_to_bq_in_debug_mode(