diff --git a/e2e/tools/validator/src/validator/cli/__init__.py b/e2e/tools/validator/src/validator/cli/__init__.py index e1e69c56bd..858527b32d 100644 --- a/e2e/tools/validator/src/validator/cli/__init__.py +++ b/e2e/tools/validator/src/validator/cli/__init__.py @@ -45,6 +45,7 @@ class ValidationResult: mse: ValueOrError mape: ValueOrError + mae: ValueOrError actual_dropped: int = 0 predicted_dropped: int = 0 @@ -54,6 +55,7 @@ class ValidationResult: mse_passed: bool = True mape_passed: bool = True + mae_passed: bool = True unexpected_error: str = "" @@ -71,10 +73,10 @@ def __init__( def verdict(self) -> str: note = " (dropped)" if self.actual_dropped > 0 or self.predicted_dropped > 0 else "" - if self.unexpected_error or self.mse.error or self.mape.error: + if self.unexpected_error or self.mse.error or self.mape.error or self.mae.error: return f"ERROR{note}" - if self.mse_passed and self.mape_passed: + if self.mse_passed and self.mape_passed and self.mae_passed: return f"PASS{note}" return f"FAIL{note}" @@ -203,9 +205,15 @@ def rel_path(x: str) -> str: md.h2("Validations") md.h3("Summary") md.table( - ["Name", "MSE", "MAPE", "Pass / Fail"], + ["Name", "MSE", "MAPE", "MAE", "Pass / Fail"], [ - [f"[{v.name}](#{v.name.replace(' ', '-')})", f"{v.mse.value:.2f}", f"{v.mape.value:.2f}", v.verdict] + [ + f"[{v.name}](#{v.name.replace(' ', '-')})", + f"{v.mse.value:.2f}", + f"{v.mape.value:.2f}", + f"{v.mae.value:.2f}", + v.verdict, + ] for v in r.validations.results if not v.unexpected_error ], @@ -231,6 +239,7 @@ def rel_path(x: str) -> str: md.write("\n**Results**:\n") md.li(f"MSE : `{v.mse}`") md.li(f"MAPE : `{v.mape} %`") + md.li(f"MAE : `{v.mae}`") md.write("\n**Charts**:\n") img_path = create_charts_for_result(results_dir, v) md.img(v.name, img_path) @@ -293,6 +302,9 @@ def create_charts_for_result(results_dir: str, r: ValidationResult) -> str: if r.mape.error is None: err_report += f"\nMAPE: {r.mape.value:.2f}%" + if r.mae.error is None: + err_report += f"\nMAE: {r.mae.value:.2f}" + ax.text( 0.98, 1.10, @@ -527,7 +539,8 @@ def run_validation( v.predicted.promql, ) click.secho(f"\t MSE : {cmp.mse}", fg="bright_blue") - click.secho(f"\t MAPE: {cmp.mape} %\n", fg="bright_blue") + click.secho(f"\t MAPE: {cmp.mape} %", fg="bright_blue") + click.secho(f"\t MAE : {cmp.mae}\n", fg="bright_blue") result.predicted_dropped = cmp.predicted_dropped result.actual_dropped = cmp.predicted_dropped @@ -539,10 +552,11 @@ def run_validation( cmp.predicted_dropped, ) - result.mse, result.mape = cmp.mse, cmp.mape + result.mse, result.mape, result.mae = cmp.mse, cmp.mape, cmp.mae result.mse_passed = v.max_mse is None or (cmp.mse.error is None and cmp.mse.value <= v.max_mse) result.mape_passed = v.max_mape is None or (cmp.mape.error is None and cmp.mape.value <= v.max_mape) + result.mae_passed = v.max_mae is None or (cmp.mae.error is None and cmp.mae.value <= v.max_mae) if not result.mse_passed: click.secho(f"MSE exceeded threshold. mse: {cmp.mse}, max_mse: {v.max_mse}", fg="red") @@ -550,6 +564,9 @@ def run_validation( if not result.mape_passed: click.secho(f"MAPE exceeded threshold. mape: {cmp.mape}, max_mape: {v.max_mape}", fg="red") + if not result.mae_passed: + click.secho(f"MAE exceeded threshold. mae: {cmp.mae}, max_mae: {v.max_mae}", fg="red") + result.actual_filepath = dump_query_result(results_dir, v.actual_label, v.actual, cmp.actual_series) result.predicted_filepath = dump_query_result(results_dir, v.predicted_label, v.predicted, cmp.predicted_series) @@ -617,7 +634,18 @@ def custom_encode(input_string): value["mape"] = float(i.mape.value) else: value["mape"] = float(i.mape.error) - value["status"] = "mape passed: " + str(i.mape_passed) + ", mse passed: " + str(i.mse_passed) + if i.mae_passed: + value["mae"] = float(i.mae.value) + else: + value["mae"] = float(i.mae.error) + value["status"] = ( + "mape passed: " + + str(i.mape_passed) + + ", mse passed: " + + str(i.mse_passed) + + ", mae passed: " + + str(i.mae_passed) + ) m_name = i.name.replace(" - ", "_") result.append({m_name: value}) diff --git a/e2e/tools/validator/src/validator/prometheus/__init__.py b/e2e/tools/validator/src/validator/prometheus/__init__.py index 13dceb47ce..fdb10e6276 100644 --- a/e2e/tools/validator/src/validator/prometheus/__init__.py +++ b/e2e/tools/validator/src/validator/prometheus/__init__.py @@ -87,6 +87,7 @@ class Result(NamedTuple): mse: ValueOrError mape: ValueOrError + mae: ValueOrError def validate_arrays(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> tuple[npt.ArrayLike, npt.ArrayLike]: @@ -123,6 +124,16 @@ def mape(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError: return ValueOrError(value=0, error=str(e)) +def mae(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError: + try: + actual, predicted = validate_arrays(actual, predicted) + return ValueOrError(value=np.abs(np.subtract(actual, predicted)).mean()) + + # ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here) + except Exception as e: + return ValueOrError(value=0, error=str(e)) + + def filter_by_equal_timestamps(a: Series, b: Series) -> tuple[Series, Series]: """ filter_by_equal_timestamps will filter out samples from a and b @@ -244,6 +255,7 @@ def compare( return Result( mse=mse(actual.values, predicted.values), mape=mape(actual.values, predicted.values), + mae=mae(actual.values, predicted.values), actual_series=actual_series, predicted_series=predicted_series, actual_dropped=actual_dropped, diff --git a/e2e/tools/validator/src/validator/validations/__init__.py b/e2e/tools/validator/src/validator/validations/__init__.py index b7681991ab..21ffe8de01 100644 --- a/e2e/tools/validator/src/validator/validations/__init__.py +++ b/e2e/tools/validator/src/validator/validations/__init__.py @@ -53,6 +53,7 @@ class Validation(NamedTuple): units: str = "" max_mse: float | None = None max_mape: float | None = None + max_mae: float | None = None def yaml_node(yml: dict[str, Any], key_path: list[str], default: Any) -> Any: diff --git a/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py b/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py index 06b24abebb..2f6ad9beec 100644 --- a/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py +++ b/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py @@ -9,13 +9,7 @@ from validator.config import ( PrometheusJob as Job, ) -from validator.prometheus import ( - Comparator, - Series, - filter_by_equal_timestamps, - mape, - mse, -) +from validator.prometheus import Comparator, Series, filter_by_equal_timestamps, mae, mape, mse @pytest.fixture @@ -138,36 +132,43 @@ def test_mse(): "b": [ 1.0, 2.0, 3.0, 4.0, ], "mse": 0.0, "mape": 0.0, + "mae": 0.0, }, { "a": [ -1.0, -2.0, -3.0, -4.0, ], "b": [ -1.0, -2.0, -3.0, -4.0, ], "mse": 0.0, "mape": 0.0, + "mae": 0.0, }, { "a": [ 1.0, -2.0, 3.0, 4.0, ], "b": [ 1.0, -2.0, 3.0, 4.0, ], "mse": 0.0, "mape": 0.0, + "mae": 0.0, }, { "a": [ 1, 2, 3, 4, ], "b": [ 1.0, 2.0, 3.0, 4.0, ], "mse": 0.0, "mape": 0.0, + "mae": 0.0, }, { "a": [ 1, 2, 3, ], "b": [ 4, 5, 6, ], "mse": 9.0, # (1 - 4)^2 + (2 - 5)^2 + (3 - 6)^2 / 3 "mape": 183.3333, + "mae": 3.0, # (|1-4| + |2-5| + |3-6|) / 3 }, { "a": [ 1.5, 2.5, 3.5 ], "b": [ 1.0, 2.0, 3.0 ], "mse": 0.25, # 3 x (0.5^2) / 3 "mape": 22.5396, + "mae": 0.5, # |1.5 - 1.0| + |2.5 - 2.0| + |3.5 - 3.0| }, { "a": [ 1, -2, 3 ], "b": [ -1, 2, -3 ], "mse": 18.6666, # 2.0^2 + 4.0^2 + 6.0^2 / 3 "mape": 200.0, + "mae": 4.0 # (|1-(-1)| + |-2-2| + |3-(-3)|) / 3 }] # fmt: on @@ -185,6 +186,11 @@ def test_mse(): expected_mape = s["mape"] assert expected_mape == pytest.approx(actual_mape.value, rel=1e-3) + actual_mae = mae(a, b) + assert actual_mae.error is None + expected_mae = s["mae"] + assert expected_mae == pytest.approx(actual_mae.value, rel=1e-3) + def test_mse_with_large_arrays(): actual = np.random.rand(1000)