Merge pull request #1815 from vprashar2929/add-mae

feat(validator): incorporate MAE in validations
sustainable-computing-io · Oct 22, 2024 · 058d088 · 058d088
2 parents 6b1d41f + fe78fd5
commit 058d088
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 26 deletions.
diff --git a/e2e/tools/validator/pyproject.toml b/e2e/tools/validator/pyproject.toml
@@ -45,6 +45,7 @@ dependencies = [
   "pytest",
   "ipython",
   "ipdb",
+  "scikit-learn",
 ]
 
 [tool.hatch.envs.default.scripts]

diff --git a/e2e/tools/validator/src/validator/cli/__init__.py b/e2e/tools/validator/src/validator/cli/__init__.py
@@ -45,6 +45,7 @@ class ValidationResult:
 
     mse: ValueOrError
     mape: ValueOrError
+    mae: ValueOrError
 
     actual_dropped: int = 0
     predicted_dropped: int = 0
@@ -54,6 +55,7 @@ class ValidationResult:
 
     mse_passed: bool = True
     mape_passed: bool = True
+    mae_passed: bool = True
 
     unexpected_error: str = ""
 
@@ -71,10 +73,10 @@ def __init__(
     def verdict(self) -> str:
         note = " (dropped)" if self.actual_dropped > 0 or self.predicted_dropped > 0 else ""
 
-        if self.unexpected_error or self.mse.error or self.mape.error:
+        if self.unexpected_error or self.mse.error or self.mape.error or self.mae.error:
             return f"ERROR{note}"
 
-        if self.mse_passed and self.mape_passed:
+        if self.mse_passed and self.mape_passed and self.mae_passed:
             return f"PASS{note}"
 
         return f"FAIL{note}"
@@ -203,9 +205,15 @@ def rel_path(x: str) -> str:
     md.h2("Validations")
     md.h3("Summary")
     md.table(
-        ["Name", "MSE", "MAPE", "Pass / Fail"],
+        ["Name", "MSE", "MAPE", "MAE", "Pass / Fail"],
         [
-            [f"[{v.name}](#{v.name.replace(' ', '-')})", f"{v.mse.value:.2f}", f"{v.mape.value:.2f}", v.verdict]
+            [
+                f"[{v.name}](#{v.name.replace(' ', '-')})",
+                f"{v.mse.value:.2f}",
+                f"{v.mape.value:.2f}",
+                f"{v.mae.value:.2f}",
+                v.verdict,
+            ]
             for v in r.validations.results
             if not v.unexpected_error
         ],
@@ -231,6 +239,7 @@ def rel_path(x: str) -> str:
         md.write("\n**Results**:\n")
         md.li(f"MSE  : `{v.mse}`")
         md.li(f"MAPE : `{v.mape} %`")
+        md.li(f"MAE  : `{v.mae}`")
         md.write("\n**Charts**:\n")
         img_path = create_charts_for_result(results_dir, v)
         md.img(v.name, img_path)
@@ -293,6 +302,9 @@ def create_charts_for_result(results_dir: str, r: ValidationResult) -> str:
     if r.mape.error is None:
         err_report += f"\nMAPE: {r.mape.value:.2f}%"
 
+    if r.mae.error is None:
+        err_report += f"\nMAE: {r.mae.value:.2f}"
+
     ax.text(
         0.98,
         1.10,
@@ -527,7 +539,8 @@ def run_validation(
             v.predicted.promql,
         )
         click.secho(f"\t MSE : {cmp.mse}", fg="bright_blue")
-        click.secho(f"\t MAPE: {cmp.mape} %\n", fg="bright_blue")
+        click.secho(f"\t MAPE: {cmp.mape} %", fg="bright_blue")
+        click.secho(f"\t MAE : {cmp.mae}\n", fg="bright_blue")
 
         result.predicted_dropped = cmp.predicted_dropped
         result.actual_dropped = cmp.predicted_dropped
@@ -539,17 +552,21 @@ def run_validation(
                 cmp.predicted_dropped,
             )
 
-        result.mse, result.mape = cmp.mse, cmp.mape
+        result.mse, result.mape, result.mae = cmp.mse, cmp.mape, cmp.mae
 
         result.mse_passed = v.max_mse is None or (cmp.mse.error is None and cmp.mse.value <= v.max_mse)
         result.mape_passed = v.max_mape is None or (cmp.mape.error is None and cmp.mape.value <= v.max_mape)
+        result.mae_passed = v.max_mae is None or (cmp.mae.error is None and cmp.mae.value <= v.max_mae)
 
         if not result.mse_passed:
             click.secho(f"MSE exceeded threshold. mse: {cmp.mse}, max_mse: {v.max_mse}", fg="red")
 
         if not result.mape_passed:
             click.secho(f"MAPE exceeded threshold. mape: {cmp.mape}, max_mape: {v.max_mape}", fg="red")
 
+        if not result.mae_passed:
+            click.secho(f"MAE exceeded threshold. mae: {cmp.mae}, max_mae: {v.max_mae}", fg="red")
+
         result.actual_filepath = dump_query_result(results_dir, v.actual_label, v.actual, cmp.actual_series)
         result.predicted_filepath = dump_query_result(results_dir, v.predicted_label, v.predicted, cmp.predicted_series)
 
@@ -617,7 +634,18 @@ def custom_encode(input_string):
             value["mape"] = float(i.mape.value)
         else:
             value["mape"] = float(i.mape.error)
-        value["status"] = "mape passed: " + str(i.mape_passed) + ", mse passed: " + str(i.mse_passed)
+        if i.mae_passed:
+            value["mae"] = float(i.mae.value)
+        else:
+            value["mae"] = float(i.mae.error)
+        value["status"] = (
+            "mape passed: "
+            + str(i.mape_passed)
+            + ", mse passed: "
+            + str(i.mse_passed)
+            + ", mae passed: "
+            + str(i.mae_passed)
+        )
         m_name = i.name.replace(" - ", "_")
 
         result.append({m_name: value})

diff --git a/e2e/tools/validator/src/validator/prometheus/__init__.py b/e2e/tools/validator/src/validator/prometheus/__init__.py
@@ -6,6 +6,7 @@
 import numpy as np
 import numpy.typing as npt
 from prometheus_api_client import PrometheusConnect
+from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
 
 from validator.config import Prometheus as PromConfig
 
@@ -87,6 +88,7 @@ class Result(NamedTuple):
 
     mse: ValueOrError
     mape: ValueOrError
+    mae: ValueOrError
 
 
 def validate_arrays(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> tuple[npt.ArrayLike, npt.ArrayLike]:
@@ -105,8 +107,7 @@ def validate_arrays(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> tuple[np
 
 def mse(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError:
     try:
-        actual, predicted = validate_arrays(actual, predicted)
-        return ValueOrError(value=np.square(np.subtract(actual, predicted)).mean())
+        return ValueOrError(value=mean_squared_error(actual, predicted))
 
     # ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here)
     except Exception as e:
@@ -115,8 +116,16 @@ def mse(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError:
 
 def mape(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError:
     try:
-        actual, predicted = validate_arrays(actual, predicted)
-        return ValueOrError(value=100 * np.abs(np.divide(np.subtract(actual, predicted), actual)).mean())
+        return ValueOrError(value=mean_absolute_percentage_error(actual, predicted))
+
+    # ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here)
+    except Exception as e:
+        return ValueOrError(value=0, error=str(e))
+
+
+def mae(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError:
+    try:
+        return ValueOrError(value=mean_absolute_error(actual, predicted))
 
     # ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here)
     except Exception as e:
@@ -244,6 +253,7 @@ def compare(
         return Result(
             mse=mse(actual.values, predicted.values),
             mape=mape(actual.values, predicted.values),
+            mae=mae(actual.values, predicted.values),
             actual_series=actual_series,
             predicted_series=predicted_series,
             actual_dropped=actual_dropped,

diff --git a/e2e/tools/validator/src/validator/report/__init__.py b/e2e/tools/validator/src/validator/report/__init__.py
@@ -3,16 +3,17 @@
 
 
 class Value:
-    def __init__(self, mse: str = "", mape: str = "", status: str = ""):
+    def __init__(self, mse: str = "", mape: str = "", mae: str = "", status: str = ""):
         self.mse = mse
         self.mape = mape
+        self.mae = mae
         self.status = status
 
     def to_dict(self):
-        return {"mse": self.mse, "mape": self.mape, "status": self.status}
+        return {"mse": self.mse, "mape": self.mape, "mae": self.mae, "status": self.status}
 
     def __repr__(self):
-        return f"Value(mse='{self.mse}', mape='{self.mape}', status='{self.status}')"
+        return f"Value(mse='{self.mse}', mape='{self.mape}', mae='{self.mae}', status='{self.status}')"
 
 
 class Result:

diff --git a/e2e/tools/validator/src/validator/validations/__init__.py b/e2e/tools/validator/src/validator/validations/__init__.py
@@ -53,6 +53,7 @@ class Validation(NamedTuple):
     units: str = ""
     max_mse: float | None = None
     max_mape: float | None = None
+    max_mae: float | None = None
 
 
 def yaml_node(yml: dict[str, Any], key_path: list[str], default: Any) -> Any:

diff --git a/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py b/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py
@@ -9,13 +9,7 @@
 from validator.config import (
     PrometheusJob as Job,
 )
-from validator.prometheus import (
-    Comparator,
-    Series,
-    filter_by_equal_timestamps,
-    mape,
-    mse,
-)
+from validator.prometheus import Comparator, Series, filter_by_equal_timestamps, mae, mape, mse
 
 
 @pytest.fixture
@@ -138,36 +132,43 @@ def test_mse():
         "b": [ 1.0, 2.0, 3.0, 4.0, ],
         "mse": 0.0,
         "mape": 0.0,
+        "mae": 0.0,
     }, {
         "a": [ -1.0, -2.0, -3.0, -4.0, ],
         "b": [ -1.0, -2.0, -3.0, -4.0, ],
         "mse": 0.0,
         "mape": 0.0,
+        "mae": 0.0,
     }, {
         "a": [ 1.0, -2.0, 3.0, 4.0, ],
         "b": [ 1.0, -2.0, 3.0, 4.0, ],
         "mse": 0.0,
         "mape": 0.0,
+        "mae": 0.0,
     }, {
         "a": [ 1, 2, 3, 4, ],
         "b": [ 1.0, 2.0, 3.0, 4.0, ],
         "mse": 0.0,
         "mape": 0.0,
+        "mae": 0.0,
     }, {
         "a": [ 1, 2, 3, ],
         "b": [ 4, 5, 6, ],
         "mse": 9.0, # (1 - 4)^2 + (2 - 5)^2 + (3 - 6)^2 / 3
-        "mape": 183.3333,
+        "mape": 1.833333,
+        "mae": 3.0, # (|1-4| + |2-5| + |3-6|) / 3
     }, {
         "a": [ 1.5, 2.5, 3.5 ],
         "b": [ 1.0, 2.0, 3.0 ],
         "mse": 0.25, # 3 x (0.5^2) / 3
-        "mape": 22.5396,
+        "mape": 0.225396,
+        "mae": 0.5, # |1.5 - 1.0| + |2.5 - 2.0| + |3.5 - 3.0|
     }, {
         "a": [ 1, -2, 3 ],
         "b": [ -1, 2, -3 ],
         "mse": 18.6666, # 2.0^2 + 4.0^2 + 6.0^2 / 3
-        "mape": 200.0,
+        "mape": 2.000,
+        "mae": 4.0 # (|1-(-1)| + |-2-2| + |3-(-3)|) / 3
     }]
     # fmt: on
 
@@ -185,6 +186,11 @@ def test_mse():
         expected_mape = s["mape"]
         assert expected_mape == pytest.approx(actual_mape.value, rel=1e-3)
 
+        actual_mae = mae(a, b)
+        assert actual_mae.error is None
+        expected_mae = s["mae"]
+        assert expected_mae == pytest.approx(actual_mae.value, rel=1e-3)
+
 
 def test_mse_with_large_arrays():
     actual = np.random.rand(1000)
@@ -196,7 +202,7 @@ def test_mse_expections():
     v = mse([], [])
     assert v.value == 0.0
     assert v.error is not None
-    assert str(v) == "Error: actual (0) and predicted (0) must not be empty"
+    assert str(v) == "Error: Found array with 0 sample(s) (shape=(0,)) while a minimum of 1 is required."
 
 
 def test_mse_with_different_lengths():
@@ -205,7 +211,7 @@ def test_mse_with_different_lengths():
     v = mse(actual, predicted)
     assert v.value == 0.0
     assert v.error is not None
-    assert str(v) == "Error: actual and predicted must be of equal length: 3 != 2"
+    assert str(v) == "Error: Found input variables with inconsistent numbers of samples: [3, 2]"
 
 
 class MockPromClient: