From 60f9ae1b8064f15320c633f85071ac2abeedfab7 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Tue, 10 Sep 2024 09:26:55 +1000 Subject: [PATCH 1/3] chore(validator): support config mapping for actual and predicted This commit * renames `expected` as `predicted` to use consistent naming every where (including MSE and MAPE) * adds support for defining optional config.mapping for mapping actual and predicted labels. * Adds support of `units` to be added to validations This allows validations to be written as follows ``` config mapping: actual: metal predicted: vm validations: - metal: vm: units: Watts ``` Signed-off-by: Sunil Thaha --- e2e/tools/validator/acpi_validations.yaml | 4 +- .../src/validator/validations/__init__.py | 33 ++++++++-- e2e/tools/validator/validations.yaml | 60 ++++++++++++------- 3 files changed, 68 insertions(+), 29 deletions(-) diff --git a/e2e/tools/validator/acpi_validations.yaml b/e2e/tools/validator/acpi_validations.yaml index 1561f2570d..232a1434ef 100644 --- a/e2e/tools/validator/acpi_validations.yaml +++ b/e2e/tools/validator/acpi_validations.yaml @@ -1,6 +1,8 @@ validations: - name: mock - node actual: | + mock_acpi_power1_average/10^6 + predicted: | sum( rate( kepler_node_platform_joules_total{{ @@ -8,7 +10,5 @@ validations: }}[{rate_interval}] ) ) - expected: | - mock_acpi_power1_average/10^6 max_mse: 0.0001 # max_mape: 0 diff --git a/e2e/tools/validator/src/validator/validations/__init__.py b/e2e/tools/validator/src/validator/validations/__init__.py index 3635dc4d80..c58ce73fd7 100644 --- a/e2e/tools/validator/src/validator/validations/__init__.py +++ b/e2e/tools/validator/src/validator/validations/__init__.py @@ -1,6 +1,6 @@ import logging import re -from typing import NamedTuple +from typing import NamedTuple, Any import yaml @@ -45,21 +45,44 @@ def mode(self) -> str: class Validation(NamedTuple): name: str - expected: QueryTemplate actual: QueryTemplate + predicted: QueryTemplate + actual_label: str + predicted_label: str + + units: str = "" max_mse: float | None = None max_mape: float | None = None +def yaml_node(yml: dict[str, Any], key_path: list[str], default: Any) -> Any: + node = yml + + for x in key_path: + if x in node: + node = node[x] + else: + return default + + return node + + def read_validations(file_path: str, promql_vars: dict[str, str]) -> list[Validation]: with open(file_path) as file: yml = yaml.safe_load(file) + + mapping = yaml_node(yml, ["config", "mapping"], {}) + actual_label = mapping.get("actual", "actual") + predicted_label = mapping.get("predicted", "predicted") + return [ Validation( name=v["name"], - expected=QueryTemplate(v["expected"], promql_vars), - actual=QueryTemplate(v["actual"], promql_vars), - max_mse=v.get("max_mse", None), + actual=QueryTemplate(v[actual_label], promql_vars), + predicted=QueryTemplate(v[predicted_label], promql_vars), + actual_label=actual_label, + predicted_label=predicted_label, + units=v.get("units", ""), max_mape=v.get("max_mape", None), ) for v in yml["validations"] diff --git a/e2e/tools/validator/validations.yaml b/e2e/tools/validator/validations.yaml index 46268ed7e6..00d9c71adb 100644 --- a/e2e/tools/validator/validations.yaml +++ b/e2e/tools/validator/validations.yaml @@ -1,7 +1,13 @@ +config: + mapping: + actual: metal + predicted: vm + validations: # absolute power comparison - name: platform - absolute - actual: | + units: Watts + metal: | sum( rate( kepler_{level}_platform_joules_total{{ @@ -10,7 +16,7 @@ validations: }}[{rate_interval}] ) ) - expected: | + vm: | sum( rate( kepler_node_platform_joules_total{{ @@ -20,7 +26,8 @@ validations: ) - name: package - absolute - actual: | + units: Watts + metal: | sum( rate( kepler_{level}_package_joules_total{{ @@ -29,7 +36,7 @@ validations: }}[{rate_interval}] ) ) - expected: | + vm: | sum( rate( kepler_node_package_joules_total{{ @@ -39,7 +46,8 @@ validations: ) - name: core - absolute - actual: | + units: Watts + metal: | sum( rate( kepler_{level}_core_joules_total{{ @@ -48,7 +56,7 @@ validations: }}[{rate_interval}] ) ) - expected: | + vm: | sum( rate( kepler_node_core_joules_total{{ @@ -59,7 +67,8 @@ validations: # dynamic power comparison - name: platform - dynamic - actual: | + units: Watts + metal: | rate( kepler_{level}_platform_joules_total{{ job="{metal_job_name}", @@ -67,7 +76,7 @@ validations: mode="dynamic", }}[{rate_interval}] ) - expected: | + vm: | rate( kepler_node_platform_joules_total{{ job="{vm_job_name}", @@ -76,7 +85,8 @@ validations: ) - name: package - dynamic - actual: | + units: Watts + metal: | rate( kepler_{level}_package_joules_total{{ job="{metal_job_name}", @@ -84,7 +94,7 @@ validations: mode="dynamic", }}[{rate_interval}] ) - expected: | + vm: | rate( kepler_node_package_joules_total{{ job="{vm_job_name}", @@ -93,7 +103,8 @@ validations: ) - name: core - dynamic - actual: | + units: Watts + metal: | rate( kepler_{level}_core_joules_total{{ job="{metal_job_name}", @@ -101,7 +112,7 @@ validations: mode="dynamic", }}[{rate_interval}] ) - expected: | + vm: | rate( kepler_node_core_joules_total{{ job="{vm_job_name}", @@ -110,7 +121,8 @@ validations: ) - name: dram - dynamic - actual: | + units: Watts + metal: | rate( kepler_{level}_dram_joules_total{{ job="{metal_job_name}", @@ -118,7 +130,7 @@ validations: mode="dynamic", }}[{rate_interval}] ) - expected: | + vm: | rate( kepler_node_dram_joules_total{{ job="{vm_job_name}", @@ -129,7 +141,8 @@ validations: # idle power comparison - name: platform - idle - actual: | + units: Watts + metal: | rate( kepler_{level}_platform_joules_total{{ job="{metal_job_name}", @@ -137,7 +150,7 @@ validations: mode="idle", }}[{rate_interval}] ) - expected: | + vm: | rate( kepler_node_platform_joules_total{{ job="{vm_job_name}", @@ -146,7 +159,8 @@ validations: ) - name: package - idle - actual: | + units: Watts + metal: | rate( kepler_{level}_package_joules_total{{ job="{metal_job_name}", @@ -154,7 +168,7 @@ validations: mode="idle", }}[{rate_interval}] ) - expected: | + vm: | rate( kepler_node_package_joules_total{{ job="{vm_job_name}", @@ -163,7 +177,8 @@ validations: ) - name: core - idle - actual: | + units: Watts + metal: | rate( kepler_{level}_core_joules_total{{ job="{metal_job_name}", @@ -171,7 +186,7 @@ validations: mode="idle", }}[{rate_interval}] ) - expected: | + vm: | rate( kepler_node_core_joules_total{{ job="{vm_job_name}", @@ -180,7 +195,8 @@ validations: ) - name: dram - idle - actual: | + units: Watts + metal: | rate( kepler_{level}_dram_joules_total{{ job="{metal_job_name}", @@ -188,7 +204,7 @@ validations: mode="idle", }}[{rate_interval}] ) - expected: | + vm: | rate( kepler_node_dram_joules_total{{ job="{vm_job_name}", From f5a8cbd9d149a274b6fd8e39c765022b38e553be Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Tue, 10 Sep 2024 09:35:03 +1000 Subject: [PATCH 2/3] chore(validator): use predicted instead of expected Signed-off-by: Sunil Thaha --- .../src/validator/prometheus/__init__.py | 71 ++++++++----------- 1 file changed, 29 insertions(+), 42 deletions(-) diff --git a/e2e/tools/validator/src/validator/prometheus/__init__.py b/e2e/tools/validator/src/validator/prometheus/__init__.py index a3a34ac497..13dceb47ce 100644 --- a/e2e/tools/validator/src/validator/prometheus/__init__.py +++ b/e2e/tools/validator/src/validator/prometheus/__init__.py @@ -79,58 +79,45 @@ def __str__(self) -> str: class Result(NamedTuple): - expected_series: Series actual_series: Series - expected_dropped: int + predicted_series: Series + actual_dropped: int + predicted_dropped: int mse: ValueOrError mape: ValueOrError - def print(self): - # ruff: noqa: T201 (Suppressed as printing is intentional and necessary in this context) - print("Expected:") - print("────────────────────────────────────────") - print(f" {self.expected_series.query}") - print(f" {self.expected_series.values}") - print("\t\t\t\t⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n") - - print("Actual:") - print("────────────────────────────────────────\n") - print(f"{self.actual_series.query}") - print(f"{self.actual_series.values}") - print("\t\t\t\t⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n") - - print(f"MSE : {self.mse}") - print(f"MAPE: {self.mape}") - print("\t\t\t\t━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━") - - -def validate_arrays(actual: npt.ArrayLike, expected: npt.ArrayLike) -> tuple[npt.ArrayLike, npt.ArrayLike]: - actual, expected = np.array(actual), np.array(expected) - if len(actual) != len(expected): - msg = f"actual and expected must be of equal length: {len(actual)} != {len(expected)}" + +def validate_arrays(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> tuple[npt.ArrayLike, npt.ArrayLike]: + actual, predicted = np.array(actual), np.array(predicted) + + if len(actual) != len(predicted): + msg = f"actual and predicted must be of equal length: {len(actual)} != {len(predicted)}" raise ValueError(msg) - if len(actual) == 0 or len(expected) == 0: - msg = f"actual ({len(actual)}) and expected ({len(expected)}) must not be empty" + + if len(actual) == 0 or len(predicted) == 0: + msg = f"actual ({len(actual)}) and predicted ({len(predicted)}) must not be empty" raise ValueError(msg) - return (actual, expected) + return (actual, predicted) -def mse(actual: npt.ArrayLike, expected: npt.ArrayLike) -> ValueOrError: +def mse(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError: try: - actual, expected = validate_arrays(actual, expected) - return ValueOrError(value=np.square(np.subtract(actual, expected)).mean()) + actual, predicted = validate_arrays(actual, predicted) + return ValueOrError(value=np.square(np.subtract(actual, predicted)).mean()) + # ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here) except Exception as e: return ValueOrError(value=0, error=str(e)) -def mape(actual: npt.ArrayLike, expected: npt.ArrayLike) -> ValueOrError: +def mape(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError: try: - actual, expected = validate_arrays(actual, expected) - return ValueOrError(value=100 * np.abs(np.divide(np.subtract(actual, expected), actual)).mean()) + actual, predicted = validate_arrays(actual, predicted) + return ValueOrError(value=100 * np.abs(np.divide(np.subtract(actual, predicted), actual)).mean()) + # ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here) except Exception as e: return ValueOrError(value=0, error=str(e)) @@ -245,20 +232,20 @@ def compare( start: datetime, end: datetime, actual_query: str, - expected_query: str, + predicted_query: str, ) -> Result: - expected_series = self.single_series(expected_query, start, end) actual_series = self.single_series(actual_query, start, end) + predicted_series = self.single_series(predicted_query, start, end) - expected, actual = filter_by_equal_timestamps(expected_series, actual_series) - expected_dropped = len(expected_series.samples) - len(expected.samples) + actual, predicted = filter_by_equal_timestamps(actual_series, predicted_series) actual_dropped = len(actual_series.samples) - len(actual.samples) + predicted_dropped = len(predicted_series.samples) - len(predicted.samples) return Result( - mse=mse(actual.values, expected.values), - mape=mape(actual.values, expected.values), - expected_series=expected_series, + mse=mse(actual.values, predicted.values), + mape=mape(actual.values, predicted.values), actual_series=actual_series, - expected_dropped=expected_dropped, + predicted_series=predicted_series, actual_dropped=actual_dropped, + predicted_dropped=predicted_dropped, ) From de99d53a8bd38075f74d4ea89eb5eb0dfa2062e6 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Tue, 10 Sep 2024 09:36:43 +1000 Subject: [PATCH 3/3] chore(validator): use predicted in reports Signed-off-by: Sunil Thaha --- .../validator/src/validator/cli/__init__.py | 71 +++++++++++-------- .../src/validator/validations/__init__.py | 2 +- .../validator/prometheus/test_prometheus.py | 12 ++-- 3 files changed, 49 insertions(+), 36 deletions(-) diff --git a/e2e/tools/validator/src/validator/cli/__init__.py b/e2e/tools/validator/src/validator/cli/__init__.py index d850e04f78..e2f76d913b 100644 --- a/e2e/tools/validator/src/validator/cli/__init__.py +++ b/e2e/tools/validator/src/validator/cli/__init__.py @@ -37,30 +37,39 @@ class ValidationResult: name: str actual: str - expected: str + predicted: str + + actual_label: str + predicted_label: str + units: str mse: ValueOrError mape: ValueOrError actual_dropped: int = 0 - expected_dropped: int = 0 + predicted_dropped: int = 0 actual_filepath: str = "" - expected_filepath: str = "" + predicted_filepath: str = "" mse_passed: bool = True mape_passed: bool = True unexpected_error: str = "" - def __init__(self, name: str, actual: str, expected: str) -> None: + def __init__( + self, name: str, actual: str, predicted: str, actual_label: str, predicted_label: str, units: str + ) -> None: self.name = name self.actual = actual - self.expected = expected + self.predicted = predicted + self.actual_label = actual_label + self.predicted_label = predicted_label + self.units = units @property def verdict(self) -> str: - note = " (dropped)" if self.actual_dropped > 0 or self.expected_dropped > 0 else "" + note = " (dropped)" if self.actual_dropped > 0 or self.predicted_dropped > 0 else "" if self.unexpected_error or self.mse.error or self.mape.error: return f"ERROR{note}" @@ -202,18 +211,18 @@ def write_md_report(results_dir: str, r: TestResult): for v in r.validations.results: md.h4(v.name) md.write("\n**Queries**:\n") - md.li(f"Actual : `{v.actual}`") - md.li(f"Expected: `{v.expected}`") + md.li(f"Actual ({v.actual_label}) : `{v.actual}`") + md.li(f"Predicted ({v.predicted_label}) : `{v.predicted}`") if v.unexpected_error: md.write("\n**Errors**:\n") md.code(v.unexpected_error) continue - if v.actual_dropped or v.expected_dropped: + if v.actual_dropped or v.predicted_dropped: md.write("\n**Dropped**:\n") - md.li(f"Actual : `{v.actual_dropped}`") - md.li(f"Expected: `{v.expected_dropped}`") + md.li(f"Actual ({v.actual_label}) : `{v.actual_dropped}`") + md.li(f"Predicted ({v.predicted_label}) : `{v.predicted_dropped}`") md.write("\n**Results**:\n") md.li(f"MSE : `{v.mse}`") @@ -245,21 +254,22 @@ def snake_case(s: str) -> str: def create_charts_for_result(results_dir: str, r: ValidationResult) -> str: actual_json_path = r.actual_filepath - expected_json_path = r.expected_filepath + predicted_json_path = r.predicted_filepath images_dir = os.path.join(results_dir, "images") os.makedirs(images_dir, exist_ok=True) fig, ax = plt.subplots(figsize=(18, 7), sharex=True, sharey=True) plt.title(r.name) + ax.set_ylabel(r.units) # actual in blue time, values = extract_dates_and_values(actual_json_path) - ax.plot(time, values, marker="x", color="#024abf", label=r.actual) + ax.plot(time, values, marker="x", color="#024abf", label=f"{r.actual_label}: {r.actual}") # expected in orange - time, values = extract_dates_and_values(expected_json_path) - ax.plot(time, values, marker="o", color="#ff742e", label=r.expected) + time, values = extract_dates_and_values(predicted_json_path) + ax.plot(time, values, marker="o", color="#ff742e", label=f"{r.predicted_label}: {r.predicted}") # Set the x-axis tick format to display time ax.xaxis.set_major_formatter(DateFormatter("%H:%M:%S")) @@ -295,7 +305,7 @@ def create_charts_for_result(results_dir: str, r: ValidationResult) -> str: # export it filename = snake_case(r.name) - out_file = os.path.join(images_dir, f"{filename}.png") + out_file = os.path.join(images_dir, f"{r.actual_label}-vs-{r.predicted_label}-{filename}.png") plt.savefig(out_file, format="png") @@ -312,11 +322,11 @@ def create_report_dir(report_dir: str) -> tuple[str, str]: return results_dir, tag -def dump_query_result(raw_results_dir: str, query: QueryTemplate, series: Series) -> str: +def dump_query_result(raw_results_dir: str, prefix: str, query: QueryTemplate, series: Series) -> str: artifacts_dir = os.path.join(raw_results_dir, "artifacts") os.makedirs(artifacts_dir, exist_ok=True) - filename = f"{query.metric_name}--{query.mode}.json" + filename = f"{prefix}-{query.metric_name}--{query.mode}.json" out_file = os.path.join(artifacts_dir, filename) with open(out_file, "w") as f: @@ -495,31 +505,34 @@ def run_validation( result = ValidationResult( v.name, v.actual.one_line, - v.expected.one_line, + v.predicted.one_line, + v.actual_label, + v.predicted_label, + v.units, ) click.secho(f"{v.name}", fg="cyan") - click.secho(f" - actual : {v.actual.one_line}") - click.secho(f" - expected: {v.expected.one_line}") + click.secho(f" - {v.actual_label} : {v.actual.one_line}") + click.secho(f" - {v.predicted_label} : {v.predicted.one_line}") try: cmp = comparator.compare( start_time, end_time, v.actual.promql, - v.expected.promql, + v.predicted.promql, ) click.secho(f"\t MSE : {cmp.mse}", fg="bright_blue") click.secho(f"\t MAPE: {cmp.mape} %\n", fg="bright_blue") - result.expected_dropped = cmp.expected_dropped - result.actual_dropped = cmp.expected_dropped + result.predicted_dropped = cmp.predicted_dropped + result.actual_dropped = cmp.predicted_dropped - if cmp.expected_dropped > 0 or cmp.actual_dropped > 0: + if cmp.predicted_dropped > 0 or cmp.actual_dropped > 0: logger.warning( - "dropped %d samples from expected and %d samples from actual", - cmp.expected_dropped, + "dropped %d samples from actual and %d samples from predicted", cmp.actual_dropped, + cmp.predicted_dropped, ) result.mse, result.mape = cmp.mse, cmp.mape @@ -533,8 +546,8 @@ def run_validation( if not result.mape_passed: click.secho(f"MAPE exceeded threshold. mape: {cmp.mape}, max_mape: {v.max_mape}", fg="red") - result.actual_filepath = dump_query_result(results_dir, v.expected, cmp.actual_series) - result.expected_filepath = dump_query_result(results_dir, v.actual, cmp.expected_series) + result.actual_filepath = dump_query_result(results_dir, v.actual_label, v.actual, cmp.actual_series) + result.predicted_filepath = dump_query_result(results_dir, v.predicted_label, v.predicted, cmp.predicted_series) # ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here) except Exception as e: diff --git a/e2e/tools/validator/src/validator/validations/__init__.py b/e2e/tools/validator/src/validator/validations/__init__.py index c58ce73fd7..dcb9766d45 100644 --- a/e2e/tools/validator/src/validator/validations/__init__.py +++ b/e2e/tools/validator/src/validator/validations/__init__.py @@ -1,6 +1,6 @@ import logging import re -from typing import NamedTuple, Any +from typing import Any, NamedTuple import yaml diff --git a/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py b/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py index e7e6c647c5..06b24abebb 100644 --- a/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py +++ b/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py @@ -188,24 +188,24 @@ def test_mse(): def test_mse_with_large_arrays(): actual = np.random.rand(1000) - expected = np.random.rand(1000) - assert mse(actual, expected).value >= 0.0 # MSE should always be non-negative + predicted = np.random.rand(1000) + assert mse(actual, predicted).value >= 0.0 # MSE should always be non-negative def test_mse_expections(): v = mse([], []) assert v.value == 0.0 assert v.error is not None - assert str(v) == "Error: actual (0) and expected (0) must not be empty" + assert str(v) == "Error: actual (0) and predicted (0) must not be empty" def test_mse_with_different_lengths(): actual = [1, 2, 3] - expected = [1, 2] - v = mse(actual, expected) + predicted = [1, 2] + v = mse(actual, predicted) assert v.value == 0.0 assert v.error is not None - assert str(v) == "Error: actual and expected must be of equal length: 3 != 2" + assert str(v) == "Error: actual and predicted must be of equal length: 3 != 2" class MockPromClient: