From d0fe96686e46ab3534fca60a68b522220255862f Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Thu, 24 Oct 2024 16:08:35 +1000 Subject: [PATCH] chore(validator): MAPE and json generation This commit fixes MAPE calculation and its tests. Additionally, the json generation is cleaned and added to `gen-report` as well. Signed-off-by: Sunil Thaha --- .../validator/src/validator/cli/__init__.py | 134 +++++++++--------- .../src/validator/prometheus/__init__.py | 17 +-- .../src/validator/report/__init__.py | 56 ++++---- .../validator/prometheus/test_prometheus.py | 32 +++-- 4 files changed, 111 insertions(+), 128 deletions(-) diff --git a/e2e/tools/validator/src/validator/cli/__init__.py b/e2e/tools/validator/src/validator/cli/__init__.py index 858527b32d..d9909acf8c 100644 --- a/e2e/tools/validator/src/validator/cli/__init__.py +++ b/e2e/tools/validator/src/validator/cli/__init__.py @@ -30,7 +30,6 @@ logger = logging.getLogger(__name__) pass_config = click.make_pass_decorator(config.Validator) -data_dict = {} @dataclass @@ -415,8 +414,6 @@ def stress(cfg: config.Validator, script_path: str, report_dir: str): click.secho(" * Generating report dir and tag", fg="green") results_dir, tag = create_report_dir(report_dir) click.secho(f"\tresults dir: {results_dir}, tag: {tag}", fg="bright_green") - filepath = results_dir + "/" + tag + ".json" - data_dict.update({"file_path": filepath}) res = TestResult(tag) @@ -437,7 +434,7 @@ def stress(cfg: config.Validator, script_path: str, report_dir: str): time.sleep(10) res.validations = run_validations(cfg, stress_test, results_dir) - create_json(res) + write_json_report(results_dir, res) write_md_report(results_dir, res) @@ -473,6 +470,7 @@ def gen_report(cfg: config.Validator, start: datetime.datetime, end: datetime.da script_result = ScriptResult(start, end) res.validations = run_validations(cfg, script_result, results_dir) + write_json_report(results_dir, res) write_md_report(results_dir, res) @@ -612,84 +610,80 @@ def validate_acpi(cfg: config.Validator, duration: datetime.timedelta, report_di return int(res.validations.passed) -def create_json(res): - def update_list_json(new_value: list, new_key: str): - data_dict[new_key] = new_value +def write_json_report(results_dir: str, res: TestResult): + pattern = re.compile(r'[{]?(\w+)=("[^"]*"|[^,]+)[},]?') - def custom_encode(input_string): - pattern = re.compile(r'(\w+)=("[^"]*"|[^,]+)') + def extract_label_value(input_string): matches = pattern.findall(input_string) - parsed_dict = {key: value.strip('"') for key, value in matches} - return json.dumps(parsed_dict) - - result = [] - - for i in res.validations.results: - value = {} - if i.mse_passed: - value["mse"] = float(i.mse.value) - else: - value["mse"] = float(i.mse.error) - if i.mape_passed: - value["mape"] = float(i.mape.value) - else: - value["mape"] = float(i.mape.error) - if i.mae_passed: - value["mae"] = float(i.mae.value) - else: - value["mae"] = float(i.mae.error) - value["status"] = ( - "mape passed: " - + str(i.mape_passed) - + ", mse passed: " - + str(i.mse_passed) - + ", mae passed: " - + str(i.mae_passed) - ) - m_name = i.name.replace(" - ", "_") - - result.append({m_name: value}) + return {key: value.strip('"') for key, value in matches} + + data_dict = {} + results = [] + + for r in res.validations.results: + value = { + "mae": str(r.mae), + "mape": str(r.mape), + "mse": str(r.mse), + "status": ( + "mape passed: " + + str(r.mape_passed).lower() + + ", mse passed: " + + str(r.mse_passed).lower() + + ", mae passed: " + + str(r.mae_passed).lower() + ), + } + results.append({r.name: value}) build_info = [] - for i in res.build_info: - tmp = i.replace("kepler_exporter_build_info", "") - build_info.append(custom_encode(tmp)) + for r in res.build_info: + selector = r.replace("kepler_exporter_build_info", "") + build_info.append(extract_label_value(selector)) node_info = [] - for i in res.node_info: - tmp = i.replace("kepler_exporter_node_info", "") - node_info.append(custom_encode(tmp)) + for r in res.node_info: + selector = r.replace("kepler_exporter_node_info", "") + node_info.append(extract_label_value(selector)) - update_list_json(build_info, "build_info") - update_list_json(node_info, "node_info") + data_dict["build_info"] = build_info + data_dict["node_info"] = node_info - machine_spec = [] - machine_spec.append( + machine_specs = [] + machine_specs.append( { "type": "host", - "model": res.host_spec[0][0], - "cores": res.host_spec[0][1], - "threads": res.host_spec[0][2], - "sockets": res.host_spec[0][3], - "flags": res.host_spec[0][4], - "dram": res.host_spec[1], - } - ) - machine_spec.append( - { - "type": "vm", - "model": res.vm_spec[0][0], - "cores": res.vm_spec[0][1], - "threads": res.vm_spec[0][2], - "sockets": res.vm_spec[0][3], - "flags": res.vm_spec[0][4], - "dram": res.vm_spec[1], + "model": res.host_spec.cpu_spec.model, + "cores": res.host_spec.cpu_spec.cores, + "threads": res.host_spec.cpu_spec.threads, + "sockets": res.host_spec.cpu_spec.sockets, + "flags": res.host_spec.cpu_spec.flags, + "dram": res.host_spec.dram_size, } ) - update_list_json(machine_spec, "machine_spec") + if res.vm_spec is not None: + machine_specs.append( + { + "type": "vm", + "model": res.vm_spec.cpu_spec.model, + "cores": res.vm_spec.cpu_spec.cores, + "threads": res.vm_spec.cpu_spec.threads, + "sockets": res.vm_spec.cpu_spec.sockets, + "flags": res.vm_spec.cpu_spec.flags, + "dram": res.vm_spec.dram_size, + } + ) + + data_dict["machine_specs"] = machine_specs + data_dict["results"] = results - update_list_json(result, "result") json_template = JsonTemplate(**data_dict) - file_name = data_dict["file_path"] - with open(file_name, "w") as file: + + path = os.path.join(results_dir, f"{res.tag}.json") + with open(path, "w") as file: json.dump(json_template, file, cls=CustomEncoder, indent=2) + + # + # TODO: remove all the above in favor of below + # with open(file_name+"sane.json", "w") as file: + # json.dump(dataclasses.asdict(res), file, cls=CustomEncoder, indent=2) diff --git a/e2e/tools/validator/src/validator/prometheus/__init__.py b/e2e/tools/validator/src/validator/prometheus/__init__.py index de5c42a18d..df6c8b5f49 100644 --- a/e2e/tools/validator/src/validator/prometheus/__init__.py +++ b/e2e/tools/validator/src/validator/prometheus/__init__.py @@ -3,7 +3,6 @@ from datetime import datetime from typing import NamedTuple, Protocol -import numpy as np import numpy.typing as npt from prometheus_api_client import PrometheusConnect from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error @@ -91,20 +90,6 @@ class Result(NamedTuple): mae: ValueOrError -def validate_arrays(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> tuple[npt.ArrayLike, npt.ArrayLike]: - actual, predicted = np.array(actual), np.array(predicted) - - if len(actual) != len(predicted): - msg = f"actual and predicted must be of equal length: {len(actual)} != {len(predicted)}" - raise ValueError(msg) - - if len(actual) == 0 or len(predicted) == 0: - msg = f"actual ({len(actual)}) and predicted ({len(predicted)}) must not be empty" - raise ValueError(msg) - - return (actual, predicted) - - def mse(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError: try: return ValueOrError(value=mean_squared_error(actual, predicted)) @@ -116,7 +101,7 @@ def mse(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError: def mape(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError: try: - return ValueOrError(value=mean_absolute_percentage_error(actual, predicted)) + return ValueOrError(value=mean_absolute_percentage_error(actual, predicted) * 100) # ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here) except Exception as e: diff --git a/e2e/tools/validator/src/validator/report/__init__.py b/e2e/tools/validator/src/validator/report/__init__.py index 956c67d777..6cde114c32 100644 --- a/e2e/tools/validator/src/validator/report/__init__.py +++ b/e2e/tools/validator/src/validator/report/__init__.py @@ -1,3 +1,4 @@ +import datetime import json from typing import Any @@ -33,49 +34,50 @@ def __repr__(self): class JsonTemplate: def __init__( self, - file_path: str, build_info: list[Any], node_info: list[Any], - machine_spec: list[Any], - result: list[dict[str, Any]], + machine_specs: list[Any], + results: list[dict[str, Any]], ): - if build_info is None: - build_info = [] - if node_info is None: - node_info = [] - if machine_spec is None: - machine_spec = [] - if result is None: - result = [] - - self.file_path = file_path self.build_info = build_info self.node_info = node_info - self.machine_spec = machine_spec - self.result = [] - for res in result: + self.machine_specs = machine_specs + self.results = [] + for res in results: for key, value in res.items(): - self.result.append(Result(key, value)) + self.results.append(Result(key, value)) def to_dict(self): return { - "file_path": self.file_path, "build_info": self.build_info, "node_info": self.node_info, - "machine_spec": self.machine_spec, - "result": [res.to_dict() for res in self.result], + "machine_specs": self.machine_specs, + "results": [res.to_dict() for res in self.results], } def __repr__(self): return ( - f"JsonTemplate(file_path='{self.file_path}', build_info={self.build_info}, " - f"node_info={self.node_info}, machine_spec={self.machine_spec}, " - f"result={self.result})" + f"JsonTemplate('build_info={self.build_info}, " + f"node_info={self.node_info}, machine_spec={self.machine_specs}, " + f"result={self.results})" ) class CustomEncoder(json.JSONEncoder): - def default(self, obj): - if hasattr(obj, "to_dict"): - return obj.to_dict() - return super().default(obj) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def default(self, o): + if hasattr(o, "_asdict"): + return o._asdict() + + if hasattr(o, "to_dict"): + return o.to_dict() + + if type(o) == datetime.datetime: + return o.isoformat() + + if type(o).__name__ == "bool": + return str(o).lower() + + return super().default(o) diff --git a/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py b/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py index cb650d5d0a..5ccdac33c8 100644 --- a/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py +++ b/e2e/tools/validator/tests/validator/prometheus/test_prometheus.py @@ -155,41 +155,43 @@ def test_mse(): "a": [ 1, 2, 3, ], "b": [ 4, 5, 6, ], "mse": 9.0, # (1 - 4)^2 + (2 - 5)^2 + (3 - 6)^2 / 3 - "mape": 1.833333, + "mape": 183.3333, "mae": 3.0, # (|1-4| + |2-5| + |3-6|) / 3 }, { "a": [ 1.5, 2.5, 3.5 ], "b": [ 1.0, 2.0, 3.0 ], "mse": 0.25, # 3 x (0.5^2) / 3 - "mape": 0.225396, + "mape": 22.5396, "mae": 0.5, # |1.5 - 1.0| + |2.5 - 2.0| + |3.5 - 3.0| }, { "a": [ 1, -2, 3 ], "b": [ -1, 2, -3 ], "mse": 18.6666, # 2.0^2 + 4.0^2 + 6.0^2 / 3 - "mape": 2.000, + "mape": 200.0, "mae": 4.0 # (|1-(-1)| + |-2-2| + |3-(-3)|) / 3 }] # fmt: on + for s in inputs: + for a, b in ([s["a"], s["b"]], [s["b"], s["a"]]): + expected_mse = s["mse"] + actual_mse = mse(a, b) + assert actual_mse.error is None + assert pytest.approx(actual_mse.value, rel=1e-3) == expected_mse + + actual_mae = mae(a, b) + assert actual_mae.error is None + expected_mae = s["mae"] + assert pytest.approx(actual_mae.value, rel=1e-3) == expected_mae + + # NOTE: MAPE(a , b) != MAPE(b, a) unlike MSE and MAE for s in inputs: a = s["a"] b = s["b"] - - expected_mse = s["mse"] - actual_mse = mse(a, b) - assert actual_mse.error is None - assert expected_mse == pytest.approx(actual_mse.value, rel=1e-3) - actual_mape = mape(a, b) assert actual_mape.error is None expected_mape = s["mape"] - assert expected_mape == pytest.approx(actual_mape.value, rel=1e-3) - - actual_mae = mae(a, b) - assert actual_mae.error is None - expected_mae = s["mae"] - assert expected_mae == pytest.approx(actual_mae.value, rel=1e-3) + assert pytest.approx(actual_mape.value, rel=1e-3) == expected_mape def test_mse_with_large_arrays():