Skip to content

Commit

Permalink
Merge pull request #1773 from sthaha/feat-validator-mappings
Browse files Browse the repository at this point in the history
chore(validator): support validation config to map actual and predicted
  • Loading branch information
sthaha authored Sep 10, 2024
2 parents 7b0a7dd + de99d53 commit ac5ee8b
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 106 deletions.
4 changes: 2 additions & 2 deletions e2e/tools/validator/acpi_validations.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
validations:
- name: mock - node
actual: |
mock_acpi_power1_average/10^6
predicted: |
sum(
rate(
kepler_node_platform_joules_total{{
job="{metal_job_name}"
}}[{rate_interval}]
)
)
expected: |
mock_acpi_power1_average/10^6
max_mse: 0.0001
# max_mape: 0
71 changes: 42 additions & 29 deletions e2e/tools/validator/src/validator/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,39 @@
class ValidationResult:
name: str
actual: str
expected: str
predicted: str

actual_label: str
predicted_label: str
units: str

mse: ValueOrError
mape: ValueOrError

actual_dropped: int = 0
expected_dropped: int = 0
predicted_dropped: int = 0

actual_filepath: str = ""
expected_filepath: str = ""
predicted_filepath: str = ""

mse_passed: bool = True
mape_passed: bool = True

unexpected_error: str = ""

def __init__(self, name: str, actual: str, expected: str) -> None:
def __init__(
self, name: str, actual: str, predicted: str, actual_label: str, predicted_label: str, units: str
) -> None:
self.name = name
self.actual = actual
self.expected = expected
self.predicted = predicted
self.actual_label = actual_label
self.predicted_label = predicted_label
self.units = units

@property
def verdict(self) -> str:
note = " (dropped)" if self.actual_dropped > 0 or self.expected_dropped > 0 else ""
note = " (dropped)" if self.actual_dropped > 0 or self.predicted_dropped > 0 else ""

if self.unexpected_error or self.mse.error or self.mape.error:
return f"ERROR{note}"
Expand Down Expand Up @@ -202,18 +211,18 @@ def write_md_report(results_dir: str, r: TestResult):
for v in r.validations.results:
md.h4(v.name)
md.write("\n**Queries**:\n")
md.li(f"Actual : `{v.actual}`")
md.li(f"Expected: `{v.expected}`")
md.li(f"Actual ({v.actual_label}) : `{v.actual}`")
md.li(f"Predicted ({v.predicted_label}) : `{v.predicted}`")

if v.unexpected_error:
md.write("\n**Errors**:\n")
md.code(v.unexpected_error)
continue

if v.actual_dropped or v.expected_dropped:
if v.actual_dropped or v.predicted_dropped:
md.write("\n**Dropped**:\n")
md.li(f"Actual : `{v.actual_dropped}`")
md.li(f"Expected: `{v.expected_dropped}`")
md.li(f"Actual ({v.actual_label}) : `{v.actual_dropped}`")
md.li(f"Predicted ({v.predicted_label}) : `{v.predicted_dropped}`")

md.write("\n**Results**:\n")
md.li(f"MSE : `{v.mse}`")
Expand Down Expand Up @@ -245,21 +254,22 @@ def snake_case(s: str) -> str:

def create_charts_for_result(results_dir: str, r: ValidationResult) -> str:
actual_json_path = r.actual_filepath
expected_json_path = r.expected_filepath
predicted_json_path = r.predicted_filepath

images_dir = os.path.join(results_dir, "images")
os.makedirs(images_dir, exist_ok=True)

fig, ax = plt.subplots(figsize=(18, 7), sharex=True, sharey=True)
plt.title(r.name)
ax.set_ylabel(r.units)

# actual in blue
time, values = extract_dates_and_values(actual_json_path)
ax.plot(time, values, marker="x", color="#024abf", label=r.actual)
ax.plot(time, values, marker="x", color="#024abf", label=f"{r.actual_label}: {r.actual}")

# expected in orange
time, values = extract_dates_and_values(expected_json_path)
ax.plot(time, values, marker="o", color="#ff742e", label=r.expected)
time, values = extract_dates_and_values(predicted_json_path)
ax.plot(time, values, marker="o", color="#ff742e", label=f"{r.predicted_label}: {r.predicted}")

# Set the x-axis tick format to display time
ax.xaxis.set_major_formatter(DateFormatter("%H:%M:%S"))
Expand Down Expand Up @@ -295,7 +305,7 @@ def create_charts_for_result(results_dir: str, r: ValidationResult) -> str:

# export it
filename = snake_case(r.name)
out_file = os.path.join(images_dir, f"{filename}.png")
out_file = os.path.join(images_dir, f"{r.actual_label}-vs-{r.predicted_label}-{filename}.png")

plt.savefig(out_file, format="png")

Expand All @@ -312,11 +322,11 @@ def create_report_dir(report_dir: str) -> tuple[str, str]:
return results_dir, tag


def dump_query_result(raw_results_dir: str, query: QueryTemplate, series: Series) -> str:
def dump_query_result(raw_results_dir: str, prefix: str, query: QueryTemplate, series: Series) -> str:
artifacts_dir = os.path.join(raw_results_dir, "artifacts")
os.makedirs(artifacts_dir, exist_ok=True)

filename = f"{query.metric_name}--{query.mode}.json"
filename = f"{prefix}-{query.metric_name}--{query.mode}.json"
out_file = os.path.join(artifacts_dir, filename)

with open(out_file, "w") as f:
Expand Down Expand Up @@ -495,31 +505,34 @@ def run_validation(
result = ValidationResult(
v.name,
v.actual.one_line,
v.expected.one_line,
v.predicted.one_line,
v.actual_label,
v.predicted_label,
v.units,
)

click.secho(f"{v.name}", fg="cyan")
click.secho(f" - actual : {v.actual.one_line}")
click.secho(f" - expected: {v.expected.one_line}")
click.secho(f" - {v.actual_label} : {v.actual.one_line}")
click.secho(f" - {v.predicted_label} : {v.predicted.one_line}")

try:
cmp = comparator.compare(
start_time,
end_time,
v.actual.promql,
v.expected.promql,
v.predicted.promql,
)
click.secho(f"\t MSE : {cmp.mse}", fg="bright_blue")
click.secho(f"\t MAPE: {cmp.mape} %\n", fg="bright_blue")

result.expected_dropped = cmp.expected_dropped
result.actual_dropped = cmp.expected_dropped
result.predicted_dropped = cmp.predicted_dropped
result.actual_dropped = cmp.predicted_dropped

if cmp.expected_dropped > 0 or cmp.actual_dropped > 0:
if cmp.predicted_dropped > 0 or cmp.actual_dropped > 0:
logger.warning(
"dropped %d samples from expected and %d samples from actual",
cmp.expected_dropped,
"dropped %d samples from actual and %d samples from predicted",
cmp.actual_dropped,
cmp.predicted_dropped,
)

result.mse, result.mape = cmp.mse, cmp.mape
Expand All @@ -533,8 +546,8 @@ def run_validation(
if not result.mape_passed:
click.secho(f"MAPE exceeded threshold. mape: {cmp.mape}, max_mape: {v.max_mape}", fg="red")

result.actual_filepath = dump_query_result(results_dir, v.expected, cmp.actual_series)
result.expected_filepath = dump_query_result(results_dir, v.actual, cmp.expected_series)
result.actual_filepath = dump_query_result(results_dir, v.actual_label, v.actual, cmp.actual_series)
result.predicted_filepath = dump_query_result(results_dir, v.predicted_label, v.predicted, cmp.predicted_series)

# ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here)
except Exception as e:
Expand Down
71 changes: 29 additions & 42 deletions e2e/tools/validator/src/validator/prometheus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,58 +79,45 @@ def __str__(self) -> str:


class Result(NamedTuple):
expected_series: Series
actual_series: Series
expected_dropped: int
predicted_series: Series

actual_dropped: int
predicted_dropped: int

mse: ValueOrError
mape: ValueOrError

def print(self):
# ruff: noqa: T201 (Suppressed as printing is intentional and necessary in this context)
print("Expected:")
print("────────────────────────────────────────")
print(f" {self.expected_series.query}")
print(f" {self.expected_series.values}")
print("\t\t\t\t⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n")

print("Actual:")
print("────────────────────────────────────────\n")
print(f"{self.actual_series.query}")
print(f"{self.actual_series.values}")
print("\t\t\t\t⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯\n")

print(f"MSE : {self.mse}")
print(f"MAPE: {self.mape}")
print("\t\t\t\t━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")


def validate_arrays(actual: npt.ArrayLike, expected: npt.ArrayLike) -> tuple[npt.ArrayLike, npt.ArrayLike]:
actual, expected = np.array(actual), np.array(expected)
if len(actual) != len(expected):
msg = f"actual and expected must be of equal length: {len(actual)} != {len(expected)}"

def validate_arrays(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> tuple[npt.ArrayLike, npt.ArrayLike]:
actual, predicted = np.array(actual), np.array(predicted)

if len(actual) != len(predicted):
msg = f"actual and predicted must be of equal length: {len(actual)} != {len(predicted)}"
raise ValueError(msg)
if len(actual) == 0 or len(expected) == 0:
msg = f"actual ({len(actual)}) and expected ({len(expected)}) must not be empty"

if len(actual) == 0 or len(predicted) == 0:
msg = f"actual ({len(actual)}) and predicted ({len(predicted)}) must not be empty"
raise ValueError(msg)

return (actual, expected)
return (actual, predicted)


def mse(actual: npt.ArrayLike, expected: npt.ArrayLike) -> ValueOrError:
def mse(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError:
try:
actual, expected = validate_arrays(actual, expected)
return ValueOrError(value=np.square(np.subtract(actual, expected)).mean())
actual, predicted = validate_arrays(actual, predicted)
return ValueOrError(value=np.square(np.subtract(actual, predicted)).mean())

# ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here)
except Exception as e:
return ValueOrError(value=0, error=str(e))


def mape(actual: npt.ArrayLike, expected: npt.ArrayLike) -> ValueOrError:
def mape(actual: npt.ArrayLike, predicted: npt.ArrayLike) -> ValueOrError:
try:
actual, expected = validate_arrays(actual, expected)
return ValueOrError(value=100 * np.abs(np.divide(np.subtract(actual, expected), actual)).mean())
actual, predicted = validate_arrays(actual, predicted)
return ValueOrError(value=100 * np.abs(np.divide(np.subtract(actual, predicted), actual)).mean())

# ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here)
except Exception as e:
return ValueOrError(value=0, error=str(e))
Expand Down Expand Up @@ -245,20 +232,20 @@ def compare(
start: datetime,
end: datetime,
actual_query: str,
expected_query: str,
predicted_query: str,
) -> Result:
expected_series = self.single_series(expected_query, start, end)
actual_series = self.single_series(actual_query, start, end)
predicted_series = self.single_series(predicted_query, start, end)

expected, actual = filter_by_equal_timestamps(expected_series, actual_series)
expected_dropped = len(expected_series.samples) - len(expected.samples)
actual, predicted = filter_by_equal_timestamps(actual_series, predicted_series)
actual_dropped = len(actual_series.samples) - len(actual.samples)
predicted_dropped = len(predicted_series.samples) - len(predicted.samples)

return Result(
mse=mse(actual.values, expected.values),
mape=mape(actual.values, expected.values),
expected_series=expected_series,
mse=mse(actual.values, predicted.values),
mape=mape(actual.values, predicted.values),
actual_series=actual_series,
expected_dropped=expected_dropped,
predicted_series=predicted_series,
actual_dropped=actual_dropped,
predicted_dropped=predicted_dropped,
)
33 changes: 28 additions & 5 deletions e2e/tools/validator/src/validator/validations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import re
from typing import NamedTuple
from typing import Any, NamedTuple

import yaml

Expand Down Expand Up @@ -45,21 +45,44 @@ def mode(self) -> str:

class Validation(NamedTuple):
name: str
expected: QueryTemplate
actual: QueryTemplate
predicted: QueryTemplate
actual_label: str
predicted_label: str

units: str = ""
max_mse: float | None = None
max_mape: float | None = None


def yaml_node(yml: dict[str, Any], key_path: list[str], default: Any) -> Any:
node = yml

for x in key_path:
if x in node:
node = node[x]
else:
return default

return node


def read_validations(file_path: str, promql_vars: dict[str, str]) -> list[Validation]:
with open(file_path) as file:
yml = yaml.safe_load(file)

mapping = yaml_node(yml, ["config", "mapping"], {})
actual_label = mapping.get("actual", "actual")
predicted_label = mapping.get("predicted", "predicted")

return [
Validation(
name=v["name"],
expected=QueryTemplate(v["expected"], promql_vars),
actual=QueryTemplate(v["actual"], promql_vars),
max_mse=v.get("max_mse", None),
actual=QueryTemplate(v[actual_label], promql_vars),
predicted=QueryTemplate(v[predicted_label], promql_vars),
actual_label=actual_label,
predicted_label=predicted_label,
units=v.get("units", ""),
max_mape=v.get("max_mape", None),
)
for v in yml["validations"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,24 +188,24 @@ def test_mse():

def test_mse_with_large_arrays():
actual = np.random.rand(1000)
expected = np.random.rand(1000)
assert mse(actual, expected).value >= 0.0 # MSE should always be non-negative
predicted = np.random.rand(1000)
assert mse(actual, predicted).value >= 0.0 # MSE should always be non-negative


def test_mse_expections():
v = mse([], [])
assert v.value == 0.0
assert v.error is not None
assert str(v) == "Error: actual (0) and expected (0) must not be empty"
assert str(v) == "Error: actual (0) and predicted (0) must not be empty"


def test_mse_with_different_lengths():
actual = [1, 2, 3]
expected = [1, 2]
v = mse(actual, expected)
predicted = [1, 2]
v = mse(actual, predicted)
assert v.value == 0.0
assert v.error is not None
assert str(v) == "Error: actual and expected must be of equal length: 3 != 2"
assert str(v) == "Error: actual and predicted must be of equal length: 3 != 2"


class MockPromClient:
Expand Down
Loading

0 comments on commit ac5ee8b

Please sign in to comment.