Skip to content

Commit

Permalink
Merge pull request #1754 from sthaha/validator-md-graphs
Browse files Browse the repository at this point in the history
chore(validator): add graphs to markdown report
  • Loading branch information
vprashar2929 authored Sep 3, 2024
2 parents 05af659 + b37f514 commit a6e5cf8
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 14 deletions.
2 changes: 2 additions & 0 deletions e2e/tools/validator/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ dependencies = [
"prometheus-api-client",
"pyyaml",
"numpy",
"pandas",
"matplotlib",
]

[project.scripts]
Expand Down
119 changes: 112 additions & 7 deletions e2e/tools/validator/src/validator/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
from dataclasses import dataclass

import click
import matplotlib.pyplot as plt
import numpy as np
import numpy.typing as npt
from matplotlib import ticker
from matplotlib.dates import DateFormatter

from validator import config
from validator.__about__ import __version__
Expand All @@ -33,8 +38,13 @@ class ValidationResult:
name: str
actual: str
expected: str

mse: ValueOrError
mape: ValueOrError

actual_filepath: str = ""
expected_filepath: str = ""

mse_passed: bool = True
mape_passed: bool = True
unexpected_error: str = ""
Expand Down Expand Up @@ -88,13 +98,17 @@ def duration(self) -> datetime.timedelta:
return self.end_time - self.start_time


class MarkdownReport(typing.NamedTuple):
class MarkdownReport:
file: typing.TextIO

def __init__(self, file: typing.TextIO) -> None:
self.file = file

def write(self, text: str) -> None:
self.file.write(text)

def close(self) -> None:
self.flush()
self.file.close()

def flush(self) -> None:
Expand All @@ -118,6 +132,9 @@ def code(self, text: str) -> None:
def li(self, text: str) -> None:
self.write(f" - {text}\n")

def img(self, alt_text: str, image_path: str) -> None:
self.write(f"![{alt_text}]({image_path})\n")

def table(self, headers: list[str], rows: list[list[str]]) -> None:
self.write("| " + " | ".join(headers) + " |\n")
self.write("| " + " | ".join(["---" for _ in headers]) + " |\n")
Expand Down Expand Up @@ -190,15 +207,91 @@ def write_md_report(results_dir: str, r: TestResult):
md.write("\n**Results**:\n")
md.li(f"MSE : `{v.mse}`")
md.li(f"MAPE : `{v.mape} %`")
md.write("\n**Charts**:\n")
img_path = create_charts_for_result(v)
md.img(v.name, img_path)

md.flush()
md.close()

click.secho("Report Generated ", fg="bright_green")
click.secho(f" * report: {path} ", fg="bright_green")
click.secho(f" * time-range: {r.start_time} - {r.end_time}", fg="bright_green")


def extract_dates_and_values(json_path: str) -> tuple[npt.NDArray, list[float]]:
with open(json_path) as f:
json_data = json.load(f)

timestamps = json_data["timestamps"]
time_list = np.array([datetime.datetime.fromtimestamp(ts, tz=datetime.UTC) for ts in timestamps])
values = json_data["values"]
return time_list, values


def snake_case(s: str) -> str:
return re.sub("[_-]+", "_", re.sub(r"[/\s]+", "_", s)).lower().strip()


def create_charts_for_result(r: ValidationResult) -> str:
actual_json_path = r.actual_filepath
expected_json_path = r.expected_filepath

dirname = os.path.dirname(r.actual_filepath)
images_dir = os.path.join(dirname, "images")
os.makedirs(images_dir, exist_ok=True)

fig, ax = plt.subplots(figsize=(18, 7), sharex=True, sharey=True)
plt.title(r.name)

# actual in blue
time, values = extract_dates_and_values(actual_json_path)
ax.plot(time, values, marker="x", color="#024abf", label=r.actual)

# expected in orange
time, values = extract_dates_and_values(expected_json_path)
ax.plot(time, values, marker="o", color="#ff742e", label=r.expected)

# Set the x-axis tick format to display time
ax.xaxis.set_major_formatter(DateFormatter("%H:%M:%S"))

# Set the x-axis tick interval to 5 seconds
ax.xaxis.set_major_locator(ticker.MaxNLocator(int((time[-1] - time[0]).total_seconds() / 10) + 1))
plt.setp(ax.get_xticklabels(), rotation=45, ha="right")

# TODO: add units to validation queries

ax.legend(bbox_to_anchor=(0.5, -0.15), ncol=1)

err_report = ""
if r.mse.error is None:
err_report += f"\nMSE: {r.mse.value:.2f}"

if r.mape.error is None:
err_report += f"\nMAPE: {r.mape.value:.2f}%"

ax.text(
0.98,
1.10,
err_report.lstrip(),
transform=ax.transAxes,
fontsize=14,
verticalalignment="top",
horizontalalignment="right",
bbox={"facecolor": "white", "alpha": 0.5},
)

ax.grid(True)
plt.tight_layout()

# export it
filename = snake_case(r.name)
out_file = os.path.join(images_dir, f"{filename}.png")

plt.savefig(out_file, format="png")

return os.path.relpath(out_file, dirname)


def create_report_dir(report_dir: str) -> tuple[str, str]:
# run git describe command and get the output as the report name
git_describe = subprocess.run(["/usr/bin/git", "describe", "--tag"], stdout=subprocess.PIPE, check=False)
Expand All @@ -209,9 +302,11 @@ def create_report_dir(report_dir: str) -> tuple[str, str]:
return results_dir, tag


def dump_query_result(raw_results_dir: str, query: QueryTemplate, series: Series):
out_file = f"{query.metric_name}--{query.mode}.json"
with open(os.path.join(raw_results_dir, out_file), "w") as f:
def dump_query_result(raw_results_dir: str, query: QueryTemplate, series: Series) -> str:
filename = f"{query.metric_name}--{query.mode}.json"
out_file = os.path.join(raw_results_dir, filename)

with open(out_file, "w") as f:
f.write(
json.dumps(
{
Expand All @@ -222,6 +317,7 @@ def dump_query_result(raw_results_dir: str, query: QueryTemplate, series: Series
}
)
)
return out_file


@click.group(
Expand Down Expand Up @@ -402,6 +498,14 @@ def run_validation(
)
click.secho(f"\t MSE : {cmp.mse}", fg="bright_blue")
click.secho(f"\t MAPE: {cmp.mape} %\n", fg="bright_blue")

if cmp.expected_dropped > 0 or cmp.actual_dropped > 0:
logger.warning(
"dropped %d samples from expected and %d samples from actual",
cmp.expected_dropped,
cmp.actual_dropped,
)

result.mse, result.mape = cmp.mse, cmp.mape

result.mse_passed = v.max_mse is None or (cmp.mse.error is None and cmp.mse.value <= v.max_mse)
Expand All @@ -413,8 +517,9 @@ def run_validation(
if not result.mape_passed:
click.secho(f"MAPE exceeded threshold. mape: {cmp.mape}, max_mape: {v.max_mape}", fg="red")

dump_query_result(results_dir, v.expected, cmp.expected_series)
dump_query_result(results_dir, v.actual, cmp.actual_series)
result.actual_filepath = dump_query_result(results_dir, v.expected, cmp.expected_series)
result.expected_filepath = dump_query_result(results_dir, v.actual, cmp.actual_series)

# ruff: noqa: BLE001 (Suppressed as we want to catch all exceptions here)
except Exception as e:
click.secho(f"\t {v.name} failed: {e} ", fg="red")
Expand Down
12 changes: 5 additions & 7 deletions e2e/tools/validator/src/validator/prometheus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def __str__(self) -> str:
class Result(NamedTuple):
expected_series: Series
actual_series: Series
expected_dropped: int
actual_dropped: int

mse: ValueOrError
mape: ValueOrError

Expand Down Expand Up @@ -248,19 +251,14 @@ def compare(
actual_series = self.single_series(actual_query, start, end)

expected, actual = filter_by_equal_timestamps(expected_series, actual_series)

expected_dropped = len(expected_series.samples) - len(expected.samples)
actual_dropped = len(actual_series.samples) - len(actual.samples)
if expected_dropped > 0 or actual_dropped > 0:
logger.warning(
"dropped %d samples from expected and %d samples from actual",
expected_dropped,
actual_dropped,
)

return Result(
mse=mse(actual.values, expected.values),
mape=mape(actual.values, expected.values),
expected_series=expected_series,
actual_series=actual_series,
expected_dropped=expected_dropped,
actual_dropped=actual_dropped,
)

0 comments on commit a6e5cf8

Please sign in to comment.