From 18cc85dbc511153dd53b91772af83d1ddbab7e3c Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Wed, 4 Sep 2024 11:38:36 +1000 Subject: [PATCH 1/4] chore(compose/metal): add node-exporter Signed-off-by: Sunil Thaha --- .../compose/validation/metal/compose.yaml | 25 +++++++++++++++++++ .../compose/validation/metal/override.yaml | 1 + .../prometheus/scrape-configs/metal.yaml | 4 +++ 3 files changed, 30 insertions(+) diff --git a/manifests/compose/validation/metal/compose.yaml b/manifests/compose/validation/metal/compose.yaml index 21ceceefce..174a463095 100644 --- a/manifests/compose/validation/metal/compose.yaml +++ b/manifests/compose/validation/metal/compose.yaml @@ -86,10 +86,35 @@ services: retries: ${HEALTHCHECK_RETRIES:-3} start_period: ${HEALTHCHECK_START_PERIOD:-1m} + node-exporter: + image: quay.io/prometheus/node-exporter:latest + pid: host + ports: + - 9100:9100 + volumes: + - type: bind + source: /proc + target: /host/proc + - type: bind + source: /sys + target: /host/sys + - type: bind + source: / + target: /rootfs + command: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + - --path.rootfs=/rootfs + - --collector.rapl # Enable RAPL collector + - --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/) + user: root + networks: + - node-exporter-network networks: scaph-network: kepler-network: + node-exporter-network: # # # NOTE: To allow access to VM from prometheus container diff --git a/manifests/compose/validation/metal/override.yaml b/manifests/compose/validation/metal/override.yaml index 5c162eda3c..25b276b9e4 100644 --- a/manifests/compose/validation/metal/override.yaml +++ b/manifests/compose/validation/metal/override.yaml @@ -15,6 +15,7 @@ services: networks: - scaph-network - kepler-network + - node-exporter-network - virt-net # external n/w for accessing VM volumes: - type: bind diff --git a/manifests/compose/validation/metal/prometheus/scrape-configs/metal.yaml b/manifests/compose/validation/metal/prometheus/scrape-configs/metal.yaml index 923dde87ff..3079d5cd50 100644 --- a/manifests/compose/validation/metal/prometheus/scrape-configs/metal.yaml +++ b/manifests/compose/validation/metal/prometheus/scrape-configs/metal.yaml @@ -10,3 +10,7 @@ scrape_configs: - job_name: metal static_configs: - targets: [kepler-metal:8888] + + - job_name: node-exporter + static_configs: + - targets: [node-exporter:9100] From a1b69ce4955bc3d972333beef6d4d6d85a6acea0 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Wed, 4 Sep 2024 11:44:55 +1000 Subject: [PATCH 2/4] chore(validator): show number of dropped samples in report Signed-off-by: Sunil Thaha --- e2e/tools/validator/src/validator/cli/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/e2e/tools/validator/src/validator/cli/__init__.py b/e2e/tools/validator/src/validator/cli/__init__.py index 33ddd5be9d..bdec89817f 100644 --- a/e2e/tools/validator/src/validator/cli/__init__.py +++ b/e2e/tools/validator/src/validator/cli/__init__.py @@ -42,6 +42,9 @@ class ValidationResult: mse: ValueOrError mape: ValueOrError + actual_dropped: int = 0 + expected_dropped: int = 0 + actual_filepath: str = "" expected_filepath: str = "" @@ -204,6 +207,11 @@ def write_md_report(results_dir: str, r: TestResult): md.code(v.unexpected_error) continue + if v.actual_dropped or v.expected_dropped: + md.write("\n**Dropped**:\n") + md.li(f"Actual : `{v.actual_dropped}`") + md.li(f"Expected: `{v.expected_dropped}`") + md.write("\n**Results**:\n") md.li(f"MSE : `{v.mse}`") md.li(f"MAPE : `{v.mape} %`") @@ -501,6 +509,9 @@ def run_validation( click.secho(f"\t MSE : {cmp.mse}", fg="bright_blue") click.secho(f"\t MAPE: {cmp.mape} %\n", fg="bright_blue") + result.expected_dropped = cmp.expected_dropped + result.actual_dropped = cmp.expected_dropped + if cmp.expected_dropped > 0 or cmp.actual_dropped > 0: logger.warning( "dropped %d samples from expected and %d samples from actual", From ca4f6b12167cae43f70e5f778ffc41ab7dd71b9f Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Wed, 4 Sep 2024 12:37:40 +1000 Subject: [PATCH 3/4] chore(validator): wait 5s before initial start Signed-off-by: Sunil Thaha --- e2e/tools/validator/scripts/stressor.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/e2e/tools/validator/scripts/stressor.sh b/e2e/tools/validator/scripts/stressor.sh index 5a9f159a62..c16465429a 100755 --- a/e2e/tools/validator/scripts/stressor.sh +++ b/e2e/tools/validator/scripts/stressor.sh @@ -33,6 +33,10 @@ main() { 0:5 ) + # sleep 5 so that first run and the second run look the same + echo "Warmup .." + run stress-ng --cpu "$cpus" --cpu-method ackermann --cpu-load 0 --timeout 5 + for i in $(seq 1 5); do echo "Running: $i/5" for x in "${load_curve[@]}"; do From 76eab79d233b67f4049e05f95a934775997965b6 Mon Sep 17 00:00:00 2001 From: Sunil Thaha Date: Wed, 4 Sep 2024 12:38:07 +1000 Subject: [PATCH 4/4] chore(validator): note if samples where dropped Signed-off-by: Sunil Thaha --- e2e/tools/validator/src/validator/cli/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/e2e/tools/validator/src/validator/cli/__init__.py b/e2e/tools/validator/src/validator/cli/__init__.py index bdec89817f..872627e1fd 100644 --- a/e2e/tools/validator/src/validator/cli/__init__.py +++ b/e2e/tools/validator/src/validator/cli/__init__.py @@ -50,6 +50,7 @@ class ValidationResult: mse_passed: bool = True mape_passed: bool = True + unexpected_error: str = "" def __init__(self, name: str, actual: str, expected: str) -> None: @@ -59,13 +60,15 @@ def __init__(self, name: str, actual: str, expected: str) -> None: @property def verdict(self) -> str: + note = " (dropped)" if self.actual_dropped > 0 or self.expected_dropped > 0 else "" + if self.unexpected_error or self.mse.error or self.mape.error: - return "ERROR" + return f"ERROR{note}" if self.mse_passed and self.mape_passed: - return "PASS" + return f"PASS{note}" - return "FAIL" + return f"FAIL{note}" @dataclass