-
Notifications
You must be signed in to change notification settings - Fork 187
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(validator): add support to validate essential metrics produced b…
…y Kepler This commit introduces functionality to validate essential metrics produced by Kepler The following comparisons are included: - Node Exporter Comparison - Validates `node_rapl_<package|core|dram>` metrics against `kepler_node_<package|core|dram>{dev}` - Kepler Process Comparison - Compares `kepler_process_<package|core|dram|platform|other|uncore>{latest}` metrics to `kepler_process_<package|core|dram|platform|other|uncore>{dev}` - Kepler Node Comparison - Validates `kepler_node_<package|core|dram|platform|other|uncore>{latest}` against `kepler_node_<package|core|dram|platform|other|uncore>{dev}` Additionally, the following changes are made to existing functionality: - Adds a new `metric_validations.yaml` file which includes promql queries for comparisons along with threshold values - Update the existing `stressor.sh` script to now support few more parameters to make it more flexible - warmup time: time to wait before starting the stressor - cooldown time: time to wait after the stressor is finished - repeats: number of times to repeat the stressor. Since for regression test we don't want to repeat the stressor multiple times - Adds a new `validator-regression.yaml` file which includes the configuration for the regression test Signed-off-by: vprashar2929 <[email protected]>
- Loading branch information
1 parent
021b544
commit 28889fe
Showing
9 changed files
with
524 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,354 @@ | ||
config: | ||
mapping: | ||
actual: latest | ||
predicted: dev | ||
|
||
validations: | ||
# node rapl comparison | ||
- name: node-rapl - kepler-package | ||
units: Watts | ||
mapping: | ||
actual: node-rapl | ||
predicted: kepler-package | ||
|
||
node-rapl: | | ||
sum( | ||
rate( | ||
node_rapl_package_joules_total[{rate_interval}] | ||
) | ||
) | ||
kepler-package: | | ||
sum( | ||
rate( | ||
kepler_node_package_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 1.01 | ||
|
||
- name: node-rapl - kepler-core | ||
units: Watts | ||
mapping: | ||
actual: node-rapl | ||
predicted: kepler-core | ||
|
||
node-rapl: | | ||
sum( | ||
rate( | ||
node_rapl_core_joules_total[{rate_interval}] | ||
) | ||
) | ||
kepler-core: | | ||
sum( | ||
rate( | ||
kepler_node_core_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 1.01 | ||
|
||
- name: node-rapl - kepler-dram | ||
units: Watts | ||
mapping: | ||
actual: node-rapl | ||
predicted: kepler-dram | ||
|
||
node-rapl: | | ||
sum( | ||
rate( | ||
node_rapl_dram_joules_total[{rate_interval}] | ||
) | ||
) | ||
kepler-dram: | | ||
sum( | ||
rate( | ||
kepler_node_dram_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 1.01 | ||
|
||
# absolute power comparison | ||
- name: Total - absolute | ||
latest: | | ||
sum( | ||
rate( | ||
kepler_process_joules_total{{ | ||
job="latest", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
dev: | | ||
sum( | ||
rate( | ||
kepler_process_joules_total{{ | ||
job="dev", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 2.01 | ||
|
||
# CPU time comparison | ||
- name: cpu-time | ||
units: Milliseconds | ||
latest: | | ||
sum( | ||
rate( | ||
kepler_process_bpf_cpu_time_ms_total{{ | ||
job="latest" | ||
}}[{rate_interval}] | ||
) | ||
) | ||
dev: | | ||
sum( | ||
rate( | ||
kepler_process_bpf_cpu_time_ms_total{{ | ||
job="dev", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
# max_mae: 20.0 | ||
|
||
# process comparison | ||
- name: platform - dynamic | ||
latest: | | ||
sum( | ||
rate( | ||
kepler_process_platform_joules_total{{ | ||
job="latest", mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
dev: | | ||
sum( | ||
rate( | ||
kepler_process_platform_joules_total{{ | ||
job="dev", mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: package - dynamic | ||
units: Watts | ||
latest: | | ||
sum( | ||
rate( | ||
kepler_process_package_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
dev: | | ||
sum( | ||
rate( | ||
kepler_process_package_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: core - dynamic | ||
units: Watts | ||
latest: | | ||
sum( | ||
rate( | ||
kepler_process_core_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
dev: | | ||
sum( | ||
rate( | ||
kepler_process_core_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: dram - dynamic | ||
units: Watts | ||
latest: | | ||
sum( | ||
rate( | ||
kepler_process_dram_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
dev: | | ||
sum( | ||
rate( | ||
kepler_process_dram_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: other - dynamic | ||
units: Watts | ||
latest: | | ||
sum( | ||
rate( | ||
kepler_process_other_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
dev: | | ||
sum( | ||
rate( | ||
kepler_process_other_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: uncore - dynamic | ||
units: Watts | ||
latest: | | ||
sum( | ||
rate( | ||
kepler_process_uncore_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
dev: | | ||
sum( | ||
rate( | ||
kepler_process_uncore_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
) | ||
max_mae: 2.01 | ||
|
||
# node comparison | ||
- name: node platform - dynamic | ||
units: Watts | ||
latest: | | ||
rate(kepler_node_platform_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
dev: | | ||
rate(kepler_node_platform_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: node package - dynamic | ||
units: Watts | ||
latest: | | ||
rate(kepler_node_package_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
dev: | | ||
rate(kepler_node_package_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: node core - dynamic | ||
units: Watts | ||
latest: | | ||
rate(kepler_node_core_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
dev: | | ||
rate(kepler_node_core_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: node dram - dynamic | ||
units: Watts | ||
latest: | | ||
rate(kepler_node_dram_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
dev: | | ||
rate(kepler_node_dram_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: node other - dynamic | ||
units: Watts | ||
latest: | | ||
rate(kepler_node_other_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
dev: | | ||
rate(kepler_node_other_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
max_mae: 2.01 | ||
|
||
- name: node uncore - dynamic | ||
units: Watts | ||
latest: | | ||
rate(kepler_node_uncore_joules_total{{ | ||
job="latest", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
dev: | | ||
rate(kepler_node_uncore_joules_total{{ | ||
job="dev", | ||
mode="dynamic", | ||
}}[{rate_interval}] | ||
) | ||
max_mae: 2.01 |
Oops, something went wrong.