Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: make validator stressor configuration on runtime and load #1843

Merged
merged 1 commit into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 66 additions & 18 deletions e2e/tools/validator/scripts/stressor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,80 @@ run() {
echo " ‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾"
}

# stepwise load curve: each step is 20 seconds
declare -a load_curve_stepwise=(
0:20
20:20
40:20
60:20
80:20
100:20
80:20
60:20
40:20
20:20
0:20
)

# default load curve: varying durations
declare -a load_curve_default=(
0:5
10:20
25:20
50:20
75:20
100:30
75:20
50:20
25:20
10:20
0:5
)

main() {
local total_time=0
local repeats=5
local curve_type="default"

while getopts "t:r:c:" opt; do
case $opt in
t) total_time=$OPTARG ;;
c) curve_type=$OPTARG ;;
*) echo "Usage: $0 [-t total_time_in_seconds] [-c curve_type(default|stepwise)]" >&2; exit 1 ;;
esac
done

# Select load curve based on curve_type
local -a load_curve
case $curve_type in
"default") load_curve=("${load_curve_default[@]}") ;;
"stepwise") load_curve=("${load_curve_stepwise[@]}") ;;
*) echo "Invalid curve type. Use 'default' or 'stepwise'" >&2; exit 1 ;;
esac

local cpus
cpus=$(nproc)

# load and time
local -a load_curve=(
0:5
10:20
25:20
50:20
75:20
100:30
75:20
50:20
25:20
10:20
0:5
)

# sleep 5 so that first run and the second run look the same
# calculate the total duration of one cycle of the load curve
local total_cycle_time=0
for x in "${load_curve[@]}"; do
local time="${x##*:}"
total_cycle_time=$((total_cycle_time + time))
done

# calculate the repeats if total_time is provided
if [ "$total_time" -gt 0 ]; then
repeats=$((total_time / total_cycle_time))
fi

echo "Total time: $total_time seconds, Repeats: $repeats, Curve type: $curve_type"

# sleep 5 so that first run and the second run look the same
echo "Warmup .."
run stress-ng --cpu "$cpus" --cpu-method ackermann --cpu-load 0 --timeout 5

for i in $(seq 1 5); do
echo "Running: $i/5"
for i in $(seq 1 "$repeats"); do
echo "Running: $i/$repeats"
for x in "${load_curve[@]}"; do
local load="${x%%:*}"
local time="${x##*:}s"
Expand Down
16 changes: 16 additions & 0 deletions e2e/tools/validator/src/validator/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@ class Prometheus(NamedTuple):
job: PrometheusJob


class Stressor(NamedTuple):
total_runtime_seconds: int
curve_type: str


class Validator(NamedTuple):
log_level: str
remote: Remote
metal: Metal
prometheus: Prometheus
stressor: Stressor
validations_file: str

def __repr__(self):
Expand Down Expand Up @@ -105,13 +111,23 @@ def load(config_file: str) -> Validator:
job=job,
)

stressor_config = config["stressor"]
if not stressor_config:
stressor = Stressor(total_runtime_seconds=1200, curve_type="default")
else:
stressor = Stressor(
total_runtime_seconds=stressor_config.get("total_runtime_seconds", 1200),
curve_type=stressor_config.get("curve_type", "default"),
)

validations_file = config.get("validations_file", "validations.yaml")
log_level = config.get("log_level", "warn")

return Validator(
remote=remote,
metal=metal,
prometheus=prometheus,
stressor=stressor,
validations_file=validations_file,
log_level=log_level,
)
6 changes: 5 additions & 1 deletion e2e/tools/validator/src/validator/stresser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def __init__(self, config: config.Remote):
self.user = config.user
self.port = config.port
self.password = config.password
self.total_runtime_seconds = config.total_runtime_seconds
self.curve_type = config.curve_type

self.ssh_client = paramiko.SSHClient()
self.ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
Expand Down Expand Up @@ -69,11 +71,13 @@ def run_script(self, script_path: str) -> ScriptResult:

# ruff: noqa: S108 (Suppressed hard-coded path because we want to intentionally copy stress.sh inside `/tmp` dir)
target_script = "/tmp/stress.sh"
cli_options = f"-t {self.total_runtime_seconds} -c {self.curve_type}"
command = f"{target_script} {cli_options}"
self.copy(script_path, target_script)

# ruff: noqa: DTZ005 (Suppressed non-time-zone aware object creation as it is not necessary for this use case)
start_time = datetime.now()
_, stdout, stderr = self.ssh_client.exec_command(target_script)
_, stdout, stderr = self.ssh_client.exec_command(command)

# ruff: noqa: T201 (Suppressed as printing is intentional and necessary in this context)
print("stdout output:")
Expand Down
44 changes: 44 additions & 0 deletions e2e/tools/validator/tests/validator/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ def minimal_config_file(config_file):
prometheus:
url: http://localhost:9090
stressor:
total_runtime_seconds: 1200
curve_type: default
"""
)

Expand Down Expand Up @@ -58,6 +62,34 @@ def test_minimal_config_file(minimal_config_file):
assert prometheus.job.vm == "vm"


@pytest.fixture
def stressor_config_file(config_file):
return config_file(
"""
remote:
host: example.com
metal:
vm:
pid: 1337
prometheus:
url: http://localhost:9090
stressor:
total_runtime_seconds: 1200
curve_type: default
"""
)


def test_stressor_config(stressor_config_file):
config = load(stressor_config_file)
stressor = config.stressor
assert stressor.total_runtime_seconds == 1200
assert stressor.curve_type == "default"


@pytest.fixture
def config_file_use_password(config_file):
return config_file(
Expand All @@ -72,6 +104,10 @@ def config_file_use_password(config_file):
prometheus:
url: http://localhost:9090
stressor:
total_runtime_seconds: 1200
curve_type: default
"""
)

Expand All @@ -98,6 +134,10 @@ def config_file_job_override(config_file):
vm:
pid: 1337
stressor:
total_runtime_seconds: 1200
curve_type: default
prometheus:
url: http://localhost:9090
Expand Down Expand Up @@ -130,6 +170,10 @@ def config_file_password_empty_pkey(config_file):
prometheus:
url: http://localhost:9090
stressor:
total_runtime_seconds: 1200
curve_type: default
"""
)

Expand Down
3 changes: 3 additions & 0 deletions e2e/tools/validator/validations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ config:
mapping:
actual: metal
predicted: vm
stressor:
total_runtime_seconds: 1200
curve_type: default

validations:
- name: node-rapl - kepler-package
Expand Down
Loading