Skip to content

Commit

Permalink
perf test improvements (#223)
Browse files Browse the repository at this point in the history
* kube-burner timeouts and metric collection improvements

* make the kube-burner timeout configurable, defaulting to 15m
* attempt to collect the resource usage metrics even if the job fails

* tests: configurable dqlite trace level

We're adding two new test settings that can be used to enable
dqlite tracing:

* TEST_DQLITE_TRACE_LEVEL
* TEST_RAFT_TRACE_LEVEL

If set, those settings are added to the instance
/var/snap/k8s/common/args/k8s-dqlite-env file.

* tests: expose k8s-dqlite debug mode

* Update copyright headers

We'll update the copyright headers as expected by the
"tox -e fmt" job:

  Copyright 2025 Canonical, Ltd.
  • Loading branch information
petrutlucian94 authored Jan 17, 2025
1 parent 93efe11 commit 8dc178b
Show file tree
Hide file tree
Showing 10 changed files with 70 additions and 16 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/performance.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,13 @@ jobs:
mkdir -p ./results/base-code
sg lxd -c 'tox -e performance'
- name: Generate 3 node Graphs
if: always()
run: |
cd test/performance
sudo Rscript parse-performance-metrics.R -p ./results/head -o ./results/head -f *three-node.log
sudo Rscript parse-performance-metrics.R -p ./results/base-code -o ./results/base-code -f *three-node.log
- name: Generate single node Graphs
if: always()
run: |
cd test/performance
mkdir -p ./results/single-node
Expand All @@ -107,6 +109,7 @@ jobs:
sudo Rscript parse-performance-metrics.R -p ./results/single-node -o ./results/single-node -f *single-node.log
- name: Upload performance result
uses: actions/upload-artifact@v4
if: always()
with:
name: performance-results
path: ${{ github.workspace }}/test/performance/results
Expand Down
2 changes: 1 addition & 1 deletion test/performance/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
import itertools
import logging
Expand Down
11 changes: 7 additions & 4 deletions test/performance/tests/test_multi_node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
from typing import List

Expand Down Expand Up @@ -31,6 +31,9 @@ def test_three_node_load(instances: List[harness.Instance]):

metrics.configure_kube_burner(cluster_node)
process_dict = metrics.collect_metrics(instances)
metrics.run_kube_burner(cluster_node)
metrics.stop_metrics(instances, process_dict)
metrics.pull_metrics(instances, "three-node")
try:
metrics.run_kube_burner(cluster_node)
finally:
# Collect the metrics even if kube-burner fails.
metrics.stop_metrics(instances, process_dict)
metrics.pull_metrics(instances, "three-node")
11 changes: 7 additions & 4 deletions test/performance/tests/test_single_node.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
from test_util import harness, metrics

Expand All @@ -8,6 +8,9 @@ def test_single_node_load(session_instance: harness.Instance):
"""Test the performance of a single node cluster with all features enabled."""
metrics.configure_kube_burner(session_instance)
process_dict = metrics.collect_metrics([session_instance])
metrics.run_kube_burner(session_instance)
metrics.stop_metrics([session_instance], process_dict)
metrics.pull_metrics([session_instance], "single-node")
try:
metrics.run_kube_burner(session_instance)
finally:
# Collect the metrics even if kube-burner fails.
metrics.stop_metrics([session_instance], process_dict)
metrics.pull_metrics([session_instance], "single-node")
11 changes: 10 additions & 1 deletion test/performance/tests/test_util/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
import os
from pathlib import Path
Expand Down Expand Up @@ -36,6 +36,9 @@
or "https://github.com/kube-burner/kube-burner/releases/download/v1.2/kube-burner-1.2-Linux-x86_64.tar.gz"
)

# Global kube-burner invocation timeout.
KUBE_BURNER_TIMEOUT = os.getenv("TEST_KUBE_BURNER_TIMEOUT") or "10m"

# FLAVOR is the flavour to use for running the performance tests.
FLAVOR = os.getenv("TEST_FLAVOR") or ""

Expand All @@ -48,3 +51,9 @@

# LXD_PROFILE_NAME is the profile name to use for LXD containers.
LXD_PROFILE_NAME = os.getenv("TEST_LXD_PROFILE_NAME") or "k8s-performance"

# Enable k8s-dqlite debug logging.
K8S_DQLITE_DEBUG = os.getenv("TEST_K8S_DQLITE_DEBUG") == "1"
# Set the following to 1 for verbose dqlite trace messages.
DQLITE_TRACE_LEVEL = os.getenv("TEST_DQLITE_TRACE_LEVEL")
RAFT_TRACE_LEVEL = os.getenv("TEST_RAFT_TRACE_LEVEL")
2 changes: 1 addition & 1 deletion test/performance/tests/test_util/harness/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
from test_util.harness.base import Harness, HarnessError, Instance
from test_util.harness.lxd import LXDHarness
Expand Down
2 changes: 1 addition & 1 deletion test/performance/tests/test_util/harness/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
import subprocess
from functools import cached_property, partial
Expand Down
2 changes: 1 addition & 1 deletion test/performance/tests/test_util/harness/lxd.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
import logging
import os
Expand Down
13 changes: 11 additions & 2 deletions test/performance/tests/test_util/metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
import os
from typing import List
Expand Down Expand Up @@ -82,4 +82,13 @@ def run_kube_burner(instance: harness.Instance):
"""Copies kubeconfig and runs kube-burner on the instance."""
instance.exec(["mkdir", "-p", "/root/.kube"])
instance.exec(["k8s", "config", ">", "/root/.kube/config"])
instance.exec(["/root/kube-burner", "init", "-c", "/root/api-intensive.yaml"])
instance.exec(
[
"/root/kube-burner",
"init",
"--timeout",
config.KUBE_BURNER_TIMEOUT,
"-c",
"/root/api-intensive.yaml",
]
)
29 changes: 28 additions & 1 deletion test/performance/tests/test_util/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright 2024 Canonical, Ltd.
# Copyright 2025 Canonical, Ltd.
#
import ipaddress
import json
Expand Down Expand Up @@ -146,6 +146,30 @@ def _as_int(value: Optional[str]) -> Optional[int]:
return None


def configure_dqlite_logging(instance: harness.Instance):
"""Configure k8s-dqlite logging (requires restart)."""
if config.DQLITE_TRACE_LEVEL:
instance.exec(
[
"echo",
f"LIBDQLITE_TRACE={config.DQLITE_TRACE_LEVEL}",
">>",
"/var/snap/k8s/common/args/k8s-dqlite-env",
]
)
if config.RAFT_TRACE_LEVEL:
instance.exec(
[
"echo",
f"LIBRAFT_TRACE={config.RAFT_TRACE_LEVEL}",
">>",
"/var/snap/k8s/common/args/k8s-dqlite-env",
]
)
if config.K8S_DQLITE_DEBUG:
instance.exec(["echo", "--debug", ">>", "/var/snap/k8s/common/args/k8s-dqlite"])


def setup_k8s_snap(
instance: harness.Instance,
tmp_path: Path,
Expand Down Expand Up @@ -185,6 +209,9 @@ def setup_k8s_snap(
cmd += [config.SNAP_NAME, "--channel", channel]

instance.exec(cmd)

configure_dqlite_logging(instance)

if connect_interfaces:
LOG.info("Ensure k8s interfaces and network requirements")
instance.exec(["/snap/k8s/current/k8s/hack/init.sh"], stdout=subprocess.DEVNULL)
Expand Down

0 comments on commit 8dc178b

Please sign in to comment.