Skip to content

Commit

Permalink
test: add metrics e2e tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mykysha committed Dec 20, 2024
1 parent 88d83ff commit 297b90d
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 12 deletions.
2 changes: 2 additions & 0 deletions test/e2e/config/common/controller_manager_config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
apiVersion: config.kueue.x-k8s.io/v1beta1
kind: Configuration
metrics:
enableClusterQueueResources: true
leaderElection:
leaderElect: true
controller:
Expand Down
2 changes: 1 addition & 1 deletion test/e2e/config/common/manager_e2e_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
value: IfNotPresent
- op: add
path: /spec/template/spec/containers/0/args/-
value: --feature-gates=MultiKueueBatchJobWithManagedBy=true,TopologyAwareScheduling=true
value: --feature-gates=MultiKueueBatchJobWithManagedBy=true,TopologyAwareScheduling=true,LocalQueueMetrics=true
151 changes: 140 additions & 11 deletions test/e2e/singlecluster/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ import (

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
gomegaformat "github.com/onsi/gomega/format"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

config "sigs.k8s.io/kueue/apis/config/v1beta1"
"sigs.k8s.io/kueue/apis/kueue/v1beta1"
utiltesting "sigs.k8s.io/kueue/pkg/util/testing"
testingjobspod "sigs.k8s.io/kueue/pkg/util/testingjobs/pod"
"sigs.k8s.io/kueue/test/util"
)
Expand All @@ -39,8 +42,28 @@ const (
)

var _ = ginkgo.Describe("Metrics", func() {
var (
ns *corev1.Namespace
)

ginkgo.BeforeEach(func() {
ns = &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{GenerateName: "e2e-metrics-"}}
gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())
})

ginkgo.AfterEach(func() {
gomega.Expect(util.DeleteNamespace(ctx, k8sClient, ns)).To(gomega.Succeed())
})

ginkgo.It("should ensure the metrics endpoint is serving metrics", func() {
ginkgo.By("Creating a ClusterRoleBinding for the service account to allow access to metrics")
var (
resourceFlavor *v1beta1.ResourceFlavor
clusterQueue *v1beta1.ClusterQueue
localQueue *v1beta1.LocalQueue
workload *v1beta1.Workload
)

metricsReaderClusterRoleBinding := &rbacv1.ClusterRoleBinding{
ObjectMeta: metav1.ObjectMeta{Name: "metrics-reader-rolebinding"},
Subjects: []rbacv1.Subject{
Expand All @@ -63,36 +86,142 @@ var _ = ginkgo.Describe("Metrics", func() {
})
})

ginkgo.By("creating resource flavor", func() {
resourceFlavor = utiltesting.MakeResourceFlavor("test-flavor").
Obj()

gomega.Expect(k8sClient.Create(ctx, resourceFlavor)).To(gomega.Succeed())
})
ginkgo.DeferCleanup(func() {
ginkgo.By("Deleting the resource flavor", func() {
util.ExpectObjectToBeDeleted(ctx, k8sClient, utiltesting.MakeResourceFlavor(resourceFlavor.GetName()).Obj(), true)
})
})

ginkgo.By("Creating a cluster queue", func() {
clusterQueue = utiltesting.MakeClusterQueue("test-cq").
ResourceGroup(
*utiltesting.MakeFlavorQuotas(resourceFlavor.GetName()).
Resource(corev1.ResourceCPU, "1").
Resource(corev1.ResourceMemory, "1Gi").
Obj(),
).
Obj()

gomega.Expect(k8sClient.Create(ctx, clusterQueue)).To(gomega.Succeed())
})
ginkgo.DeferCleanup(func() {
ginkgo.By("Deleting the cluster queue", func() {
util.ExpectObjectToBeDeleted(ctx, k8sClient, utiltesting.MakeClusterQueue(clusterQueue.GetName()).Obj(), true)
})
})

ginkgo.By("Creating a local queue", func() {
localQueue = utiltesting.MakeLocalQueue("test-lq", ns.GetName()).
ClusterQueue(clusterQueue.GetName()).
Obj()

gomega.Expect(k8sClient.Create(ctx, localQueue)).To(gomega.Succeed())
})
ginkgo.DeferCleanup(func() {
ginkgo.By("Deleting the local queue", func() {
util.ExpectObjectToBeDeleted(ctx, k8sClient, utiltesting.MakeLocalQueue(localQueue.GetName(), ns.GetName()).Obj(), true)
})
})

ginkgo.By("Creating a workload", func() {
workload = utiltesting.MakeWorkload("test-workload", ns.GetName()).
Queue(localQueue.GetName()).
PodSets(
*utiltesting.MakePodSet("ps1", 1).Obj(),
).
Request(corev1.ResourceCPU, "1").
Obj()

gomega.Expect(k8sClient.Create(ctx, workload)).To(gomega.Succeed())
})

ginkgo.DeferCleanup(func() {
ginkgo.By("Deleting the workload", func() {
util.ExpectObjectToBeDeleted(ctx, k8sClient, utiltesting.MakeWorkload(workload.GetName(), ns.GetName()).Obj(), true)
})
})

util.ExpectWorkloadsToBeAdmitted(ctx, k8sClient, workload)

ginkgo.By("Creating the curl-metrics pod to access the metrics endpoint")
pod := testingjobspod.MakePod("curl-metrics", config.DefaultNamespace).
ServiceAccountName(serviceAccountName).
Image(util.E2eTTestCurlImage, []string{
"/bin/sh", "-c", fmt.Sprintf(
"curl -s -k -H \"Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)\" https://%s.%s.svc.cluster.local:8443/metrics",
metricsServiceName, config.DefaultNamespace,
),
"sleep", "5m",
}).
Obj()
gomega.Expect(k8sClient.Create(ctx, pod)).Should(gomega.Succeed())
ginkgo.DeferCleanup(func() {
ginkgo.By("Deleting the pod", func() {
util.ExpectObjectToBeDeleted(ctx, k8sClient, pod, true)
util.ExpectObjectToBeDeletedWithTimeout(ctx, k8sClient, pod, true, util.LongTimeout)
})
})

ginkgo.By("Waiting for the curl-metrics pod to complete.", func() {
ginkgo.By("Waiting for the curl-metrics pod to run.", func() {
gomega.Eventually(func(g gomega.Gomega) {
createdPod := &corev1.Pod{}
g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pod), createdPod)).To(gomega.Succeed())
g.Expect(createdPod.Status.Phase).To(gomega.Equal(corev1.PodSucceeded))
g.Expect(createdPod.Status.Phase).To(gomega.Equal(corev1.PodRunning))
}, util.LongTimeout).Should(gomega.Succeed())
})

metrics := []string{
"controller_runtime_reconcile_total",

"kueue_admission_attempts_total",
"kueue_admission_attempt_duration_seconds",
"kueue_pending_workloads",
"kueue_reserving_active_workloads",
"kueue_admitted_active_workloads",
"kueue_quota_reserved_workloads_total",
"kueue_quota_reserved_wait_time_seconds",
"kueue_admitted_workloads_total",
"kueue_admission_wait_time_seconds",
"kueue_cluster_queue_resource_usage",
"kueue_cluster_queue_status",
"kueue_cluster_queue_resource_reservation",
"kueue_cluster_queue_nominal_quota",
"kueue_cluster_queue_borrowing_limit",
"kueue_cluster_queue_lending_limit",
"kueue_cluster_queue_weighted_share",

// LocalQueueMetrics
"kueue_local_queue_pending_workloads",
"kueue_local_queue_reserving_active_workloads",
"kueue_local_queue_admitted_active_workloads",
"kueue_local_queue_quota_reserved_workloads_total",
"kueue_local_queue_quota_reserved_wait_time_seconds",
"kueue_local_queue_admitted_workloads_total",
"kueue_local_queue_admission_wait_time_seconds",
"kueue_local_queue_status",
}

defaultGomegaMaxLength := gomegaformat.MaxLength
gomegaformat.MaxLength = 0
ginkgo.DeferCleanup(func() {
gomegaformat.MaxLength = defaultGomegaMaxLength
})

ginkgo.By("Getting the metrics by checking curl-metrics logs", func() {
cmd := exec.Command("kubectl", "logs", "curl-metrics", "-n", config.DefaultNamespace)
metricsOutput, err := cmd.CombinedOutput()
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(metricsOutput).To(gomega.ContainSubstring("controller_runtime_reconcile_total"))
gomega.Eventually(func(g gomega.Gomega) {
cmd := exec.Command("kubectl", "exec", "-n", config.DefaultNamespace, "curl-metrics", "--", "/bin/sh", "-c",
fmt.Sprintf(
"curl -s -k -H \"Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)\" https://%s.%s.svc.cluster.local:8443/metrics ",
metricsServiceName, config.DefaultNamespace,
),
)
metricsOutput, err := cmd.CombinedOutput()
g.Expect(err).NotTo(gomega.HaveOccurred())
for _, metric := range metrics {
g.Expect(string(metricsOutput)).To(gomega.ContainSubstring(metric))
}
}, util.Timeout).Should(gomega.Succeed())
})
})
})

0 comments on commit 297b90d

Please sign in to comment.