From 0fb096ff7bec22e179b1cfd2b74515d1f5b789ac Mon Sep 17 00:00:00 2001 From: torredil Date: Thu, 7 Nov 2024 14:00:02 +0000 Subject: [PATCH] Add unit/e2e tests for NVMe log page metrics Signed-off-by: torredil --- Makefile | 2 +- pkg/metrics/metrics_test.go | 6 +- pkg/metrics/nvme_test.go | 330 +++++++++++++++++++++++++++++ tests/e2e/nvme_metrics.go | 166 +++++++++++++++ tests/e2e/testsuites/testsuites.go | 4 + 5 files changed, 504 insertions(+), 4 deletions(-) create mode 100644 pkg/metrics/nvme_test.go create mode 100644 tests/e2e/nvme_metrics.go diff --git a/Makefile b/Makefile index 8b5d58cd20..fc7b587c2a 100644 --- a/Makefile +++ b/Makefile @@ -129,7 +129,7 @@ e2e/single-az: bin/helm bin/ginkgo TEST_PATH=./tests/e2e/... \ GINKGO_FOCUS="\[ebs-csi-e2e\] \[single-az\]" \ GINKGO_PARALLEL=5 \ - HELM_EXTRA_FLAGS="--set=controller.volumeModificationFeature.enabled=true,sidecars.provisioner.additionalArgs[0]='--feature-gates=VolumeAttributesClass=true',sidecars.resizer.additionalArgs[0]='--feature-gates=VolumeAttributesClass=true'" \ + HELM_EXTRA_FLAGS="--set=controller.volumeModificationFeature.enabled=true,sidecars.provisioner.additionalArgs[0]='--feature-gates=VolumeAttributesClass=true',sidecars.resizer.additionalArgs[0]='--feature-gates=VolumeAttributesClass=true',node.enableMetrics=true" \ ./hack/e2e/run.sh .PHONY: e2e/multi-az diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index 070b5a8302..c9a0671f9f 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -34,7 +34,7 @@ func TestMetricRecorder(t *testing.T) { m.IncreaseCount("test_total", map[string]string{"key": "value"}) }, expected: ` -# HELP test_total [ALPHA] ebs_csi_aws_com metric +# HELP test_total ebs_csi_aws_com metric # TYPE test_total counter test_total{key="value"} 1 `, @@ -46,7 +46,7 @@ test_total{key="value"} 1 m.ObserveHistogram("test", 1.5, map[string]string{"key": "value"}, []float64{1, 2, 3}) }, expected: ` -# HELP test [ALPHA] ebs_csi_aws_com metric +# HELP test ebs_csi_aws_com metric # TYPE test histogram test{key="value",le="1"} 0 test{key="value",le="2"} 1 @@ -66,7 +66,7 @@ test_count{key="value"} 1 m.IncreaseCount("test_re_register_total", map[string]string{"key": "value2"}) }, expected: ` -# HELP test_re_register_total [ALPHA] ebs_csi_aws_com metric +# HELP test_re_register_total ebs_csi_aws_com metric # TYPE test_re_register_total counter test_re_register_total{key="value1"} 2 test_re_register_total{key="value2"} 1 diff --git a/pkg/metrics/nvme_test.go b/pkg/metrics/nvme_test.go new file mode 100644 index 0000000000..220ab0aafb --- /dev/null +++ b/pkg/metrics/nvme_test.go @@ -0,0 +1,330 @@ +// Copyright 2024 The Kubernetes Authors. +// +// Licensed under the Apache License, Version 2.0 (the 'License'); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build linux +// +build linux + +package metrics + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "reflect" + "strings" + "testing" +) + +func TestNewNVMECollector(t *testing.T) { + testPath := "/test/path" + testInstanceID := "test-instance-1" + + collector := NewNVMECollector(testPath, testInstanceID) + + if collector == nil { + t.Fatal("NewNVMECollector returned nil") + } + + if collector.csiMountPointPath != testPath { + t.Errorf("csiMountPointPath = %v, want %v", collector.csiMountPointPath, testPath) + } + + if collector.instanceID != testInstanceID { + t.Errorf("instanceID = %v, want %v", collector.instanceID, testInstanceID) + } + + expectedMetrics := []string{ + "total_read_ops", + "total_write_ops", + "total_read_bytes", + "total_write_bytes", + "total_read_time", + "total_write_time", + "ebs_volume_performance_exceeded_iops", + "ebs_volume_performance_exceeded_tp", + "ec2_instance_ebs_performance_exceeded_iops", + "ec2_instance_ebs_performance_exceeded_tp", + "volume_queue_length", + "read_io_latency_histogram", + "write_io_latency_histogram", + } + + for _, metricName := range expectedMetrics { + if _, exists := collector.metrics[metricName]; !exists { + t.Errorf("metric %s not found in collector.metrics", metricName) + } + } +} + +func TestConvertHistogram(t *testing.T) { + tests := []struct { + name string + histogram Histogram + wantCount uint64 + wantBuckets map[float64]uint64 + }{ + { + name: "standard histogram with multiple bins", + histogram: Histogram{ + BinCount: 3, + Bins: [64]HistogramBin{ + {Lower: 0, Upper: 100, Count: 5}, + {Lower: 100, Upper: 200, Count: 3}, + {Lower: 200, Upper: 300, Count: 2}, + }, + }, + wantCount: 10, + wantBuckets: map[float64]uint64{ + 100: 5, + 200: 8, + 300: 10, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotCount, gotBuckets := convertHistogram(tt.histogram) + + if gotCount != tt.wantCount { + t.Errorf("convertHistogram() count = %v, want %v", gotCount, tt.wantCount) + } + + if !reflect.DeepEqual(gotBuckets, tt.wantBuckets) { + t.Errorf("convertHistogram() buckets = %v, want %v", gotBuckets, tt.wantBuckets) + } + }) + } +} + +func TestParseLogPage(t *testing.T) { + tests := []struct { + name string + input []byte + want EBSMetrics + wantErr string + }{ + { + name: "valid log page", + input: func() []byte { + metrics := EBSMetrics{ + EBSMagic: 0x3C23B510, + ReadOps: 100, + WriteOps: 200, + ReadBytes: 1024, + WriteBytes: 2048, + TotalReadTime: 5000, + TotalWriteTime: 6000, + EBSIOPSExceeded: 10, + EBSThroughputExceeded: 20, + } + buf := new(bytes.Buffer) + if err := binary.Write(buf, binary.LittleEndian, metrics); err != nil { + t.Fatalf("failed to create test data: %v", err) + } + return buf.Bytes() + }(), + want: EBSMetrics{ + EBSMagic: 0x3C23B510, + ReadOps: 100, + WriteOps: 200, + ReadBytes: 1024, + WriteBytes: 2048, + TotalReadTime: 5000, + TotalWriteTime: 6000, + EBSIOPSExceeded: 10, + EBSThroughputExceeded: 20, + }, + wantErr: "", + }, + { + name: "invalid magic number", + input: func() []byte { + metrics := EBSMetrics{ + EBSMagic: 0x12345678, + } + buf := new(bytes.Buffer) + if err := binary.Write(buf, binary.LittleEndian, metrics); err != nil { + t.Fatalf("failed to create test data: %v", err) + } + return buf.Bytes() + }(), + want: EBSMetrics{}, + wantErr: "invalid EBS magic number: 12345678", + }, + { + name: "empty data", + input: []byte{}, + want: EBSMetrics{}, + wantErr: "failed to parse log page", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := parseLogPage(tt.input) + + if tt.wantErr != "" { + if err == nil { + t.Errorf("parseLogPage() error = nil, wantErr %v", tt.wantErr) + return + } + if !strings.Contains(err.Error(), tt.wantErr) { + t.Errorf("parseLogPage() error = %v, wantErr %v", err, tt.wantErr) + return + } + return + } + + if err != nil { + t.Errorf("parseLogPage() unexpected error = %v", err) + return + } + + if got.EBSMagic != tt.want.EBSMagic { + t.Errorf("parseLogPage() magic number = %x, want %x", got.EBSMagic, tt.want.EBSMagic) + } + if got.ReadOps != tt.want.ReadOps { + t.Errorf("parseLogPage() ReadOps = %v, want %v", got.ReadOps, tt.want.ReadOps) + } + if got.WriteOps != tt.want.WriteOps { + t.Errorf("parseLogPage() WriteOps = %v, want %v", got.WriteOps, tt.want.WriteOps) + } + if got.ReadBytes != tt.want.ReadBytes { + t.Errorf("parseLogPage() ReadBytes = %v, want %v", got.ReadBytes, tt.want.ReadBytes) + } + if got.WriteBytes != tt.want.WriteBytes { + t.Errorf("parseLogPage() WriteBytes = %v, want %v", got.WriteBytes, tt.want.WriteBytes) + } + }) + } +} + +func TestMapDevicePathsToVolumeIDs(t *testing.T) { + tests := []struct { + name string + devicePaths []string + lsblkOutput []byte + want map[string]string + wantErr bool + }{ + { + name: "standard device mapping", + devicePaths: []string{ + "/dev/nvme1n1", + "/dev/nvme2n1", + }, + lsblkOutput: func() []byte { + data := LsblkOutput{ + BlockDevices: []BlockDevice{ + {Name: "nvme1n1", Serial: "vol-123456789"}, + {Name: "nvme2n1", Serial: "vol-987654321"}, + }, + } + b, err := json.Marshal(data) + if err != nil { + t.Fatalf("failed to create test data: %v", err) + } + return b + }(), + want: map[string]string{ + "/dev/nvme1n1": "vol-123456789", + "/dev/nvme2n1": "vol-987654321", + }, + wantErr: false, + }, + { + name: "device without hyphen", + devicePaths: []string{ + "/dev/nvme1n1", + }, + lsblkOutput: func() []byte { + data := LsblkOutput{ + BlockDevices: []BlockDevice{ + {Name: "nvme1n1", Serial: "vol123456789"}, + }, + } + b, err := json.Marshal(data) + if err != nil { + t.Fatalf("failed to create test data: %v", err) + } + return b + }(), + want: map[string]string{ + "/dev/nvme1n1": "vol-123456789", + }, + wantErr: false, + }, + { + name: "empty device paths", + devicePaths: []string{}, + lsblkOutput: func() []byte { + data := LsblkOutput{ + BlockDevices: []BlockDevice{ + {Name: "nvme1n1", Serial: "vol-123456789"}, + }, + } + b, err := json.Marshal(data) + if err != nil { + t.Fatalf("failed to create test data: %v", err) + } + return b + }(), + want: map[string]string{}, + wantErr: false, + }, + { + name: "invalid json", + devicePaths: []string{"/dev/nvme1n1"}, + lsblkOutput: []byte(`invalid json`), + want: nil, + wantErr: true, + }, + { + name: "no matching devices", + devicePaths: []string{ + "/dev/nvme3n1", + }, + lsblkOutput: func() []byte { + data := LsblkOutput{ + BlockDevices: []BlockDevice{ + {Name: "nvme1n1", Serial: "vol-123456789"}, + }, + } + b, err := json.Marshal(data) + if err != nil { + t.Fatalf("failed to create test data: %v", err) + } + return b + }(), + want: map[string]string{}, + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := mapDevicePathsToVolumeIDs(tt.devicePaths, tt.lsblkOutput) + + if (err != nil) != tt.wantErr { + t.Errorf("mapDevicePathsToVolumeIDs() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("mapDevicePathsToVolumeIDs() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/tests/e2e/nvme_metrics.go b/tests/e2e/nvme_metrics.go new file mode 100644 index 0000000000..4c7d1232ed --- /dev/null +++ b/tests/e2e/nvme_metrics.go @@ -0,0 +1,166 @@ +// Copyright 2024 The Kubernetes Authors. +// +// Licensed under the Apache License, Version 2.0 (the 'License'); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an 'AS IS' BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "fmt" + "io" + "net/http" + "time" + + awscloud "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/cloud" + ebscsidriver "github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/driver" + "github.com/kubernetes-sigs/aws-ebs-csi-driver/tests/e2e/driver" + "github.com/kubernetes-sigs/aws-ebs-csi-driver/tests/e2e/testsuites" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/kubernetes/test/e2e/framework/kubectl" + admissionapi "k8s.io/pod-security-admission/api" +) + +var _ = Describe("[ebs-csi-e2e] [single-az] NVMe Metrics", func() { + f := framework.NewDefaultFramework("ebs") + f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged + + var ( + cs clientset.Interface + ns *v1.Namespace + ebsDriver driver.PVTestDriver + ) + + BeforeEach(func() { + cs = f.ClientSet + ns = f.Namespace + ebsDriver = driver.InitEbsCSIDriver() + }) + + It("should create a stateful pod and read NVMe metrics from node plugin", func() { + pod := testsuites.PodDetails{ + Cmd: "while true; do echo $(date -u) >> /mnt/test-1/out.txt; sleep 5; done", + Volumes: []testsuites.VolumeDetails{ + { + CreateVolumeParameters: map[string]string{ + ebscsidriver.VolumeTypeKey: awscloud.VolumeTypeGP3, + ebscsidriver.FSTypeKey: ebscsidriver.FSTypeExt4, + }, + ClaimSize: driver.MinimumSizeForVolumeType(awscloud.VolumeTypeGP3), + VolumeMount: testsuites.VolumeMountDetails{ + NameGenerate: "test-volume-", + MountPathGenerate: "/mnt/test-", + }, + }, + }, + } + + By("Setting up pod and volumes") + tpod, _ := pod.SetupWithDynamicVolumes(cs, ns, ebsDriver) + defer tpod.Cleanup() + + By("Creating the stateful pod") + tpod.Create() + tpod.WaitForRunning() + + By("Getting name of the node where test pod is running") + p, err := cs.CoreV1().Pods(ns.Name).Get(context.TODO(), tpod.GetName(), metav1.GetOptions{}) + framework.ExpectNoError(err, "Failed to get pod") + nodeName := p.Spec.NodeName + Expect(nodeName).NotTo(BeEmpty(), "Node name should not be empty") + + By("Finding the CSI node plugin on the node where test pod is running") + csiPods, err := cs.CoreV1().Pods("kube-system").List(context.TODO(), metav1.ListOptions{ + LabelSelector: "app=ebs-csi-node", + }) + framework.ExpectNoError(err, "Failed to get CSI node pods") + + var csiNodePod *v1.Pod + for _, pod := range csiPods.Items { + if pod.Spec.NodeName == nodeName { + csiNodePod = &pod + break + } + } + Expect(csiNodePod).NotTo(BeNil(), "No CSI node pod found") + + By("Setting up port-forwarding") + args := []string{"port-forward", + "-n", "kube-system", + csiNodePod.Name, + "3302:3302"} + + go kubectl.RunKubectlOrDie("kube-system", args...) + + By("Getting metrics from endpoint") + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + + metricsOutput, err := getMetricsWithRetry(ctx) + framework.ExpectNoError(err, "Failed to get metrics after retries") + + By("Verifying NVMe metrics") + expectedMetrics := []string{ + "total_read_ops", + "total_write_ops", + "total_read_bytes", + "total_write_bytes", + "read_io_latency_histogram", + "write_io_latency_histogram", + } + + for _, metric := range expectedMetrics { + Expect(metricsOutput).To(ContainSubstring(metric), + fmt.Sprintf("Metric %s not found in response", metric)) + } + }) +}) + +func getMetricsWithRetry(ctx context.Context) (string, error) { + var metricsOutput string + + backoff := wait.Backoff{ + Duration: 5 * time.Second, + Factor: 2.0, + Steps: 5, + } + + err := wait.ExponentialBackoffWithContext(ctx, backoff, func(ctx context.Context) (bool, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost:3302/metrics", nil) + if err != nil { + return false, fmt.Errorf("failed to create request: %w", err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + framework.Logf("Failed to get metrics: %v, retrying...", err) + return false, nil + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + framework.Logf("Failed to read metrics: %v, retrying...", err) + return false, nil + } + + metricsOutput = string(body) + return true, nil + }) + + return metricsOutput, err +} diff --git a/tests/e2e/testsuites/testsuites.go b/tests/e2e/testsuites/testsuites.go index 32ffe0f43b..a99c756197 100644 --- a/tests/e2e/testsuites/testsuites.go +++ b/tests/e2e/testsuites/testsuites.go @@ -654,6 +654,10 @@ func (t *TestPod) Create() { framework.ExpectNoError(err) } +func (t *TestPod) GetName() string { + return t.pod.Name +} + func (t *TestPod) WaitForSuccess() { err := e2epod.WaitForPodSuccessInNamespace(context.Background(), t.client, t.pod.Name, t.namespace.Name) framework.ExpectNoError(err)