Skip to content

Commit

Permalink
add windows test
Browse files Browse the repository at this point in the history
  • Loading branch information
matmerr committed Oct 1, 2024
1 parent a07a585 commit 067f818
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,15 @@ spec:
fieldRef:
apiVersion: v1
fieldPath: status.hostIP
livenessProbe:
httpGet:
path: /metrics
port: {{ .Values.retinaPort }}
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds | default "30" }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds | default "30" }}
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds | default "1" }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold | default "3" }}
successThreshold: {{ .Values.livenessProbe.successThreshold | default "1" }}
securityContext:
capabilities:
add:
Expand Down
1 change: 0 additions & 1 deletion test/e2e/framework/azure/create-cluster-with-npm.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ func (c *CreateNPMCluster) Run() error {

npmCluster.Properties.AutoUpgradeProfile = &armcontainerservice.ManagedClusterAutoUpgradeProfile{
NodeOSUpgradeChannel: to.Ptr(armcontainerservice.NodeOSUpgradeChannelNodeImage),
UpgradeChannel: to.Ptr(armcontainerservice.UpgradeChannelPatch),
}

// Deploy cluster
Expand Down
3 changes: 3 additions & 0 deletions test/e2e/framework/kubernetes/create-kapinger-deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,9 @@ func (c *CreateKapingerDeployment) GetKapingerDeployment() *appsv1.Deployment {
},

Spec: v1.PodSpec{
NodeSelector: map[string]string{
"kubernetes.io/os": "linux",
},
Affinity: &v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
// prefer an even spread across the cluster to avoid scheduling on the same node
Expand Down
20 changes: 11 additions & 9 deletions test/e2e/framework/kubernetes/exec-pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func (e *ExecInPod) Run() error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

config, err := clientcmd.BuildConfigFromFlags("", e.PodName)
config, err := clientcmd.BuildConfigFromFlags("", e.KubeConfigFilePath)
if err != nil {
return fmt.Errorf("error building kubeconfig: %w", err)
}
Expand All @@ -55,7 +55,8 @@ func (e *ExecInPod) Stop() error {
return nil
}

func ExecPod(ctx context.Context, clientset *kubernetes.Clientset, config *rest.Config, namespace, podName, command string) (string, error) {
func ExecPod(ctx context.Context, clientset *kubernetes.Clientset, config *rest.Config, namespace, podName, command string) ([]byte, error) {
log.Printf("executing command \"%s\" on pod \"%s\" in namespace \"%s\"...", command, podName, namespace)
req := clientset.CoreV1().RESTClient().Post().Resource("pods").Name(podName).
Namespace(namespace).SubResource(ExecSubResources)
option := &v1.PodExecOptions{
Expand All @@ -71,21 +72,22 @@ func ExecPod(ctx context.Context, clientset *kubernetes.Clientset, config *rest.
scheme.ParameterCodec,
)

var buf bytes.Buffer
exec, err := remotecommand.NewSPDYExecutor(config, "POST", req.URL())
if err != nil {
return "", fmt.Errorf("error creating executor: %w", err)
return buf.Bytes(), fmt.Errorf("error creating executor: %w", err)
}

var stdout, stderr bytes.Buffer
log.Printf("executing command \"%s\" on pod \"%s\" in namespace \"%s\"...", command, podName, namespace)
err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{
Stdin: os.Stdin,
Stdout: &stdout,
Stderr: &stderr,
Stdout: &buf,
Stderr: &buf,
})
if err != nil {
return "", fmt.Errorf("error executing command: %w", err)
return buf.Bytes(), fmt.Errorf("error executing command: %w", err)
}

return stdout.String(), nil
res := buf.Bytes()
log.Print(string(res))
return res, nil
}
18 changes: 14 additions & 4 deletions test/e2e/framework/kubernetes/port-forward.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

retry "github.com/microsoft/retina/test/retry"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
Expand Down Expand Up @@ -120,14 +121,23 @@ func (p *PortForward) Run() error {
}

func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kubernetes.Clientset) (string, error) {
targetPods, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
targetPodsAll, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
LabelSelector: p.LabelSelector,
FieldSelector: "status.phase=Running",
})
if errAffinity != nil {
return "", fmt.Errorf("could not list pods in %q with label %q: %w", p.Namespace, p.LabelSelector, errAffinity)
}

// omit windows pods because we can't port-forward to them
targetPodsLinux := make([]v1.Pod, 0)
for i := range targetPodsAll.Items {
if targetPodsAll.Items[i].Spec.NodeSelector["kubernetes.io/os"] != "windows" {
targetPodsLinux = append(targetPodsLinux, targetPodsAll.Items[i])
}
}

// get all pods with optional label affinity
affinityPods, errAffinity := clientset.CoreV1().Pods(p.Namespace).List(ctx, metav1.ListOptions{
LabelSelector: p.OptionalLabelAffinity,
FieldSelector: "status.phase=Running",
Expand All @@ -143,10 +153,10 @@ func (p *PortForward) findPodsWithAffinity(ctx context.Context, clientset *kuber
}

// if a pod is found on the same node as an affinity pod, use it
for i := range targetPods.Items {
if affinityNodes[targetPods.Items[i].Spec.NodeName] {
for i := range targetPodsLinux {
if affinityNodes[targetPodsLinux[i].Spec.NodeName] {
// found a pod with the specified label, on a node with the optional label affinity
return targetPods.Items[i].Name, nil
return targetPodsLinux[i].Name, nil
}
}

Expand Down
42 changes: 38 additions & 4 deletions test/e2e/framework/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"log"
"net/http"
"reflect"
"strings"
"time"

"github.com/microsoft/retina/test/retry"
Expand Down Expand Up @@ -34,7 +35,7 @@ func CheckMetric(promAddress, metricName string, validMetric map[string]string)
var err error

// obtain a full dump of all metrics on the endpoint
metrics, err = getAllPrometheusMetrics(promAddress)
metrics, err = getAllPrometheusMetricsFromURL(promAddress)
if err != nil {
return fmt.Errorf("could not start port forward within %ds: %w ", defaultTimeout, err)
}
Expand All @@ -57,6 +58,21 @@ func CheckMetric(promAddress, metricName string, validMetric map[string]string)
return nil
}

func CheckMetricFromBuffer(promethusMetricData []byte, metricName string, validMetric map[string]string) error {
metrics, err := getAllPrometheusMetricsFromBuffer(promethusMetricData)
if err != nil {
return fmt.Errorf("failed to parse prometheus metrics: %w", err)
}

err = verifyValidMetricPresent(metricName, metrics, validMetric)
if err != nil {
log.Printf("failed to find metric matching %s: %+v\n", metricName, validMetric)
return ErrNoMetricFound
}

return nil
}

func verifyValidMetricPresent(metricName string, data map[string]*promclient.MetricFamily, validMetric map[string]string) error {
for _, metric := range data {
if metric.GetName() == metricName {
Expand All @@ -77,7 +93,7 @@ func verifyValidMetricPresent(metricName string, data map[string]*promclient.Met
return fmt.Errorf("failed to find metric matching: %+v: %w", validMetric, ErrNoMetricFound)
}

func getAllPrometheusMetrics(url string) (map[string]*promclient.MetricFamily, error) {
func getAllPrometheusMetricsFromURL(url string) (map[string]*promclient.MetricFamily, error) {
client := http.Client{}
resp, err := client.Get(url) //nolint
if err != nil {
Expand All @@ -89,15 +105,33 @@ func getAllPrometheusMetrics(url string) (map[string]*promclient.MetricFamily, e
return nil, fmt.Errorf("HTTP request failed with status: %v", resp.Status) //nolint:goerr113,gocritic
}

metrics, err := parseReaderPrometheusMetrics(resp.Body)
metrics, err := ParseReaderPrometheusMetrics(resp.Body)
if err != nil {
return nil, err
}

return metrics, nil
}

func parseReaderPrometheusMetrics(input io.Reader) (map[string]*promclient.MetricFamily, error) {
func getAllPrometheusMetricsFromBuffer(buf []byte) (map[string]*promclient.MetricFamily, error) {
var parser expfmt.TextParser
reader := strings.NewReader(string(buf))
return parser.TextToMetricFamilies(reader) //nolint
}

func ParseReaderPrometheusMetrics(input io.Reader) (map[string]*promclient.MetricFamily, error) {
var parser expfmt.TextParser
return parser.TextToMetricFamilies(input) //nolint
}

// When capturing promethus output via curl and exect, there's a lot
// of garbage at the front
func stripExecGarbage(s string) string {
index := strings.Index(s, "#")
if index == -1 {
// If there's no `#`, return the original string
return s
}
// Slice the string up to the character before the first `#`
return s[:index]
}
6 changes: 3 additions & 3 deletions test/e2e/jobs/jobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func CreateTestInfra(subID, clusterName, location, kubeConfigFilePath string, cr

} else {
job.AddStep(&azure.GetAKSKubeConfig{
KubeConfigFilePath: "./test.pem",
KubeConfigFilePath: kubeConfigFilePath,
ClusterName: clusterName,
SubscriptionID: subID,
ResourceGroupName: clusterName,
Expand Down Expand Up @@ -85,8 +85,6 @@ func InstallAndTestRetinaBasicMetrics(kubeConfigFilePath, chartPath string) *typ
TagEnv: generic.DefaultTagEnv,
}, nil)

job.AddScenario(windows.ValidateWindowsBasicMetric())

job.AddScenario(drop.ValidateDropMetric())

job.AddScenario(tcp.ValidateTCPMetrics())
Expand Down Expand Up @@ -203,6 +201,8 @@ func UpgradeAndTestRetinaAdvancedMetrics(kubeConfigFilePath, chartPath, valuesFi

job.AddScenario(latency.ValidateLatencyMetric())

job.AddScenario(windows.ValidateWindowsBasicMetric())

job.AddStep(&kubernetes.EnsureStableCluster{
PodNamespace: "kube-system",
LabelSelector: "k8s-app=retina",
Expand Down
5 changes: 3 additions & 2 deletions test/e2e/scenarios/windows/scenario.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ func ValidateWindowsBasicMetric() *types.Scenario {
steps := []*types.StepWrapper{
{
Step: &ValidateHNSMetric{
KubeConfigFilePath: "./test.pem",
RetinaPodNamespace: "kube-system",
KubeConfigFilePath: "./test.pem",
RetinaDaemonSetNamespace: "kube-system",
RetinaDaemonSetName: "retina-agent-win",
},
},
}
Expand Down
76 changes: 64 additions & 12 deletions test/e2e/scenarios/windows/validate-hns-metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,39 @@ package windows

import (
"context"
"errors"
"fmt"
"log"
"time"

"github.com/microsoft/retina/test/e2e/common"
k8s "github.com/microsoft/retina/test/e2e/framework/kubernetes"
prom "github.com/microsoft/retina/test/e2e/framework/prometheus"
"github.com/microsoft/retina/test/retry"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubernetes "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
)

const (
defaultRetryDelay = 5 * time.Second
defaultRetryAttempts = 5
defaultHTTPClientTimeout = 2 * time.Second
)

var (
ErrorNoWindowsPod = errors.New("no windows retina pod found")
ErrNoMetricFound = fmt.Errorf("no metric found")

hnsMetricName = "networkobservability_windows_hns_stats"
defaultRetrier = retry.Retrier{Attempts: defaultRetryAttempts, Delay: defaultRetryDelay}
)

type ValidateHNSMetric struct {
KubeConfigFilePath string
RetinaPodNamespace string
KubeConfigFilePath string
RetinaDaemonSetNamespace string
RetinaDaemonSetName string
}

func (v *ValidateHNSMetric) Run() error {
Expand All @@ -26,23 +48,53 @@ func (v *ValidateHNSMetric) Run() error {
return fmt.Errorf("error creating Kubernetes client: %w", err)
}

daemonset, err := clientset.AppsV1().DaemonSets(v.RetinaPodNamespace).Get(context.TODO(), "retina-agent-win", metav1.GetOptions{})
pods, err := clientset.CoreV1().Pods(v.RetinaDaemonSetNamespace).List(context.TODO(), metav1.ListOptions{
LabelSelector: "k8s-app=retina",
})
if err != nil {
return fmt.Errorf("error getting daemonset: %w", err)
panic(err.Error())
}

pods, err := clientset.CoreV1().Pods(daemonset.Namespace).List(context.TODO(), metav1.ListOptions{
LabelSelector: "app=retina-agent-win",
})
if err != nil {
return fmt.Errorf("error getting pods: %w", err)
var windowsRetinaPod *v1.Pod
for pod := range pods.Items {
if pods.Items[pod].Spec.NodeSelector["kubernetes.io/os"] == "windows" {
windowsRetinaPod = &pods.Items[pod]
}
}
if windowsRetinaPod == nil {
return ErrorNoWindowsPod
}

labels := map[string]string{
"direction": "win_packets_sent_count",
}

log.Printf("checking for metric %s with labels %+v\n", hnsMetricName, labels)

pod := pods.Items[0]
// wrap this in a retrier because windows is slow
var output []byte
err = defaultRetrier.Do(context.TODO(), func() error {
output, err = k8s.ExecPod(context.TODO(), clientset, config, windowsRetinaPod.Namespace, windowsRetinaPod.Name, fmt.Sprintf("curl -s http://localhost:%d/metrics", common.RetinaPort))
if err != nil {
return fmt.Errorf("error executing command in windows retina pod: %w", err)
}
if len(output) == 0 {
return ErrNoMetricFound
}

output, err := k8s.ExecPod(context.TODO(), clientset, config, pod.Namespace, pod.Name, "Get-Counter -Counter '\\Network Interface(*)\\Bytes Total/sec' | Format-Table -AutoSize")
if err != nil {
return fmt.Errorf("failed to get metrics from windows retina pod: %w", err)
}

err = prom.CheckMetricFromBuffer(output, hnsMetricName, labels)
if err != nil {
return fmt.Errorf("failed to verify prometheus metrics: %w", err)
}

return nil
})

fmt.Println(output)
log.Printf("found metric matching %+v: with labels %+v\n", hnsMetricName, labels)
return nil
}

Expand Down

0 comments on commit 067f818

Please sign in to comment.