Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add agent injector telemetry #703

Merged
merged 5 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 52 additions & 14 deletions agent-inject/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"io"
"net/http"
"strings"
"time"

"github.com/hashicorp/go-hclog"
"github.com/hashicorp/vault-k8s/agent-inject/agent"
Expand Down Expand Up @@ -85,6 +86,14 @@ type Handler struct {
func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
h.Log.Info("Request received", "Method", r.Method, "URL", r.URL)

// Measure request processing duration and monitor request queue
requestQueue.Inc()
requestStart := time.Now()
defer func() {
requestProcessingTime.Observe(float64(time.Since(requestStart).Milliseconds()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if would be worth tracking the failures as well. We do something like this in VSO here: https://github.com/hashicorp/vault-secrets-operator/blob/6ab056c22acb5dc4812620233e3b141fb9e02f9c/vault/client.go#L810

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, and it would unblock @tvoran's feedback here.

@benashz Given that it goes beyond the scope we were asked for, is it OK if I push that to a follow-on PR?

requestQueue.Dec()
}()

if ct := r.Header.Get("Content-Type"); ct != "application/json" {
msg := fmt.Sprintf("Invalid content-type: %q", ct)
http.Error(w, msg, http.StatusBadRequest)
Expand All @@ -109,7 +118,10 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
return
}

var admResp admissionv1.AdmissionReview
var (
mutateResp MutateResponse
admResp admissionv1.AdmissionReview
)

// Both v1 and v1beta1 AdmissionReview types are exactly the same, so the v1beta1 type can
// be decoded into the v1 type. However the runtime codec's decoder guesses which type to
Expand All @@ -126,7 +138,8 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
h.Log.Error("error on request", "Error", msg, "Code", http.StatusInternalServerError)
return
} else {
admResp.Response = h.Mutate(admReq.Request)
mutateResp = h.Mutate(admReq.Request)
admResp.Response = mutateResp.Resp
}

// Default to a v1 AdmissionReview, otherwise the API server may not recognize the request
Expand All @@ -142,26 +155,43 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) {
msg := fmt.Sprintf("error marshalling admission response: %s", err)
http.Error(w, msg, http.StatusInternalServerError)
h.Log.Error("error on request", "Error", msg, "Code", http.StatusInternalServerError)
incrementInjectionFailures(admReq.Request.Namespace)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may also want to call incrementInjectionFailures() for the error returns further up in this function too? But this should suffice as-is; I could even see those being a different error metric added in the future.

return
}

if _, err := w.Write(resp); err != nil {
h.Log.Error("error writing response", "Error", err)
incrementInjectionFailures(admReq.Request.Namespace)
return
}

if admResp.Response.Allowed {
incrementInjections(admReq.Request.Namespace, mutateResp)
} else {
incrementInjectionFailures(admReq.Request.Namespace)
}
}

type MutateResponse struct {
Resp *admissionv1.AdmissionResponse
InjectedInit bool
InjectedSidecar bool
}

// Mutate takes an admission request and performs mutation if necessary,
// returning the final API response.
func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.AdmissionResponse {
func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) MutateResponse {
// Decode the pod from the request
var pod corev1.Pod
if err := json.Unmarshal(req.Object.Raw, &pod); err != nil {
h.Log.Error("could not unmarshal request to pod: %s", err)
h.Log.Debug("%s", req.Object.Raw)
return &admissionv1.AdmissionResponse{
UID: req.UID,
Result: &metav1.Status{
Message: err.Error(),
return MutateResponse{
Resp: &admissionv1.AdmissionResponse{
UID: req.UID,
Result: &metav1.Status{
Message: err.Error(),
},
},
}
}
Expand All @@ -178,7 +208,9 @@ func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.Admissi
err := fmt.Errorf("error checking if should inject agent: %s", err)
return admissionError(req.UID, err)
} else if !inject {
return resp
return MutateResponse{
Resp: resp,
}
}

h.Log.Debug("checking namespaces..")
Expand Down Expand Up @@ -249,14 +281,20 @@ func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.Admissi
patchType := admissionv1.PatchTypeJSONPatch
resp.PatchType = &patchType

return resp
return MutateResponse{
Resp: resp,
InjectedInit: agentSidecar.PrePopulate,
InjectedSidecar: !agentSidecar.PrePopulateOnly,
}
}

func admissionError(UID types.UID, err error) *admissionv1.AdmissionResponse {
return &admissionv1.AdmissionResponse{
UID: UID,
Result: &metav1.Status{
Message: err.Error(),
func admissionError(UID types.UID, err error) MutateResponse {
return MutateResponse{
Resp: &admissionv1.AdmissionResponse{
UID: UID,
Result: &metav1.Status{
Message: err.Error(),
},
},
}
}
Expand Down
2 changes: 1 addition & 1 deletion agent-inject/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ func TestHandlerHandle(t *testing.T) {
for _, tt := range cases {
t.Run(tt.Name, func(t *testing.T) {
req := require.New(t)
resp := tt.Handler.Mutate(&tt.Req)
resp := (tt.Handler.Mutate(&tt.Req)).Resp
if (tt.Err == "") != resp.Allowed {
t.Fatalf("allowed: %v, expected err: %v", resp.Allowed, tt.Err)
}
Expand Down
76 changes: 76 additions & 0 deletions agent-inject/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package agent_inject
gsantos-hc marked this conversation as resolved.
Show resolved Hide resolved

import (
"github.com/prometheus/client_golang/prometheus"
)

const (
metricsNamespace = "vault"
metricsSubsystem = "agent_injector"
metricsLabelNamespace = "namespace"
metricsLabelType = "injection_type"
metricsLabelTypeBoth = "init_and_sidecar"
metricsLabelTypeInitOnly = "init_only"
metricsLabelTypeSidecarOnly = "sidecar_only"
)

var (
requestQueue = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "request_queue_length",
Help: "Count of webhook requests in the injector's queue",
})

requestProcessingTime = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "request_processing_duration_ms",
Buckets: []float64{5, 10, 25, 50, 75, 100, 250, 500, 1000, 2500, 5000, 7500, 10000},
})

injectionsByNamespace = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "injections_by_namespace_total",
Help: "Total count of Agent Sidecar injections by namespace",
}, []string{metricsLabelNamespace, metricsLabelType})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose that the number of namespaces should be relatively low, so we should be okay with this high cardinality metric.


failedInjectionsByNamespace = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsSubsystem,
Name: "failed_injections_by_namespace_total",
Help: "Total count of failed Agent Sidecar injections by namespace",
}, []string{metricsLabelNamespace})
)

func incrementInjections(namespace string, res MutateResponse) {
// Injection type can be one of: init_and_sidecar (default); init_only; or sidecar_only
typeLabel := metricsLabelTypeBoth
if res.InjectedInit && !res.InjectedSidecar {
typeLabel = metricsLabelTypeInitOnly
} else if res.InjectedSidecar && !res.InjectedInit {
typeLabel = metricsLabelTypeSidecarOnly
}

injectionsByNamespace.With(prometheus.Labels{
metricsLabelNamespace: namespace,
metricsLabelType: typeLabel,
}).Inc()
}

func incrementInjectionFailures(namespace string) {
failedInjectionsByNamespace.With(prometheus.Labels{metricsLabelNamespace: namespace}).Inc()
}

func MustRegisterInjectorMetrics(registry prometheus.Registerer) {
registry.MustRegister(
requestQueue,
requestProcessingTime,
injectionsByNamespace,
failedInjectionsByNamespace,
)
}
2 changes: 2 additions & 0 deletions subcommand/injector/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/hashicorp/vault-k8s/leader"
"github.com/hashicorp/vault-k8s/version"
"github.com/mitchellh/cli"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
adminv1 "k8s.io/api/admissionregistration/v1"
adminv1beta "k8s.io/api/admissionregistration/v1beta1"
Expand Down Expand Up @@ -231,6 +232,7 @@ func (c *Command) Run(args []string) int {

// Registering path to expose metrics
if c.flagTelemetryPath != "" {
agentInject.MustRegisterInjectorMetrics(prometheus.DefaultRegisterer)
gsantos-hc marked this conversation as resolved.
Show resolved Hide resolved
c.UI.Info(fmt.Sprintf("Registering telemetry path on %q", c.flagTelemetryPath))
mux.Handle(c.flagTelemetryPath, promhttp.Handler())
}
Expand Down