-
Notifications
You must be signed in to change notification settings - Fork 172
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: add agent injector telemetry #703
Changes from 4 commits
f3f6b3a
d5db59a
a095cc4
687121f
3175b57
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ import ( | |
"io" | ||
"net/http" | ||
"strings" | ||
"time" | ||
|
||
"github.com/hashicorp/go-hclog" | ||
"github.com/hashicorp/vault-k8s/agent-inject/agent" | ||
|
@@ -85,6 +86,14 @@ type Handler struct { | |
func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) { | ||
h.Log.Info("Request received", "Method", r.Method, "URL", r.URL) | ||
|
||
// Measure request processing duration and monitor request queue | ||
requestQueue.Inc() | ||
requestStart := time.Now() | ||
defer func() { | ||
requestProcessingTime.Observe(float64(time.Since(requestStart).Milliseconds())) | ||
requestQueue.Dec() | ||
}() | ||
|
||
if ct := r.Header.Get("Content-Type"); ct != "application/json" { | ||
msg := fmt.Sprintf("Invalid content-type: %q", ct) | ||
http.Error(w, msg, http.StatusBadRequest) | ||
|
@@ -109,7 +118,10 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) { | |
return | ||
} | ||
|
||
var admResp admissionv1.AdmissionReview | ||
var ( | ||
mutateResp MutateResponse | ||
admResp admissionv1.AdmissionReview | ||
) | ||
|
||
// Both v1 and v1beta1 AdmissionReview types are exactly the same, so the v1beta1 type can | ||
// be decoded into the v1 type. However the runtime codec's decoder guesses which type to | ||
|
@@ -126,7 +138,8 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) { | |
h.Log.Error("error on request", "Error", msg, "Code", http.StatusInternalServerError) | ||
return | ||
} else { | ||
admResp.Response = h.Mutate(admReq.Request) | ||
mutateResp = h.Mutate(admReq.Request) | ||
admResp.Response = mutateResp.Resp | ||
} | ||
|
||
// Default to a v1 AdmissionReview, otherwise the API server may not recognize the request | ||
|
@@ -142,26 +155,43 @@ func (h *Handler) Handle(w http.ResponseWriter, r *http.Request) { | |
msg := fmt.Sprintf("error marshalling admission response: %s", err) | ||
http.Error(w, msg, http.StatusInternalServerError) | ||
h.Log.Error("error on request", "Error", msg, "Code", http.StatusInternalServerError) | ||
incrementInjectionFailures(admReq.Request.Namespace) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We may also want to call |
||
return | ||
} | ||
|
||
if _, err := w.Write(resp); err != nil { | ||
h.Log.Error("error writing response", "Error", err) | ||
incrementInjectionFailures(admReq.Request.Namespace) | ||
return | ||
} | ||
|
||
if admResp.Response.Allowed { | ||
incrementInjections(admReq.Request.Namespace, mutateResp) | ||
} else { | ||
incrementInjectionFailures(admReq.Request.Namespace) | ||
} | ||
} | ||
|
||
type MutateResponse struct { | ||
Resp *admissionv1.AdmissionResponse | ||
InjectedInit bool | ||
InjectedSidecar bool | ||
} | ||
|
||
// Mutate takes an admission request and performs mutation if necessary, | ||
// returning the final API response. | ||
func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.AdmissionResponse { | ||
func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) MutateResponse { | ||
// Decode the pod from the request | ||
var pod corev1.Pod | ||
if err := json.Unmarshal(req.Object.Raw, &pod); err != nil { | ||
h.Log.Error("could not unmarshal request to pod: %s", err) | ||
h.Log.Debug("%s", req.Object.Raw) | ||
return &admissionv1.AdmissionResponse{ | ||
UID: req.UID, | ||
Result: &metav1.Status{ | ||
Message: err.Error(), | ||
return MutateResponse{ | ||
Resp: &admissionv1.AdmissionResponse{ | ||
UID: req.UID, | ||
Result: &metav1.Status{ | ||
Message: err.Error(), | ||
}, | ||
}, | ||
} | ||
} | ||
|
@@ -178,7 +208,9 @@ func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.Admissi | |
err := fmt.Errorf("error checking if should inject agent: %s", err) | ||
return admissionError(req.UID, err) | ||
} else if !inject { | ||
return resp | ||
return MutateResponse{ | ||
Resp: resp, | ||
} | ||
} | ||
|
||
h.Log.Debug("checking namespaces..") | ||
|
@@ -249,14 +281,20 @@ func (h *Handler) Mutate(req *admissionv1.AdmissionRequest) *admissionv1.Admissi | |
patchType := admissionv1.PatchTypeJSONPatch | ||
resp.PatchType = &patchType | ||
|
||
return resp | ||
return MutateResponse{ | ||
Resp: resp, | ||
InjectedInit: agentSidecar.PrePopulate, | ||
InjectedSidecar: !agentSidecar.PrePopulateOnly, | ||
} | ||
} | ||
|
||
func admissionError(UID types.UID, err error) *admissionv1.AdmissionResponse { | ||
return &admissionv1.AdmissionResponse{ | ||
UID: UID, | ||
Result: &metav1.Status{ | ||
Message: err.Error(), | ||
func admissionError(UID types.UID, err error) MutateResponse { | ||
return MutateResponse{ | ||
Resp: &admissionv1.AdmissionResponse{ | ||
UID: UID, | ||
Result: &metav1.Status{ | ||
Message: err.Error(), | ||
}, | ||
}, | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// Copyright (c) HashiCorp, Inc. | ||
// SPDX-License-Identifier: MPL-2.0 | ||
|
||
package agent_inject | ||
gsantos-hc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
import ( | ||
"github.com/prometheus/client_golang/prometheus" | ||
) | ||
|
||
const ( | ||
metricsNamespace = "vault" | ||
metricsSubsystem = "agent_injector" | ||
metricsLabelNamespace = "namespace" | ||
metricsLabelType = "injection_type" | ||
metricsLabelTypeBoth = "init_and_sidecar" | ||
metricsLabelTypeInitOnly = "init_only" | ||
metricsLabelTypeSidecarOnly = "sidecar_only" | ||
) | ||
|
||
var ( | ||
requestQueue = prometheus.NewGauge(prometheus.GaugeOpts{ | ||
Namespace: metricsNamespace, | ||
Subsystem: metricsSubsystem, | ||
Name: "request_queue_length", | ||
Help: "Count of webhook requests in the injector's queue", | ||
}) | ||
|
||
requestProcessingTime = prometheus.NewHistogram(prometheus.HistogramOpts{ | ||
Namespace: metricsNamespace, | ||
Subsystem: metricsSubsystem, | ||
Name: "request_processing_duration_ms", | ||
Buckets: []float64{5, 10, 25, 50, 75, 100, 250, 500, 1000, 2500, 5000, 7500, 10000}, | ||
}) | ||
|
||
injectionsByNamespace = prometheus.NewCounterVec(prometheus.CounterOpts{ | ||
Namespace: metricsNamespace, | ||
Subsystem: metricsSubsystem, | ||
Name: "injections_by_namespace_total", | ||
Help: "Total count of Agent Sidecar injections by namespace", | ||
}, []string{metricsLabelNamespace, metricsLabelType}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose that the number of namespaces should be relatively low, so we should be okay with this high cardinality metric. |
||
|
||
failedInjectionsByNamespace = prometheus.NewCounterVec(prometheus.CounterOpts{ | ||
Namespace: metricsNamespace, | ||
Subsystem: metricsSubsystem, | ||
Name: "failed_injections_by_namespace_total", | ||
Help: "Total count of failed Agent Sidecar injections by namespace", | ||
}, []string{metricsLabelNamespace}) | ||
) | ||
|
||
func incrementInjections(namespace string, res MutateResponse) { | ||
// Injection type can be one of: init_and_sidecar (default); init_only; or sidecar_only | ||
typeLabel := metricsLabelTypeBoth | ||
if res.InjectedInit && !res.InjectedSidecar { | ||
typeLabel = metricsLabelTypeInitOnly | ||
} else if res.InjectedSidecar && !res.InjectedInit { | ||
typeLabel = metricsLabelTypeSidecarOnly | ||
} | ||
|
||
injectionsByNamespace.With(prometheus.Labels{ | ||
metricsLabelNamespace: namespace, | ||
metricsLabelType: typeLabel, | ||
}).Inc() | ||
} | ||
|
||
func incrementInjectionFailures(namespace string) { | ||
failedInjectionsByNamespace.With(prometheus.Labels{metricsLabelNamespace: namespace}).Inc() | ||
} | ||
|
||
func MustRegisterInjectorMetrics(registry prometheus.Registerer) { | ||
registry.MustRegister( | ||
requestQueue, | ||
requestProcessingTime, | ||
injectionsByNamespace, | ||
failedInjectionsByNamespace, | ||
) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if would be worth tracking the failures as well. We do something like this in VSO here: https://github.com/hashicorp/vault-secrets-operator/blob/6ab056c22acb5dc4812620233e3b141fb9e02f9c/vault/client.go#L810
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense, and it would unblock @tvoran's feedback here.
@benashz Given that it goes beyond the scope we were asked for, is it OK if I push that to a follow-on PR?