diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 80710425c..0cdcf4150 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -5,4 +5,4 @@ apiVersion: v2 name: chaos-controller description: Datadog Chaos Controller chart -version: 3.1.0 +version: 3.1.1 diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index f83da6cb8..354d12c48 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -136,3 +136,5 @@ data: enabled: {{ .Values.handler.enabled }} timeout: {{ .Values.handler.timeout | quote }} maxTimeout: {{ .Values.handler.maxTimeout | quote }} + cpu: {{ .Values.handler.resources.cpu | quote | default "" }} + memory: {{ .Values.handler.resources.memory | quote | default "" }} diff --git a/chart/values.yaml b/chart/values.yaml index 4630d36e9..7aa730161 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -140,6 +140,9 @@ handler: enabled: true # enable the chaos handler (required to use the onInit disruption feature) timeout: 10m # time the handler init container will wait before exiting if no signal is received maxTimeout: 2h # maximum amount of time to allow users to configure for their handler timeout + resources: + cpu: 10m + memory: 5Mi proxy: image: diff --git a/config/config.go b/config/config.go index cac8d27f6..93d230dc7 100644 --- a/config/config.go +++ b/config/config.go @@ -16,6 +16,7 @@ import ( "github.com/fsnotify/fsnotify" "github.com/spf13/pflag" "github.com/spf13/viper" + "k8s.io/apimachinery/pkg/api/resource" ) type config struct { @@ -101,6 +102,8 @@ type handlerConfig struct { Image string `json:"image" yaml:"image"` Timeout time.Duration `json:"timeout" yaml:"timeout"` MaxTimeout time.Duration `json:"maxTimeout" yaml:"maxTimeout"` + CPU string `json:"cpu" yaml:"cpu"` + Memory string `json:"memory" yaml:"memory"` } const DefaultDisruptionDeletionTimeout = time.Minute * 15 @@ -356,6 +359,26 @@ func New(logger *zap.SugaredLogger, osArgs []string) (config, error) { return cfg, err } + mainFS.StringVar(&cfg.Handler.CPU, "handler-cpu", "100m", "CPU limit/requests for handler init container") + + if err := viper.BindPFlag("handler.cpu", mainFS.Lookup("handler-cpu")); err != nil { + return cfg, err + } + + if _, err := resource.ParseQuantity(cfg.Handler.CPU); err != nil { + return cfg, err + } + + mainFS.StringVar(&cfg.Handler.Memory, "handler-memory", "100Mi", "Memory limit/requests for handler init container") + + if err := viper.BindPFlag("handler.memory", mainFS.Lookup("handler-memory")); err != nil { + return cfg, err + } + + if _, err := resource.ParseQuantity(cfg.Handler.Memory); err != nil { + return cfg, err + } + mainFS.StringVar(&cfg.Controller.Webhook.CertDir, "admission-webhook-cert-dir", "", "Admission webhook certificate directory to search for tls.crt and tls.key files") if err := viper.BindPFlag("controller.webhook.certDir", mainFS.Lookup("admission-webhook-cert-dir")); err != nil { diff --git a/examples/demo.yaml b/examples/demo.yaml index ccbcf1a7b..b4e56ee25 100644 --- a/examples/demo.yaml +++ b/examples/demo.yaml @@ -39,7 +39,7 @@ spec: spec: containers: - name: nginx - image: nginx:1.21.6 + image: nginx:1.27 livenessProbe: httpGet: path: / @@ -76,10 +76,11 @@ spec: app.kubernetes.io/component: client labels: app: demo-curl +# chaos.datadoghq.com/disrupt-on-init: "true" # uncomment this and run `kubectl -n chaos-demo apply -f examples/demo.yaml`, for testing the examples/on_init.yaml disruption spec: containers: - name: cpu-stress # jump into pod and run `kill -s TERM 7` to confirm re-start apply cpu reinjection - image: alpine/curl:3.14 + image: alpine/curl:8.9.1 command: [/bin/sh, -c] args: - ee(){exit 1}; trap 'ee' TERM; tail -f /dev/null @@ -92,7 +93,7 @@ spec: memory: 32Mi cpu: 1500m - name: curl - image: alpine/curl:3.14 + image: alpine/curl:8.9.1 command: [/bin/sh] args: - -c @@ -105,7 +106,7 @@ spec: memory: 32Mi cpu: 10m - name: curl-remote - image: alpine/curl:3.14 + image: alpine/curl:8.9.1 command: [/bin/sh] args: - -c @@ -118,7 +119,7 @@ spec: memory: 32Mi cpu: 10m - name: read-file - image: ubuntu:bionic-20220128 + image: ubuntu:focal-20240918 command: ["/bin/bash"] args: - -c @@ -134,7 +135,7 @@ spec: memory: 32Mi cpu: 100m - name: write-file - image: ubuntu:bionic-20220128 + image: ubuntu:focal-20240918 command: ["/bin/bash"] args: - -c diff --git a/main.go b/main.go index 296c7e452..8b80f8842 100644 --- a/main.go +++ b/main.go @@ -12,6 +12,7 @@ import ( "time" "go.uber.org/zap" + "k8s.io/apimachinery/pkg/api/resource" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" @@ -407,6 +408,10 @@ func main() { Timeout: cfg.Handler.Timeout, MaxTimeout: cfg.Handler.MaxTimeout, Decoder: webhookDecoder, + ResourceList: &corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(cfg.Handler.CPU), + corev1.ResourceMemory: resource.MustParse(cfg.Handler.Memory), + }, }, }) } diff --git a/webhook/chaos_handler.go b/webhook/chaos_handler.go index bd3142e55..39abccf4f 100644 --- a/webhook/chaos_handler.go +++ b/webhook/chaos_handler.go @@ -19,12 +19,13 @@ import ( ) type ChaosHandlerMutator struct { - Client client.Client - Log *zap.SugaredLogger - Image string - Timeout time.Duration - MaxTimeout time.Duration - Decoder *admission.Decoder + Client client.Client + Log *zap.SugaredLogger + Image string + Timeout time.Duration + MaxTimeout time.Duration + Decoder *admission.Decoder + ResourceList *corev1.ResourceList } func (m *ChaosHandlerMutator) Handle(ctx context.Context, req admission.Request) admission.Response { @@ -90,6 +91,10 @@ func (m *ChaosHandlerMutator) Handle(ctx context.Context, req admission.Request) handlerTimeout, succeedOnTimeout, }, + Resources: corev1.ResourceRequirements{ + Limits: *m.ResourceList, + Requests: *m.ResourceList, + }, } // prepend chaos handler init container to already existing init containers