Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,14 @@ spec:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
- jsonPath: .spec.updatePolicy.minReplicas
name: MinReplicas
priority: 1
type: integer
- jsonPath: .spec.updatePolicy.evictAfterOOMThreshold
name: OOMThreshold
priority: 1
type: string
name: v1
schema:
openAPIV3Schema:
Expand Down Expand Up @@ -425,6 +433,14 @@ spec:
If not specified, all fields in the `PodUpdatePolicy` are set to their
default values.
properties:
evictAfterOOMThreshold:
description: |-
EvictAfterOOMThreshold specifies the time to wait after an OOM event before
considering the pod for eviction. Pods that have OOMed in less than this threshold
since start will be evicted.
format: duration
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
evictionRequirements:
description: |-
EvictionRequirements is a list of EvictionRequirements that need to
Expand Down Expand Up @@ -478,6 +494,10 @@ spec:
- Auto
type: string
type: object
x-kubernetes-validations:
- message: evictAfterOOMThreshold must be greater than 0
rule: '!has(self.evictAfterOOMThreshold) || duration(self.evictAfterOOMThreshold)
>= duration(''0s'')'
required:
- targetRef
type: object
Expand Down
20 changes: 20 additions & 0 deletions vertical-pod-autoscaler/deploy/vpa-v1-crd-gen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,14 @@ spec:
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
- jsonPath: .spec.updatePolicy.minReplicas
name: MinReplicas
priority: 1
type: integer
- jsonPath: .spec.updatePolicy.evictAfterOOMThreshold
name: OOMThreshold
priority: 1
type: string
name: v1
schema:
openAPIV3Schema:
Expand Down Expand Up @@ -425,6 +433,14 @@ spec:
If not specified, all fields in the `PodUpdatePolicy` are set to their
default values.
properties:
evictAfterOOMThreshold:
description: |-
EvictAfterOOMThreshold specifies the time to wait after an OOM event before
considering the pod for eviction. Pods that have OOMed in less than this threshold
since start will be evicted.
format: duration
pattern: ^([0-9]+(\.[0-9]+)?(ns|us|µs|ms|s|m|h))+$
type: string
evictionRequirements:
description: |-
EvictionRequirements is a list of EvictionRequirements that need to
Expand Down Expand Up @@ -478,6 +494,10 @@ spec:
- Auto
type: string
type: object
x-kubernetes-validations:
- message: evictAfterOOMThreshold must be greater than 0
rule: '!has(self.evictAfterOOMThreshold) || duration(self.evictAfterOOMThreshold)
>= duration(''0s'')'
required:
- targetRef
type: object
Expand Down
3 changes: 1 addition & 2 deletions vertical-pod-autoscaler/docs/flags.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ This document is auto-generated from the flag definitions in the VPA updater cod
| `add-dir-header` | | | If true, adds the file directory to the header of the log messages |
| `address` | string | ":8943" | The address to expose Prometheus metrics. |
| `alsologtostderr` | | | log to standard error as well as files (no effect when -logtostderr=true) |
| `evict-after-oom-threshold` | | 10m0s | duration Evict pod that has OOMed in less than evict-after-oom-threshold since start. |
| `evict-after-oom-threshold` | | 10m0s | duration The default duration to evict pod that has OOMed in less than evict-after-oom-threshold since start. |
| `eviction-rate-burst` | int | 1 | Burst of pods that can be evicted. |
| `eviction-rate-limit` | float | | Number of pods that can be evicted per seconds. A rate limit set to 0 or -1 will disable<br>the rate limiter. (default -1) |
| `eviction-tolerance` | float | 0.5 | Fraction of replica count that can be evicted for update, if more than one pod can be evicted. |
Expand Down Expand Up @@ -174,4 +174,3 @@ This document is auto-generated from the flag definitions in the VPA updater cod
| `v,` | | : 4 | , --v Level set the log level verbosity (default 4) |
| `vmodule` | moduleSpec | | comma-separated list of pattern=N settings for file-filtered logging |
| `vpa-object-namespace` | string | | Specifies the namespace to search for VPA objects. Leave empty to include all namespaces. If provided, the garbage collector will only clean this namespace. |

5 changes: 4 additions & 1 deletion vertical-pod-autoscaler/e2e/utils/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ var RecommenderLabels = map[string]string{"app": "vpa-recommender"}
// HamsterLabels are labels of hamster app
var HamsterLabels = map[string]string{"app": "hamster"}

// OOMLabels are labels for OOM test pods
var OOMLabels = map[string]string{"app": "oom-test"}

// SIGDescribe adds sig-autoscaling tag to test description.
// Takes args that are passed to ginkgo.Describe.
func SIGDescribe(scenario, name string, args ...interface{}) bool {
Expand Down Expand Up @@ -226,7 +229,7 @@ func NewNHamstersDeployment(f *framework.Framework, n int) *appsv1.Deployment {
DefaultHamsterReplicas, /*replicas*/
HamsterLabels, /*podLabels*/
GetHamsterContainerNameByIndex(0), /*imageName*/
"registry.k8s.io/ubuntu-slim:0.14", /*image*/
"ubuntu:25.10", /*image*/
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did a quick grep of the kubernetes codebase, and I see they use ubuntu:latest
Unsure if it really matters though

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm I always prefer to set a specific version rather than latest, but no strong opinions here

appsv1.RollingUpdateDeploymentStrategyType, /*strategyType*/
)
d.ObjectMeta.Namespace = f.Namespace.Name
Expand Down
22 changes: 11 additions & 11 deletions vertical-pod-autoscaler/e2e/v1/admission_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -956,7 +956,7 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomBumpUpRatio must be greater than or equal to 1.0, got -1",
},
{
name: "Invalid oomBumpUpRatio (string value)",
name: "Invalid oomBumpUpRatio (less than 1)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
Expand All @@ -973,16 +973,16 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
"oomBumpUpRatio": "not-a-number",
"oomBumpUpRatio": 0.5,
"oomMinBumpUp": 104857600
}]
}
}
}`,
expectedErr: "admission webhook \"vpa\\.k8s\\.io\" denied the request: quantities must match the regular expression",
expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomBumpUpRatio must be greater than or equal to 1.0, got 0.5",
},
{
name: "Invalid oomBumpUpRatio (less than 1)",
name: "Invalid oomMinBumpUp (negative value)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
Expand All @@ -999,16 +999,16 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
"oomBumpUpRatio": 0.5,
"oomMinBumpUp": 104857600
"oomBumpUpRatio": 2,
"oomMinBumpUp": -1
}]
}
}
}`,
expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomBumpUpRatio must be greater than or equal to 1.0, got 0.5",
expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomMinBumpUp must be greater than or equal to 0, got -1 bytes",
},
{
name: "Invalid oomMinBumpUp (negative value)",
name: "Invalid oomBumpUpRatio (string value)",
vpaJSON: `{
"apiVersion": "autoscaling.k8s.io/v1",
"kind": "VerticalPodAutoscaler",
Expand All @@ -1025,13 +1025,13 @@ var _ = AdmissionControllerE2eDescribe("Admission-controller", func() {
"resourcePolicy": {
"containerPolicies": [{
"containerName": "*",
"oomBumpUpRatio": 2,
"oomMinBumpUp": -1
"oomBumpUpRatio": "not-a-number",
"oomMinBumpUp": 104857600
}]
}
}
}`,
expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: oomMinBumpUp must be greater than or equal to 0, got -1 bytes",
expectedErr: "admission webhook \"vpa.k8s.io\" denied the request: quantities must match the regular expression",
},
}
for _, tc := range testCases {
Expand Down
2 changes: 2 additions & 0 deletions vertical-pod-autoscaler/e2e/v1/autoscaling_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/autoscaler/vertical-pod-autoscaler/e2e/utils"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
e2edebug "k8s.io/kubernetes/test/e2e/framework/debug"
Expand Down Expand Up @@ -444,6 +445,7 @@ func runOomingReplicationController(c clientset.Interface, ns, name string, repl
Namespace: ns,
Timeout: timeoutRC,
Replicas: replicas,
Labels: utils.OOMLabels,
Annotations: make(map[string]string),
MemRequest: 1024 * 1024 * 1024,
MemLimit: 1024 * 1024 * 1024,
Expand Down
17 changes: 17 additions & 0 deletions vertical-pod-autoscaler/e2e/v1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,13 @@ func GetHamsterPods(f *framework.Framework) (*apiv1.PodList, error) {
return f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), options)
}

// GetOOMPods returns running OOM test pods (matched by utils.OOMLabels)
func GetOOMPods(f *framework.Framework) (*apiv1.PodList, error) {
label := labels.SelectorFromSet(labels.Set(utils.OOMLabels))
options := metav1.ListOptions{LabelSelector: label.String(), FieldSelector: getPodSelectorExcludingDonePodsOrDie()}
return f.ClientSet.CoreV1().Pods(f.Namespace.Name).List(context.TODO(), options)
}

// NewTestCronJob returns a CronJob for test purposes.
func NewTestCronJob(name, schedule string, replicas int32) *batchv1.CronJob {
backoffLimit := utils.DefaultHamsterBackoffLimit
Expand Down Expand Up @@ -345,6 +352,16 @@ func CheckNoPodsEvicted(f *framework.Framework, initialPodSet PodSet) {
gomega.Expect(restarted).To(gomega.Equal(0), "there should be no pod evictions")
}

// CheckNoPodsEvictedOOM waits for long enough period for VPA to start evicting
// TODO(omerap12): merge this CheckNoPodsEvicted
func CheckNoPodsEvictedOOM(f *framework.Framework, initialPodSet PodSet) {
time.Sleep(VpaEvictionTimeout)
currentPodList, err := GetOOMPods(f)
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "unexpected error when listing hamster pods to check number of pod evictions")
restarted := GetEvictedPodsCount(MakePodSet(currentPodList), initialPodSet)
gomega.Expect(restarted).To(gomega.Equal(0), "there should be no pod evictions")
}

// WaitForUncappedCPURecommendationAbove pools VPA object until uncapped recommendation is above specified value.
// Returns polled VPA object. On timeout returns error.
func WaitForUncappedCPURecommendationAbove(c vpa_clientset.Interface, vpa *vpa_types.VerticalPodAutoscaler, minMilliCPU int64) (*vpa_types.VerticalPodAutoscaler, error) {
Expand Down
65 changes: 65 additions & 0 deletions vertical-pod-autoscaler/e2e/v1/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/vertical-pod-autoscaler/e2e/utils"
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/features"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/status"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test"
"k8s.io/kubernetes/test/e2e/framework"
Expand Down Expand Up @@ -207,6 +208,70 @@ var _ = UpdaterE2eDescribe("Updater", func() {
})
})

var _ = UpdaterE2eDescribe("Updater with PerVPAConfig", func() {
const replicas = 3
const statusUpdateInterval = 10 * time.Second
f := framework.NewDefaultFramework("vertical-pod-autoscaling")
f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline

f.It("does not evict pods with OOM when threshold is very small", framework.WithFeatureGate(features.PerVPAConfig), func() {
ginkgo.By("Setting up the Admission Controller status")
stopCh := make(chan struct{})
statusUpdater := status.NewUpdater(
f.ClientSet,
status.AdmissionControllerStatusName,
status.AdmissionControllerStatusNamespace,
statusUpdateInterval,
"e2e test",
)
defer func() {
ginkgo.By("Deleting the Admission Controller status")
close(stopCh)
err := f.ClientSet.CoordinationV1().Leases(status.AdmissionControllerStatusNamespace).
Delete(context.TODO(), status.AdmissionControllerStatusName, metav1.DeleteOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
}()
statusUpdater.Run(stopCh)

ginkgo.By("Setting up a deployment that will OOM")
runOomingReplicationController(
f.ClientSet,
f.Namespace.Name,
"hamster",
replicas,
)

ginkgo.By("Waiting for pods to be created and OOM")
time.Sleep(10 * time.Second)

podList, err := GetOOMPods(f)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(len(podList.Items)).To(gomega.BeNumerically(">", 0))

disabledThreshold := 0 * time.Second // Disable quick OOM eviction
ginkgo.By("Setting up a VPA CRD with very short evictAfterOOMThreshold (1ns)")
targetRef := &autoscaling.CrossVersionObjectReference{
APIVersion: "apps/v1",
Kind: "Deployment",
Name: "hamster",
}
containerName := utils.GetHamsterContainerNameByIndex(0)
vpaCRD := test.VerticalPodAutoscaler().
WithName("hamster-vpa").
WithNamespace(f.Namespace.Name).
WithTargetRef(targetRef).
WithUpdateMode(vpa_types.UpdateModeRecreate).
WithEvictAfterOOMThreshold(&metav1.Duration{Duration: disabledThreshold}).
WithContainer(containerName).
Get()

utils.InstallVPA(f, vpaCRD)

ginkgo.By("Waiting to verify pods are NOT evicted (OOM time is 0)")
CheckNoPodsEvictedOOM(f, MakePodSet(podList))
})
})

func setupPodsForUpscalingEviction(f *framework.Framework) *apiv1.PodList {
return setupPodsForEviction(f, "100m", "100Mi", nil)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ func parseVPA(raw []byte) (*vpa_types.VerticalPodAutoscaler, error) {

// ValidateVPA checks the correctness of VPA Spec and returns an error if there is a problem.
func ValidateVPA(vpa *vpa_types.VerticalPodAutoscaler, isCreate bool) error {
// check that perVPA is on if being used
if err := validatePerVPAFeatureFlag(vpa); err != nil {
return err
}

if vpa.Spec.UpdatePolicy != nil {
mode := vpa.Spec.UpdatePolicy.UpdateMode
if mode == nil {
Expand All @@ -130,6 +135,7 @@ func ValidateVPA(vpa *vpa_types.VerticalPodAutoscaler, isCreate bool) error {
if minReplicas := vpa.Spec.UpdatePolicy.MinReplicas; minReplicas != nil && *minReplicas <= 0 {
return fmt.Errorf("minReplicas has to be positive, got %v", *minReplicas)
}

}

if vpa.Spec.ResourcePolicy != nil {
Expand All @@ -138,11 +144,6 @@ func ValidateVPA(vpa *vpa_types.VerticalPodAutoscaler, isCreate bool) error {
return fmt.Errorf("containerPolicies.ContainerName is required")
}

// check that perVPA is on if being used
if err := validatePerVPAFeatureFlag(&policy); err != nil {
return err
}

// Validate OOMBumpUpRatio
if policy.OOMBumpUpRatio != nil {
ratio := float64(policy.OOMBumpUpRatio.MilliValue()) / 1000.0
Expand Down Expand Up @@ -224,11 +225,20 @@ func validateMemoryResolution(val apires.Quantity) error {
return nil
}

func validatePerVPAFeatureFlag(policy *vpa_types.ContainerResourcePolicy) error {
func validatePerVPAFeatureFlag(vpa *vpa_types.VerticalPodAutoscaler) error {
featureFlagOn := features.Enabled(features.PerVPAConfig)
perVPA := policy.OOMBumpUpRatio != nil || policy.OOMMinBumpUp != nil
if !featureFlagOn && perVPA {
return fmt.Errorf("OOMBumpUpRatio and OOMMinBumpUp are not supported when feature flag %s is disabled", features.PerVPAConfig)
if !featureFlagOn && vpa.Spec.UpdatePolicy.EvictAfterOOMThreshold != nil {
return fmt.Errorf("EvictAfterOOMThreshold is not supported when feature flag %s is disabled", features.PerVPAConfig)
}

if vpa.Spec.ResourcePolicy != nil {
for _, policy := range vpa.Spec.ResourcePolicy.ContainerPolicies {
perVPA := policy.OOMBumpUpRatio != nil || policy.OOMMinBumpUp != nil
if !featureFlagOn && perVPA {
return fmt.Errorf("OOMBumpUpRatio and OOMMinBumpUp are not supported when feature flag %s is disabled", features.PerVPAConfig)
}
}

}
return nil
}
Loading