From bfb977d44bd52b9756ff844f2feead1c7dea3291 Mon Sep 17 00:00:00 2001 From: Dave Protasowski Date: Fri, 19 Sep 2025 20:55:56 -0400 Subject: [PATCH 1/7] adjust revisionfailure test image to allow changing the state of the pod --- test/test_images/configmimic/configmimic.go | 154 ++++++++++++++++++ test/test_images/configmimic/service.yaml | 45 +++++ .../revisionfailure/revisionfailure.go | 71 -------- test/test_images/revisionfailure/service.yaml | 28 ---- 4 files changed, 199 insertions(+), 99 deletions(-) create mode 100644 test/test_images/configmimic/configmimic.go create mode 100644 test/test_images/configmimic/service.yaml delete mode 100644 test/test_images/revisionfailure/revisionfailure.go delete mode 100644 test/test_images/revisionfailure/service.yaml diff --git a/test/test_images/configmimic/configmimic.go b/test/test_images/configmimic/configmimic.go new file mode 100644 index 000000000000..73151dac6b73 --- /dev/null +++ b/test/test_images/configmimic/configmimic.go @@ -0,0 +1,154 @@ +/* +Copyright 2024 The Knative Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "cmp" + "context" + "errors" + "fmt" + "io/fs" + "log" + "net/http" + "os" + "path/filepath" + "strings" + "sync/atomic" + "time" + + "k8s.io/apimachinery/pkg/util/sets" + pkgnet "knative.dev/pkg/network" + "knative.dev/pkg/signals" +) + +var ( + priority = []string{ + os.Getenv("K_SERVICE"), + os.Getenv("K_CONFIGURATION"), + os.Getenv("K_REVISION"), + cmp.Or( + os.Getenv("POD_NAME"), + os.Getenv("HOSTNAME"), + ), + } + + ready atomic.Bool + live atomic.Bool + startup atomic.Bool +) + +func handler(w http.ResponseWriter, _ *http.Request) { + log.Println("Hello world received a request.") + fmt.Fprintf(w, "Hello %v! How about some tasty noodles?", os.Getenv("NAME")) +} + +func boolHandler(val *atomic.Bool) http.HandlerFunc { + return func(w http.ResponseWriter, _ *http.Request) { + if val.Load() { + w.WriteHeader(http.StatusOK) + } else { + w.WriteHeader(http.StatusServiceUnavailable) + } + } +} + +func main() { + ready.Store(false) + live.Store(false) + + log.Println("Hello world app started.") + + checkForStatus() + + sig := signals.SetupSignalHandler() + + mux := http.NewServeMux() + mux.HandleFunc("/", handler) + mux.HandleFunc("/healthz/ready", boolHandler(&ready)) + mux.HandleFunc("/healthz/live", boolHandler(&live)) + mux.HandleFunc("/healthz/startup", boolHandler(&startup)) + + server := pkgnet.NewServer(addr(), mux) + + go server.ListenAndServe() + defer server.Shutdown(context.Background()) + + for { + select { + case <-time.After(5 * time.Second): + checkForStatus() + case <-sig: + return + } + } +} + +func checkForStatus() { + log.Println("Checking for status") + + var path string + var states sets.Set[string] + + for _, entry := range priority { + if entry == "" { + continue + } + + path = filepath.Join("/etc/config", entry) + bytes, err := os.ReadFile(path) + if errors.Is(err, fs.ErrNotExist) { + continue + } else if err != nil { + log.Printf("failed to read file at %q: %s\n", path, err) + continue + } + + states = sets.New(strings.Split(string(bytes), ",")...) + } + + log.Println("Read state", filepath.Base(path), sets.List(states)) + + newReady := states.Has("ready") + oldReady := ready.Swap(newReady) + if oldReady != newReady { + log.Println("ready is now:", newReady) + } + + newLive := states.Has("live") + oldLive := live.Swap(newLive) + if oldLive != newLive { + log.Println("live is now:", newLive) + } + + newStartup := states.Has("startup") + oldStartup := startup.Swap(newStartup) + if oldStartup != newStartup { + log.Println("startup is now:", newLive) + } + + if states.Has("fail") { + log.Fatalf("you want me to fail") + } +} + +func addr() string { + if port := os.Getenv("PORT"); port != "" { + return ":" + port + + } + return ":8080" +} diff --git a/test/test_images/configmimic/service.yaml b/test/test_images/configmimic/service.yaml new file mode 100644 index 000000000000..28e9dbbd8261 --- /dev/null +++ b/test/test_images/configmimic/service.yaml @@ -0,0 +1,45 @@ +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: revision-state +spec: + template: + spec: + timeoutSeconds: 30 + containers: + - image: ko://knative.dev/serving/test/test_images/configmimic + env: + - name: NAME + value: "$NAME" + volumeMounts: + - name: state + mountPath: /etc/config + + # This seems broken now + # startupProbe: + # httpGet: + # path: /healthz/start + + readinessProbe: + httpGet: + path: /healthz/ready + + livenessProbe: + httpGet: + path: /healthz/live + + volumes: + - name: state + configMap: + name: revision-state + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: revision-state +data: + # By default the service is ready and live + revision-state: "startup,ready,live" + # Fail the second revision + revision-state-00002: "" diff --git a/test/test_images/revisionfailure/revisionfailure.go b/test/test_images/revisionfailure/revisionfailure.go deleted file mode 100644 index 970ef5489cd7..000000000000 --- a/test/test_images/revisionfailure/revisionfailure.go +++ /dev/null @@ -1,71 +0,0 @@ -/* -Copyright 2024 The Knative Authors - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "flag" - "fmt" - "log" - "net/http" - "os" - "time" - - "knative.dev/serving/test" -) - -func handler(w http.ResponseWriter, _ *http.Request) { - log.Print("Hello world received a request.") - fmt.Fprintf(w, "Hello %v! How about some tasty noodles?", os.Getenv("NAME")) -} - -func main() { - stop := make(chan struct{}) - - flag.Parse() - log.Println("Hello world app started.") - - checkForFailure() - - go func() { - test.ListenAndServeGracefully(":8080", handler) - close(stop) - }() - - for { - select { - case <-time.After(5 * time.Second): - checkForFailure() - case <-stop: - return - } - } -} - -func checkForFailure() { - log.Println("Checking for failure") - - entries, err := os.ReadDir("/etc/config") - if err != nil { - log.Fatal("failed to read failure list", err) - } - - for _, e := range entries { - if e.Name() == os.Getenv("K_REVISION") { - log.Fatalf("you want me to fail") - } - } -} diff --git a/test/test_images/revisionfailure/service.yaml b/test/test_images/revisionfailure/service.yaml deleted file mode 100644 index 5f6bd70c0a5b..000000000000 --- a/test/test_images/revisionfailure/service.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: serving.knative.dev/v1 -kind: Service -metadata: - name: revision-failure - namespace: default -spec: - template: - spec: - timeoutSeconds: 30 - containers: - - image: ko://knative.dev/serving/test/test_images/revisionfailure - env: - - name: NAME - value: "$NAME" - volumeMounts: - - name: failure-list - mountPath: /etc/config - volumes: - - name: failure-list - configMap: - name: revision-failure ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: revision-failure -# data: -# revision-failure-00001: "1" From df8d493371e94f95edf3242d06b3806036b7e7c5 Mon Sep 17 00:00:00 2001 From: Dave Protasowski Date: Fri, 19 Sep 2025 21:19:57 -0400 Subject: [PATCH 2/7] include test to ensure min-scale transition works as expected a previous revision should scale down when only the next revision is ready --- pkg/testing/v1/configuration.go | 18 ++++ test/conformance.go | 1 + test/e2e/minscale_readiness_test.go | 160 ++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+) diff --git a/pkg/testing/v1/configuration.go b/pkg/testing/v1/configuration.go index e84e532d9d6d..65657316b8ad 100644 --- a/pkg/testing/v1/configuration.go +++ b/pkg/testing/v1/configuration.go @@ -138,3 +138,21 @@ func WithConfigRevisionIdleTimeoutSeconds(revisionIdleTimeoutSeconds int64) Conf cfg.Spec.Template.Spec.IdleTimeoutSeconds = ptr.Int64(revisionIdleTimeoutSeconds) } } + +// WithVolume adds a volume to the service +func WithConfigVolume(name, mountPath string, volumeSource corev1.VolumeSource) ConfigOption { + return func(c *v1.Configuration) { + rt := &c.Spec.Template.Spec + + rt.Containers[0].VolumeMounts = append(rt.Containers[0].VolumeMounts, + corev1.VolumeMount{ + Name: name, + MountPath: mountPath, + }) + + rt.Volumes = append(rt.Volumes, corev1.Volume{ + Name: name, + VolumeSource: volumeSource, + }) + } +} diff --git a/test/conformance.go b/test/conformance.go index ff31fd4d49f1..f505de7acdc6 100644 --- a/test/conformance.go +++ b/test/conformance.go @@ -53,6 +53,7 @@ const ( Timeout = "timeout" Volumes = "volumes" SlowStart = "slowstart" + ConfigMimic = "configmimic" // Constants for test image output. PizzaPlanetText1 = "What a spaceport!" diff --git a/test/e2e/minscale_readiness_test.go b/test/e2e/minscale_readiness_test.go index 95d6fb5c0e3a..cbb28943fb93 100644 --- a/test/e2e/minscale_readiness_test.go +++ b/test/e2e/minscale_readiness_test.go @@ -30,8 +30,15 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/selection" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" + "knative.dev/pkg/apis/duck" + "knative.dev/pkg/kmeta" "knative.dev/serving/pkg/apis/autoscaling" + "knative.dev/serving/pkg/apis/serving" v1 "knative.dev/serving/pkg/apis/serving/v1" "knative.dev/serving/pkg/resources" rtesting "knative.dev/serving/pkg/testing/v1" @@ -39,6 +46,159 @@ import ( v1test "knative.dev/serving/test/v1" ) +func TestMinScaleTransition(t *testing.T) { + t.Parallel() + ctx := t.Context() + + const ( + minScale = 4 + scalingWindow = autoscaling.WindowMin + ) + + clients := Setup(t) + cmClient := clients.KubeClient.CoreV1().ConfigMaps(test.ServingFlags.TestNamespace) + + names := test.ResourceNames{ + // Config and Route have different names to avoid false positives + Config: test.ObjectNameForTest(t), + Route: test.ObjectNameForTest(t), + Image: test.ConfigMimic, + } + + test.EnsureTearDown(t, clients, &names) + + secondRevisionName := kmeta.ChildName(names.Config, fmt.Sprintf("-%05d", 2)) + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: names.Config, + Namespace: test.ServingFlags.TestNamespace, + }, + Data: map[string]string{ + // By default the pods of the configuration are ready + names.Config: "startup,ready,live", + + // By default the second revision doesn't become ready + secondRevisionName: "startup", + }, + } + cm, err := cmClient.Create(t.Context(), cm, metav1.CreateOptions{}) + + test.EnsureCleanup(t, func() { + cmClient.Delete(t.Context(), cm.Name, metav1.DeleteOptions{}) + }) + + t.Log("Creating route") + if _, err := v1test.CreateRoute(t, clients, names); err != nil { + t.Fatal("Failed to create Route:", err) + } + + t.Log("Creating configuration") + cfg, err := v1test.CreateConfiguration(t, clients, names, + withMinScale(minScale), + // Make sure we scale down quickly after panic, before the autoscaler get killed by chaosduck. + withWindow(scalingWindow), + rtesting.WithConfigReadinessProbe(&corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/healthz/ready", + Port: intstr.FromInt(8080), + }, + }, + }), + rtesting.WithConfigVolume("state", "/etc/config", corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: cm.Name, + }, + }, + }), + ) + + if err != nil { + t.Fatal("Failed to create Configuration:", err) + } + + firstRevName := latestRevisionName(t, clients, names.Config, "") + firstRevServiceName := PrivateServiceName(t, clients, firstRevName) + + t.Log("Waiting for first revision to become ready") + err = v1test.WaitForRevisionState(clients.ServingClient, firstRevName, v1test.IsRevisionReady, "RevisionIsReady") + if err != nil { + t.Fatalf("The Revision %q did not become ready: %v", firstRevName, err) + } + + t.Log("Holding revision at minScale after becoming ready") + if lr, ok := ensureDesiredScale(clients, t, firstRevServiceName, gte(minScale)); !ok { + t.Fatalf("The revision %q observed scale %d < %d after becoming ready", firstRevName, lr, minScale) + } + + t.Log("Updating configuration") + if _, err := v1test.PatchConfig(t, clients, cfg, rtesting.WithConfigEnv(corev1.EnvVar{Name: "FOO", Value: "BAR"})); err != nil { + t.Fatal("Failed to update Configuration:", err) + } + + t.Logf("Waiting for %v pods to be created", minScale) + var podList *corev1.PodList + err = wait.PollUntilContextTimeout(ctx, 250*time.Millisecond, 10*time.Second, true, func(context.Context) (bool, error) { + revLabel, err := labels.NewRequirement(serving.RevisionLabelKey, selection.Equals, []string{secondRevisionName}) + if err != nil { + return false, fmt.Errorf("unable to create rev label: %w", err) + } + + pods := clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace) + podList, err = pods.List(ctx, metav1.ListOptions{ + LabelSelector: revLabel.String(), + }) + + if err != nil { + return false, err + } + + runningPods := 0 + for _, pod := range podList.Items { + if pod.Status.Phase == corev1.PodRunning { + runningPods++ + } + } + + return runningPods == minScale, nil + }) + + if errors.Is(err, context.DeadlineExceeded) { + t.Fatal("Timedout waiting for pods to be running", err) + } else if err != nil { + t.Fatal("Failed waiting for pods to be running", err) + } + + podName := podList.Items[0].Name + + t.Logf("Marking revision %s pod %s as ready", secondRevisionName, podName) + newCM := cm.DeepCopy() + newCM.Data[podName] = "startup,ready" + + patchBytes, err := duck.CreateBytePatch(cm, newCM) + if err != nil { + t.Fatal("Failed to create patch:", err) + } + + cm, err = cmClient.Patch(ctx, names.Config, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) + if err != nil { + t.Fatal("Failed to patch ConfigMap", err) + } + + t.Logf("Waiting for scaling window %v to pass", scalingWindow) + + // Wait two autoscaling window durations + // Scaling decisions are made at the end of the window + time.Sleep(scalingWindow) + + t.Log("Check original revision holding at min scale", minScale) + if _, ok := ensureDesiredScale(clients, t, firstRevServiceName, gte(minScale)); !ok { + t.Fatalf("Revision %q was scaled down prematurely", firstRevName) + } +} + func TestMinScale(t *testing.T) { t.Parallel() From be98a164bb7f67886850678e1c4991d8bad4e5ce Mon Sep 17 00:00:00 2001 From: Dave Protasowski Date: Mon, 22 Sep 2025 12:18:22 -0400 Subject: [PATCH 3/7] fix linter warnings --- test/e2e/minscale_readiness_test.go | 5 +++-- test/test_images/configmimic/configmimic.go | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/e2e/minscale_readiness_test.go b/test/e2e/minscale_readiness_test.go index cbb28943fb93..568f7b1f5682 100644 --- a/test/e2e/minscale_readiness_test.go +++ b/test/e2e/minscale_readiness_test.go @@ -83,6 +83,9 @@ func TestMinScaleTransition(t *testing.T) { }, } cm, err := cmClient.Create(t.Context(), cm, metav1.CreateOptions{}) + if err != nil { + t.Fatal("Failed to create create config map:", err) + } test.EnsureCleanup(t, func() { cmClient.Delete(t.Context(), cm.Name, metav1.DeleteOptions{}) @@ -114,7 +117,6 @@ func TestMinScaleTransition(t *testing.T) { }, }), ) - if err != nil { t.Fatal("Failed to create Configuration:", err) } @@ -150,7 +152,6 @@ func TestMinScaleTransition(t *testing.T) { podList, err = pods.List(ctx, metav1.ListOptions{ LabelSelector: revLabel.String(), }) - if err != nil { return false, err } diff --git a/test/test_images/configmimic/configmimic.go b/test/test_images/configmimic/configmimic.go index 73151dac6b73..b41d9b22b17c 100644 --- a/test/test_images/configmimic/configmimic.go +++ b/test/test_images/configmimic/configmimic.go @@ -148,7 +148,6 @@ func checkForStatus() { func addr() string { if port := os.Getenv("PORT"); port != "" { return ":" + port - } return ":8080" } From 427d870e2fae8bda9859a20b3e1e8ae8d2ba3cc9 Mon Sep 17 00:00:00 2001 From: Dave Protasowski Date: Mon, 22 Sep 2025 16:23:34 -0400 Subject: [PATCH 4/7] fix revision conditions when initial scale isn't achieved --- pkg/apis/serving/v1/revision_lifecycle.go | 6 ++- .../serving/v1/revision_lifecycle_test.go | 38 +++++++++++++++++++ pkg/reconciler/revision/table_test.go | 1 + 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/pkg/apis/serving/v1/revision_lifecycle.go b/pkg/apis/serving/v1/revision_lifecycle.go index f0e54da9d872..fe10f1494468 100644 --- a/pkg/apis/serving/v1/revision_lifecycle.go +++ b/pkg/apis/serving/v1/revision_lifecycle.go @@ -204,8 +204,10 @@ func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodA // Mark resource unavailable if we don't have a Service Name and the deployment is ready // This can happen when we have initial scale set to 0 - if rs.GetCondition(RevisionConditionResourcesAvailable).IsTrue() && ps.ServiceName == "" { - rs.MarkResourcesAvailableUnknown(ReasonDeploying, "") + if rs.GetCondition(RevisionConditionResourcesAvailable).IsTrue() { + if ps.ServiceName == "" || !ps.IsScaleTargetInitialized() { + rs.MarkResourcesAvailableUnknown(ReasonDeploying, "") + } } switch cond.Status { diff --git a/pkg/apis/serving/v1/revision_lifecycle_test.go b/pkg/apis/serving/v1/revision_lifecycle_test.go index 62781c3b4641..32ce9b20b09b 100644 --- a/pkg/apis/serving/v1/revision_lifecycle_test.go +++ b/pkg/apis/serving/v1/revision_lifecycle_test.go @@ -584,6 +584,44 @@ func TestPropagateAutoscalerStatus(t *testing.T) { apistest.CheckConditionSucceeded(r, RevisionConditionResourcesAvailable, t) } +func TestPropagateAutoscalerStatus_ScaleTargetNotInitialized(t *testing.T) { + r := &RevisionStatus{} + r.InitializeConditions() + apistest.CheckConditionOngoing(r, RevisionConditionReady, t) + + // PodAutoscaler has no active condition, so we are just coming up. + r.PropagateAutoscalerStatus(&autoscalingv1alpha1.PodAutoscalerStatus{ + Status: duckv1.Status{}, + }) + + apistest.CheckConditionOngoing(r, RevisionConditionActive, t) + apistest.CheckConditionOngoing(r, RevisionConditionResourcesAvailable, t) + + // Deployment is created we mark resources created + r.MarkResourcesAvailableTrue() + + // PodAutoscaler resources have been created but initial scale target + // has not been achieved + r.PropagateAutoscalerStatus(&autoscalingv1alpha1.PodAutoscalerStatus{ + ServiceName: "some-service", + Status: duckv1.Status{ + Conditions: duckv1.Conditions{{ + Type: autoscalingv1alpha1.PodAutoscalerConditionReady, + Status: corev1.ConditionUnknown, + }, { + Type: autoscalingv1alpha1.PodAutoscalerConditionScaleTargetInitialized, + Status: corev1.ConditionUnknown, + }, { + Type: autoscalingv1alpha1.PodAutoscalerConditionSKSReady, + Status: corev1.ConditionTrue, + }}, + }, + }) + + // ResourcesAvailable should be reverted back to on-going + apistest.CheckConditionOngoing(r, RevisionConditionResourcesAvailable, t) +} + func TestPropagateAutoscalerStatus_NoOverridingResourcesAvailable(t *testing.T) { // Cases to test Ready condition // we fix ScaleTargetInitialized to True diff --git a/pkg/reconciler/revision/table_test.go b/pkg/reconciler/revision/table_test.go index 9ef47b0b5338..9fdba4ca8525 100644 --- a/pkg/reconciler/revision/table_test.go +++ b/pkg/reconciler/revision/table_test.go @@ -283,6 +283,7 @@ func TestReconcile(t *testing.T) { WithRoutingState(v1.RoutingStateActive, fc), MarkRevisionReady, WithRevisionObservedGeneration(1)), pa("foo", "pa-not-ready", + WithScaleTargetInitialized, WithPAStatusService("its-not-confidential"), WithBufferedTraffic, WithReachabilityReachable), From 8db05ad46c42c6651136a3777128f2e2d6cab15b Mon Sep 17 00:00:00 2001 From: Dave Protasowski Date: Thu, 25 Sep 2025 10:13:24 -0400 Subject: [PATCH 5/7] give more time for the pods to be running --- test/e2e/minscale_readiness_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/e2e/minscale_readiness_test.go b/test/e2e/minscale_readiness_test.go index 568f7b1f5682..f971952b4eb6 100644 --- a/test/e2e/minscale_readiness_test.go +++ b/test/e2e/minscale_readiness_test.go @@ -142,7 +142,8 @@ func TestMinScaleTransition(t *testing.T) { t.Logf("Waiting for %v pods to be created", minScale) var podList *corev1.PodList - err = wait.PollUntilContextTimeout(ctx, 250*time.Millisecond, 10*time.Second, true, func(context.Context) (bool, error) { + + err = wait.PollUntilContextTimeout(ctx, time.Second, time.Minute, true, func(context.Context) (bool, error) { revLabel, err := labels.NewRequirement(serving.RevisionLabelKey, selection.Equals, []string{secondRevisionName}) if err != nil { return false, fmt.Errorf("unable to create rev label: %w", err) From b7f4c48cc317487c581706c3c9498abc268aeb02 Mon Sep 17 00:00:00 2001 From: Dave Protasowski Date: Thu, 25 Sep 2025 12:06:26 -0400 Subject: [PATCH 6/7] log pod states when it fails --- test/e2e/minscale_readiness_test.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/e2e/minscale_readiness_test.go b/test/e2e/minscale_readiness_test.go index f971952b4eb6..134b95b82f30 100644 --- a/test/e2e/minscale_readiness_test.go +++ b/test/e2e/minscale_readiness_test.go @@ -168,7 +168,10 @@ func TestMinScaleTransition(t *testing.T) { }) if errors.Is(err, context.DeadlineExceeded) { - t.Fatal("Timedout waiting for pods to be running", err) + for _, pod := range podList.Items { + t.Logf("pod %s is in phase %s", pod.Name, pod.Status.Phase) + } + t.Fatal("Timed out waiting for pods to be running", err) } else if err != nil { t.Fatal("Failed waiting for pods to be running", err) } From 34eb99588be57e2bd536a98529a9025fd2ccf18a Mon Sep 17 00:00:00 2001 From: Dave Protasowski Date: Thu, 25 Sep 2025 18:46:08 -0400 Subject: [PATCH 7/7] ensure new revision becomes ready and holds scale while old revision is scaled down --- test/e2e/minscale_readiness_test.go | 104 ++++++++++++++------ test/test_images/configmimic/configmimic.go | 7 +- 2 files changed, 80 insertions(+), 31 deletions(-) diff --git a/test/e2e/minscale_readiness_test.go b/test/e2e/minscale_readiness_test.go index 134b95b82f30..479a473f937a 100644 --- a/test/e2e/minscale_readiness_test.go +++ b/test/e2e/minscale_readiness_test.go @@ -35,7 +35,6 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" - "knative.dev/pkg/apis/duck" "knative.dev/pkg/kmeta" "knative.dev/serving/pkg/apis/autoscaling" "knative.dev/serving/pkg/apis/serving" @@ -67,7 +66,8 @@ func TestMinScaleTransition(t *testing.T) { test.EnsureTearDown(t, clients, &names) - secondRevisionName := kmeta.ChildName(names.Config, fmt.Sprintf("-%05d", 2)) + firstRevision := kmeta.ChildName(names.Config, fmt.Sprintf("-%05d", 1)) + secondRevision := kmeta.ChildName(names.Config, fmt.Sprintf("-%05d", 2)) cm := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ @@ -79,7 +79,7 @@ func TestMinScaleTransition(t *testing.T) { names.Config: "startup,ready,live", // By default the second revision doesn't become ready - secondRevisionName: "startup", + secondRevision: "startup", }, } cm, err := cmClient.Create(t.Context(), cm, metav1.CreateOptions{}) @@ -88,7 +88,7 @@ func TestMinScaleTransition(t *testing.T) { } test.EnsureCleanup(t, func() { - cmClient.Delete(t.Context(), cm.Name, metav1.DeleteOptions{}) + cmClient.Delete(context.Background(), cm.Name, metav1.DeleteOptions{}) }) t.Log("Creating route") @@ -121,18 +121,18 @@ func TestMinScaleTransition(t *testing.T) { t.Fatal("Failed to create Configuration:", err) } - firstRevName := latestRevisionName(t, clients, names.Config, "") - firstRevServiceName := PrivateServiceName(t, clients, firstRevName) + // This will wait for the revision to be created + firstRevisionService := PrivateServiceName(t, clients, firstRevision) t.Log("Waiting for first revision to become ready") - err = v1test.WaitForRevisionState(clients.ServingClient, firstRevName, v1test.IsRevisionReady, "RevisionIsReady") + err = v1test.WaitForRevisionState(clients.ServingClient, firstRevision, v1test.IsRevisionReady, "RevisionIsReady") if err != nil { - t.Fatalf("The Revision %q did not become ready: %v", firstRevName, err) + t.Fatalf("The Revision %q did not become ready: %v", firstRevision, err) } t.Log("Holding revision at minScale after becoming ready") - if lr, ok := ensureDesiredScale(clients, t, firstRevServiceName, gte(minScale)); !ok { - t.Fatalf("The revision %q observed scale %d < %d after becoming ready", firstRevName, lr, minScale) + if lr, ok := ensureDesiredScale(clients, t, firstRevisionService, gte(minScale)); !ok { + t.Fatalf("The revision %q observed scale %d < %d after becoming ready", firstRevision, lr, minScale) } t.Log("Updating configuration") @@ -144,7 +144,7 @@ func TestMinScaleTransition(t *testing.T) { var podList *corev1.PodList err = wait.PollUntilContextTimeout(ctx, time.Second, time.Minute, true, func(context.Context) (bool, error) { - revLabel, err := labels.NewRequirement(serving.RevisionLabelKey, selection.Equals, []string{secondRevisionName}) + revLabel, err := labels.NewRequirement(serving.RevisionLabelKey, selection.Equals, []string{secondRevision}) if err != nil { return false, fmt.Errorf("unable to create rev label: %w", err) } @@ -176,31 +176,73 @@ func TestMinScaleTransition(t *testing.T) { t.Fatal("Failed waiting for pods to be running", err) } - podName := podList.Items[0].Name + secondRevisionService := PrivateServiceName(t, clients, secondRevision) - t.Logf("Marking revision %s pod %s as ready", secondRevisionName, podName) - newCM := cm.DeepCopy() - newCM.Data[podName] = "startup,ready" + // Go over all the new pods and start marking each one ready + // except the last one + for i, pod := range podList.Items { + podName := pod.Name - patchBytes, err := duck.CreateBytePatch(cm, newCM) - if err != nil { - t.Fatal("Failed to create patch:", err) + t.Logf("Marking revision %s pod %s as ready", secondRevision, podName) + markPodAsReadyAndWait(t, clients, cm.Name, podName) + t.Logf("Revision %s pod %s is ready", secondRevision, podName) + + t.Logf("Waiting for 2x scaling window %v to pass", 2*scalingWindow) + + // Wait two autoscaling window durations + // Scaling decisions are made at the end of the window + time.Sleep(2 * scalingWindow) + + if i >= len(podList.Items)-1 { + // When marking the last pod ready we want to skip + // ensuring the previous revision doesn't scale down + break + } + + t.Log("Check original revision holding at min scale", minScale) + if _, ok := ensureDesiredScale(clients, t, firstRevisionService, gte(minScale)); !ok { + t.Fatalf("Revision %q was scaled down prematurely", firstRevision) + } } - cm, err = cmClient.Patch(ctx, names.Config, types.JSONPatchType, patchBytes, metav1.PatchOptions{}) + t.Log("Check new revision holding at min scale", minScale) + if _, ok := ensureDesiredScale(clients, t, secondRevisionService, gte(minScale)); !ok { + t.Fatalf("Revision %q was scaled down prematurely", secondRevision) + } + + t.Log("Check old revision is scaled down") + if _, err := waitForDesiredScale(clients, firstRevisionService, eq(0)); err != nil { + t.Fatalf("Revision %q was not scaled down: %s", firstRevision, err) + } +} + +func markPodAsReadyAndWait(t *testing.T, clients *test.Clients, cmName, podName string) { + ctx := t.Context() + coreClient := clients.KubeClient.CoreV1() + cmClient := coreClient.ConfigMaps(test.ServingFlags.TestNamespace) + + patch := fmt.Sprintf(`[{"op":"add", "path":"/data/%s", "value": "startup,ready"}]`, podName) + + _, err := cmClient.Patch(ctx, cmName, types.JSONPatchType, []byte(patch), metav1.PatchOptions{}) if err != nil { t.Fatal("Failed to patch ConfigMap", err) } - t.Logf("Waiting for scaling window %v to pass", scalingWindow) + if err = wait.PollUntilContextTimeout(ctx, 10*time.Second, 2*time.Minute, true, func(context.Context) (bool, error) { + pod, err := clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace).Get(context.Background(), podName, metav1.GetOptions{}) + if err != nil { + return false, err + } - // Wait two autoscaling window durations - // Scaling decisions are made at the end of the window - time.Sleep(scalingWindow) + for _, cond := range pod.Status.Conditions { + if cond.Type == corev1.PodReady { + return cond.Status == corev1.ConditionTrue, nil + } + } - t.Log("Check original revision holding at min scale", minScale) - if _, ok := ensureDesiredScale(clients, t, firstRevServiceName, gte(minScale)); !ok { - t.Fatalf("Revision %q was scaled down prematurely", firstRevName) + return false, nil + }); err != nil { + t.Fatalf("Pod %s didn't become ready: %s", podName, err) } } @@ -336,6 +378,12 @@ func lt(m int) func(int) bool { } } +func eq(m int) func(int) bool { + return func(n int) bool { + return n == m + } +} + func withMinScale(minScale int) func(cfg *v1.Configuration) { return func(cfg *v1.Configuration) { if cfg.Spec.Template.Annotations == nil { @@ -379,7 +427,7 @@ func waitForDesiredScale(clients *test.Clients, serviceName string, cond func(in endpoints := clients.KubeClient.CoreV1().Endpoints(test.ServingFlags.TestNamespace) // See https://github.com/knative/serving/issues/7727#issuecomment-706772507 for context. - return latestReady, wait.PollUntilContextTimeout(context.Background(), 250*time.Millisecond, 3*time.Minute, true, func(context.Context) (bool, error) { + return latestReady, wait.PollUntilContextTimeout(context.Background(), time.Second, 3*time.Minute, true, func(context.Context) (bool, error) { endpoint, err := endpoints.Get(context.Background(), serviceName, metav1.GetOptions{}) if err != nil { return false, nil //nolint:nilerr @@ -392,7 +440,7 @@ func waitForDesiredScale(clients *test.Clients, serviceName string, cond func(in func ensureDesiredScale(clients *test.Clients, t *testing.T, serviceName string, cond func(int) bool) (latestReady int, observed bool) { endpoints := clients.KubeClient.CoreV1().Endpoints(test.ServingFlags.TestNamespace) - err := wait.PollUntilContextTimeout(context.Background(), 250*time.Millisecond, 10*time.Second, true, func(context.Context) (bool, error) { + err := wait.PollUntilContextTimeout(context.Background(), time.Second, 30*time.Second, true, func(context.Context) (bool, error) { endpoint, err := endpoints.Get(context.Background(), serviceName, metav1.GetOptions{}) if err != nil { return false, nil //nolint:nilerr diff --git a/test/test_images/configmimic/configmimic.go b/test/test_images/configmimic/configmimic.go index b41d9b22b17c..e1c6f37fc222 100644 --- a/test/test_images/configmimic/configmimic.go +++ b/test/test_images/configmimic/configmimic.go @@ -100,7 +100,7 @@ func main() { func checkForStatus() { log.Println("Checking for status") - var path string + var lastPath string var states sets.Set[string] for _, entry := range priority { @@ -108,7 +108,7 @@ func checkForStatus() { continue } - path = filepath.Join("/etc/config", entry) + path := filepath.Join("/etc/config", entry) bytes, err := os.ReadFile(path) if errors.Is(err, fs.ErrNotExist) { continue @@ -117,10 +117,11 @@ func checkForStatus() { continue } + lastPath = path states = sets.New(strings.Split(string(bytes), ",")...) } - log.Println("Read state", filepath.Base(path), sets.List(states)) + log.Println("Read state", filepath.Base(lastPath), sets.List(states)) newReady := states.Has("ready") oldReady := ready.Swap(newReady)