Skip to content

Commit a12bddf

Browse files
committed
OCPEDGE-2088, OCPEDGE-1885: Updated state transitions & tests for TNF setup job
- Setup job now sets Available condition only when ExternalEtcd isn't enabled yet (includes test) - Fixed jobcontroller & test setup to handle variable operator status conditions - Added test coverage for updated expectations for job controller condition transitions (Progressing, Degraded, Available)
1 parent 61b7fb5 commit a12bddf

File tree

4 files changed

+247
-23
lines changed

4 files changed

+247
-23
lines changed

pkg/tnf/operator/starter.go

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@ import (
44
"bytes"
55
"context"
66
"fmt"
7-
operatorv1informers "github.com/openshift/client-go/operator/informers/externalversions/operator/v1"
87
"os"
98
"sync"
109

10+
operatorv1informers "github.com/openshift/client-go/operator/informers/externalversions/operator/v1"
11+
1112
operatorv1 "github.com/openshift/api/operator/v1"
1213
configv1informers "github.com/openshift/client-go/config/informers/externalversions"
1314
v1 "github.com/openshift/client-go/config/informers/externalversions/config/v1"
@@ -69,6 +70,12 @@ func HandleDualReplicaClusters(ctx context.Context,
6970

7071
klog.Infof("detected DualReplica topology")
7172

73+
// We only set the CEO available status to false during the initial setup
74+
initialSetup, err := isInitialSetup(operatorClient)
75+
if err != nil {
76+
return false, fmt.Errorf("could not determine if we are in the initial setup: %w", err)
77+
}
78+
7279
runExternalEtcdSupportController(ctx, controllerContext, operatorClient, envVarGetter, kubeInformersForNamespaces,
7380
configInformers, networkInformer, controlPlaneNodeInformer, etcdInformer, kubeClient)
7481
runTnfResourceController(ctx, controllerContext, kubeClient, dynamicClient, operatorClient, kubeInformersForNamespaces)
@@ -78,7 +85,7 @@ func HandleDualReplicaClusters(ctx context.Context,
7885
// we need node names for assigning auth and after-setup jobs to specific nodes
7986
var once sync.Once
8087
klog.Infof("watching for nodes...")
81-
_, err := controlPlaneNodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
88+
_, err = controlPlaneNodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
8289
AddFunc: func(obj interface{}) {
8390
node, ok := obj.(*corev1.Node)
8491
if !ok {
@@ -102,11 +109,17 @@ func HandleDualReplicaClusters(ctx context.Context,
102109
klog.Infof("found 2 control plane nodes (%q, %q), creating TNF jobs", nodeList[0].GetName(), nodeList[1].GetName())
103110
// the order of job creation does not matter, the jobs wait on each other as needed
104111
for _, node := range nodeList {
105-
runJobController(ctx, tools.JobTypeAuth, &node.Name, controllerContext, operatorClient, kubeClient, kubeInformersForNamespaces)
106-
runJobController(ctx, tools.JobTypeAfterSetup, &node.Name, controllerContext, operatorClient, kubeClient, kubeInformersForNamespaces)
112+
runJobController(ctx, tools.JobTypeAuth, &node.Name, controllerContext, operatorClient, kubeClient, kubeInformersForNamespaces, jobs.DefaultConditions)
113+
runJobController(ctx, tools.JobTypeAfterSetup, &node.Name, controllerContext, operatorClient, kubeClient, kubeInformersForNamespaces, jobs.DefaultConditions)
114+
}
115+
116+
setupConditions := jobs.DefaultConditions
117+
if initialSetup {
118+
setupConditions = append(setupConditions, operatorv1.OperatorStatusTypeAvailable)
107119
}
108-
runJobController(ctx, tools.JobTypeSetup, nil, controllerContext, operatorClient, kubeClient, kubeInformersForNamespaces)
109-
runJobController(ctx, tools.JobTypeFencing, nil, controllerContext, operatorClient, kubeClient, kubeInformersForNamespaces)
120+
121+
runJobController(ctx, tools.JobTypeSetup, nil, controllerContext, operatorClient, kubeClient, kubeInformersForNamespaces, setupConditions)
122+
runJobController(ctx, tools.JobTypeFencing, nil, controllerContext, operatorClient, kubeClient, kubeInformersForNamespaces, jobs.DefaultConditions)
110123
})
111124
},
112125
})
@@ -138,6 +151,20 @@ func HandleDualReplicaClusters(ctx context.Context,
138151
return true, nil
139152
}
140153

154+
func isInitialSetup(operatorClient v1helpers.StaticPodOperatorClient) (bool, error) {
155+
// Detect if the cluster is already running in ExternalEtcd mode
156+
operatorSpec, _, _, err := operatorClient.GetStaticPodOperatorState()
157+
if err != nil {
158+
return false, fmt.Errorf("could not get operator spec: %w", err)
159+
}
160+
externalEtcdMode, err := ceohelpers.IsExternalEtcdSupport(operatorSpec)
161+
if err != nil {
162+
return false, fmt.Errorf("could not determine if useExternalEtcdSupport config override is set: %w", err)
163+
}
164+
165+
return !externalEtcdMode, nil
166+
}
167+
141168
func isDualReplicaTopoly(ctx context.Context, featureGateAccessor featuregates.FeatureGateAccess, configInformers configv1informers.SharedInformerFactory) (bool, error) {
142169
if isDualReplicaTopology, err := ceohelpers.IsDualReplicaTopology(ctx, configInformers.Config().V1().Infrastructures().Lister()); err != nil {
143170
return false, fmt.Errorf("could not determine DualReplicaTopology, aborting controller start: %w", err)
@@ -201,7 +228,7 @@ func runTnfResourceController(ctx context.Context, controllerContext *controller
201228
go tnfResourceController.Run(ctx, 1)
202229
}
203230

204-
func runJobController(ctx context.Context, jobType tools.JobType, nodeName *string, controllerContext *controllercmd.ControllerContext, operatorClient v1helpers.StaticPodOperatorClient, kubeClient kubernetes.Interface, kubeInformersForNamespaces v1helpers.KubeInformersForNamespaces) {
231+
func runJobController(ctx context.Context, jobType tools.JobType, nodeName *string, controllerContext *controllercmd.ControllerContext, operatorClient v1helpers.StaticPodOperatorClient, kubeClient kubernetes.Interface, kubeInformersForNamespaces v1helpers.KubeInformersForNamespaces, conditions []string) {
205232
nodeNameForLogs := "n/a"
206233
if nodeName != nil {
207234
nodeNameForLogs = *nodeName
@@ -214,6 +241,7 @@ func runJobController(ctx context.Context, jobType tools.JobType, nodeName *stri
214241
operatorClient,
215242
kubeClient,
216243
kubeInformersForNamespaces.InformersFor(operatorclient.TargetNamespace).Batch().V1().Jobs(),
244+
conditions,
217245
[]factory.Informer{},
218246
[]jobs.JobHookFunc{
219247
func(_ *operatorv1.OperatorSpec, job *batchv1.Job) error {

pkg/tnf/operator/starter_test.go

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@ package operator
22

33
import (
44
"context"
5-
"github.com/stretchr/testify/require"
65
"maps"
76
"testing"
87
"time"
98

9+
"github.com/stretchr/testify/require"
10+
1011
configv1 "github.com/openshift/api/config/v1"
1112
operatorv1 "github.com/openshift/api/operator/v1"
1213

@@ -36,6 +37,7 @@ import (
3637
"github.com/openshift/cluster-etcd-operator/pkg/operator/ceohelpers"
3738
"github.com/openshift/cluster-etcd-operator/pkg/operator/operatorclient"
3839
"github.com/openshift/cluster-etcd-operator/pkg/tnf/operator/dualreplicahelpers"
40+
"github.com/openshift/cluster-etcd-operator/pkg/tnf/pkg/jobs"
3941
)
4042

4143
type args struct {
@@ -112,6 +114,72 @@ func TestHandleDualReplicaClusters(t *testing.T) {
112114
}
113115
}
114116

117+
func TestSetupJobConditionsBasedOnExternalEtcd(t *testing.T) {
118+
tests := []struct {
119+
name string
120+
externalEtcdEnabled bool
121+
expectedAvailableInSetup bool
122+
}{
123+
{
124+
name: "Job sets the Available condition during initial setup",
125+
externalEtcdEnabled: false,
126+
expectedAvailableInSetup: true,
127+
},
128+
{
129+
name: "Job does not set the Available condition once ExternalEtcd is enabled",
130+
externalEtcdEnabled: true,
131+
expectedAvailableInSetup: false,
132+
},
133+
}
134+
135+
for _, tt := range tests {
136+
t.Run(tt.name, func(t *testing.T) {
137+
// Create operator spec with or without external etcd support
138+
operatorSpec := &operatorv1.StaticPodOperatorSpec{}
139+
if tt.externalEtcdEnabled {
140+
operatorSpec.UnsupportedConfigOverrides = runtime.RawExtension{
141+
Raw: []byte(`{"useExternalEtcdSupport": true}`),
142+
}
143+
}
144+
145+
fakeOperatorClient := v1helpers.NewFakeStaticPodOperatorClient(
146+
operatorSpec,
147+
&operatorv1.StaticPodOperatorStatus{},
148+
nil,
149+
nil,
150+
)
151+
152+
// Test isInitialSetup function
153+
isInitialSetup, err := isInitialSetup(fakeOperatorClient)
154+
require.NoError(t, err)
155+
156+
// Verify the expected behavior
157+
expectedInitialSetup := !tt.externalEtcdEnabled
158+
require.Equal(t, expectedInitialSetup, isInitialSetup,
159+
"isInitialSetup should return %v when externalEtcdEnabled is %v",
160+
expectedInitialSetup, tt.externalEtcdEnabled)
161+
162+
// Verify setup conditions
163+
setupConditions := jobs.DefaultConditions
164+
if isInitialSetup {
165+
setupConditions = append(setupConditions, operatorv1.OperatorStatusTypeAvailable)
166+
}
167+
168+
hasAvailableCondition := false
169+
for _, condition := range setupConditions {
170+
if condition == operatorv1.OperatorStatusTypeAvailable {
171+
hasAvailableCondition = true
172+
break
173+
}
174+
}
175+
176+
require.Equal(t, tt.expectedAvailableInSetup, hasAvailableCondition,
177+
"Setup job should have Available condition: %v, but got: %v",
178+
tt.expectedAvailableInSetup, hasAvailableCondition)
179+
})
180+
}
181+
}
182+
115183
func getArgs(t *testing.T, dualReplicaControlPlaneEnabled, dualReplicaFeatureGateEnabled bool) args {
116184

117185
fakeKubeClient := fake.NewSimpleClientset()

pkg/tnf/pkg/jobs/jobcontroller.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ import (
2626
"github.com/openshift/cluster-etcd-operator/pkg/tnf/pkg/tools"
2727
)
2828

29+
// By default, we only set the progressing and degraded conditions for TNF setup.
30+
// This is because the operator is only unavailable during the transition of etcd
31+
// ownership between CEO and pacemaker.
32+
var DefaultConditions = []string{opv1.OperatorStatusTypeProgressing, opv1.OperatorStatusTypeDegraded}
33+
2934
// TODO This based on DeploymentController in openshift/library-go
3035
// TODO should be moved there once it proved to be useful
3136

@@ -71,6 +76,7 @@ func NewJobController(
7176
operatorClient v1helpers.OperatorClient,
7277
kubeClient kubernetes.Interface,
7378
jobInformer batchinformersv1.JobInformer,
79+
conditions []string,
7480
optionalInformers []factory.Informer,
7581
optionalJobHooks ...JobHookFunc,
7682
) factory.Controller {
@@ -82,9 +88,7 @@ func NewJobController(
8288
kubeClient,
8389
jobInformer,
8490
).WithConditions(
85-
opv1.OperatorStatusTypeAvailable,
86-
opv1.OperatorStatusTypeProgressing,
87-
opv1.OperatorStatusTypeDegraded,
91+
conditions...,
8892
).WithExtraInformers(
8993
optionalInformers...,
9094
).WithJobHooks(

0 commit comments

Comments
 (0)