@@ -4,10 +4,11 @@ import (
44 "bytes"
55 "context"
66 "fmt"
7- operatorv1informers "github.com/openshift/client-go/operator/informers/externalversions/operator/v1"
87 "os"
98 "sync"
109
10+ operatorv1informers "github.com/openshift/client-go/operator/informers/externalversions/operator/v1"
11+
1112 operatorv1 "github.com/openshift/api/operator/v1"
1213 configv1informers "github.com/openshift/client-go/config/informers/externalversions"
1314 v1 "github.com/openshift/client-go/config/informers/externalversions/config/v1"
@@ -61,14 +62,30 @@ func HandleDualReplicaClusters(ctx context.Context,
6162 kubeClient kubernetes.Interface ,
6263 dynamicClient dynamic.Interface ) (bool , error ) {
6364
64- if isDualReplicaTopology , err := isDualReplicaTopoly (ctx , featureGateAccessor , configInformers ); err != nil {
65+ // Start the informers and wait for them to sync.
66+ // The config informer is used to check the control plane topology.
67+ // The operator client informer is used to check if CEO is managing etcd, since
68+ // we need to set the available status to false during the transition to external etcd.
69+ configInformers .Start (ctx .Done ())
70+ operatorClient .Informer ().Run (ctx .Done ())
71+ if ! cache .WaitForCacheSync (ctx .Done (), configInformers .Config ().V1 ().APIServers ().Informer ().HasSynced , operatorClient .Informer ().HasSynced ) {
72+ klog .Fatal ("Failed to sync caches for static pod operator client" )
73+ }
74+
75+ if isDualReplicaTopology , err := isDualReplicaTopology (ctx , featureGateAccessor , configInformers ); err != nil {
6576 return false , err
6677 } else if ! isDualReplicaTopology {
6778 return false , nil
6879 }
6980
7081 klog .Infof ("detected DualReplica topology" )
7182
83+ // We only set the CEO available status to false during the initial transition to external etcd.
84+ initialTransition , err := isCEOManagingEtcd (operatorClient )
85+ if err != nil {
86+ return false , fmt .Errorf ("could not determine if we are in the initial setup: %w" , err )
87+ }
88+
7289 runExternalEtcdSupportController (ctx , controllerContext , operatorClient , envVarGetter , kubeInformersForNamespaces ,
7390 configInformers , networkInformer , controlPlaneNodeInformer , etcdInformer , kubeClient )
7491 runTnfResourceController (ctx , controllerContext , kubeClient , dynamicClient , operatorClient , kubeInformersForNamespaces )
@@ -78,7 +95,7 @@ func HandleDualReplicaClusters(ctx context.Context,
7895 // we need node names for assigning auth and after-setup jobs to specific nodes
7996 var once sync.Once
8097 klog .Infof ("watching for nodes..." )
81- _ , err : = controlPlaneNodeInformer .AddEventHandler (cache.ResourceEventHandlerFuncs {
98+ _ , err = controlPlaneNodeInformer .AddEventHandler (cache.ResourceEventHandlerFuncs {
8299 AddFunc : func (obj interface {}) {
83100 node , ok := obj .(* corev1.Node )
84101 if ! ok {
@@ -102,11 +119,22 @@ func HandleDualReplicaClusters(ctx context.Context,
102119 klog .Infof ("found 2 control plane nodes (%q, %q), creating TNF jobs" , nodeList [0 ].GetName (), nodeList [1 ].GetName ())
103120 // the order of job creation does not matter, the jobs wait on each other as needed
104121 for _ , node := range nodeList {
105- runJobController (ctx , tools .JobTypeAuth , & node .Name , controllerContext , operatorClient , kubeClient , kubeInformersForNamespaces )
106- runJobController (ctx , tools .JobTypeAfterSetup , & node .Name , controllerContext , operatorClient , kubeClient , kubeInformersForNamespaces )
122+ runJobController (ctx , tools .JobTypeAuth , & node .Name , controllerContext , operatorClient , kubeClient , kubeInformersForNamespaces , jobs . DefaultConditions )
123+ runJobController (ctx , tools .JobTypeAfterSetup , & node .Name , controllerContext , operatorClient , kubeClient , kubeInformersForNamespaces , jobs . DefaultConditions )
107124 }
108- runJobController (ctx , tools .JobTypeSetup , nil , controllerContext , operatorClient , kubeClient , kubeInformersForNamespaces )
109- runJobController (ctx , tools .JobTypeFencing , nil , controllerContext , operatorClient , kubeClient , kubeInformersForNamespaces )
125+
126+ // Make a copy of the default conditions and add the available condition
127+ conditionsWithAvailable := make ([]string , len (jobs .DefaultConditions ))
128+ copy (conditionsWithAvailable , jobs .DefaultConditions )
129+ conditionsWithAvailable = append (conditionsWithAvailable , operatorv1 .OperatorStatusTypeAvailable )
130+
131+ setupConditions := jobs .DefaultConditions
132+ if initialTransition {
133+ setupConditions = conditionsWithAvailable
134+ }
135+
136+ runJobController (ctx , tools .JobTypeSetup , nil , controllerContext , operatorClient , kubeClient , kubeInformersForNamespaces , setupConditions )
137+ runJobController (ctx , tools .JobTypeFencing , nil , controllerContext , operatorClient , kubeClient , kubeInformersForNamespaces , jobs .DefaultConditions )
110138 })
111139 },
112140 })
@@ -138,7 +166,21 @@ func HandleDualReplicaClusters(ctx context.Context,
138166 return true , nil
139167}
140168
141- func isDualReplicaTopoly (ctx context.Context , featureGateAccessor featuregates.FeatureGateAccess , configInformers configv1informers.SharedInformerFactory ) (bool , error ) {
169+ func isCEOManagingEtcd (operatorClient v1helpers.StaticPodOperatorClient ) (bool , error ) {
170+ // Detect if the cluster is already running in ExternalEtcd mode
171+ operatorSpec , _ , _ , err := operatorClient .GetStaticPodOperatorState ()
172+ if err != nil {
173+ return false , fmt .Errorf ("could not get operator spec: %w" , err )
174+ }
175+ externalEtcdMode , err := ceohelpers .IsExternalEtcdSupport (operatorSpec )
176+ if err != nil {
177+ return false , fmt .Errorf ("could not determine if useExternalEtcdSupport config override is set: %w" , err )
178+ }
179+
180+ return ! externalEtcdMode , nil
181+ }
182+
183+ func isDualReplicaTopology (ctx context.Context , featureGateAccessor featuregates.FeatureGateAccess , configInformers configv1informers.SharedInformerFactory ) (bool , error ) {
142184 if isDualReplicaTopology , err := ceohelpers .IsDualReplicaTopology (ctx , configInformers .Config ().V1 ().Infrastructures ().Lister ()); err != nil {
143185 return false , fmt .Errorf ("could not determine DualReplicaTopology, aborting controller start: %w" , err )
144186 } else if ! isDualReplicaTopology {
@@ -201,7 +243,7 @@ func runTnfResourceController(ctx context.Context, controllerContext *controller
201243 go tnfResourceController .Run (ctx , 1 )
202244}
203245
204- func runJobController (ctx context.Context , jobType tools.JobType , nodeName * string , controllerContext * controllercmd.ControllerContext , operatorClient v1helpers.StaticPodOperatorClient , kubeClient kubernetes.Interface , kubeInformersForNamespaces v1helpers.KubeInformersForNamespaces ) {
246+ func runJobController (ctx context.Context , jobType tools.JobType , nodeName * string , controllerContext * controllercmd.ControllerContext , operatorClient v1helpers.StaticPodOperatorClient , kubeClient kubernetes.Interface , kubeInformersForNamespaces v1helpers.KubeInformersForNamespaces , conditions [] string ) {
205247 nodeNameForLogs := "n/a"
206248 if nodeName != nil {
207249 nodeNameForLogs = * nodeName
@@ -214,6 +256,7 @@ func runJobController(ctx context.Context, jobType tools.JobType, nodeName *stri
214256 operatorClient ,
215257 kubeClient ,
216258 kubeInformersForNamespaces .InformersFor (operatorclient .TargetNamespace ).Batch ().V1 ().Jobs (),
259+ conditions ,
217260 []factory.Informer {},
218261 []jobs.JobHookFunc {
219262 func (_ * operatorv1.OperatorSpec , job * batchv1.Job ) error {
0 commit comments