diff --git a/controllers/sync.go b/controllers/sync.go index 00129eed..d28223ea 100644 --- a/controllers/sync.go +++ b/controllers/sync.go @@ -14,7 +14,7 @@ import ( apiProxy "github.com/ytsaurus/yt-k8s-operator/pkg/apiproxy" ) -func (r *YtsaurusReconciler) handleUpdatingStateFullMode( +func (r *YtsaurusReconciler) handleFullStrategy( ctx context.Context, ytsaurus *apiProxy.Ytsaurus, componentManager *ComponentManager, @@ -160,7 +160,7 @@ func (r *YtsaurusReconciler) handleUpdatingStateFullMode( return nil, nil } -func (r *YtsaurusReconciler) handleUpdatingStateLocalMode( +func (r *YtsaurusReconciler) handleStatelessStrategy( ctx context.Context, ytsaurus *apiProxy.Ytsaurus, componentManager *ComponentManager, @@ -232,6 +232,159 @@ func (r *YtsaurusReconciler) handleUpdatingStateLocalMode( return nil, nil } +func (r *YtsaurusReconciler) handleMasterOnlyStrategy( + ctx context.Context, + ytsaurus *apiProxy.Ytsaurus, + componentManager *ComponentManager, +) (*ctrl.Result, error) { + resource := ytsaurus.GetResource() + + switch resource.Status.UpdateStatus.State { + case ytv1.UpdateStateNone: + ytsaurus.LogUpdate(ctx, "Checking the possibility of updating") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStatePossibilityCheck) + return &ctrl.Result{Requeue: true}, err + + case ytv1.UpdateStatePossibilityCheck: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionHasPossibility) { + ytsaurus.LogUpdate(ctx, "Waiting for safe mode enabled") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForSafeModeEnabled) + return &ctrl.Result{Requeue: true}, err + } else if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionNoPossibility) { + ytsaurus.LogUpdate(ctx, "Update is impossible, need to apply previous images") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateImpossibleToStart) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateImpossibleToStart: + if !componentManager.needSync() || !ytsaurus.GetResource().Spec.EnableFullUpdate { + ytsaurus.LogUpdate(ctx, "Spec changed back or full update isn't enabled, update is canceling") + err := ytsaurus.SaveClusterState(ctx, ytv1.ClusterStateCancelUpdate) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForSafeModeEnabled: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionSafeModeEnabled) { + ytsaurus.LogUpdate(ctx, "Waiting for tablet cells saving") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForSnapshots) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForSnapshots: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionSnaphotsSaved) { + ytsaurus.LogUpdate(ctx, "Waiting for pods removal") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForPodsRemoval) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForPodsRemoval: + if componentManager.arePodsRemoved() { + ytsaurus.LogUpdate(ctx, "Waiting for pods creation") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForPodsCreation) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForPodsCreation: + if componentManager.allReadyOrUpdating() { + ytsaurus.LogUpdate(ctx, "All components were recreated") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForMasterExitReadOnly) + return &ctrl.Result{RequeueAfter: time.Second * 7}, err + } + + case ytv1.UpdateStateWaitingForMasterExitReadOnly: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionMasterExitedReadOnly) { + ytsaurus.LogUpdate(ctx, "Masters exited read-only state") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForSafeModeDisabled) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForSafeModeDisabled: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionSafeModeDisabled) { + ytsaurus.LogUpdate(ctx, "Finishing") + err := ytsaurus.SaveClusterState(ctx, ytv1.ClusterStateUpdateFinishing) + return &ctrl.Result{Requeue: true}, err + } + } + return nil, nil +} + +func (r *YtsaurusReconciler) handleTabletNodesOnlyStrategy( + ctx context.Context, + ytsaurus *apiProxy.Ytsaurus, + componentManager *ComponentManager, +) (*ctrl.Result, error) { + resource := ytsaurus.GetResource() + + switch resource.Status.UpdateStatus.State { + case ytv1.UpdateStateNone: + ytsaurus.LogUpdate(ctx, "Checking the possibility of updating") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStatePossibilityCheck) + return &ctrl.Result{Requeue: true}, err + + case ytv1.UpdateStatePossibilityCheck: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionHasPossibility) { + ytsaurus.LogUpdate(ctx, "Waiting for safe mode enabled") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForTabletCellsSaving) + return &ctrl.Result{Requeue: true}, err + } else if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionNoPossibility) { + ytsaurus.LogUpdate(ctx, "Update is impossible, need to apply previous images") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateImpossibleToStart) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateImpossibleToStart: + if !componentManager.needSync() || !ytsaurus.GetResource().Spec.EnableFullUpdate { + ytsaurus.LogUpdate(ctx, "Spec changed back or full update isn't enabled, update is canceling") + err := ytsaurus.SaveClusterState(ctx, ytv1.ClusterStateCancelUpdate) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForTabletCellsSaving: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsSaved) { + ytsaurus.LogUpdate(ctx, "Waiting for tablet cells removing to start") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForTabletCellsRemovingStart) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForTabletCellsRemovingStart: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsRemovingStarted) { + ytsaurus.LogUpdate(ctx, "Waiting for tablet cells removing to finish") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForTabletCellsRemoved) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForTabletCellsRemoved: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsRemoved) { + ytsaurus.LogUpdate(ctx, "Waiting for snapshots") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForPodsRemoval) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForPodsRemoval: + if componentManager.arePodsRemoved() { + ytsaurus.LogUpdate(ctx, "Waiting for pods creation") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForPodsCreation) + return &ctrl.Result{Requeue: true}, err + } + + case ytv1.UpdateStateWaitingForPodsCreation: + if componentManager.allReadyOrUpdating() { + ytsaurus.LogUpdate(ctx, "All components were recreated") + err := ytsaurus.SaveUpdateState(ctx, ytv1.UpdateStateWaitingForTabletCellsRecovery) + return &ctrl.Result{RequeueAfter: time.Second * 7}, err + } + + case ytv1.UpdateStateWaitingForTabletCellsRecovery: + if ytsaurus.IsUpdateStatusConditionTrue(consts.ConditionTabletCellsRecovered) { + ytsaurus.LogUpdate(ctx, "Finishing") + err := ytsaurus.SaveClusterState(ctx, ytv1.ClusterStateUpdateFinishing) + return &ctrl.Result{Requeue: true}, err + } + } + + return nil, nil +} + func getComponentNames(components []components.Component) []string { if components == nil { return nil @@ -243,11 +396,16 @@ func getComponentNames(components []components.Component) []string { return names } +type updateMeta struct { + strategy ytv1.UpdateStrategy + componentNames []string +} + // chooseUpdateStrategy considers spec decides if operator should proceed with update or block. // Block is indicated with non-empty blockMsg. // Component names which are chosen for update are return in names slice. // Nil names slice means "full update". -func chooseUpdateStrategy(spec ytv1.YtsaurusSpec, needUpdate []components.Component) (names []string, blockMsg string) { +func chooseUpdateStrategy(spec ytv1.YtsaurusSpec, needUpdate []components.Component) (meta updateMeta, blockMsg string) { isFullUpdateEnabled := spec.EnableFullUpdate masterNeedsUpdate := false @@ -266,12 +424,12 @@ func chooseUpdateStrategy(spec ytv1.YtsaurusSpec, needUpdate []components.Compon if statefulNeedUpdate { if isFullUpdateEnabled { - return nil, "" + return updateMeta{strategy: ytv1.UpdateStrategyFull, componentNames: nil}, "" } else { - return nil, "Full update is not allowed by enableFullUpdate field, ignoring it" + return updateMeta{strategy: "", componentNames: nil}, "Full update is not allowed by enableFullUpdate field, ignoring it" } } - return getComponentNames(needUpdate), "" + return updateMeta{strategy: ytv1.UpdateStrategyStatelessOnly, componentNames: getComponentNames(needUpdate)}, "" } func (r *YtsaurusReconciler) Sync(ctx context.Context, resource *ytv1.Ytsaurus) (ctrl.Result, error) { @@ -314,23 +472,35 @@ func (r *YtsaurusReconciler) Sync(ctx context.Context, resource *ytv1.Ytsaurus) return ctrl.Result{Requeue: true}, err case componentManager.needUpdate() != nil: - componentNames, blockMsg := chooseUpdateStrategy(ytsaurus.GetResource().Spec, componentManager.needUpdate()) + meta, blockMsg := chooseUpdateStrategy(ytsaurus.GetResource().Spec, componentManager.needUpdate()) if blockMsg != "" { logger.Info(blockMsg) return ctrl.Result{}, nil } - logger.Info("Ytsaurus needs components update", "components", componentNames) - err := ytsaurus.SaveUpdatingClusterState(ctx, componentNames) - return ctrl.Result{Requeue: true}, err + logger.Info("Ytsaurus needs components update", + "components", meta.componentNames, + "strategy", meta.strategy, + ) + err = ytsaurus.SaveUpdatingClusterState(ctx, meta.strategy, meta.componentNames) + if err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{Requeue: true}, nil } case ytv1.ClusterStateUpdating: var result *ctrl.Result var err error - if ytsaurus.GetLocalUpdatingComponents() != nil { - result, err = r.handleUpdatingStateLocalMode(ctx, ytsaurus, componentManager) - } else { - result, err = r.handleUpdatingStateFullMode(ctx, ytsaurus, componentManager) + + switch ytsaurus.GetUpdateStrategy() { + case ytv1.UpdateStrategyFull: + result, err = r.handleFullStrategy(ctx, ytsaurus, componentManager) + case ytv1.UpdateStrategyStatelessOnly: + result, err = r.handleStatelessStrategy(ctx, ytsaurus, componentManager) + case ytv1.UpdateStrategyMasterOnly: + result, err = r.handleMasterOnlyStrategy(ctx, ytsaurus, componentManager) + case ytv1.UpdateStrategyTabletNodesOnly: + result, err = r.handleTabletNodesOnlyStrategy(ctx, ytsaurus, componentManager) } if result != nil { diff --git a/pkg/apiproxy/ytsaurus.go b/pkg/apiproxy/ytsaurus.go index 933f7cb3..7b828a00 100644 --- a/pkg/apiproxy/ytsaurus.go +++ b/pkg/apiproxy/ytsaurus.go @@ -58,6 +58,10 @@ func (c *Ytsaurus) GetLocalUpdatingComponents() []string { return c.ytsaurus.Status.UpdateStatus.Components } +func (c *Ytsaurus) GetUpdateStrategy() ytv1.UpdateStrategy { + return c.ytsaurus.Status.UpdateStatus.Strategy +} + func (c *Ytsaurus) IsUpdateStatusConditionTrue(condition string) bool { return meta.IsStatusConditionTrue(c.ytsaurus.Status.UpdateStatus.Conditions, condition) } @@ -73,6 +77,7 @@ func (c *Ytsaurus) ClearUpdateStatus(ctx context.Context) error { c.ytsaurus.Status.UpdateStatus.TabletCellBundles = make([]ytv1.TabletCellBundleInfo, 0) c.ytsaurus.Status.UpdateStatus.MasterMonitoringPaths = make([]string, 0) c.ytsaurus.Status.UpdateStatus.Components = nil + c.ytsaurus.Status.UpdateStatus.Strategy = ytv1.UpdateStrategyNone return c.apiProxy.UpdateStatus(ctx) } @@ -82,9 +87,10 @@ func (c *Ytsaurus) LogUpdate(ctx context.Context, message string) { logger.Info(fmt.Sprintf("Ytsaurus update: %s", message)) } -func (c *Ytsaurus) SaveUpdatingClusterState(ctx context.Context, components []string) error { +func (c *Ytsaurus) SaveUpdatingClusterState(ctx context.Context, strategy ytv1.UpdateStrategy, components []string) error { logger := log.FromContext(ctx) c.ytsaurus.Status.State = ytv1.ClusterStateUpdating + c.ytsaurus.Status.UpdateStatus.Strategy = strategy c.ytsaurus.Status.UpdateStatus.Components = components if err := c.apiProxy.UpdateStatus(ctx); err != nil {