diff --git a/pkg/cluster/__debug_bin3849815725 b/pkg/cluster/__debug_bin3849815725 new file mode 100644 index 000000000..e69de29bb diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 5aa123e32..b3c62c1f5 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -1733,12 +1733,48 @@ func (c *Cluster) GetStatus() *ClusterStatus { // Switchover does a switchover (via Patroni) to a candidate pod func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) error { - var err error c.logger.Debugf("switching over from %q to %q", curMaster.Name, candidate) if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) { - c.logger.Infof("skipping switchover, not in maintenance window") + c.logger.Infof("postponing switchover, not in maintenance window") + + var possibleSwitchover, schedule time.Time + + now := time.Now().UTC() + for _, window := range c.Spec.MaintenanceWindows { + if window.Everyday { + possibleSwitchover = time.Date(now.Year(), now.Month(), now.Day(), window.StartTime.Hour(), window.StartTime.Minute(), 0, 0, time.UTC) + if now.After(possibleSwitchover) { + // we are already past the time for today, try tomorrow + day := now.AddDate(0, 0, 1) + possibleSwitchover = time.Date(day.Year(), day.Month(), day.Day(), window.StartTime.Hour(), window.StartTime.Minute(), 0, 0, time.UTC) + } + } else { + timeToday := time.Date(now.Year(), now.Month(), now.Day(), window.StartTime.Hour(), window.StartTime.Minute(), 0, 0, time.UTC) + // is it still possible today? + if now.Weekday() == window.Weekday { + if now.Before(timeToday) { + possibleSwitchover = timeToday + } + } else { + c.logger.Debugf("3. switching over at %s", schedule.Format("2006-01-02T15:04+00")) + // get closest possible time for this window + date := now.AddDate(0, 0, int((7+window.Weekday-now.Weekday())%7)) + possibleSwitchover = time.Date(date.Year(), date.Month(), date.Day(), window.StartTime.Hour(), window.StartTime.Minute(), 0, 0, time.UTC) + } + } + + if (schedule == time.Time{}) || possibleSwitchover.Before(schedule) { + schedule = possibleSwitchover + } + c.logger.Debugf("switching over at %s", schedule.Format("2006-01-02T15:04+00")) + } + + if err := c.patroni.Switchover(curMaster, candidate.Name, schedule.Format("2006-01-02T15:04+00")); err != nil { + return fmt.Errorf("could not schedule switchover: %v", err) + } + c.logger.Infof("switchover is scheduled at %s", schedule.Format("2006-01-02T15:04+00")) return nil } @@ -1748,7 +1784,7 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e defer c.unregisterPodSubscriber(candidate) defer close(stopCh) - if err = c.patroni.Switchover(curMaster, candidate.Name); err == nil { + if err = c.patroni.Switchover(curMaster, candidate.Name, ""); err == nil { c.logger.Debugf("successfully switched over from %q to %q", curMaster.Name, candidate) c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeNormal, "Switchover", "Successfully switched over from %q to %q", curMaster.Name, candidate) _, err = c.waitForPodLabel(ch, stopCh, nil) diff --git a/pkg/cluster/resources.go b/pkg/cluster/resources.go index 3f47328ee..85711dbd1 100644 --- a/pkg/cluster/resources.go +++ b/pkg/cluster/resources.go @@ -162,8 +162,8 @@ func (c *Cluster) preScaleDown(newStatefulSet *appsv1.StatefulSet) error { return fmt.Errorf("pod %q does not belong to cluster", podName) } - if err := c.patroni.Switchover(&masterPod[0], masterCandidatePod.Name); err != nil { - return fmt.Errorf("could not failover: %v", err) + if err := c.patroni.Switchover(&masterPod[0], masterCandidatePod.Name, ""); err != nil { + return fmt.Errorf("could not switchover: %v", err) } return nil diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index d06fdf477..a0ea0ff27 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -659,7 +659,7 @@ func (c *Cluster) syncStatefulSet() error { // statefulset or those that got their configuration from the outdated statefulset) if len(podsToRecreate) > 0 { if !isInMaintenanceWindow(c.Spec.MaintenanceWindows) { - c.logger.Infof("skipping pod recreation, not in maintenance window") + c.logger.Infof("postpone pod recreation - not in maintenance window") } else if isSafeToRecreatePods { c.logger.Info("performing rolling update") c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Performing rolling update") diff --git a/pkg/util/patroni/patroni.go b/pkg/util/patroni/patroni.go index 4d580f1c2..2129f1acc 100644 --- a/pkg/util/patroni/patroni.go +++ b/pkg/util/patroni/patroni.go @@ -20,19 +20,19 @@ import ( ) const ( - failoverPath = "/failover" - configPath = "/config" - clusterPath = "/cluster" - statusPath = "/patroni" - restartPath = "/restart" - ApiPort = 8008 - timeout = 30 * time.Second + switchoverPath = "/switchover" + configPath = "/config" + clusterPath = "/cluster" + statusPath = "/patroni" + restartPath = "/restart" + ApiPort = 8008 + timeout = 30 * time.Second ) // Interface describe patroni methods type Interface interface { GetClusterMembers(master *v1.Pod) ([]ClusterMember, error) - Switchover(master *v1.Pod, candidate string) error + Switchover(master *v1.Pod, candidate string, scheduled_at string) error SetPostgresParameters(server *v1.Pod, options map[string]string) error SetStandbyClusterParameters(server *v1.Pod, options map[string]interface{}) error GetMemberData(server *v1.Pod) (MemberData, error) @@ -103,7 +103,7 @@ func (p *Patroni) httpPostOrPatch(method string, url string, body *bytes.Buffer) } }() - if resp.StatusCode != http.StatusOK { + if resp.StatusCode < http.StatusOK || resp.StatusCode >= 300 { bodyBytes, err := io.ReadAll(resp.Body) if err != nil { return fmt.Errorf("could not read response: %v", err) @@ -128,7 +128,7 @@ func (p *Patroni) httpGet(url string) (string, error) { return "", fmt.Errorf("could not read response: %v", err) } - if response.StatusCode != http.StatusOK { + if response.StatusCode < http.StatusOK || response.StatusCode >= 300 { return string(bodyBytes), fmt.Errorf("patroni returned '%d'", response.StatusCode) } @@ -136,9 +136,9 @@ func (p *Patroni) httpGet(url string) (string, error) { } // Switchover by calling Patroni REST API -func (p *Patroni) Switchover(master *v1.Pod, candidate string) error { +func (p *Patroni) Switchover(master *v1.Pod, candidate string, scheduled_at string) error { buf := &bytes.Buffer{} - err := json.NewEncoder(buf).Encode(map[string]string{"leader": master.Name, "member": candidate}) + err := json.NewEncoder(buf).Encode(map[string]string{"leader": master.Name, "member": candidate, "scheduled_at": scheduled_at}) if err != nil { return fmt.Errorf("could not encode json: %v", err) } @@ -146,7 +146,7 @@ func (p *Patroni) Switchover(master *v1.Pod, candidate string) error { if err != nil { return err } - return p.httpPostOrPatch(http.MethodPost, apiURLString+failoverPath, buf) + return p.httpPostOrPatch(http.MethodPost, apiURLString+switchoverPath, buf) } //TODO: add an option call /patroni to check if it is necessary to restart the server