Skip to content

Commit

Permalink
Optimization: failed task retry backoff (#94)
Browse files Browse the repository at this point in the history
* optimize task retry, use exponential backoff policy, add failure retry machanism 
* optimize log, set multiple log levels
  • Loading branch information
powerfooI authored Nov 10, 2023
1 parent 10c14d4 commit 270ba63
Show file tree
Hide file tree
Showing 64 changed files with 1,084 additions and 505 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ deploy/*secret*

# Kubernetes Generated files - skip generated files, except for vendored files
# !vendor/**/zz_generated.*

.vscode
18 changes: 10 additions & 8 deletions api/constants/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,16 @@ const (
)

const (
BackupPolicyStatusPreparing types.BackupPolicyStatusType = "PREPARING"
BackupPolicyStatusPrepared types.BackupPolicyStatusType = "PREPARED"
BackupPolicyStatusRunning types.BackupPolicyStatusType = "RUNNING"
BackupPolicyStatusFailed types.BackupPolicyStatusType = "FAILED"
BackupPolicyStatusPausing types.BackupPolicyStatusType = "PAUSING"
BackupPolicyStatusPaused types.BackupPolicyStatusType = "PAUSED"
BackupPolicyStatusStopped types.BackupPolicyStatusType = "STOPPED"
BackupPolicyStatusResuming types.BackupPolicyStatusType = "RESUMING"
BackupPolicyStatusPreparing types.BackupPolicyStatusType = "PREPARING"
BackupPolicyStatusPrepared types.BackupPolicyStatusType = "PREPARED"
BackupPolicyStatusRunning types.BackupPolicyStatusType = "RUNNING"
BackupPolicyStatusFailed types.BackupPolicyStatusType = "FAILED"
BackupPolicyStatusPausing types.BackupPolicyStatusType = "PAUSING"
BackupPolicyStatusPaused types.BackupPolicyStatusType = "PAUSED"
BackupPolicyStatusStopped types.BackupPolicyStatusType = "STOPPED"
BackupPolicyStatusResuming types.BackupPolicyStatusType = "RESUMING"
BackupPolicyStatusDeleting types.BackupPolicyStatusType = "DELETING"
BackupPolicyStatusMaintaining types.BackupPolicyStatusType = "MAINTAINING"
)

const (
Expand Down
1 change: 1 addition & 0 deletions api/v1alpha1/obtenantbackuppolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type OBTenantBackupPolicyStatus struct {
Status apitypes.BackupPolicyStatusType `json:"status"`
OperationContext *OperationContext `json:"operationContext,omitempty"`

ObservedGeneration int64 `json:"observedGeneration,omitempty"`
NextFull string `json:"nextFull,omitempty"`
NextIncremental string `json:"nextIncremental,omitempty"`
TenantCR *OBTenant `json:"tenantCR,omitempty"`
Expand Down
2 changes: 2 additions & 0 deletions api/v1alpha1/obzone_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type OBZoneStatus struct {

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.status"
//+kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"

// OBZone is the Schema for the obzones API
type OBZone struct {
Expand Down
2 changes: 1 addition & 1 deletion charts/oceanbase-cluster/templates/NOTES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ You can use the following command to wait for the OBCluster to be ready.

After that, the cluster is ready to handle connections stably. Example command is following:

> mysql -A -h$(kubectl get pods -l ref-obcluster={{ .Release.Name }} -o jsonpath='{.items[0].status.podIP}') -P2881 -uroot -p$(kubectl get secret -n {{ .Release.Namespace }} {{ .Values.userSecrets.root }} -o jsonpath='{.data.password}' | base64 -d)
> mysql -A -h$(kubectl get pods -n {{ .Release.Namespace }} -l ref-obcluster={{ .Release.Name }} -o jsonpath='{.items[0].status.podIP}') -P2881 -uroot -p$(kubectl get secret -n {{ .Release.Namespace }} {{ .Values.userSecrets.root }} -o jsonpath='{.data.password}' | base64 -d)
2 changes: 1 addition & 1 deletion charts/oceanbase-cluster/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
clusterName: obcluster
clusterId: 1025
clusterId: 1

storageClass: local-path

Expand Down
41 changes: 24 additions & 17 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.

"go.uber.org/zap/zapcore"
_ "k8s.io/client-go/plugin/pkg/client/auth"

"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -57,17 +59,20 @@ func main() {
var metricsAddr string
var enableLeaderElection bool
var probeAddr string
var logVerbosity int
flag.StringVar(&namespace, "namespace", "", "The namespace to run oceanbase, default value is empty means all.")
flag.StringVar(&managerNamespace, "manager-namespace", "oceanbase-system", "The namespace to run manager tools.")
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.IntVar(&logVerbosity, "log-verbosity", 0, "Log verbosity level, 0 is info, 1 is debug, 2 is trace")
opts := zap.Options{
Development: true,
Development: logVerbosity > 0,
Level: zapcore.Level(-logVerbosity),
}
opts.BindFlags(flag.CommandLine)
// opts.BindFlags(flag.CommandLine)
flag.Parse()

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
Expand Down Expand Up @@ -170,10 +175,6 @@ func main() {
setupLog.Error(err, "unable to create controller", "controller", "OBTenantBackupPolicy")
os.Exit(1)
}
if err = (&v1alpha1.OBTenantBackupPolicy{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "OBTenantBackupPolicy")
os.Exit(1)
}
if err = (&controller.OBTenantOperationReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Expand All @@ -182,17 +183,23 @@ func main() {
setupLog.Error(err, "unable to create controller", "controller", "OBTenantOperation")
os.Exit(1)
}
if err = (&v1alpha1.OBTenant{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "OBTenant")
os.Exit(1)
}
if err = (&v1alpha1.OBTenantOperation{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "OBTenantOperation")
os.Exit(1)
}
if err = (&v1alpha1.OBCluster{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "OBCluster")
os.Exit(1)
if os.Getenv("DISABLE_WEBHOOKS") != "true" {
if err = (&v1alpha1.OBTenantBackupPolicy{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "OBTenantBackupPolicy")
os.Exit(1)
}
if err = (&v1alpha1.OBTenant{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "OBTenant")
os.Exit(1)
}
if err = (&v1alpha1.OBTenantOperation{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "OBTenantOperation")
os.Exit(1)
}
if err = (&v1alpha1.OBCluster{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "OBCluster")
os.Exit(1)
}
}
//+kubebuilder:scaffold:builder

Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/oceanbase.oceanbase.com_obclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2736,6 +2736,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/oceanbase.oceanbase.com_obparameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/oceanbase.oceanbase.com_observers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2643,6 +2643,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,9 @@ spec:
type: string
nextIncremental:
type: string
observedGeneration:
format: int64
type: integer
operationContext:
properties:
failureRule:
Expand All @@ -390,6 +393,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down Expand Up @@ -621,6 +628,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down Expand Up @@ -765,6 +776,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
4 changes: 4 additions & 0 deletions config/crd/bases/oceanbase.oceanbase.com_obtenantbackups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
20 changes: 20 additions & 0 deletions config/crd/bases/oceanbase.oceanbase.com_obtenantoperations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down Expand Up @@ -326,6 +330,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down Expand Up @@ -470,6 +478,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down Expand Up @@ -838,6 +850,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down Expand Up @@ -982,6 +998,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
8 changes: 8 additions & 0 deletions config/crd/bases/oceanbase.oceanbase.com_obtenants.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down Expand Up @@ -390,6 +394,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
13 changes: 12 additions & 1 deletion config/crd/bases/oceanbase.oceanbase.com_obzones.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,14 @@ spec:
singular: obzone
scope: Namespaced
versions:
- name: v1alpha1
- additionalPrinterColumns:
- jsonPath: .status.status
name: Status
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v1alpha1
schema:
openAPIV3Schema:
description: OBZone is the Schema for the obzones API
Expand Down Expand Up @@ -2685,6 +2692,10 @@ spec:
type: string
failureStrategy:
type: string
maxRetry:
type: integer
retryCount:
type: integer
required:
- failureStatus
- failureStrategy
Expand Down
1 change: 1 addition & 0 deletions config/default/manager_auth_proxy_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,4 @@ spec:
- "--metrics-bind-address=127.0.0.1:8080"
- "--leader-elect"
- "--manager-namespace=oceanbase-system"
- "--log-verbosity=2"
2 changes: 1 addition & 1 deletion config/manager/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ kind: Kustomization
images:
- name: controller
newName: oceanbasedev/ob-operator
newTag: 2.1.0-alpha.1
newTag: 2.1.0-alpha.2
10 changes: 3 additions & 7 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,6 @@ spec:
operator: In
values:
- linux
- key: kubernetes.io/hostname
operator: In
values:
- sqaappnoxdnv62s2011161204053.sa128
- key: node-role.kubernetes.io/master
operator: Exists
securityContext:
runAsNonRoot: true
# TODO(user): For common cases that do not require escalating privileges
Expand All @@ -81,8 +75,10 @@ spec:
env:
- name: TELEMETRY_DEBUG
value: "true"
- name: DISABLE_TELEMETRY
value: "true"
- name: TELEMETRY_REPORT_HOST
value: "http://openwebapi.test.alipay.net"
value: "https://openwebapi.oceanbase.com"
securityContext:
allowPrivilegeEscalation: false
capabilities:
Expand Down
Loading

0 comments on commit 270ba63

Please sign in to comment.