From 19d46483adaefa9419113474aa1afb6bf170f18e Mon Sep 17 00:00:00 2001 From: yuyi Date: Mon, 25 Sep 2023 20:30:48 +0800 Subject: [PATCH] feat: restore tenant from backup, activate or replay --- Makefile | 18 +- api/v1alpha1/obtenantrestore_types.go | 5 + ...anbase.oceanbase.com_obtenantrestores.yaml | 18 +- config/default/manager_auth_proxy_patch.yaml | 1 + config/manager/kustomization.yaml | 2 +- deploy/operator.yaml | 692 ++++++++++++++---- deploy/tenant_restore.yaml | 4 +- distribution/oceanbase/build.sh | 2 +- go.mod | 14 +- go.sum | 28 +- .../status/tenantstatus/obtenant_status.go | 1 + pkg/controller/obtenantrestore_controller.go | 35 +- pkg/oceanbase/const/sql/restore.go | 2 +- pkg/oceanbase/operation/restore.go | 2 +- pkg/oceanbase/test/restore_test.go | 104 +++ pkg/resource/obtenant_manager.go | 1 + pkg/resource/obtenantrestore_manager.go | 26 +- pkg/resource/obtenantrestore_task.go | 4 + pkg/task/obtenant_flow.go | 2 +- pkg/task/restore_flow.go | 4 +- 20 files changed, 783 insertions(+), 182 deletions(-) diff --git a/Makefile b/Makefile index 68f5dcf6d..c60ec3b95 100644 --- a/Makefile +++ b/Makefile @@ -205,4 +205,20 @@ commit-hook: $(GOLANGCI_LINT) ## Install commit hook. touch .git/hooks/pre-commit chmod +x .git/hooks/pre-commit echo "#!/bin/sh" > .git/hooks/pre-commit - echo "make lint" >> .git/hooks/pre-commit \ No newline at end of file + echo "make lint" >> .git/hooks/pre-commit + +.PHONY: connect +connect: +ifdef TENANT + mysq -h$(shell kubectl get pods -o jsonpath='{.items[0].status.podIP}') -P2881 -A -uroot@${TENANT} +else + mysql -h$(shell kubectl get pods -o jsonpath='{.items[0].status.podIP}') -P2881 -A -uroot -p +endif + +.PHONY: connectob +connectob: +ifdef TENANT + mysq -h$(shell kubectl get pods -o jsonpath='{.items[0].status.podIP}') -P2881 -A -uroot@${TENANT} -Doceanbase +else + mysql -h$(shell kubectl get pods -o jsonpath='{.items[0].status.podIP}') -P2881 -A -uroot -p -Doceanbase +endif diff --git a/api/v1alpha1/obtenantrestore_types.go b/api/v1alpha1/obtenantrestore_types.go index 069a11776..2b35bb9a5 100644 --- a/api/v1alpha1/obtenantrestore_types.go +++ b/api/v1alpha1/obtenantrestore_types.go @@ -60,6 +60,11 @@ func (in *OBTenantRestoreStatus) DeepCopyInto(out *OBTenantRestoreStatus) { //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.status` +//+kubebuilder:printcolumn:name="TargetTenant",type=string,JSONPath=`.spec.targetTenant` +//+kubebuilder:printcolumn:name="TargetCluster",type=string,JSONPath=`.spec.targetCluster` +//+kubebuilder:printcolumn:name="RestoreRole",type=string,JSONPath=`.spec.restoreRole` +//+kubebuilder:printcolumn:name="StatusInDB",type=string,JSONPath=`.status.restoreProgress.status` // OBTenantRestore is the Schema for the obtenantrestores API // An instance of OBTenantRestore stands for a tenant restore job diff --git a/config/crd/bases/oceanbase.oceanbase.com_obtenantrestores.yaml b/config/crd/bases/oceanbase.oceanbase.com_obtenantrestores.yaml index 80b49e04f..14c4e31f3 100644 --- a/config/crd/bases/oceanbase.oceanbase.com_obtenantrestores.yaml +++ b/config/crd/bases/oceanbase.oceanbase.com_obtenantrestores.yaml @@ -14,7 +14,23 @@ spec: singular: obtenantrestore scope: Namespaced versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .status.status + name: Status + type: string + - jsonPath: .spec.targetTenant + name: TargetTenant + type: string + - jsonPath: .spec.targetCluster + name: TargetCluster + type: string + - jsonPath: .spec.restoreRole + name: RestoreRole + type: string + - jsonPath: .status.restoreProgress.status + name: StatusInDB + type: string + name: v1alpha1 schema: openAPIV3Schema: description: OBTenantRestore is the Schema for the obtenantrestores API An diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml index b75126616..f06445fee 100644 --- a/config/default/manager_auth_proxy_patch.yaml +++ b/config/default/manager_auth_proxy_patch.yaml @@ -53,3 +53,4 @@ spec: - "--health-probe-bind-address=:8081" - "--metrics-bind-address=127.0.0.1:8080" - "--leader-elect" + - "--manager-namespace=oceanbase-system" diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 0b5addd19..7d3d6f7d7 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: oceanbasedev/ob-operator - newTag: 2.0.0-alpha.8 + newTag: 2.0.0 diff --git a/deploy/operator.yaml b/deploy/operator.yaml index 10d49e26e..369051976 100644 --- a/deploy/operator.yaml +++ b/deploy/operator.yaml @@ -4188,7 +4188,6 @@ spec: - piece_switch_interval - round_id - start_scn - - start_scn_display - status - tenant_id - used_piece_id @@ -4626,7 +4625,6 @@ spec: - piece_switch_interval - round_id - start_scn - - start_scn_display - status - tenant_id - used_piece_id @@ -4789,6 +4787,128 @@ spec: --- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: oceanbase-system/oceanbase-serving-cert + controller-gen.kubebuilder.io/version: v0.13.0 + name: obtenantoperations.oceanbase.oceanbase.com +spec: + group: oceanbase.oceanbase.com + names: + kind: OBTenantOperation + listKind: OBTenantOperationList + plural: obtenantoperations + singular: obtenantoperation + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: OBTenantOperation is the Schema for the obtenantoperations API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: OBTenantOperationSpec defines the desired state of OBTenantOperation + properties: + changePwd: + properties: + secretRef: + type: string + tenant: + type: string + required: + - secretRef + - tenant + type: object + failover: + properties: + standbyTenant: + type: string + required: + - standbyTenant + type: object + switchover: + properties: + primaryTenant: + type: string + standbyTenant: + type: string + required: + - primaryTenant + - standbyTenant + type: object + type: + type: string + required: + - type + type: object + status: + description: OBTenantOperationStatus defines the observed state of OBTenantOperation + properties: + operationContext: + properties: + failureRule: + properties: + failureStatus: + type: string + failureStrategy: + type: string + required: + - failureStatus + - failureStrategy + type: object + idx: + type: integer + name: + type: string + targetStatus: + type: string + task: + type: string + taskId: + type: string + taskStatus: + type: string + tasks: + items: + type: string + type: array + required: + - idx + - name + - targetStatus + - task + - taskId + - taskStatus + - tasks + type: object + status: + description: 'INSERT ADDITIONAL STATUS FIELD - define observed state + of cluster Important: Run "make" to regenerate code after modifying + this file' + type: string + required: + - status + type: object + type: object + served: true + storage: true + subresources: + status: {} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition metadata: annotations: cert-manager.io/inject-ca-from: oceanbase-system/oceanbase-serving-cert @@ -4824,146 +4944,54 @@ spec: spec: description: OBTenantRestoreSpec defines the desired state of OBTenantRestore properties: - obClusterName: - type: string - restoreTenantName: + restoreOption: type: string - sourceUri: + restoreRole: type: string - type: + source: + properties: + cancel: + type: boolean + description: + type: string + replayLogUntil: + properties: + scn: + type: string + timestamp: + type: string + unlimited: + type: boolean + type: object + sourceUri: + type: string + until: + properties: + scn: + type: string + timestamp: + type: string + unlimited: + type: boolean + type: object + required: + - sourceUri + - until + type: object + targetCluster: type: string - until: + targetTenant: type: string required: - - obClusterName - - restoreTenantName - - sourceUri - - type + - restoreOption + - restoreRole + - source + - targetCluster + - targetTenant type: object status: description: OBTenantRestoreStatus defines the observed state of OBTenantRestore properties: - jobStatus: - description: JobStatus represents the current state of a Job. - properties: - active: - description: The number of pending and running pods. - format: int32 - type: integer - completedIndexes: - description: completedIndexes holds the completed indexes when - .spec.completionMode = "Indexed" in a text format. The indexes - are represented as decimal integers separated by commas. The - numbers are listed in increasing order. Three or more consecutive - numbers are compressed and represented by the first and last - element of the series, separated by a hyphen. For example, if - the completed indexes are 1, 3, 4, 5 and 7, they are represented - as "1,3-5,7". - type: string - completionTime: - description: Represents time when the job was completed. It is - not guaranteed to be set in happens-before order across separate - operations. It is represented in RFC3339 form and is in UTC. - The completion time is only set when the job finishes successfully. - format: date-time - type: string - conditions: - description: 'The latest available observations of an object''s - current state. When a Job fails, one of the conditions will - have type "Failed" and status true. When a Job is suspended, - one of the conditions will have type "Suspended" and status - true; when the Job is resumed, the status of this condition - will become false. When a Job is completed, one of the conditions - will have type "Complete" and status true. More info: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/' - items: - description: JobCondition describes current state of a job. - properties: - lastProbeTime: - description: Last time the condition was checked. - format: date-time - type: string - lastTransitionTime: - description: Last time the condition transit from one status - to another. - format: date-time - type: string - message: - description: Human readable message indicating details about - last transition. - type: string - reason: - description: (brief) reason for the condition's last transition. - type: string - status: - description: Status of the condition, one of True, False, - Unknown. - type: string - type: - description: Type of job condition, Complete or Failed. - type: string - required: - - status - - type - type: object - type: array - x-kubernetes-list-type: atomic - failed: - description: The number of pods which reached phase Failed. - format: int32 - type: integer - ready: - description: "The number of pods which have a Ready condition. - \n This field is beta-level. The job controller populates the - field when the feature gate JobReadyPods is enabled (enabled - by default)." - format: int32 - type: integer - startTime: - description: Represents time when the job controller started processing - a job. When a Job is created in the suspended state, this field - is not set until the first time it is resumed. This field is - reset every time a Job is resumed from suspension. It is represented - in RFC3339 form and is in UTC. - format: date-time - type: string - succeeded: - description: The number of pods which reached phase Succeeded. - format: int32 - type: integer - uncountedTerminatedPods: - description: "uncountedTerminatedPods holds the UIDs of Pods that - have terminated but the job controller hasn't yet accounted - for in the status counters. \n The job controller creates pods - with a finalizer. When a pod terminates (succeeded or failed), - the controller does three steps to account for it in the job - status: \n 1. Add the pod UID to the arrays in this field. 2. - Remove the pod finalizer. 3. Remove the pod UID from the arrays - while increasing the corresponding counter. \n Old jobs might - not be tracked using this field, in which case the field remains - null." - properties: - failed: - description: failed holds UIDs of failed Pods. - items: - description: UID is a type that holds unique ID values, - including UUIDs. Because we don't ONLY use UUIDs, this - is an alias to string. Being a type captures intent and - helps make sure that UIDs and names do not get conflated. - type: string - type: array - x-kubernetes-list-type: set - succeeded: - description: succeeded holds UIDs of succeeded Pods. - items: - description: UID is a type that holds unique ID values, - including UUIDs. Because we don't ONLY use UUIDs, this - is an alias to string. Being a type captures intent and - helps make sure that UIDs and names do not get conflated. - type: string - type: array - x-kubernetes-list-type: set - type: object - type: object operationContext: properties: failureRule: @@ -5001,16 +5029,102 @@ spec: - taskStatus - tasks type: object - progress: - type: string + restoreProgress: + description: RestoreHistory is the history of restore job, matches + view CDB_OB_RESTORE_HISTORY + properties: + backup_cluster_name: + type: string + backup_cluster_version: + type: string + backup_dest: + type: string + backup_piece_list: + type: string + backup_set_list: + type: string + backup_tenant_id: + format: int64 + type: integer + backup_tenant_name: + type: string + description: + type: string + finish_bytes: + format: int64 + type: integer + finish_bytes_display: + type: string + finish_ls_count: + format: int64 + type: integer + finish_tablet_count: + format: int64 + type: integer + finish_timestamp: + type: string + job_id: + format: int64 + type: integer + ls_count: + format: int64 + type: integer + restore_option: + type: string + restore_scn: + format: int64 + type: integer + restore_scn_display: + type: string + restore_tenant_id: + format: int64 + type: integer + restore_tenant_name: + type: string + start_timestamp: + type: string + status: + type: string + tablet_count: + format: int64 + type: integer + tenant_id: + format: int64 + type: integer + total_bytes: + format: int64 + type: integer + total_bytes_display: + type: string + required: + - backup_cluster_name + - backup_cluster_version + - backup_dest + - backup_piece_list + - backup_set_list + - backup_tenant_id + - backup_tenant_name + - finish_ls_count + - finish_tablet_count + - finish_timestamp + - job_id + - ls_count + - restore_option + - restore_scn + - restore_scn_display + - restore_tenant_id + - restore_tenant_name + - start_timestamp + - status + - tablet_count + - tenant_id + type: object status: description: 'INSERT ADDITIONAL STATUS FIELD - define observed state of cluster Important: Run "make" to regenerate code after modifying this file' type: string required: - - jobStatus - - progress - status type: object type: object @@ -5039,7 +5153,10 @@ spec: - jsonPath: .status.status name: status type: string - - jsonPath: .spec.clusterName + - jsonPath: .spec.tenantName + name: tenantName + type: string + - jsonPath: .spec.obcluster name: clusterName type: string - jsonPath: .metadata.creationTimestamp @@ -5089,6 +5206,13 @@ spec: connectWhiteList: default: '%' type: string + credentials: + properties: + root: + type: string + standbyRo: + type: string + type: object forceDelete: default: false type: boolean @@ -5162,8 +5286,47 @@ spec: - zone type: object type: array + source: + description: Source for restoring or creating standby + properties: + restore: + properties: + cancel: + type: boolean + description: + type: string + replayLogUntil: + properties: + scn: + type: string + timestamp: + type: string + unlimited: + type: boolean + type: object + sourceUri: + type: string + until: + properties: + scn: + type: string + timestamp: + type: string + unlimited: + type: boolean + type: object + required: + - sourceUri + - until + type: object + tenant: + type: string + type: object tenantName: type: string + tenantRole: + default: PRIMARY + type: string unitNum: type: integer required: @@ -5313,6 +5476,150 @@ spec: - zoneList type: object type: array + source: + properties: + restore: + description: OBTenantRestoreStatus defines the observed state + of OBTenantRestore + properties: + operationContext: + properties: + failureRule: + properties: + failureStatus: + type: string + failureStrategy: + type: string + required: + - failureStatus + - failureStrategy + type: object + idx: + type: integer + name: + type: string + targetStatus: + type: string + task: + type: string + taskId: + type: string + taskStatus: + type: string + tasks: + items: + type: string + type: array + required: + - idx + - name + - targetStatus + - task + - taskId + - taskStatus + - tasks + type: object + restoreProgress: + description: RestoreHistory is the history of restore job, + matches view CDB_OB_RESTORE_HISTORY + properties: + backup_cluster_name: + type: string + backup_cluster_version: + type: string + backup_dest: + type: string + backup_piece_list: + type: string + backup_set_list: + type: string + backup_tenant_id: + format: int64 + type: integer + backup_tenant_name: + type: string + description: + type: string + finish_bytes: + format: int64 + type: integer + finish_bytes_display: + type: string + finish_ls_count: + format: int64 + type: integer + finish_tablet_count: + format: int64 + type: integer + finish_timestamp: + type: string + job_id: + format: int64 + type: integer + ls_count: + format: int64 + type: integer + restore_option: + type: string + restore_scn: + format: int64 + type: integer + restore_scn_display: + type: string + restore_tenant_id: + format: int64 + type: integer + restore_tenant_name: + type: string + start_timestamp: + type: string + status: + type: string + tablet_count: + format: int64 + type: integer + tenant_id: + format: int64 + type: integer + total_bytes: + format: int64 + type: integer + total_bytes_display: + type: string + required: + - backup_cluster_name + - backup_cluster_version + - backup_dest + - backup_piece_list + - backup_set_list + - backup_tenant_id + - backup_tenant_name + - finish_ls_count + - finish_tablet_count + - finish_timestamp + - job_id + - ls_count + - restore_option + - restore_scn + - restore_scn_display + - restore_tenant_id + - restore_tenant_name + - start_timestamp + - status + - tablet_count + - tenant_id + type: object + status: + description: 'INSERT ADDITIONAL STATUS FIELD - define observed + state of cluster Important: Run "make" to regenerate code + after modifying this file' + type: string + required: + - status + type: object + tenant: + type: string + type: object status: description: 'INSERT ADDITIONAL STATUS FIELD - define observed state of cluster Important: Run "make" to regenerate code after modifying @@ -5344,6 +5651,8 @@ spec: - primaryZone - tenantID type: object + tenantRole: + type: string required: - resourcePool - status @@ -7589,6 +7898,26 @@ rules: - get - patch - update +- apiGroups: + - oceanbase.oceanbase.com + resources: + - obtenant + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - oceanbase.oceanbase.com + resources: + - obtenant/status + verbs: + - get + - patch + - update - apiGroups: - oceanbase.oceanbase.com resources: @@ -7661,6 +7990,52 @@ rules: - get - patch - update +- apiGroups: + - oceanbase.oceanbase.com + resources: + - obtenantoperations + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - oceanbase.oceanbase.com + resources: + - obtenantoperations/finalizers + verbs: + - update +- apiGroups: + - oceanbase.oceanbase.com + resources: + - obtenantoperations/status + verbs: + - get + - patch + - update +- apiGroups: + - oceanbase.oceanbase.com + resources: + - obtenantrestore + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - oceanbase.oceanbase.com + resources: + - obtenantrestore/status + verbs: + - get + - patch + - update - apiGroups: - oceanbase.oceanbase.com resources: @@ -7957,6 +8332,7 @@ spec: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect + - --manager-namespace=oceanbase-system command: - /manager image: oceanbasedev/ob-operator:2.0.0 @@ -8076,6 +8452,26 @@ metadata: app.kubernetes.io/part-of: ob-operator-generate name: oceanbase-mutating-webhook-configuration webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: oceanbase-webhook-service + namespace: oceanbase-system + path: /mutate-oceanbase-oceanbase-com-v1alpha1-obtenant + failurePolicy: Fail + name: mobtenant.kb.io + rules: + - apiGroups: + - oceanbase.oceanbase.com + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - obtenants + sideEffects: None - admissionReviewVersions: - v1 clientConfig: @@ -8111,6 +8507,26 @@ metadata: app.kubernetes.io/part-of: ob-operator-generate name: oceanbase-validating-webhook-configuration webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: oceanbase-webhook-service + namespace: oceanbase-system + path: /validate-oceanbase-oceanbase-com-v1alpha1-obtenant + failurePolicy: Fail + name: vobtenant.kb.io + rules: + - apiGroups: + - oceanbase.oceanbase.com + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - obtenants + sideEffects: None - admissionReviewVersions: - v1 clientConfig: diff --git a/deploy/tenant_restore.yaml b/deploy/tenant_restore.yaml index 33be89f8b..4a1ea9873 100644 --- a/deploy/tenant_restore.yaml +++ b/deploy/tenant_restore.yaml @@ -13,7 +13,7 @@ spec: tenantRole: PRIMARY source: restore: - sourceUri: "file:///ob-backup/t1/backup,file://ob-backup/t1/archive" + sourceUri: "file:///ob-backup/t1/data_backup_custom1,file:///ob-backup/t1/log_archive_custom1" until: unlimited: true replayLogUntil: @@ -21,7 +21,7 @@ spec: pools: - zone: zone1 type: - name: FUll + name: Full replica: 1 isActive: true resource: diff --git a/distribution/oceanbase/build.sh b/distribution/oceanbase/build.sh index 9ff6e687c..2f9547be8 100755 --- a/distribution/oceanbase/build.sh +++ b/distribution/oceanbase/build.sh @@ -1,2 +1,2 @@ #!/bin/bash - docker build -t $1 --build-arg GOPROXY=${GOPROXY} --build-arg VERSION=$2 . +docker build -t $1 --build-arg GOPROXY=$(go env GOPROXY) --build-arg VERSION=$2 . diff --git a/go.mod b/go.mod index c967649f3..9a0a6abd3 100644 --- a/go.mod +++ b/go.mod @@ -7,8 +7,8 @@ require ( github.com/go-sql-driver/mysql v1.7.1 github.com/google/uuid v1.3.0 github.com/jmoiron/sqlx v1.3.5 - github.com/onsi/ginkgo/v2 v2.9.5 - github.com/onsi/gomega v1.27.7 + github.com/onsi/ginkgo/v2 v2.11.0 + github.com/onsi/gomega v1.27.10 github.com/pkg/errors v0.9.1 github.com/robfig/cron/v3 v3.0.1 github.com/stretchr/testify v1.8.1 @@ -55,13 +55,13 @@ require ( go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect go.uber.org/zap v1.24.0 // indirect - golang.org/x/net v0.10.0 // indirect + golang.org/x/net v0.12.0 // indirect golang.org/x/oauth2 v0.5.0 // indirect - golang.org/x/sys v0.8.0 // indirect - golang.org/x/term v0.8.0 // indirect - golang.org/x/text v0.9.0 // indirect + golang.org/x/sys v0.10.0 // indirect + golang.org/x/term v0.10.0 // indirect + golang.org/x/text v0.11.0 // indirect golang.org/x/time v0.3.0 // indirect - golang.org/x/tools v0.9.1 // indirect + golang.org/x/tools v0.9.3 // indirect gomodules.xyz/jsonpatch/v2 v2.3.0 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.30.0 // indirect diff --git a/go.sum b/go.sum index 32da7d867..db5198d41 100644 --- a/go.sum +++ b/go.sum @@ -111,10 +111,10 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/onsi/ginkgo/v2 v2.9.5 h1:+6Hr4uxzP4XIUyAkg61dWBw8lb/gc4/X5luuxN/EC+Q= -github.com/onsi/ginkgo/v2 v2.9.5/go.mod h1:tvAoo1QUJwNEU2ITftXTpR7R1RbCzoZUOs3RonqW57k= -github.com/onsi/gomega v1.27.7 h1:fVih9JD6ogIiHUN6ePK7HJidyEDpWGVB5mzM7cWNXoU= -github.com/onsi/gomega v1.27.7/go.mod h1:1p8OOlwo2iUUDsHnOrjE5UKYJ+e3W8eQ3qSlRahPmr4= +github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= +github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= +github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= +github.com/onsi/gomega v1.27.10/go.mod h1:RsS8tutOdbdgzbPtzzATp12yT7kM5I5aElG3evPbQ0M= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -179,8 +179,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= -golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50= +golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.5.0 h1:HuArIo48skDwlrvM3sEdHXElYslAMsf3KwRkkW4MC4s= golang.org/x/oauth2 v0.5.0/go.mod h1:9/XBHVqLaWO3/BRHs5jbpYCnOZVjj5V0ndyaAM7KB4I= @@ -200,16 +200,16 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= -golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols= -golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c= +golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= -golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -221,8 +221,8 @@ golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtn golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.9.3 h1:Gn1I8+64MsuTb/HpH+LmQtNas23LhUVr3rYZ0eKuaMM= +golang.org/x/tools v0.9.3/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/pkg/const/status/tenantstatus/obtenant_status.go b/pkg/const/status/tenantstatus/obtenant_status.go index d84943dc4..553665b0c 100644 --- a/pkg/const/status/tenantstatus/obtenant_status.go +++ b/pkg/const/status/tenantstatus/obtenant_status.go @@ -31,4 +31,5 @@ const ( SwitchingRole = "switching role" RestoreCanceled = "restore canceled" CancelingRestore = "canceling restore" + RestoreFailed = "restore failed" ) diff --git a/pkg/controller/obtenantrestore_controller.go b/pkg/controller/obtenantrestore_controller.go index 1077742a9..5acd83ded 100644 --- a/pkg/controller/obtenantrestore_controller.go +++ b/pkg/controller/obtenantrestore_controller.go @@ -18,6 +18,7 @@ package controller import ( "context" + "time" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" @@ -26,6 +27,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" v1alpha1 "github.com/oceanbase/ob-operator/api/v1alpha1" + "github.com/oceanbase/ob-operator/pkg/resource" ) // OBTenantRestoreReconciler reconciles a OBTenantRestore object @@ -48,11 +50,38 @@ type OBTenantRestoreReconciler struct { // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.4/pkg/reconcile func (r *OBTenantRestoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { _ = req - _ = log.FromContext(ctx) - // TODO(user): your logic here + logger := log.FromContext(ctx) + restore := &v1alpha1.OBTenantRestore{} + err := r.Client.Get(ctx, req.NamespacedName, restore) + if err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } - return ctrl.Result{}, nil + // finalizerName := "obtenantrestore.finalizers.oceanbase.com" + // // examine DeletionTimestamp to determine if the policy is under deletion + // if restore.ObjectMeta.DeletionTimestamp.IsZero() { + // if !controllerutil.ContainsFinalizer(restore, finalizerName) { + // controllerutil.AddFinalizer(restore, finalizerName) + // if err := r.Update(ctx, restore); err != nil { + // return ctrl.Result{}, err + // } + // } + // } + + mgr := &resource.ObTenantRestoreManager{ + Ctx: ctx, + Resource: restore, + Client: r.Client, + Recorder: r.Recorder, + Logger: &logger, + } + + coordinator := resource.NewCoordinator(mgr, &logger) + _, err = coordinator.Coordinate() + return ctrl.Result{ + RequeueAfter: 10 * time.Second, + }, err } // SetupWithManager sets up the controller with the Manager. diff --git a/pkg/oceanbase/const/sql/restore.go b/pkg/oceanbase/const/sql/restore.go index f7c87b18a..d99c60fd6 100644 --- a/pkg/oceanbase/const/sql/restore.go +++ b/pkg/oceanbase/const/sql/restore.go @@ -23,7 +23,7 @@ const ( StartRestoreWithLimit = "ALTER SYSTEM RESTORE %s FROM ? UNTIL %s=? WITH ?" // tenant_name, uri, restore_option StartRestoreUnlimited = "ALTER SYSTEM RESTORE %s FROM ? WITH ?" - CancelRestore = "ALTER SYSTEM CANCEL RESTORE ?" + CancelRestore = "ALTER SYSTEM CANCEL RESTORE %s" ReplayStandbyLog = "ALTER SYSTEM RECOVER STANDBY TENANT ? UNTIL %s" ActivateStandby = "ALTER SYSTEM ACTIVATE STANDBY TENANT ?" QueryRestoreProgress = "SELECT " + restoreProgressFields + " FROM CDB_OB_RESTORE_PROGRESS" diff --git a/pkg/oceanbase/operation/restore.go b/pkg/oceanbase/operation/restore.go index 35408bfba..a809e502e 100644 --- a/pkg/oceanbase/operation/restore.go +++ b/pkg/oceanbase/operation/restore.go @@ -51,7 +51,7 @@ func (m *OceanbaseOperationManager) StartRestoreUnlimited(tenantName, uri, resto } func (m *OceanbaseOperationManager) CancelRestoreOfTenant(tenantName string) error { - err := m.ExecWithDefaultTimeout(sql.CancelRestore, tenantName) + err := m.ExecWithDefaultTimeout(fmt.Sprintf(sql.CancelRestore, tenantName)) if err != nil { m.Logger.Error(err, "Got exception when cancel restore of tenant") return errors.Wrap(err, "Cancel restore of tenant") diff --git a/pkg/oceanbase/test/restore_test.go b/pkg/oceanbase/test/restore_test.go index 2da23302c..700d285fd 100644 --- a/pkg/oceanbase/test/restore_test.go +++ b/pkg/oceanbase/test/restore_test.go @@ -333,3 +333,107 @@ var _ = Describe("Test Restore Operation", Serial, Label("restore"), func() { } }) }) + +var _ = Describe("Test canceling restore", Serial, Label("canceling"), func() { + var con *operation.OceanbaseOperationManager + var standbyName string + var _ = BeforeEach(func() { + var err error + logger := logr.Discard() + ds := connector.NewOceanBaseDataSource(host, port, sysUser, "sys", sysPassword, database) + con, err = operation.GetOceanbaseOperationManager(ds) + Expect(err).To(BeNil()) + con.Logger = &logger + standbyName = tenant + "_standby" + }) + It("Create units", func() { + By("Create unit") + unitList, err := con.GetUnitConfigV4List() + Expect(err).To(BeNil()) + exists := false + for _, unit := range unitList { + if unit.Name == "unit_test" { + exists = true + break + } + } + if !exists { + err = con.AddUnitConfigV4(&model.UnitConfigV4SQLParam{ + UnitConfigName: "unit_test", + MinCPU: 2, + MaxCPU: 2, + MemorySize: 2147483648, + MaxIops: 1024, + LogDiskSize: 2147483648, + MinIops: 1024, + }) + Expect(err).To(BeNil()) + } + + }) + It("Start and cancel the restore", func() { + By("Check target tenant's existence") + exists, err := con.CheckTenantExistByName(standbyName) + Expect(err).To(BeNil()) + if exists { + Skip("Target standby tenant exists") + } + + By("Create resource pool") + poolList, err := con.GetPoolList() + Expect(err).To(BeNil()) + exists = false + for _, pool := range poolList { + if pool.Name == "pool_test_standby1" { + exists = true + break + } + } + if !exists { + for _, v := range []int{1, 2, 3} { + err = con.AddPool(model.PoolSQLParam{ + UnitNum: 1, + PoolName: fmt.Sprintf("pool_test_standby%d", v), + ZoneList: fmt.Sprintf("zone%d", v), + UnitName: "unit_test", + }) + Expect(err).To(BeNil()) + } + } + + By("Trigger restoration of standby tenant") + backupDest := "file:///ob-backup/" + tenant + "/data_backup_custom1" + archiveDest := "file:///ob-backup/" + tenant + "/log_archive_custom1" + err = con.StartRestoreUnlimited(standbyName, strings.Join([]string{backupDest, archiveDest}, ","), "pool_list=pool_test_standby1,pool_test_standby2,pool_test_standby3") + Expect(err).To(BeNil()) + + By("Cancel restoration of tenant") + err = con.CancelRestoreOfTenant(standbyName) + Expect(err).To(BeNil()) + }) + + It("Delete Tenants", Label("delete_tenants"), func() { + By("Deleting primary tenant") + exists, err := con.CheckTenantExistByName(tenant) + Expect(err).To(BeNil()) + if exists { + Expect(con.DeleteTenant(tenant, true)).To(BeNil()) + } + + By("Deleting standby tenants") + exists, err = con.CheckTenantExistByName(standbyName) + Expect(err).To(BeNil()) + if exists { + Expect(con.DeleteTenant(standbyName, true)).To(BeNil()) + } + + By("Deleting resource pools") + for _, pool := range []string{"pool_test1", "pool_test2", "pool_test3", "pool_test_standby1", "pool_test_standby2", "pool_test_standby3"} { + exists, err = con.CheckPoolExistByName(pool) + Expect(err).To(BeNil()) + if exists { + Expect(con.DeletePool(pool)).To(BeNil()) + } + } + }) +}) diff --git a/pkg/resource/obtenant_manager.go b/pkg/resource/obtenant_manager.go index 9ddd4659e..9fe2bf9db 100644 --- a/pkg/resource/obtenant_manager.go +++ b/pkg/resource/obtenant_manager.go @@ -141,6 +141,7 @@ func (m *OBTenantManager) UpdateStatus() error { m.OBTenant.Spec.Source != nil && m.OBTenant.Spec.Source.Restore != nil && m.OBTenant.Spec.Source.Restore.Cancel { + m.OBTenant.Status.OperationContext = nil m.OBTenant.Status.Status = tenantstatus.CancelingRestore } else if m.OBTenant.Status.Status != tenantstatus.Running { m.Logger.Info(fmt.Sprintf("OBTenant status is %s (not running), skip compare", m.OBTenant.Status.Status)) diff --git a/pkg/resource/obtenantrestore_manager.go b/pkg/resource/obtenantrestore_manager.go index 7c5fb92e6..4c3e390e7 100644 --- a/pkg/resource/obtenantrestore_manager.go +++ b/pkg/resource/obtenantrestore_manager.go @@ -16,6 +16,7 @@ import ( "context" "github.com/go-logr/logr" + "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -112,6 +113,9 @@ func (m *ObTenantRestoreManager) checkRestoreProgress() error { } else { m.Resource.Status.Status = constants.RestoreJobStatusActivating } + } else if restoreJob.Status == "FAIL" { + m.Recorder.Event(m.Resource, corev1.EventTypeWarning, "Restore job is failed", "Restore job is failed") + m.Resource.Status.Status = constants.RestoreJobFailed } } if restoreJob == nil { @@ -127,6 +131,9 @@ func (m *ObTenantRestoreManager) checkRestoreProgress() error { } else { m.Resource.Status.Status = constants.RestoreJobStatusActivating } + } else if restoreHistory != nil && restoreHistory.Status == "FAIL" { + m.Recorder.Event(m.Resource, corev1.EventTypeWarning, "Restore job is failed", "Restore job is failed") + m.Resource.Status.Status = constants.RestoreJobFailed } } return nil @@ -151,8 +158,9 @@ func (m ObTenantRestoreManager) GetTaskFunc(name string) (func() error, error) { return m.StartLogReplay, nil case taskname.ActivateStandby: return m.ActivateStandby, nil + default: + return nil, errors.New("Task name not registered") } - return nil, nil } func (m ObTenantRestoreManager) GetTaskFlow() (*task.TaskFlow, error) { @@ -201,15 +209,15 @@ func (m ObTenantRestoreManager) PrintErrEvent(err error) { } func (m *ObTenantRestoreManager) retryUpdateStatus() error { - resource := &v1alpha1.OBTenantRestore{} - err := m.Client.Get(m.Ctx, types.NamespacedName{ - Namespace: m.Resource.GetNamespace(), - Name: m.Resource.GetName(), - }, resource) - if err != nil { - return client.IgnoreNotFound(err) - } return retry.RetryOnConflict(retry.DefaultRetry, func() error { + resource := &v1alpha1.OBTenantRestore{} + err := m.Client.Get(m.Ctx, types.NamespacedName{ + Namespace: m.Resource.GetNamespace(), + Name: m.Resource.GetName(), + }, resource) + if err != nil { + return client.IgnoreNotFound(err) + } resource.Status = m.Resource.Status return m.Client.Status().Update(m.Ctx, resource) }) diff --git a/pkg/resource/obtenantrestore_task.go b/pkg/resource/obtenantrestore_task.go index 9701b9544..e466d9be2 100644 --- a/pkg/resource/obtenantrestore_task.go +++ b/pkg/resource/obtenantrestore_task.go @@ -141,6 +141,10 @@ func (m *OBTenantManager) CancelTenantRestoreJob() error { m.Logger.Error(err, "delete restore job CR") return err } + err = m.Client.Delete(m.Ctx, m.OBTenant) + if err != nil { + m.Logger.Error(err, "delete tenant CR") + } return nil } diff --git a/pkg/task/obtenant_flow.go b/pkg/task/obtenant_flow.go index 5d4a04e5d..93edee6d9 100644 --- a/pkg/task/obtenant_flow.go +++ b/pkg/task/obtenant_flow.go @@ -143,7 +143,7 @@ func RestoreTenant() *TaskFlow { }, TargetStatus: tenantstatus.Running, OnFailure: strategy.FailureRule{ - NextTryStatus: tenantstatus.Restoring, + NextTryStatus: tenantstatus.RestoreFailed, }, }, } diff --git a/pkg/task/restore_flow.go b/pkg/task/restore_flow.go index 9fef7b9e0..133a3b711 100644 --- a/pkg/task/restore_flow.go +++ b/pkg/task/restore_flow.go @@ -24,10 +24,10 @@ func StartRestoreJob() *TaskFlow { return &TaskFlow{ OperationContext: &v1alpha1.OperationContext{ Name: flowname.StartRestoreFlow, - Tasks: []string{taskname.StartBackupJob}, + Tasks: []string{taskname.StartRestoreJob}, TargetStatus: string(constants.RestoreJobRunning), OnFailure: strategy.FailureRule{ - NextTryStatus: string(constants.RestoreJobStarting), + NextTryStatus: string(constants.RestoreJobFailed), }, }, }