From 8087c7f13a85370356d4b5a89e03ab6d3d0a7408 Mon Sep 17 00:00:00 2001 From: Lyndon-Li Date: Wed, 4 Dec 2024 10:28:50 +0800 Subject: [PATCH 1/3] add diagnostic for data mover exposer Signed-off-by: Lyndon-Li --- pkg/controller/data_download_controller.go | 2 + .../data_download_controller_test.go | 4 ++ pkg/controller/data_upload_controller.go | 2 + pkg/controller/data_upload_controller_test.go | 8 +++ pkg/exposer/csi_snapshot.go | 61 +++++++++++++++++++ pkg/exposer/generic_restore.go | 48 +++++++++++++++ ...ic_restore.go => GenericRestoreExposer.go} | 18 ++++++ pkg/exposer/snapshot.go | 4 ++ pkg/nodeagent/node_agent.go | 18 +++++- pkg/util/csi/volume_snapshot.go | 42 +++++++++++++ pkg/util/kube/pod.go | 10 +++ pkg/util/kube/pvc_pv.go | 17 ++++++ pkg/util/kube/pvc_pv_test.go | 3 + 13 files changed, 235 insertions(+), 2 deletions(-) rename pkg/exposer/mocks/{generic_restore.go => GenericRestoreExposer.go} (89%) diff --git a/pkg/controller/data_download_controller.go b/pkg/controller/data_download_controller.go index d607b13b06..95a08ef5c9 100644 --- a/pkg/controller/data_download_controller.go +++ b/pkg/controller/data_download_controller.go @@ -689,6 +689,8 @@ func (r *DataDownloadReconciler) onPrepareTimeout(ctx context.Context, dd *veler return } + log.Warn(r.restoreExposer.DiagnoseExpose(ctx, getDataDownloadOwnerObject(dd))) + r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd)) log.Info("Dataupload has been cleaned up") diff --git a/pkg/controller/data_download_controller_test.go b/pkg/controller/data_download_controller_test.go index f7d35ff661..498e379edc 100644 --- a/pkg/controller/data_download_controller_test.go +++ b/pkg/controller/data_download_controller_test.go @@ -971,6 +971,10 @@ func (dt *ddResumeTestHelper) PeekExposed(context.Context, corev1.ObjectReferenc return nil } +func (dt *ddResumeTestHelper) DiagnoseExpose(context.Context, corev1.ObjectReference) string { + return "" +} + func (dt *ddResumeTestHelper) RebindVolume(context.Context, corev1.ObjectReference, string, string, time.Duration) error { return nil } diff --git a/pkg/controller/data_upload_controller.go b/pkg/controller/data_upload_controller.go index 5723e6323b..f58aec1fbc 100644 --- a/pkg/controller/data_upload_controller.go +++ b/pkg/controller/data_upload_controller.go @@ -758,6 +758,8 @@ func (r *DataUploadReconciler) onPrepareTimeout(ctx context.Context, du *velerov volumeSnapshotName = du.Spec.CSISnapshot.VolumeSnapshot } + log.Warn(ep.DiagnoseExpose(ctx, getOwnerObject(du))) + ep.CleanUp(ctx, getOwnerObject(du), volumeSnapshotName, du.Spec.SourceNamespace) log.Info("Dataupload has been cleaned up") diff --git a/pkg/controller/data_upload_controller_test.go b/pkg/controller/data_upload_controller_test.go index c6cdfa0915..c7f110e88e 100644 --- a/pkg/controller/data_upload_controller_test.go +++ b/pkg/controller/data_upload_controller_test.go @@ -300,6 +300,10 @@ func (f *fakeSnapshotExposer) PeekExposed(ctx context.Context, ownerObject corev return f.peekErr } +func (f *fakeSnapshotExposer) DiagnoseExpose(context.Context, corev1.ObjectReference) string { + return "" +} + func (f *fakeSnapshotExposer) CleanUp(context.Context, corev1.ObjectReference, string, string) { } @@ -1043,6 +1047,10 @@ func (dt *duResumeTestHelper) PeekExposed(context.Context, corev1.ObjectReferenc return nil } +func (dt *duResumeTestHelper) DiagnoseExpose(context.Context, corev1.ObjectReference) string { + return "" +} + func (dt *duResumeTestHelper) CleanUp(context.Context, corev1.ObjectReference, string, string) {} func (dt *duResumeTestHelper) newMicroServiceBRWatcher(kbclient.Client, kubernetes.Interface, manager.Manager, string, string, string, string, string, string, diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index 59aabf59e0..c84b441f0d 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -308,6 +308,67 @@ func (e *csiSnapshotExposer) PeekExposed(ctx context.Context, ownerObject corev1 return nil } +func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject corev1.ObjectReference) string { + backupPodName := ownerObject.Name + backupPVCName := ownerObject.Name + backupVSName := ownerObject.Name + + diag := fmt.Sprintf("***************************begin diagnose CSI exposer[%s/%s]***************************\n", ownerObject.Namespace, ownerObject.Name) + + pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, backupPodName, metav1.GetOptions{}) + if err != nil { + diag += fmt.Sprintf("error getting backup pod %s, err: %v\n", backupPodName, err) + } + + pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, backupPVCName, metav1.GetOptions{}) + if err != nil { + diag += fmt.Sprintf("error getting backup pvc %s, err: %v\n", backupPVCName, err) + } + + vs, err := e.csiSnapshotClient.VolumeSnapshots(ownerObject.Namespace).Get(ctx, backupVSName, metav1.GetOptions{}) + if err != nil { + diag += fmt.Sprintf("error getting backup vs %s, err: %v\n", backupVSName, err) + } + + if pod != nil { + diag += kube.DiagnosePod(pod) + + if pod.Spec.NodeName != "" { + if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil { + diag += fmt.Sprintf("node-agent is not running in node %s\n", pod.Spec.NodeName) + } + } + } + + if pvc != nil { + diag += kube.DiagnosePVC(pvc) + + if pvc.Spec.VolumeName != "" { + if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil { + diag += fmt.Sprintf("error getting backup pv %s, err: %v\n", pvc.Spec.VolumeName, err) + } else { + diag += kube.DiagnosePV(pv) + } + } + } + + if vs != nil { + diag += csi.DiagnoseVS(vs) + + if vs.Status.BoundVolumeSnapshotContentName != nil && *vs.Status.BoundVolumeSnapshotContentName != "" { + if vsc, err := e.csiSnapshotClient.VolumeSnapshotContents().Get(ctx, *vs.Status.BoundVolumeSnapshotContentName, metav1.GetOptions{}); err != nil { + diag += fmt.Sprintf("error getting backup vsc %s, err: %v\n", *vs.Status.BoundVolumeSnapshotContentName, err) + } else { + diag += csi.DiagnoseVSC(vsc) + } + } + } + + diag += fmt.Sprintf("***************************end diagnose CSI exposer[%s/%s]***************************\n", ownerObject.Namespace, ownerObject.Name) + + return diag +} + const cleanUpTimeout = time.Minute func (e *csiSnapshotExposer) CleanUp(ctx context.Context, ownerObject corev1.ObjectReference, vsName string, sourceNamespace string) { diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index d498470a77..8d5ae9a795 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -30,6 +30,7 @@ import ( "k8s.io/client-go/kubernetes" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/vmware-tanzu/velero/pkg/nodeagent" "github.com/vmware-tanzu/velero/pkg/util/boolptr" "github.com/vmware-tanzu/velero/pkg/util/kube" ) @@ -49,6 +50,10 @@ type GenericRestoreExposer interface { // Otherwise, it returns nil immediately. PeekExposed(context.Context, corev1.ObjectReference) error + // DiagnoseExpose generate the diagnostic info when the expose is not finished for a long time. + // If it finds any problem, it returns an string about the problem. + DiagnoseExpose(context.Context, corev1.ObjectReference) string + // RebindVolume unexposes the restored PV and rebind it to the target PVC RebindVolume(context.Context, corev1.ObjectReference, string, string, time.Duration) error @@ -195,6 +200,49 @@ func (e *genericRestoreExposer) PeekExposed(ctx context.Context, ownerObject cor return nil } +func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject corev1.ObjectReference) string { + restorePodName := ownerObject.Name + restorePVCName := ownerObject.Name + + diag := fmt.Sprintf("***************************begin diagnose restore exposer[%s/%s]***************************\n", ownerObject.Namespace, ownerObject.Name) + + pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, restorePodName, metav1.GetOptions{}) + if err != nil { + diag += fmt.Sprintf("error to get restore pod %s, err: %v\n", restorePodName, err) + } + + pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, restorePVCName, metav1.GetOptions{}) + if err != nil { + diag += fmt.Sprintf("error to get restore pvc %s, err: %v\n", restorePVCName, err) + } + + if pod != nil { + diag += kube.DiagnosePod(pod) + + if pod.Spec.NodeName != "" { + if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil { + diag += fmt.Sprintf("node-agent is not running in node %s\n", pod.Spec.NodeName) + } + } + } + + if pvc != nil { + diag += kube.DiagnosePVC(pvc) + + if pvc.Spec.VolumeName != "" { + if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil { + diag += fmt.Sprintf("error getting backup pv %s, err: %v\n", pvc.Spec.VolumeName, err) + } else { + diag += kube.DiagnosePV(pv) + } + } + } + + diag += fmt.Sprintf("***************************end diagnose restore exposer[%s/%s]***************************\n", ownerObject.Namespace, ownerObject.Name) + + return diag +} + func (e *genericRestoreExposer) CleanUp(ctx context.Context, ownerObject corev1.ObjectReference) { restorePodName := ownerObject.Name restorePVCName := ownerObject.Name diff --git a/pkg/exposer/mocks/generic_restore.go b/pkg/exposer/mocks/GenericRestoreExposer.go similarity index 89% rename from pkg/exposer/mocks/generic_restore.go rename to pkg/exposer/mocks/GenericRestoreExposer.go index e0b76d6e75..83a9789af6 100644 --- a/pkg/exposer/mocks/generic_restore.go +++ b/pkg/exposer/mocks/GenericRestoreExposer.go @@ -26,6 +26,24 @@ func (_m *GenericRestoreExposer) CleanUp(_a0 context.Context, _a1 v1.ObjectRefer _m.Called(_a0, _a1) } +// DiagnoseExpose provides a mock function with given fields: _a0, _a1 +func (_m *GenericRestoreExposer) DiagnoseExpose(_a0 context.Context, _a1 v1.ObjectReference) string { + ret := _m.Called(_a0, _a1) + + if len(ret) == 0 { + panic("no return value specified for DiagnoseExpose") + } + + var r0 string + if rf, ok := ret.Get(0).(func(context.Context, v1.ObjectReference) string); ok { + r0 = rf(_a0, _a1) + } else { + r0 = ret.Get(0).(string) + } + + return r0 +} + // Expose provides a mock function with given fields: _a0, _a1, _a2, _a3, _a4, _a5, _a6 func (_m *GenericRestoreExposer) Expose(_a0 context.Context, _a1 v1.ObjectReference, _a2 string, _a3 string, _a4 map[string]string, _a5 v1.ResourceRequirements, _a6 time.Duration) error { ret := _m.Called(_a0, _a1, _a2, _a3, _a4, _a5, _a6) diff --git a/pkg/exposer/snapshot.go b/pkg/exposer/snapshot.go index 63fee5e3a9..a4a6bd7df2 100644 --- a/pkg/exposer/snapshot.go +++ b/pkg/exposer/snapshot.go @@ -37,6 +37,10 @@ type SnapshotExposer interface { // Otherwise, it returns nil immediately. PeekExposed(context.Context, corev1.ObjectReference) error + // DiagnoseExpose generate the diagnostic info when the expose is not finished for a long time. + // If it finds any problem, it returns an string about the problem. + DiagnoseExpose(context.Context, corev1.ObjectReference) string + // CleanUp cleans up any objects generated during the snapshot expose CleanUp(context.Context, corev1.ObjectReference, string, string) } diff --git a/pkg/nodeagent/node_agent.go b/pkg/nodeagent/node_agent.go index ff5d011eca..a57379f37f 100644 --- a/pkg/nodeagent/node_agent.go +++ b/pkg/nodeagent/node_agent.go @@ -100,8 +100,17 @@ func IsRunning(ctx context.Context, kubeClient kubernetes.Interface, namespace s } } -// IsRunningInNode checks if the node agent pod is running properly in a specified node. If not, return the error found +// KbClientIsRunningInNode checks if the node agent pod is running properly in a specified node through kube client. If not, return the error found +func KbClientIsRunningInNode(ctx context.Context, namespace string, nodeName string, kubeClient kubernetes.Interface) error { + return isRunningInNode(ctx, namespace, nodeName, nil, kubeClient) +} + +// IsRunningInNode checks if the node agent pod is running properly in a specified node through controller client. If not, return the error found func IsRunningInNode(ctx context.Context, namespace string, nodeName string, crClient ctrlclient.Client) error { + return isRunningInNode(ctx, namespace, nodeName, crClient, nil) +} + +func isRunningInNode(ctx context.Context, namespace string, nodeName string, crClient ctrlclient.Client, kubeClient kubernetes.Interface) error { if nodeName == "" { return errors.New("node name is empty") } @@ -112,7 +121,12 @@ func IsRunningInNode(ctx context.Context, namespace string, nodeName string, crC return errors.Wrap(err, "fail to parse selector") } - err = crClient.List(ctx, pods, &ctrlclient.ListOptions{LabelSelector: parsedSelector}) + if crClient != nil { + err = crClient.List(ctx, pods, &ctrlclient.ListOptions{LabelSelector: parsedSelector}) + } else { + pods, err = kubeClient.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{LabelSelector: parsedSelector.String()}) + } + if err != nil { return errors.Wrap(err, "failed to list daemonset pods") } diff --git a/pkg/util/csi/volume_snapshot.go b/pkg/util/csi/volume_snapshot.go index 76a4d59fa5..fcd6836388 100644 --- a/pkg/util/csi/volume_snapshot.go +++ b/pkg/util/csi/volume_snapshot.go @@ -773,3 +773,45 @@ func WaitUntilVSCHandleIsReady( return vsc, nil } + +func DiagnoseVS(vs *snapshotv1api.VolumeSnapshot) string { + vscName := "" + if vs.Status.BoundVolumeSnapshotContentName != nil { + vscName = *vs.Status.BoundVolumeSnapshotContentName + } + + readyToUse := false + if vs.Status.ReadyToUse != nil { + readyToUse = *vs.Status.ReadyToUse + } + + errMessage := "" + if vs.Status.Error != nil && vs.Status.Error.Message != nil { + errMessage = *vs.Status.Error.Message + } + + diag := fmt.Sprintf("VS %s/%s, bind to %s, readToUse %v, errMessage %s\n", vs.Namespace, vs.Name, vscName, readyToUse, errMessage) + + return diag +} + +func DiagnoseVSC(vsc *snapshotv1api.VolumeSnapshotContent) string { + handle := "" + if vsc.Status.SnapshotHandle != nil { + handle = *vsc.Status.SnapshotHandle + } + + readyToUse := false + if vsc.Status.ReadyToUse != nil { + readyToUse = *vsc.Status.ReadyToUse + } + + errMessage := "" + if vsc.Status.Error != nil && vsc.Status.Error.Message != nil { + errMessage = *vsc.Status.Error.Message + } + + diag := fmt.Sprintf("VSC %s, readToUse %v, errMessage %s, handle %s\n", vsc.Name, readyToUse, errMessage, handle) + + return diag +} diff --git a/pkg/util/kube/pod.go b/pkg/util/kube/pod.go index 593d1541f2..9f126a71a1 100644 --- a/pkg/util/kube/pod.go +++ b/pkg/util/kube/pod.go @@ -257,3 +257,13 @@ func ToSystemAffinity(loadAffinities []*LoadAffinity) *corev1api.Affinity { return nil } + +func DiagnosePod(pod *corev1api.Pod) string { + diag := fmt.Sprintf("Pod %s/%s, phase %s, node name %s\n", pod.Namespace, pod.Name, pod.Status.Phase, pod.Spec.NodeName) + + for _, condition := range pod.Status.Conditions { + diag += fmt.Sprintf("Pod condition %s, reason %s, message %s\n", condition.Type, condition.Reason, condition.Message) + } + + return diag +} diff --git a/pkg/util/kube/pvc_pv.go b/pkg/util/kube/pvc_pv.go index 1811a2c1df..ac7d15fbfd 100644 --- a/pkg/util/kube/pvc_pv.go +++ b/pkg/util/kube/pvc_pv.go @@ -412,3 +412,20 @@ func GetPVCForPodVolume(vol *corev1api.Volume, pod *corev1api.Pod, crClient crcl return pvc, nil } + +func DiagnosePVC(pvc *corev1api.PersistentVolumeClaim) string { + diag := fmt.Sprintf("PVC %s/%s, phase %s\n", pvc.Namespace, pvc.Name, pvc.Status.Phase) + + for _, condition := range pvc.Status.Conditions { + diag += fmt.Sprintf("PVC condition %s, reason %s, message %s\n", condition.Type, condition.Reason, condition.Message) + } + + diag += fmt.Sprintf("PVC is binding to %s\n", pvc.Spec.VolumeName) + + return diag +} + +func DiagnosePV(pv *corev1api.PersistentVolume) string { + diag := fmt.Sprintf("PV %s, phase %s, reason %s, message %s\n", pv.Name, pv.Status.Phase, pv.Status.Reason, pv.Status.Message) + return diag +} diff --git a/pkg/util/kube/pvc_pv_test.go b/pkg/util/kube/pvc_pv_test.go index 5cbe02dc06..00c3962c5c 100644 --- a/pkg/util/kube/pvc_pv_test.go +++ b/pkg/util/kube/pvc_pv_test.go @@ -1463,3 +1463,6 @@ func TestMakePodPVCAttachment(t *testing.T) { }) } } + +func TestDiagnosePVC(t *testing.T) { +} From 1e2ef374d651984503ee3bed1ba67dd739b674dd Mon Sep 17 00:00:00 2001 From: Lyndon-Li Date: Wed, 4 Dec 2024 14:30:54 +0800 Subject: [PATCH 2/3] add diagnostic for data mover exposer Signed-off-by: Lyndon-Li --- changelogs/unreleased/8482-Lyndon-Li | 1 + pkg/exposer/csi_snapshot.go | 2 +- pkg/exposer/csi_snapshot_test.go | 400 +++++++++++++++++++++++++++ pkg/exposer/generic_restore.go | 6 +- pkg/exposer/generic_restore_test.go | 262 ++++++++++++++++++ pkg/util/csi/volume_snapshot.go | 50 ++-- pkg/util/csi/volume_snapshot_test.go | 194 +++++++++++++ pkg/util/kube/pod.go | 2 +- pkg/util/kube/pod_test.go | 46 +++ pkg/util/kube/pvc_pv.go | 10 +- pkg/util/kube/pvc_pv_test.go | 59 ++++ 11 files changed, 996 insertions(+), 36 deletions(-) create mode 100644 changelogs/unreleased/8482-Lyndon-Li diff --git a/changelogs/unreleased/8482-Lyndon-Li b/changelogs/unreleased/8482-Lyndon-Li new file mode 100644 index 0000000000..c1cec7e241 --- /dev/null +++ b/changelogs/unreleased/8482-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #8125, add diagnostic info for data mover exposers when expose timeout \ No newline at end of file diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index c84b441f0d..64fe7230f3 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -355,7 +355,7 @@ func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject cor if vs != nil { diag += csi.DiagnoseVS(vs) - if vs.Status.BoundVolumeSnapshotContentName != nil && *vs.Status.BoundVolumeSnapshotContentName != "" { + if vs.Status != nil && vs.Status.BoundVolumeSnapshotContentName != nil && *vs.Status.BoundVolumeSnapshotContentName != "" { if vsc, err := e.csiSnapshotClient.VolumeSnapshotContents().Get(ctx, *vs.Status.BoundVolumeSnapshotContentName, metav1.GetOptions{}); err != nil { diag += fmt.Sprintf("error getting backup vsc %s, err: %v\n", *vs.Status.BoundVolumeSnapshotContentName, err) } else { diff --git a/pkg/exposer/csi_snapshot_test.go b/pkg/exposer/csi_snapshot_test.go index 0cbe65ffb4..d7e4a768ed 100644 --- a/pkg/exposer/csi_snapshot_test.go +++ b/pkg/exposer/csi_snapshot_test.go @@ -959,3 +959,403 @@ func Test_csiSnapshotExposer_createBackupPVC(t *testing.T) { }) } } + +func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { + backup := &velerov1.Backup{ + TypeMeta: metav1.TypeMeta{ + APIVersion: velerov1.SchemeGroupVersion.String(), + Kind: "Backup", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-backup", + UID: "fake-uid", + }, + } + + backupPodWithoutNodeName := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-backup", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: backup.APIVersion, + Kind: backup.Kind, + Name: backup.Name, + UID: backup.UID, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodInitialized, + Status: corev1.ConditionTrue, + Message: "fake-pod-message", + }, + }, + }, + } + + backupPodWithNodeName := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-backup", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: backup.APIVersion, + Kind: backup.Kind, + Name: backup.Name, + UID: backup.UID, + }, + }, + }, + Spec: corev1.PodSpec{ + NodeName: "fake-node", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodInitialized, + Status: corev1.ConditionTrue, + Message: "fake-pod-message", + }, + }, + }, + } + + backupPVCWithoutVolumeName := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-backup", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: backup.APIVersion, + Kind: backup.Kind, + Name: backup.Name, + UID: backup.UID, + }, + }, + }, + Status: corev1.PersistentVolumeClaimStatus{ + Phase: corev1.ClaimPending, + }, + } + + backupPVCWithVolumeName := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-backup", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: backup.APIVersion, + Kind: backup.Kind, + Name: backup.Name, + UID: backup.UID, + }, + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + VolumeName: "fake-pv", + }, + Status: corev1.PersistentVolumeClaimStatus{ + Phase: corev1.ClaimPending, + }, + } + + backupPV := corev1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pv", + }, + Status: corev1.PersistentVolumeStatus{ + Phase: corev1.VolumePending, + Message: "fake-pv-message", + }, + } + + readyToUse := false + vscMessage := "fake-vsc-message" + backupVSC := snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + }, + Status: &snapshotv1api.VolumeSnapshotContentStatus{ + ReadyToUse: &readyToUse, + Error: &snapshotv1api.VolumeSnapshotError{ + Message: &vscMessage, + }, + }, + } + + backupVSWithoutStatus := snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-backup", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: backup.APIVersion, + Kind: backup.Kind, + Name: backup.Name, + UID: backup.UID, + }, + }, + }, + } + + backupVSWithoutVSC := snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-backup", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: backup.APIVersion, + Kind: backup.Kind, + Name: backup.Name, + UID: backup.UID, + }, + }, + }, + Status: &snapshotv1api.VolumeSnapshotStatus{}, + } + + vsMessage := "fake-vs-message" + backupVSWithVSC := snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-backup", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: backup.APIVersion, + Kind: backup.Kind, + Name: backup.Name, + UID: backup.UID, + }, + }, + }, + Status: &snapshotv1api.VolumeSnapshotStatus{ + BoundVolumeSnapshotContentName: &backupVSC.Name, + Error: &snapshotv1api.VolumeSnapshotError{ + Message: &vsMessage, + }, + }, + } + + nodeAgentPod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "node-agent-pod-1", + Labels: map[string]string{"name": "node-agent"}, + }, + Spec: corev1.PodSpec{ + NodeName: "fake-node", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + }, + } + + tests := []struct { + name string + ownerBackup *velerov1.Backup + kubeClientObj []runtime.Object + snapshotClientObj []runtime.Object + expected string + }{ + { + name: "no pod, pvc, vs", + ownerBackup: backup, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +error getting backup pod fake-backup, err: pods "fake-backup" not found +error getting backup pvc fake-backup, err: persistentvolumeclaims "fake-backup" not found +error getting backup vs fake-backup, err: volumesnapshots.snapshot.storage.k8s.io "fake-backup" not found +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + { + name: "pod without node name, pvc without volume name, vs without status", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithoutNodeName, + &backupPVCWithoutVolumeName, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithoutStatus, + }, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +Pod velero/fake-backup, phase Pending, node name +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-backup, phase Pending, binding to +VS velero/fake-backup, bind to , readyToUse false, errMessage +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + { + name: "pod without node name, pvc without volume name, vs without VSC", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithoutNodeName, + &backupPVCWithoutVolumeName, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithoutVSC, + }, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +Pod velero/fake-backup, phase Pending, node name +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-backup, phase Pending, binding to +VS velero/fake-backup, bind to , readyToUse false, errMessage +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + { + name: "pod with node name, no node agent", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithNodeName, + &backupPVCWithoutVolumeName, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithoutVSC, + }, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +Pod velero/fake-backup, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +node-agent is not running in node fake-node +PVC velero/fake-backup, phase Pending, binding to +VS velero/fake-backup, bind to , readyToUse false, errMessage +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + { + name: "pod with node name, node agent is running", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithNodeName, + &backupPVCWithoutVolumeName, + &nodeAgentPod, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithoutVSC, + }, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +Pod velero/fake-backup, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-backup, phase Pending, binding to +VS velero/fake-backup, bind to , readyToUse false, errMessage +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + { + name: "pvc with volume name, no pv", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithNodeName, + &backupPVCWithVolumeName, + &nodeAgentPod, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithoutVSC, + }, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +Pod velero/fake-backup, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-backup, phase Pending, binding to fake-pv +error getting backup pv fake-pv, err: persistentvolumes "fake-pv" not found +VS velero/fake-backup, bind to , readyToUse false, errMessage +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + { + name: "pvc with volume name, pv exists", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithNodeName, + &backupPVCWithVolumeName, + &backupPV, + &nodeAgentPod, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithoutVSC, + }, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +Pod velero/fake-backup, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-backup, phase Pending, binding to fake-pv +PV fake-pv, phase Pending, reason , message fake-pv-message +VS velero/fake-backup, bind to , readyToUse false, errMessage +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + { + name: "vs with vsc, vsc doesn't exist", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithNodeName, + &backupPVCWithVolumeName, + &backupPV, + &nodeAgentPod, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithVSC, + }, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +Pod velero/fake-backup, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-backup, phase Pending, binding to fake-pv +PV fake-pv, phase Pending, reason , message fake-pv-message +VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message +error getting backup vsc fake-vsc, err: volumesnapshotcontents.snapshot.storage.k8s.io "fake-vsc" not found +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + { + name: "vs with vsc, vsc exists", + ownerBackup: backup, + kubeClientObj: []runtime.Object{ + &backupPodWithNodeName, + &backupPVCWithVolumeName, + &backupPV, + &nodeAgentPod, + }, + snapshotClientObj: []runtime.Object{ + &backupVSWithVSC, + &backupVSC, + }, + expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** +Pod velero/fake-backup, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-backup, phase Pending, binding to fake-pv +PV fake-pv, phase Pending, reason , message fake-pv-message +VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message +VSC fake-vsc, readyToUse false, errMessage fake-vsc-message, handle +***************************end diagnose CSI exposer[velero/fake-backup]*************************** +`, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeKubeClient := fake.NewSimpleClientset(tt.kubeClientObj...) + fakeSnapshotClient := snapshotFake.NewSimpleClientset(tt.snapshotClientObj...) + e := &csiSnapshotExposer{ + kubeClient: fakeKubeClient, + csiSnapshotClient: fakeSnapshotClient.SnapshotV1(), + log: velerotest.NewLogger(), + } + var ownerObject corev1.ObjectReference + if tt.ownerBackup != nil { + ownerObject = corev1.ObjectReference{ + Kind: tt.ownerBackup.Kind, + Namespace: tt.ownerBackup.Namespace, + Name: tt.ownerBackup.Name, + UID: tt.ownerBackup.UID, + APIVersion: tt.ownerBackup.APIVersion, + } + } + + diag := e.DiagnoseExpose(context.Background(), ownerObject) + assert.Equal(t, tt.expected, diag) + }) + } +} diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index 8d5ae9a795..b33a60484e 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -208,12 +208,12 @@ func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, restorePodName, metav1.GetOptions{}) if err != nil { - diag += fmt.Sprintf("error to get restore pod %s, err: %v\n", restorePodName, err) + diag += fmt.Sprintf("error getting restore pod %s, err: %v\n", restorePodName, err) } pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, restorePVCName, metav1.GetOptions{}) if err != nil { - diag += fmt.Sprintf("error to get restore pvc %s, err: %v\n", restorePVCName, err) + diag += fmt.Sprintf("error getting restore pvc %s, err: %v\n", restorePVCName, err) } if pod != nil { @@ -231,7 +231,7 @@ func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject if pvc.Spec.VolumeName != "" { if pv, err := e.kubeClient.CoreV1().PersistentVolumes().Get(ctx, pvc.Spec.VolumeName, metav1.GetOptions{}); err != nil { - diag += fmt.Sprintf("error getting backup pv %s, err: %v\n", pvc.Spec.VolumeName, err) + diag += fmt.Sprintf("error getting restore pv %s, err: %v\n", pvc.Spec.VolumeName, err) } else { diag += kube.DiagnosePV(pv) } diff --git a/pkg/exposer/generic_restore_test.go b/pkg/exposer/generic_restore_test.go index 4c3221b5ca..bcc78b9810 100644 --- a/pkg/exposer/generic_restore_test.go +++ b/pkg/exposer/generic_restore_test.go @@ -507,3 +507,265 @@ func TestRestorePeekExpose(t *testing.T) { }) } } + +func Test_ReastoreDiagnoseExpose(t *testing.T) { + restore := &velerov1.Restore{ + TypeMeta: metav1.TypeMeta{ + APIVersion: velerov1.SchemeGroupVersion.String(), + Kind: "Restore", + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-restore", + UID: "fake-uid", + }, + } + + restorePodWithoutNodeName := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-restore", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: restore.APIVersion, + Kind: restore.Kind, + Name: restore.Name, + UID: restore.UID, + }, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodInitialized, + Status: corev1.ConditionTrue, + Message: "fake-pod-message", + }, + }, + }, + } + + restorePodWithNodeName := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-restore", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: restore.APIVersion, + Kind: restore.Kind, + Name: restore.Name, + UID: restore.UID, + }, + }, + }, + Spec: corev1.PodSpec{ + NodeName: "fake-node", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodPending, + Conditions: []corev1.PodCondition{ + { + Type: corev1.PodInitialized, + Status: corev1.ConditionTrue, + Message: "fake-pod-message", + }, + }, + }, + } + + restorePVCWithoutVolumeName := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-restore", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: restore.APIVersion, + Kind: restore.Kind, + Name: restore.Name, + UID: restore.UID, + }, + }, + }, + Status: corev1.PersistentVolumeClaimStatus{ + Phase: corev1.ClaimPending, + }, + } + + restorePVCWithVolumeName := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "fake-restore", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: restore.APIVersion, + Kind: restore.Kind, + Name: restore.Name, + UID: restore.UID, + }, + }, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + VolumeName: "fake-pv", + }, + Status: corev1.PersistentVolumeClaimStatus{ + Phase: corev1.ClaimPending, + }, + } + + restorePV := corev1.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pv", + }, + Status: corev1.PersistentVolumeStatus{ + Phase: corev1.VolumePending, + Message: "fake-pv-message", + }, + } + + nodeAgentPod := corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: velerov1.DefaultNamespace, + Name: "node-agent-pod-1", + Labels: map[string]string{"name": "node-agent"}, + }, + Spec: corev1.PodSpec{ + NodeName: "fake-node", + }, + Status: corev1.PodStatus{ + Phase: corev1.PodRunning, + }, + } + + tests := []struct { + name string + ownerRestore *velerov1.Restore + kubeClientObj []runtime.Object + expected string + }{ + { + name: "no pod, pvc", + ownerRestore: restore, + expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** +error getting restore pod fake-restore, err: pods "fake-restore" not found +error getting restore pvc fake-restore, err: persistentvolumeclaims "fake-restore" not found +***************************end diagnose restore exposer[velero/fake-restore]*************************** +`, + }, + { + name: "pod without node name, pvc without volume name, vs without status", + ownerRestore: restore, + kubeClientObj: []runtime.Object{ + &restorePodWithoutNodeName, + &restorePVCWithoutVolumeName, + }, + expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** +Pod velero/fake-restore, phase Pending, node name +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-restore, phase Pending, binding to +***************************end diagnose restore exposer[velero/fake-restore]*************************** +`, + }, + { + name: "pod without node name, pvc without volume name", + ownerRestore: restore, + kubeClientObj: []runtime.Object{ + &restorePodWithoutNodeName, + &restorePVCWithoutVolumeName, + }, + expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** +Pod velero/fake-restore, phase Pending, node name +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-restore, phase Pending, binding to +***************************end diagnose restore exposer[velero/fake-restore]*************************** +`, + }, + { + name: "pod with node name, no node agent", + ownerRestore: restore, + kubeClientObj: []runtime.Object{ + &restorePodWithNodeName, + &restorePVCWithoutVolumeName, + }, + expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** +Pod velero/fake-restore, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +node-agent is not running in node fake-node +PVC velero/fake-restore, phase Pending, binding to +***************************end diagnose restore exposer[velero/fake-restore]*************************** +`, + }, + { + name: "pod with node name, node agent is running", + ownerRestore: restore, + kubeClientObj: []runtime.Object{ + &restorePodWithNodeName, + &restorePVCWithoutVolumeName, + &nodeAgentPod, + }, + expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** +Pod velero/fake-restore, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-restore, phase Pending, binding to +***************************end diagnose restore exposer[velero/fake-restore]*************************** +`, + }, + { + name: "pvc with volume name, no pv", + ownerRestore: restore, + kubeClientObj: []runtime.Object{ + &restorePodWithNodeName, + &restorePVCWithVolumeName, + &nodeAgentPod, + }, + expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** +Pod velero/fake-restore, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-restore, phase Pending, binding to fake-pv +error getting restore pv fake-pv, err: persistentvolumes "fake-pv" not found +***************************end diagnose restore exposer[velero/fake-restore]*************************** +`, + }, + { + name: "pvc with volume name, pv exists", + ownerRestore: restore, + kubeClientObj: []runtime.Object{ + &restorePodWithNodeName, + &restorePVCWithVolumeName, + &restorePV, + &nodeAgentPod, + }, + expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** +Pod velero/fake-restore, phase Pending, node name fake-node +Pod condition Initialized, status True, reason , message fake-pod-message +PVC velero/fake-restore, phase Pending, binding to fake-pv +PV fake-pv, phase Pending, reason , message fake-pv-message +***************************end diagnose restore exposer[velero/fake-restore]*************************** +`, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + fakeKubeClient := fake.NewSimpleClientset(test.kubeClientObj...) + + e := genericRestoreExposer{ + kubeClient: fakeKubeClient, + log: velerotest.NewLogger(), + } + + var ownerObject corev1api.ObjectReference + if test.ownerRestore != nil { + ownerObject = corev1api.ObjectReference{ + Kind: test.ownerRestore.Kind, + Namespace: test.ownerRestore.Namespace, + Name: test.ownerRestore.Name, + UID: test.ownerRestore.UID, + APIVersion: test.ownerRestore.APIVersion, + } + } + + diag := e.DiagnoseExpose(context.Background(), ownerObject) + assert.Equal(t, test.expected, diag) + }) + } +} diff --git a/pkg/util/csi/volume_snapshot.go b/pkg/util/csi/volume_snapshot.go index fcd6836388..738bd91481 100644 --- a/pkg/util/csi/volume_snapshot.go +++ b/pkg/util/csi/volume_snapshot.go @@ -776,42 +776,48 @@ func WaitUntilVSCHandleIsReady( func DiagnoseVS(vs *snapshotv1api.VolumeSnapshot) string { vscName := "" - if vs.Status.BoundVolumeSnapshotContentName != nil { - vscName = *vs.Status.BoundVolumeSnapshotContentName - } - readyToUse := false - if vs.Status.ReadyToUse != nil { - readyToUse = *vs.Status.ReadyToUse - } - errMessage := "" - if vs.Status.Error != nil && vs.Status.Error.Message != nil { - errMessage = *vs.Status.Error.Message + + if vs.Status != nil { + if vs.Status.BoundVolumeSnapshotContentName != nil { + vscName = *vs.Status.BoundVolumeSnapshotContentName + } + + if vs.Status.ReadyToUse != nil { + readyToUse = *vs.Status.ReadyToUse + } + + if vs.Status.Error != nil && vs.Status.Error.Message != nil { + errMessage = *vs.Status.Error.Message + } } - diag := fmt.Sprintf("VS %s/%s, bind to %s, readToUse %v, errMessage %s\n", vs.Namespace, vs.Name, vscName, readyToUse, errMessage) + diag := fmt.Sprintf("VS %s/%s, bind to %s, readyToUse %v, errMessage %s\n", vs.Namespace, vs.Name, vscName, readyToUse, errMessage) return diag } func DiagnoseVSC(vsc *snapshotv1api.VolumeSnapshotContent) string { handle := "" - if vsc.Status.SnapshotHandle != nil { - handle = *vsc.Status.SnapshotHandle - } - readyToUse := false - if vsc.Status.ReadyToUse != nil { - readyToUse = *vsc.Status.ReadyToUse - } - errMessage := "" - if vsc.Status.Error != nil && vsc.Status.Error.Message != nil { - errMessage = *vsc.Status.Error.Message + + if vsc.Status != nil { + if vsc.Status.SnapshotHandle != nil { + handle = *vsc.Status.SnapshotHandle + } + + if vsc.Status.ReadyToUse != nil { + readyToUse = *vsc.Status.ReadyToUse + } + + if vsc.Status.Error != nil && vsc.Status.Error.Message != nil { + errMessage = *vsc.Status.Error.Message + } } - diag := fmt.Sprintf("VSC %s, readToUse %v, errMessage %s, handle %s\n", vsc.Name, readyToUse, errMessage, handle) + diag := fmt.Sprintf("VSC %s, readyToUse %v, errMessage %s, handle %s\n", vsc.Name, readyToUse, errMessage, handle) return diag } diff --git a/pkg/util/csi/volume_snapshot_test.go b/pkg/util/csi/volume_snapshot_test.go index 3876d96edb..79adaf0025 100644 --- a/pkg/util/csi/volume_snapshot_test.go +++ b/pkg/util/csi/volume_snapshot_test.go @@ -1655,3 +1655,197 @@ func TestWaitUntilVSCHandleIsReady(t *testing.T) { }) } } + +func TestDiagnoseVS(t *testing.T) { + vscName := "fake-vsc" + readyToUse := true + message := "fake-message" + + testCases := []struct { + name string + vs *snapshotv1api.VolumeSnapshot + expected string + }{ + { + name: "VS with no status", + vs: &snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vs", + Namespace: "fake-ns", + }, + }, + expected: "VS fake-ns/fake-vs, bind to , readyToUse false, errMessage \n", + }, + { + name: "VS with empty status", + vs: &snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vs", + Namespace: "fake-ns", + }, + Status: &snapshotv1api.VolumeSnapshotStatus{}, + }, + expected: "VS fake-ns/fake-vs, bind to , readyToUse false, errMessage \n", + }, + { + name: "VS with VSC name", + vs: &snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vs", + Namespace: "fake-ns", + }, + Status: &snapshotv1api.VolumeSnapshotStatus{ + BoundVolumeSnapshotContentName: &vscName, + }, + }, + expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse false, errMessage \n", + }, + { + name: "VS with VSC name+ready", + vs: &snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vs", + Namespace: "fake-ns", + }, + Status: &snapshotv1api.VolumeSnapshotStatus{ + BoundVolumeSnapshotContentName: &vscName, + ReadyToUse: &readyToUse, + }, + }, + expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage \n", + }, + { + name: "VS with VSC name+ready+empty error", + vs: &snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vs", + Namespace: "fake-ns", + }, + Status: &snapshotv1api.VolumeSnapshotStatus{ + BoundVolumeSnapshotContentName: &vscName, + ReadyToUse: &readyToUse, + Error: &snapshotv1api.VolumeSnapshotError{}, + }, + }, + expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage \n", + }, + { + name: "VS with VSC name+ready+error", + vs: &snapshotv1api.VolumeSnapshot{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vs", + Namespace: "fake-ns", + }, + Status: &snapshotv1api.VolumeSnapshotStatus{ + BoundVolumeSnapshotContentName: &vscName, + ReadyToUse: &readyToUse, + Error: &snapshotv1api.VolumeSnapshotError{ + Message: &message, + }, + }, + }, + expected: "VS fake-ns/fake-vs, bind to fake-vsc, readyToUse true, errMessage fake-message\n", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + diag := DiagnoseVS(tc.vs) + assert.Equal(t, tc.expected, diag) + }) + } +} + +func TestDiagnoseVSC(t *testing.T) { + readyToUse := true + message := "fake-message" + handle := "fake-handle" + + testCases := []struct { + name string + vsc *snapshotv1api.VolumeSnapshotContent + expected string + }{ + { + name: "VS with no status", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + }, + }, + expected: "VSC fake-vsc, readyToUse false, errMessage , handle \n", + }, + { + name: "VSC with empty status", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + }, + Status: &snapshotv1api.VolumeSnapshotContentStatus{}, + }, + expected: "VSC fake-vsc, readyToUse false, errMessage , handle \n", + }, + { + name: "VSC with ready", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + }, + Status: &snapshotv1api.VolumeSnapshotContentStatus{ + ReadyToUse: &readyToUse, + }, + }, + expected: "VSC fake-vsc, readyToUse true, errMessage , handle \n", + }, + { + name: "VSC with ready+handle", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + }, + Status: &snapshotv1api.VolumeSnapshotContentStatus{ + ReadyToUse: &readyToUse, + SnapshotHandle: &handle, + }, + }, + expected: "VSC fake-vsc, readyToUse true, errMessage , handle fake-handle\n", + }, + { + name: "VSC with ready+handle+empty error", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + }, + Status: &snapshotv1api.VolumeSnapshotContentStatus{ + ReadyToUse: &readyToUse, + SnapshotHandle: &handle, + Error: &snapshotv1api.VolumeSnapshotError{}, + }, + }, + expected: "VSC fake-vsc, readyToUse true, errMessage , handle fake-handle\n", + }, + { + name: "VSC with ready+handle+error", + vsc: &snapshotv1api.VolumeSnapshotContent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-vsc", + }, + Status: &snapshotv1api.VolumeSnapshotContentStatus{ + ReadyToUse: &readyToUse, + SnapshotHandle: &handle, + Error: &snapshotv1api.VolumeSnapshotError{ + Message: &message, + }, + }, + }, + expected: "VSC fake-vsc, readyToUse true, errMessage fake-message, handle fake-handle\n", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + diag := DiagnoseVSC(tc.vsc) + assert.Equal(t, tc.expected, diag) + }) + } +} diff --git a/pkg/util/kube/pod.go b/pkg/util/kube/pod.go index 9f126a71a1..bd8bda01d5 100644 --- a/pkg/util/kube/pod.go +++ b/pkg/util/kube/pod.go @@ -262,7 +262,7 @@ func DiagnosePod(pod *corev1api.Pod) string { diag := fmt.Sprintf("Pod %s/%s, phase %s, node name %s\n", pod.Namespace, pod.Name, pod.Status.Phase, pod.Spec.NodeName) for _, condition := range pod.Status.Conditions { - diag += fmt.Sprintf("Pod condition %s, reason %s, message %s\n", condition.Type, condition.Reason, condition.Message) + diag += fmt.Sprintf("Pod condition %s, status %s, reason %s, message %s\n", condition.Type, condition.Status, condition.Reason, condition.Message) } return diag diff --git a/pkg/util/kube/pod_test.go b/pkg/util/kube/pod_test.go index 0e76899a5e..387c29d140 100644 --- a/pkg/util/kube/pod_test.go +++ b/pkg/util/kube/pod_test.go @@ -846,3 +846,49 @@ func TestToSystemAffinity(t *testing.T) { }) } } + +func TestDiagnosePod(t *testing.T) { + testCases := []struct { + name string + pod *corev1api.Pod + expected string + }{ + { + name: "pod with all info", + pod: &corev1api.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pod", + Namespace: "fake-ns", + }, + Spec: corev1api.PodSpec{ + NodeName: "fake-node", + }, + Status: corev1api.PodStatus{ + Phase: corev1api.PodPending, + Conditions: []corev1api.PodCondition{ + { + Type: corev1api.PodInitialized, + Status: corev1api.ConditionTrue, + Reason: "fake-reason-1", + Message: "fake-message-1", + }, + { + Type: corev1api.PodScheduled, + Status: corev1api.ConditionFalse, + Reason: "fake-reason-2", + Message: "fake-message-2", + }, + }, + }, + }, + expected: "Pod fake-ns/fake-pod, phase Pending, node name fake-node\nPod condition Initialized, status True, reason fake-reason-1, message fake-message-1\nPod condition PodScheduled, status False, reason fake-reason-2, message fake-message-2\n", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + diag := DiagnosePod(tc.pod) + assert.Equal(t, tc.expected, diag) + }) + } +} diff --git a/pkg/util/kube/pvc_pv.go b/pkg/util/kube/pvc_pv.go index ac7d15fbfd..da9dfb2c74 100644 --- a/pkg/util/kube/pvc_pv.go +++ b/pkg/util/kube/pvc_pv.go @@ -414,15 +414,7 @@ func GetPVCForPodVolume(vol *corev1api.Volume, pod *corev1api.Pod, crClient crcl } func DiagnosePVC(pvc *corev1api.PersistentVolumeClaim) string { - diag := fmt.Sprintf("PVC %s/%s, phase %s\n", pvc.Namespace, pvc.Name, pvc.Status.Phase) - - for _, condition := range pvc.Status.Conditions { - diag += fmt.Sprintf("PVC condition %s, reason %s, message %s\n", condition.Type, condition.Reason, condition.Message) - } - - diag += fmt.Sprintf("PVC is binding to %s\n", pvc.Spec.VolumeName) - - return diag + return fmt.Sprintf("PVC %s/%s, phase %s, binding to %s\n", pvc.Namespace, pvc.Name, pvc.Status.Phase, pvc.Spec.VolumeName) } func DiagnosePV(pv *corev1api.PersistentVolume) string { diff --git a/pkg/util/kube/pvc_pv_test.go b/pkg/util/kube/pvc_pv_test.go index 00c3962c5c..8304ebf3be 100644 --- a/pkg/util/kube/pvc_pv_test.go +++ b/pkg/util/kube/pvc_pv_test.go @@ -1465,4 +1465,63 @@ func TestMakePodPVCAttachment(t *testing.T) { } func TestDiagnosePVC(t *testing.T) { + testCases := []struct { + name string + pvc *corev1api.PersistentVolumeClaim + expected string + }{ + { + name: "pvc with all info", + pvc: &corev1api.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pvc", + Namespace: "fake-ns", + }, + Spec: corev1api.PersistentVolumeClaimSpec{ + VolumeName: "fake-pv", + }, + Status: corev1api.PersistentVolumeClaimStatus{ + Phase: corev1api.ClaimPending, + }, + }, + expected: "PVC fake-ns/fake-pvc, phase Pending, binding to fake-pv\n", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + diag := DiagnosePVC(tc.pvc) + assert.Equal(t, tc.expected, diag) + }) + } +} + +func TestDiagnosePV(t *testing.T) { + testCases := []struct { + name string + pv *corev1api.PersistentVolume + expected string + }{ + { + name: "pv with all info", + pv: &corev1api.PersistentVolume{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fake-pv", + }, + Status: corev1api.PersistentVolumeStatus{ + Phase: corev1api.VolumePending, + Message: "fake-message", + Reason: "fake-reason", + }, + }, + expected: "PV fake-pv, phase Pending, reason fake-reason, message fake-message\n", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + diag := DiagnosePV(tc.pv) + assert.Equal(t, tc.expected, diag) + }) + } } From a217e332210ecf7289e9ac1b301e8d678418a4bf Mon Sep 17 00:00:00 2001 From: Lyndon-Li Date: Thu, 5 Dec 2024 15:03:33 +0800 Subject: [PATCH 3/3] add diagnostic for data mover exposer Signed-off-by: Lyndon-Li --- changelogs/unreleased/8482-Lyndon-Li | 1 - changelogs/unreleased/8511-Lyndon-Li | 1 + pkg/controller/data_download_controller.go | 6 ++- pkg/controller/data_upload_controller.go | 6 ++- pkg/exposer/csi_snapshot.go | 9 +++-- pkg/exposer/csi_snapshot_test.go | 47 +++++++++------------- pkg/exposer/generic_restore.go | 8 ++-- pkg/exposer/generic_restore_test.go | 37 +++++++---------- 8 files changed, 56 insertions(+), 59 deletions(-) delete mode 100644 changelogs/unreleased/8482-Lyndon-Li create mode 100644 changelogs/unreleased/8511-Lyndon-Li diff --git a/changelogs/unreleased/8482-Lyndon-Li b/changelogs/unreleased/8482-Lyndon-Li deleted file mode 100644 index c1cec7e241..0000000000 --- a/changelogs/unreleased/8482-Lyndon-Li +++ /dev/null @@ -1 +0,0 @@ -Fix issue #8125, add diagnostic info for data mover exposers when expose timeout \ No newline at end of file diff --git a/changelogs/unreleased/8511-Lyndon-Li b/changelogs/unreleased/8511-Lyndon-Li new file mode 100644 index 0000000000..33be17e0af --- /dev/null +++ b/changelogs/unreleased/8511-Lyndon-Li @@ -0,0 +1 @@ +Fix issue #8125, log diagnostic info for data mover exposers when expose timeout \ No newline at end of file diff --git a/pkg/controller/data_download_controller.go b/pkg/controller/data_download_controller.go index 95a08ef5c9..c784811ab5 100644 --- a/pkg/controller/data_download_controller.go +++ b/pkg/controller/data_download_controller.go @@ -19,6 +19,7 @@ package controller import ( "context" "fmt" + "strings" "time" "github.com/pkg/errors" @@ -689,7 +690,10 @@ func (r *DataDownloadReconciler) onPrepareTimeout(ctx context.Context, dd *veler return } - log.Warn(r.restoreExposer.DiagnoseExpose(ctx, getDataDownloadOwnerObject(dd))) + diags := strings.Split(r.restoreExposer.DiagnoseExpose(ctx, getDataDownloadOwnerObject(dd)), "\n") + for _, diag := range diags { + log.Warnf("[Diagnose DD expose]%s", diag) + } r.restoreExposer.CleanUp(ctx, getDataDownloadOwnerObject(dd)) diff --git a/pkg/controller/data_upload_controller.go b/pkg/controller/data_upload_controller.go index f58aec1fbc..fb96300746 100644 --- a/pkg/controller/data_upload_controller.go +++ b/pkg/controller/data_upload_controller.go @@ -19,6 +19,7 @@ package controller import ( "context" "fmt" + "strings" "time" snapshotter "github.com/kubernetes-csi/external-snapshotter/client/v7/clientset/versioned/typed/volumesnapshot/v1" @@ -758,7 +759,10 @@ func (r *DataUploadReconciler) onPrepareTimeout(ctx context.Context, du *velerov volumeSnapshotName = du.Spec.CSISnapshot.VolumeSnapshot } - log.Warn(ep.DiagnoseExpose(ctx, getOwnerObject(du))) + diags := strings.Split(ep.DiagnoseExpose(ctx, getOwnerObject(du)), "\n") + for _, diag := range diags { + log.Warnf("[Diagnose DU expose]%s", diag) + } ep.CleanUp(ctx, getOwnerObject(du), volumeSnapshotName, du.Spec.SourceNamespace) diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index 64fe7230f3..871d54cec8 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -313,20 +313,23 @@ func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject cor backupPVCName := ownerObject.Name backupVSName := ownerObject.Name - diag := fmt.Sprintf("***************************begin diagnose CSI exposer[%s/%s]***************************\n", ownerObject.Namespace, ownerObject.Name) + diag := "begin diagnose CSI exposer\n" pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, backupPodName, metav1.GetOptions{}) if err != nil { + pod = nil diag += fmt.Sprintf("error getting backup pod %s, err: %v\n", backupPodName, err) } pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, backupPVCName, metav1.GetOptions{}) if err != nil { + pvc = nil diag += fmt.Sprintf("error getting backup pvc %s, err: %v\n", backupPVCName, err) } vs, err := e.csiSnapshotClient.VolumeSnapshots(ownerObject.Namespace).Get(ctx, backupVSName, metav1.GetOptions{}) if err != nil { + vs = nil diag += fmt.Sprintf("error getting backup vs %s, err: %v\n", backupVSName, err) } @@ -335,7 +338,7 @@ func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject cor if pod.Spec.NodeName != "" { if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil { - diag += fmt.Sprintf("node-agent is not running in node %s\n", pod.Spec.NodeName) + diag += fmt.Sprintf("node-agent is not running in node %s, err: %v\n", pod.Spec.NodeName, err) } } } @@ -364,7 +367,7 @@ func (e *csiSnapshotExposer) DiagnoseExpose(ctx context.Context, ownerObject cor } } - diag += fmt.Sprintf("***************************end diagnose CSI exposer[%s/%s]***************************\n", ownerObject.Namespace, ownerObject.Name) + diag += "end diagnose CSI exposer" return diag } diff --git a/pkg/exposer/csi_snapshot_test.go b/pkg/exposer/csi_snapshot_test.go index d7e4a768ed..77d7926356 100644 --- a/pkg/exposer/csi_snapshot_test.go +++ b/pkg/exposer/csi_snapshot_test.go @@ -1166,12 +1166,11 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) { { name: "no pod, pvc, vs", ownerBackup: backup, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer error getting backup pod fake-backup, err: pods "fake-backup" not found error getting backup pvc fake-backup, err: persistentvolumeclaims "fake-backup" not found error getting backup vs fake-backup, err: volumesnapshots.snapshot.storage.k8s.io "fake-backup" not found -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, { name: "pod without node name, pvc without volume name, vs without status", @@ -1183,13 +1182,12 @@ error getting backup vs fake-backup, err: volumesnapshots.snapshot.storage.k8s.i snapshotClientObj: []runtime.Object{ &backupVSWithoutStatus, }, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer Pod velero/fake-backup, phase Pending, node name Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-backup, phase Pending, binding to VS velero/fake-backup, bind to , readyToUse false, errMessage -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, { name: "pod without node name, pvc without volume name, vs without VSC", @@ -1201,13 +1199,12 @@ VS velero/fake-backup, bind to , readyToUse false, errMessage snapshotClientObj: []runtime.Object{ &backupVSWithoutVSC, }, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer Pod velero/fake-backup, phase Pending, node name Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-backup, phase Pending, binding to VS velero/fake-backup, bind to , readyToUse false, errMessage -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, { name: "pod with node name, no node agent", @@ -1219,14 +1216,13 @@ VS velero/fake-backup, bind to , readyToUse false, errMessage snapshotClientObj: []runtime.Object{ &backupVSWithoutVSC, }, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer Pod velero/fake-backup, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message -node-agent is not running in node fake-node +node-agent is not running in node fake-node, err: daemonset pod not found in running state in node fake-node PVC velero/fake-backup, phase Pending, binding to VS velero/fake-backup, bind to , readyToUse false, errMessage -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, { name: "pod with node name, node agent is running", @@ -1239,13 +1235,12 @@ VS velero/fake-backup, bind to , readyToUse false, errMessage snapshotClientObj: []runtime.Object{ &backupVSWithoutVSC, }, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer Pod velero/fake-backup, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-backup, phase Pending, binding to VS velero/fake-backup, bind to , readyToUse false, errMessage -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, { name: "pvc with volume name, no pv", @@ -1258,14 +1253,13 @@ VS velero/fake-backup, bind to , readyToUse false, errMessage snapshotClientObj: []runtime.Object{ &backupVSWithoutVSC, }, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer Pod velero/fake-backup, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-backup, phase Pending, binding to fake-pv error getting backup pv fake-pv, err: persistentvolumes "fake-pv" not found VS velero/fake-backup, bind to , readyToUse false, errMessage -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, { name: "pvc with volume name, pv exists", @@ -1279,14 +1273,13 @@ VS velero/fake-backup, bind to , readyToUse false, errMessage snapshotClientObj: []runtime.Object{ &backupVSWithoutVSC, }, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer Pod velero/fake-backup, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-backup, phase Pending, binding to fake-pv PV fake-pv, phase Pending, reason , message fake-pv-message VS velero/fake-backup, bind to , readyToUse false, errMessage -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, { name: "vs with vsc, vsc doesn't exist", @@ -1300,15 +1293,14 @@ VS velero/fake-backup, bind to , readyToUse false, errMessage snapshotClientObj: []runtime.Object{ &backupVSWithVSC, }, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer Pod velero/fake-backup, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-backup, phase Pending, binding to fake-pv PV fake-pv, phase Pending, reason , message fake-pv-message VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message error getting backup vsc fake-vsc, err: volumesnapshotcontents.snapshot.storage.k8s.io "fake-vsc" not found -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, { name: "vs with vsc, vsc exists", @@ -1323,15 +1315,14 @@ error getting backup vsc fake-vsc, err: volumesnapshotcontents.snapshot.storage. &backupVSWithVSC, &backupVSC, }, - expected: `***************************begin diagnose CSI exposer[velero/fake-backup]*************************** + expected: `begin diagnose CSI exposer Pod velero/fake-backup, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-backup, phase Pending, binding to fake-pv PV fake-pv, phase Pending, reason , message fake-pv-message VS velero/fake-backup, bind to fake-vsc, readyToUse false, errMessage fake-vs-message VSC fake-vsc, readyToUse false, errMessage fake-vsc-message, handle -***************************end diagnose CSI exposer[velero/fake-backup]*************************** -`, +end diagnose CSI exposer`, }, } for _, tt := range tests { diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index b33a60484e..af61ed438f 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -204,15 +204,17 @@ func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject restorePodName := ownerObject.Name restorePVCName := ownerObject.Name - diag := fmt.Sprintf("***************************begin diagnose restore exposer[%s/%s]***************************\n", ownerObject.Namespace, ownerObject.Name) + diag := "begin diagnose restore exposer\n" pod, err := e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Get(ctx, restorePodName, metav1.GetOptions{}) if err != nil { + pod = nil diag += fmt.Sprintf("error getting restore pod %s, err: %v\n", restorePodName, err) } pvc, err := e.kubeClient.CoreV1().PersistentVolumeClaims(ownerObject.Namespace).Get(ctx, restorePVCName, metav1.GetOptions{}) if err != nil { + pvc = nil diag += fmt.Sprintf("error getting restore pvc %s, err: %v\n", restorePVCName, err) } @@ -221,7 +223,7 @@ func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject if pod.Spec.NodeName != "" { if err := nodeagent.KbClientIsRunningInNode(ctx, ownerObject.Namespace, pod.Spec.NodeName, e.kubeClient); err != nil { - diag += fmt.Sprintf("node-agent is not running in node %s\n", pod.Spec.NodeName) + diag += fmt.Sprintf("node-agent is not running in node %s, err: %v\n", pod.Spec.NodeName, err) } } } @@ -238,7 +240,7 @@ func (e *genericRestoreExposer) DiagnoseExpose(ctx context.Context, ownerObject } } - diag += fmt.Sprintf("***************************end diagnose restore exposer[%s/%s]***************************\n", ownerObject.Namespace, ownerObject.Name) + diag += "end diagnose restore exposer" return diag } diff --git a/pkg/exposer/generic_restore_test.go b/pkg/exposer/generic_restore_test.go index bcc78b9810..2eec0ce182 100644 --- a/pkg/exposer/generic_restore_test.go +++ b/pkg/exposer/generic_restore_test.go @@ -646,11 +646,10 @@ func Test_ReastoreDiagnoseExpose(t *testing.T) { { name: "no pod, pvc", ownerRestore: restore, - expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** + expected: `begin diagnose restore exposer error getting restore pod fake-restore, err: pods "fake-restore" not found error getting restore pvc fake-restore, err: persistentvolumeclaims "fake-restore" not found -***************************end diagnose restore exposer[velero/fake-restore]*************************** -`, +end diagnose restore exposer`, }, { name: "pod without node name, pvc without volume name, vs without status", @@ -659,12 +658,11 @@ error getting restore pvc fake-restore, err: persistentvolumeclaims "fake-restor &restorePodWithoutNodeName, &restorePVCWithoutVolumeName, }, - expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** + expected: `begin diagnose restore exposer Pod velero/fake-restore, phase Pending, node name Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-restore, phase Pending, binding to -***************************end diagnose restore exposer[velero/fake-restore]*************************** -`, +end diagnose restore exposer`, }, { name: "pod without node name, pvc without volume name", @@ -673,12 +671,11 @@ PVC velero/fake-restore, phase Pending, binding to &restorePodWithoutNodeName, &restorePVCWithoutVolumeName, }, - expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** + expected: `begin diagnose restore exposer Pod velero/fake-restore, phase Pending, node name Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-restore, phase Pending, binding to -***************************end diagnose restore exposer[velero/fake-restore]*************************** -`, +end diagnose restore exposer`, }, { name: "pod with node name, no node agent", @@ -687,13 +684,12 @@ PVC velero/fake-restore, phase Pending, binding to &restorePodWithNodeName, &restorePVCWithoutVolumeName, }, - expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** + expected: `begin diagnose restore exposer Pod velero/fake-restore, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message -node-agent is not running in node fake-node +node-agent is not running in node fake-node, err: daemonset pod not found in running state in node fake-node PVC velero/fake-restore, phase Pending, binding to -***************************end diagnose restore exposer[velero/fake-restore]*************************** -`, +end diagnose restore exposer`, }, { name: "pod with node name, node agent is running", @@ -703,12 +699,11 @@ PVC velero/fake-restore, phase Pending, binding to &restorePVCWithoutVolumeName, &nodeAgentPod, }, - expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** + expected: `begin diagnose restore exposer Pod velero/fake-restore, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-restore, phase Pending, binding to -***************************end diagnose restore exposer[velero/fake-restore]*************************** -`, +end diagnose restore exposer`, }, { name: "pvc with volume name, no pv", @@ -718,13 +713,12 @@ PVC velero/fake-restore, phase Pending, binding to &restorePVCWithVolumeName, &nodeAgentPod, }, - expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** + expected: `begin diagnose restore exposer Pod velero/fake-restore, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-restore, phase Pending, binding to fake-pv error getting restore pv fake-pv, err: persistentvolumes "fake-pv" not found -***************************end diagnose restore exposer[velero/fake-restore]*************************** -`, +end diagnose restore exposer`, }, { name: "pvc with volume name, pv exists", @@ -735,13 +729,12 @@ error getting restore pv fake-pv, err: persistentvolumes "fake-pv" not found &restorePV, &nodeAgentPod, }, - expected: `***************************begin diagnose restore exposer[velero/fake-restore]*************************** + expected: `begin diagnose restore exposer Pod velero/fake-restore, phase Pending, node name fake-node Pod condition Initialized, status True, reason , message fake-pod-message PVC velero/fake-restore, phase Pending, binding to fake-pv PV fake-pv, phase Pending, reason , message fake-pv-message -***************************end diagnose restore exposer[velero/fake-restore]*************************** -`, +end diagnose restore exposer`, }, } for _, test := range tests {