Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fs-backup for windows nodes #8518

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelogs/unreleased/8518-Lyndon-Li
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Make fs-backup work on linux nodes with the new Velero deployment and disable fs-backup if the source/target pod is running in non-linux node (#8424)
19 changes: 15 additions & 4 deletions pkg/cmd/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
"github.com/vmware-tanzu/velero/pkg/restore"
"github.com/vmware-tanzu/velero/pkg/uploader"
"github.com/vmware-tanzu/velero/pkg/util/filesystem"
"github.com/vmware-tanzu/velero/pkg/util/kube"
"github.com/vmware-tanzu/velero/pkg/util/logging"
)

Expand Down Expand Up @@ -454,10 +455,20 @@

func (s *server) checkNodeAgent() {
// warn if node agent does not exist
if err := nodeagent.IsRunning(s.ctx, s.kubeClient, s.namespace); err == nodeagent.ErrDaemonSetNotFound {
s.logger.Warn("Velero node agent not found; pod volume backups/restores will not work until it's created")
} else if err != nil {
s.logger.WithError(errors.WithStack(err)).Warn("Error checking for existence of velero node agent")
if kube.WithLinuxNode(s.ctx, s.crClient, s.logger) {
if err := nodeagent.IsRunningOnLinux(s.ctx, s.kubeClient, s.namespace); err == nodeagent.ErrDaemonSetNotFound {
s.logger.Warn("Velero node agent not found for linux nodes; pod volume backups/restores and data mover backups/restores will not work until it's created")
} else if err != nil {
s.logger.WithError(errors.WithStack(err)).Warn("Error checking for existence of velero node agent for linux nodes")
}

Check warning on line 463 in pkg/cmd/server/server.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/server.go#L458-L463

Added lines #L458 - L463 were not covered by tests
}

if kube.WithWindowsNode(s.ctx, s.crClient, s.logger) {
if err := nodeagent.IsRunningOnWindows(s.ctx, s.kubeClient, s.namespace); err == nodeagent.ErrDaemonSetNotFound {
s.logger.Warn("Velero node agent not found for Windows nodes; pod volume backups/restores and data mover backups/restores will not work until it's created")
} else if err != nil {
s.logger.WithError(errors.WithStack(err)).Warn("Error checking for existence of velero node agent for Windows nodes")
}

Check warning on line 471 in pkg/cmd/server/server.go

View check run for this annotation

Codecov / codecov/patch

pkg/cmd/server/server.go#L466-L471

Added lines #L466 - L471 were not covered by tests
}
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/exposer/csi_snapshot_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,7 @@ func Test_csiSnapshotExposer_DiagnoseExpose(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "node-agent-pod-1",
Labels: map[string]string{"name": "node-agent"},
Labels: map[string]string{"role": "node-agent"},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
Expand Down
2 changes: 1 addition & 1 deletion pkg/exposer/generic_restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ func Test_ReastoreDiagnoseExpose(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{
Namespace: velerov1.DefaultNamespace,
Name: "node-agent-pod-1",
Labels: map[string]string{"name": "node-agent"},
Labels: map[string]string{"role": "node-agent"},
},
Spec: corev1.PodSpec{
NodeName: "fake-node",
Expand Down
25 changes: 19 additions & 6 deletions pkg/nodeagent/node_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,14 @@
)

const (
// daemonSet is the name of the Velero node agent daemonset.
// daemonSet is the name of the Velero node agent daemonset on linux nodes.
daemonSet = "node-agent"

// daemonsetWindows is the name of the Velero node agent daemonset on Windows nodes.
daemonsetWindows = "node-agent-windows"

// nodeAgentRole marks pods with node-agent role on all nodes.
nodeAgentRole = "node-agent"
)

var (
Expand Down Expand Up @@ -89,9 +95,16 @@
PodResources *kube.PodResources `json:"podResources,omitempty"`
}

// IsRunning checks if the node agent daemonset is running properly. If not, return the error found
func IsRunning(ctx context.Context, kubeClient kubernetes.Interface, namespace string) error {
if _, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, daemonSet, metav1.GetOptions{}); apierrors.IsNotFound(err) {
func IsRunningOnLinux(ctx context.Context, kubeClient kubernetes.Interface, namespace string) error {
return isRunning(ctx, kubeClient, namespace, daemonSet)

Check warning on line 99 in pkg/nodeagent/node_agent.go

View check run for this annotation

Codecov / codecov/patch

pkg/nodeagent/node_agent.go#L98-L99

Added lines #L98 - L99 were not covered by tests
}

func IsRunningOnWindows(ctx context.Context, kubeClient kubernetes.Interface, namespace string) error {
return isRunning(ctx, kubeClient, namespace, daemonsetWindows)

Check warning on line 103 in pkg/nodeagent/node_agent.go

View check run for this annotation

Codecov / codecov/patch

pkg/nodeagent/node_agent.go#L102-L103

Added lines #L102 - L103 were not covered by tests
}

func isRunning(ctx context.Context, kubeClient kubernetes.Interface, namespace string, daemonset string) error {
if _, err := kubeClient.AppsV1().DaemonSets(namespace).Get(ctx, daemonset, metav1.GetOptions{}); apierrors.IsNotFound(err) {
return ErrDaemonSetNotFound
} else if err != nil {
return err
Expand All @@ -116,7 +129,7 @@
}

pods := new(v1.PodList)
parsedSelector, err := labels.Parse(fmt.Sprintf("name=%s", daemonSet))
parsedSelector, err := labels.Parse(fmt.Sprintf("role=%s", nodeAgentRole))
if err != nil {
return errors.Wrap(err, "fail to parse selector")
}
Expand All @@ -128,7 +141,7 @@
}

if err != nil {
return errors.Wrap(err, "failed to list daemonset pods")
return errors.Wrap(err, "failed to list node-agent pods")

Check warning on line 144 in pkg/nodeagent/node_agent.go

View check run for this annotation

Codecov / codecov/patch

pkg/nodeagent/node_agent.go#L144

Added line #L144 was not covered by tests
}

for i := range pods.Items {
Expand Down
14 changes: 7 additions & 7 deletions pkg/nodeagent/node_agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type reactor struct {
}

func TestIsRunning(t *testing.T) {
daemonSet := &appsv1.DaemonSet{
ds := &appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Namespace: "fake-ns",
Name: "node-agent",
Expand Down Expand Up @@ -80,7 +80,7 @@ func TestIsRunning(t *testing.T) {
name: "succeed",
namespace: "fake-ns",
kubeClientObj: []runtime.Object{
daemonSet,
ds,
},
},
}
Expand All @@ -93,7 +93,7 @@ func TestIsRunning(t *testing.T) {
fakeKubeClient.Fake.PrependReactor(reactor.verb, reactor.resource, reactor.reactorFunc)
}

err := IsRunning(context.TODO(), fakeKubeClient, test.namespace)
err := isRunning(context.TODO(), fakeKubeClient, test.namespace, daemonSet)
if test.expectErr == "" {
assert.NoError(t, err)
} else {
Expand All @@ -108,11 +108,11 @@ func TestIsRunningInNode(t *testing.T) {
corev1.AddToScheme(scheme)

nonNodeAgentPod := builder.ForPod("fake-ns", "fake-pod").Result()
nodeAgentPodNotRunning := builder.ForPod("fake-ns", "fake-pod").Labels(map[string]string{"name": "node-agent"}).Result()
nodeAgentPodRunning1 := builder.ForPod("fake-ns", "fake-pod-1").Labels(map[string]string{"name": "node-agent"}).Phase(corev1.PodRunning).Result()
nodeAgentPodRunning2 := builder.ForPod("fake-ns", "fake-pod-2").Labels(map[string]string{"name": "node-agent"}).Phase(corev1.PodRunning).Result()
nodeAgentPodNotRunning := builder.ForPod("fake-ns", "fake-pod").Labels(map[string]string{"role": "node-agent"}).Result()
nodeAgentPodRunning1 := builder.ForPod("fake-ns", "fake-pod-1").Labels(map[string]string{"role": "node-agent"}).Phase(corev1.PodRunning).Result()
nodeAgentPodRunning2 := builder.ForPod("fake-ns", "fake-pod-2").Labels(map[string]string{"role": "node-agent"}).Phase(corev1.PodRunning).Result()
nodeAgentPodRunning3 := builder.ForPod("fake-ns", "fake-pod-3").
Labels(map[string]string{"name": "node-agent"}).
Labels(map[string]string{"role": "node-agent"}).
Phase(corev1.PodRunning).
NodeName("fake-node").
Result()
Expand Down
6 changes: 6 additions & 0 deletions pkg/podvolume/backupper.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@
return nil, pvcSummary, nil
}

if err := kube.IsLinuxNode(b.ctx, pod.Spec.NodeName, b.crClient); err != nil {
err := errors.Wrapf(err, "Pod %s/%s is not running in linux node(%s), skip", pod.Namespace, pod.Name, pod.Spec.NodeName)
skipAllPodVolumes(pod, volumesToBackup, err, pvcSummary, log)
return nil, pvcSummary, []error{err}
}

Check warning on line 213 in pkg/podvolume/backupper.go

View check run for this annotation

Codecov / codecov/patch

pkg/podvolume/backupper.go#L210-L213

Added lines #L210 - L213 were not covered by tests

err := nodeagent.IsRunningInNode(b.ctx, backup.Namespace, pod.Spec.NodeName, b.crClient)
if err != nil {
skipAllPodVolumes(pod, volumesToBackup, err, pvcSummary, log)
Expand Down
46 changes: 42 additions & 4 deletions pkg/podvolume/backupper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ func createPodObj(running bool, withVolume bool, withVolumeMounted bool, volumeN

func createNodeAgentPodObj(running bool) *corev1api.Pod {
podObj := builder.ForPod(velerov1api.DefaultNamespace, "fake-node-agent").Result()
podObj.Labels = map[string]string{"name": "node-agent"}
podObj.Labels = map[string]string{"role": "node-agent"}

if running {
podObj.Status.Phase = corev1api.PodRunning
Expand Down Expand Up @@ -303,6 +303,14 @@ func createPVBObj(fail bool, withSnapshot bool, index int, uploaderType string)
return pvbObj
}

func createNodeObj() *corev1api.Node {
return builder.ForNode("fake-node-name").Labels(map[string]string{"kubernetes.io/os": "linux"}).Result()
}

func createWindowsNodeObj() *corev1api.Node {
return builder.ForNode("fake-node-name").Labels(map[string]string{"kubernetes.io/os": "windows"}).Result()
}

func TestBackupPodVolumes(t *testing.T) {
scheme := runtime.NewScheme()
velerov1api.AddToScheme(scheme)
Expand Down Expand Up @@ -358,13 +366,32 @@ func TestBackupPodVolumes(t *testing.T) {
uploaderType: "kopia",
bsl: "fake-bsl",
},
{
name: "pod is not running on Linux node",
volumes: []string{
"fake-volume-1",
"fake-volume-2",
},
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createWindowsNodeObj(),
},
sourcePod: createPodObj(false, false, false, 2),
uploaderType: "kopia",
errs: []string{
"Pod fake-ns/fake-pod is not running in linux node(fake-node-name), skip",
},
},
{
name: "node-agent pod is not running in node",
volumes: []string{
"fake-volume-1",
"fake-volume-2",
},
sourcePod: createPodObj(true, false, false, 2),
sourcePod: createPodObj(true, false, false, 2),
kubeClientObj: []runtime.Object{
createNodeObj(),
},
uploaderType: "kopia",
errs: []string{
"daemonset pod not found in running state in node fake-node-name",
Expand All @@ -379,6 +406,7 @@ func TestBackupPodVolumes(t *testing.T) {
sourcePod: createPodObj(true, false, false, 2),
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createNodeObj(),
},
uploaderType: "kopia",
mockGetRepositoryType: true,
Expand All @@ -395,6 +423,7 @@ func TestBackupPodVolumes(t *testing.T) {
sourcePod: createPodObj(true, false, false, 2),
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createNodeObj(),
},
uploaderType: "kopia",
errs: []string{
Expand All @@ -410,6 +439,7 @@ func TestBackupPodVolumes(t *testing.T) {
sourcePod: createPodObj(true, false, false, 2),
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createNodeObj(),
},
ctlClientObj: []runtime.Object{
createBackupRepoObj(),
Expand All @@ -427,6 +457,7 @@ func TestBackupPodVolumes(t *testing.T) {
sourcePod: createPodObj(true, true, false, 2),
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createNodeObj(),
},
ctlClientObj: []runtime.Object{
createBackupRepoObj(),
Expand All @@ -448,6 +479,7 @@ func TestBackupPodVolumes(t *testing.T) {
sourcePod: createPodObj(true, true, false, 2),
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createNodeObj(),
createPVCObj(1),
createPVCObj(2),
},
Expand All @@ -471,6 +503,7 @@ func TestBackupPodVolumes(t *testing.T) {
sourcePod: createPodObj(true, true, false, 2),
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createNodeObj(),
createPVCObj(1),
createPVCObj(2),
createPVObj(1, true),
Expand All @@ -482,6 +515,7 @@ func TestBackupPodVolumes(t *testing.T) {
runtimeScheme: scheme,
uploaderType: "kopia",
bsl: "fake-bsl",
errs: []string{},
},
{
name: "volume not mounted by pod should be skipped",
Expand All @@ -492,6 +526,7 @@ func TestBackupPodVolumes(t *testing.T) {
sourcePod: createPodObj(true, true, false, 2),
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createNodeObj(),
createPVCObj(1),
createPVCObj(2),
createPVObj(1, false),
Expand All @@ -503,6 +538,7 @@ func TestBackupPodVolumes(t *testing.T) {
runtimeScheme: scheme,
uploaderType: "kopia",
bsl: "fake-bsl",
errs: []string{},
},
{
name: "return completed pvbs",
Expand All @@ -512,6 +548,7 @@ func TestBackupPodVolumes(t *testing.T) {
sourcePod: createPodObj(true, true, true, 1),
kubeClientObj: []runtime.Object{
createNodeAgentPodObj(true),
createNodeObj(),
createPVCObj(1),
createPVObj(1, false),
},
Expand All @@ -522,6 +559,7 @@ func TestBackupPodVolumes(t *testing.T) {
uploaderType: "kopia",
bsl: "fake-bsl",
pvbs: 1,
errs: []string{},
},
}
// TODO add more verification around PVCBackupSummary returned by "BackupPodVolumes"
Expand Down Expand Up @@ -568,8 +606,8 @@ func TestBackupPodVolumes(t *testing.T) {

pvbs, _, errs := bp.BackupPodVolumes(backupObj, test.sourcePod, test.volumes, nil, velerotest.NewLogger())

if errs == nil {
assert.Nil(t, test.errs)
if test.errs == nil {
assert.NoError(t, err)
} else {
for i := 0; i < len(errs); i++ {
assert.EqualError(t, errs[i], test.errs[i])
Expand Down
8 changes: 7 additions & 1 deletion pkg/podvolume/restorer.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (r *restorer) RestorePodVolumes(data RestoreData, tracker *volume.RestoreVo
return nil
}

if err := nodeagent.IsRunning(r.ctx, r.kubeClient, data.Restore.Namespace); err != nil {
if err := nodeagent.IsRunningOnLinux(r.ctx, r.kubeClient, data.Restore.Namespace); err != nil {
return []error{errors.Wrapf(err, "error to check node agent status")}
}

Expand Down Expand Up @@ -213,6 +213,12 @@ func (r *restorer) RestorePodVolumes(data RestoreData, tracker *volume.RestoreVo
} else if err != nil {
r.log.WithError(err).Error("Failed to check node-agent pod status, disengage")
} else {
if err := kube.IsLinuxNode(checkCtx, nodeName, r.crClient); err != nil {
r.log.WithField("node", nodeName).WithError(err).Error("Restored pod is not running in linux node")
r.nodeAgentCheck <- errors.Wrapf(err, "restored pod %s/%s is not running in linux node(%s)", data.Pod.Namespace, data.Pod.Name, nodeName)
return
}

err = nodeagent.IsRunningInNode(checkCtx, data.Restore.Namespace, nodeName, r.crClient)
if err != nil {
r.log.WithField("node", nodeName).WithError(err).Error("node-agent pod is not running in node, abort the restore")
Expand Down
Loading
Loading