diff --git a/runsc/boot/BUILD b/runsc/boot/BUILD index ebe27a137d..2b7f293d13 100644 --- a/runsc/boot/BUILD +++ b/runsc/boot/BUILD @@ -132,6 +132,7 @@ go_library( "//runsc/profile", "//runsc/specutils", "//runsc/specutils/seccomp", + "//runsc/version", "@com_github_opencontainers_runtime_spec//specs-go:go_default_library", "@com_github_syndtr_gocapability//capability:go_default_library", "@org_golang_google_protobuf//proto:go_default_library", diff --git a/runsc/boot/autosave.go b/runsc/boot/autosave.go index d2c5e89073..87cfb463da 100644 --- a/runsc/boot/autosave.go +++ b/runsc/boot/autosave.go @@ -56,6 +56,7 @@ func getSaveOpts(l *Loader, k *kernel.Kernel, isResume bool) state.SaveOpts { func getTargetForSaveResume(l *Loader) func(k *kernel.Kernel) { return func(k *kernel.Kernel) { + l.addVersionToCheckpoint() l.addContainerSpecsToCheckpoint() saveOpts := getSaveOpts(l, k, true /* isResume */) // Store the state file contents in a buffer for save-resume. @@ -75,6 +76,7 @@ func getTargetForSaveRestore(l *Loader, files []*fd.FD) func(k *kernel.Kernel) { var once sync.Once return func(k *kernel.Kernel) { once.Do(func() { + l.addVersionToCheckpoint() l.addContainerSpecsToCheckpoint() saveOpts := getSaveOpts(l, k, false /* isResume */) saveOpts.Destination = files[0] diff --git a/runsc/boot/loader.go b/runsc/boot/loader.go index 97ecbd4eb8..5ba8f3ff2c 100644 --- a/runsc/boot/loader.go +++ b/runsc/boot/loader.go @@ -82,6 +82,7 @@ import ( "gvisor.dev/gvisor/runsc/profile" "gvisor.dev/gvisor/runsc/specutils" "gvisor.dev/gvisor/runsc/specutils/seccomp" + "gvisor.dev/gvisor/runsc/version" // Top-level inet providers. "gvisor.dev/gvisor/pkg/sentry/socket/hostinet" @@ -369,6 +370,10 @@ const ( // containerSpecsKey is the key used to add and pop the container specs to the // kernel during save/restore. containerSpecsKey = "container_specs" + + // versionKey is the key used to add and pop runsc version to the kernel + // during save/restore. + versionKey = "runsc_version" ) func getRootCredentials(spec *specs.Spec, conf *config.Config, userNs *auth.UserNamespace) *auth.Credentials { @@ -1989,3 +1994,13 @@ func popContainerSpecsFromCheckpoint(k *kernel.Kernel) (map[string]*specs.Spec, } return oldSpecs, nil } + +// addVersionToCheckpoint adds the runsc version to the kernel. +func (l *Loader) addVersionToCheckpoint() { + l.k.AddStateToCheckpoint(versionKey, version.Version()) +} + +// popVersionFromCheckpoint pops the runsc version from the kernel. +func popVersionFromCheckpoint(k *kernel.Kernel) string { + return (k.PopCheckpointState(versionKey)).(string) +} diff --git a/runsc/boot/restore.go b/runsc/boot/restore.go index eec2cff994..7aa9537c65 100644 --- a/runsc/boot/restore.go +++ b/runsc/boot/restore.go @@ -18,7 +18,11 @@ import ( "errors" "fmt" "io" + "reflect" + "slices" + "sort" "strconv" + "strings" time2 "time" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -43,6 +47,7 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/stack" "gvisor.dev/gvisor/runsc/boot/pprof" "gvisor.dev/gvisor/runsc/config" + "gvisor.dev/gvisor/runsc/version" ) const ( @@ -141,13 +146,248 @@ func createNetworkStackForRestore(l *Loader) (*stack.Stack, inet.Stack) { return nil, hostinet.NewStack() } +func validateErrorWithMsg(field, cName string, oldV, newV any, msg string) error { + return fmt.Errorf("%v does not match across checkpoint restore for container: %v, checkpoint %v restore %v, got error %v", field, cName, oldV, newV, msg) +} + +func validateError(field, cName string, oldV, newV any) error { + return fmt.Errorf("%v does not match across checkpoint restore for container: %v, checkpoint %v restore %v", field, cName, oldV, newV) +} + +// validateMounts validates the mounts in the checkpoint and restore spec. +// Duplicate mounts are allowed iff all the fields in the mount are same. +func validateMounts(field, cName string, o, n []specs.Mount) error { + // Create a new mount map without source as source path can vary + // across checkpoint restore. + oldMnts := make(map[string]specs.Mount) + for _, m := range o { + mnt := specs.Mount{ + Destination: m.Destination, + Type: m.Type, + Source: m.Source, + } + mnt.Options = make([]string, len(m.Options)) + copy(mnt.Options, m.Options) + sort.Strings(mnt.Options) + oldMnts[mnt.Destination] = mnt + } + newMnts := make(map[string]specs.Mount) + for _, m := range n { + mnt := specs.Mount{ + Destination: m.Destination, + Type: m.Type, + Source: m.Source, + } + mnt.Options = make([]string, len(m.Options)) + copy(mnt.Options, m.Options) + sort.Strings(mnt.Options) + + // Source can vary during restore. + oldMnt, ok := oldMnts[mnt.Destination] + if !ok { + return validateError(field, cName, o, n) + } + if oldMnt.Destination != mnt.Destination || oldMnt.Type != mnt.Type || !slices.Equal(oldMnt.Options, mnt.Options) { + return validateError(field, cName, o, n) + } + + // Duplicate mounts are allowed iff all fields in specs.Mount are same. + if val, ok := newMnts[mnt.Destination]; ok { + if !reflect.DeepEqual(val, mnt) { + return validateErrorWithMsg(field, cName, o, n, "invalid mount in the restore spec") + } + continue + } + newMnts[mnt.Destination] = mnt + } + if len(oldMnts) != len(newMnts) { + return validateError(field, cName, o, n) + } + return nil +} + +func validateDevices(field, cName string, o, n []specs.LinuxDevice) error { + if len(o) != len(n) { + return validateErrorWithMsg(field, cName, o, n, "length mismatch") + } + if len(o) == 0 { + return nil + } + + // Create with only Path and Type fields as other fields can vary during restore. + devs := make(map[specs.LinuxDevice]struct{}) + for _, d := range o { + dev := specs.LinuxDevice{ + Path: d.Path, + Type: d.Type, + } + if _, ok := devs[dev]; ok { + return fmt.Errorf("duplicate device found in the spec %v before checkpoint for container %v", o, cName) + } + devs[dev] = struct{}{} + } + for _, d := range n { + dev := specs.LinuxDevice{ + Path: d.Path, + Type: d.Type, + } + if _, ok := devs[dev]; !ok { + return validateError(field, cName, o, n) + } + delete(devs, dev) + } + if len(devs) != 0 { + return validateError(field, cName, o, n) + } + return nil +} + +func validateAnnotations(cName string, oldMap, newMap map[string]string) error { + const ( + gvisorStr = "dev.gvisor" + internalStr = "dev.gvisor.internal" + mntStr = "dev.gvisor.spec.mount.source" + ) + copyMap := func(o map[string]string) map[string]string { + n := make(map[string]string) + for key, val := range o { + if strings.HasPrefix(key, internalStr) || strings.HasPrefix(key, mntStr) { + continue + } + if strings.HasPrefix(key, gvisorStr) { + n[key] = val + } + } + return n + } + + oldM := copyMap(oldMap) + newM := copyMap(newMap) + if !reflect.DeepEqual(oldM, newM) { + return validateError("Annotations", cName, oldM, newM) + } + return nil +} + +// validateArray performs a deep comparison of two arrays, checking for equality +// at every level of nesting. Note that this method: +// * does not allow duplicates in the arrays. +// * does not depend on the order of the elements in the arrays. +func validateArray[T any](field, cName string, oldArr, newArr []T) error { + if len(oldArr) != len(newArr) { + return validateErrorWithMsg(field, cName, oldArr, newArr, "length mismatch") + } + if len(oldArr) == 0 { + return nil + } + oldMap := make(map[any]struct{}) + newMap := make(map[any]struct{}) + for i := 0; i < len(oldArr); i++ { + key := oldArr[i] + if _, ok := oldMap[key]; ok { + return validateErrorWithMsg(field, cName, oldArr, newArr, "duplicate value") + } + oldMap[key] = struct{}{} + + key = newArr[i] + if _, ok := newMap[key]; ok { + return validateErrorWithMsg(field, cName, oldArr, newArr, "duplicate value") + } + newMap[key] = struct{}{} + } + if !reflect.DeepEqual(oldMap, newMap) { + return validateError(field, cName, oldArr, newArr) + } + + return nil +} + +func validateStruct(field, cName string, oldS, newS any) error { + if !reflect.DeepEqual(oldS, newS) { + return validateError(field, cName, oldS, newS) + } + return nil +} + +func ifNil[T any](v *T) *T { + if v != nil { + return v + } + var t T + return &t +} + +func validateSpecForContainer(oldSpec, newSpec *specs.Spec, cName string) error { + oldLinux, newLinux := ifNil(oldSpec.Linux), ifNil(newSpec.Linux) + oldProcess, newProcess := ifNil(oldSpec.Process), ifNil(newSpec.Process) + oldRoot, newRoot := ifNil(oldSpec.Root), ifNil(newSpec.Root) + + if oldSpec.Version != newSpec.Version { + return validateError("OCI Version", cName, oldSpec.Version, newSpec.Version) + } + validateStructMap := make(map[string][2]any) + validateStructMap["Root"] = [2]any{oldRoot, newRoot} + if err := validateMounts("Mounts", cName, oldSpec.Mounts, newSpec.Mounts); err != nil { + return err + } + + // Validate specs.Process. + if oldProcess.Terminal != newProcess.Terminal { + return validateError("Terminal", cName, oldProcess.Terminal, newProcess.Terminal) + } + if oldProcess.Cwd != newProcess.Cwd { + return validateError("Cwd", cName, oldProcess.Cwd, newProcess.Cwd) + } + validateStructMap["User"] = [2]any{oldProcess.User, newProcess.User} + validateStructMap["Rlimits"] = [2]any{oldProcess.Rlimits, newProcess.Rlimits} + if ok := slices.Equal(oldProcess.Args, newProcess.Args); !ok { + return validateError("Args", cName, oldProcess.Args, newProcess.Args) + } + + // Validate specs.Linux. + if oldLinux.CgroupsPath != newLinux.CgroupsPath { + return validateError("CgroupsPath", cName, oldLinux.CgroupsPath, newLinux.CgroupsPath) + } + validateStructMap["Sysctl"] = [2]any{oldLinux.Sysctl, newLinux.Sysctl} + validateStructMap["Seccomp"] = [2]any{oldLinux.Seccomp, newLinux.Seccomp} + if err := validateDevices("Devices", cName, oldLinux.Devices, newLinux.Devices); err != nil { + return err + } + if err := validateArray("UIDMappings", cName, oldLinux.UIDMappings, newLinux.UIDMappings); err != nil { + return err + } + if err := validateArray("GIDMappings", cName, oldLinux.GIDMappings, newLinux.GIDMappings); err != nil { + return err + } + if err := validateArray("Namespace", cName, oldLinux.Namespaces, newLinux.Namespaces); err != nil { + return err + } + + for key, val := range validateStructMap { + if err := validateStruct(key, cName, val[0], val[1]); err != nil { + return err + } + } + + if err := validateAnnotations(cName, oldSpec.Annotations, newSpec.Annotations); err != nil { + return err + } + + // TODO(b/359591006): Validate Linux.Resources, Process.Capabilities and Annotations. + // TODO(b/359591006): Check other remaining fields for equality. + return nil +} + // Validate OCI specs before restoring the containers. func validateSpecs(oldSpecs, newSpecs map[string]*specs.Spec) error { - for name := range newSpecs { - if _, ok := oldSpecs[name]; !ok { - return fmt.Errorf("checkpoint image does not contain spec for container: %q", name) + for cName, newSpec := range newSpecs { + oldSpec, ok := oldSpecs[cName] + if !ok { + return fmt.Errorf("checkpoint image does not contain spec for container: %q", cName) } + return validateSpecForContainer(oldSpec, newSpec, cName) } + return nil } @@ -247,6 +487,12 @@ func (r *restorer) restore(l *Loader) error { return err } + checkpointVersion := popVersionFromCheckpoint(l.k) + restoreVersion := version.Version() + if checkpointVersion != restoreVersion { + return fmt.Errorf("runsc version does not match across checkpoint restore, checkpoint: %v restore: %v", checkpointVersion, restoreVersion) + } + oldSpecs, err := popContainerSpecsFromCheckpoint(l.k) if err != nil { return err @@ -361,6 +607,9 @@ func (l *Loader) save(o *control.SaveOpts) (err error) { } o.Metadata["container_count"] = strconv.Itoa(l.containerCount()) + // Save runsc version. + l.addVersionToCheckpoint() + // Save container specs. l.addContainerSpecsToCheckpoint() diff --git a/runsc/container/container_test.go b/runsc/container/container_test.go index 19f7e8b486..0e6c3ab738 100644 --- a/runsc/container/container_test.go +++ b/runsc/container/container_test.go @@ -3653,3 +3653,211 @@ func TestLookupEROFS(t *testing.T) { } } } + +func TestSpecValidation(t *testing.T) { + // TODO(b/359591006): Add more tests. + tests := []struct { + name string + mutate func(spec, restoreSpec *specs.Spec) + wantErr string + }{ + { + name: "Terminal", + mutate: func(_, restoreSpec *specs.Spec) { + restoreSpec.Process.Terminal = true + }, + wantErr: "Terminal does not match across checkpoint restore", + }, + { + name: "Args", + mutate: func(_, restoreSpec *specs.Spec) { + restoreSpec.Process.Args = append(restoreSpec.Process.Args, "new arg") + }, + wantErr: "Args does not match across checkpoint restore", + }, + { + name: "Device", + mutate: func(spec, restoreSpec *specs.Spec) { + spec.Linux = &specs.Linux{} + restoreSpec.Linux = &specs.Linux{} + mode := os.FileMode(0666) + dev := specs.LinuxDevice{ + Path: "/dev/nvidiactl", + Type: "c", + Major: 195, // nvgpu.NV_MAJOR_DEVICE_NUMBER, + Minor: 255, // nvgpu.NV_CONTROL_DEVICE_MINOR, + FileMode: &mode, + } + restoreSpec.Linux.Devices = append(restoreSpec.Linux.Devices, dev) + }, + wantErr: "Devices does not match across checkpoint restore", + }, + { + name: "Namespace", + mutate: func(spec, restoreSpec *specs.Spec) { + spec.Linux = &specs.Linux{} + restoreSpec.Linux = &specs.Linux{} + restoreSpec.Linux.Namespaces = append(restoreSpec.Linux.Namespaces, specs.LinuxNamespace{ + Type: "network", + Path: fmt.Sprintf("/proc/%d/ns/net", os.Getpid()), + }) + }, + wantErr: "Namespace does not match across checkpoint restore", + }, + { + name: "Seccomp", + mutate: func(spec, restoreSpec *specs.Spec) { + spec.Linux = &specs.Linux{} + restoreSpec.Linux = &specs.Linux{} + restoreSpec.Linux.Seccomp = &specs.LinuxSeccomp{ + DefaultAction: specs.ActAllow, + } + }, + wantErr: "Seccomp does not match across checkpoint restore", + }, + { + name: "RestoreDupMountsSuccess", + mutate: func(_, _ *specs.Spec) {}, + wantErr: "", + }, + { + name: "RestoreDupMountsFail", + mutate: func(_, _ *specs.Spec) {}, + wantErr: "invalid mount", + }, + { + name: "RestoreMountsFail", + mutate: func(_, _ *specs.Spec) {}, + wantErr: "Mounts does not match across checkpoint restore", + }, + { + name: "FlagAnnotations", + mutate: func(spec, restoreSpec *specs.Spec) { + spec.Annotations = make(map[string]string) + spec.Annotations["dev.gvisor.net-disconnect-ok"] = strconv.FormatBool(true) + }, + wantErr: "Annotations does not match across checkpoint restore", + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + spec, _ := sleepSpecConf(t) + restoreSpec, _ := sleepSpecConf(t) + test.mutate(spec, restoreSpec) + + mountDir, err := os.MkdirTemp(testutil.TmpDir(), "mount-test") + if err != nil { + t.Fatalf("os.MkdirTemp() failed: %v", err) + } + if err := os.Chmod(mountDir, 0777); err != nil { + t.Fatalf("error chmoding file: %q, %v", mountDir, err) + } + defer os.RemoveAll(mountDir) + mountDest := filepath.Join(mountDir, "/foo-dir") + mnt := specs.Mount{ + Source: mountDest, + Destination: mountDest, + Type: "tmpfs", + } + spec.Mounts = append(spec.Mounts, mnt) + + conf := testutil.TestConfig(t) + _, bundleDir, cleanup, err := testutil.SetupContainer(spec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup() + + args := Args{ + ID: testutil.RandomContainerID(), + Spec: spec, + BundleDir: bundleDir, + } + cont, err := New(conf, args) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + + if err := cont.Start(conf); err != nil { + t.Fatalf("error starting container: %v", err) + } + + // Set the image path, which is where the checkpoint image will be saved. + dir, err := os.MkdirTemp(testutil.TmpDir(), "checkpoint") + if err != nil { + t.Fatalf("os.MkdirTemp failed: %v", err) + } + defer os.RemoveAll(dir) + if err := os.Chmod(dir, 0777); err != nil { + t.Fatalf("error chmoding file: %q, %v", dir, err) + } + // Checkpoint running container; save state into new file. + if err := cont.Checkpoint(dir, false /* direct */, statefile.Options{Compression: statefile.CompressionLevelFlateBestSpeed}, pgalloc.SaveOpts{}); err != nil { + t.Fatalf("error checkpointing container to empty file: %v", err) + } + + restoreDir, err := os.MkdirTemp(testutil.TmpDir(), "restore-test") + if err != nil { + t.Fatalf("os.MkdirTemp() failed: %v", err) + } + if err := os.Chmod(restoreDir, 0777); err != nil { + t.Fatalf("error chmoding file: %q, %v", restoreDir, err) + } + defer os.RemoveAll(restoreDir) + + restoreSrc := filepath.Join(restoreDir, "/restore-dir") + restoreMnt := specs.Mount{ + Source: restoreSrc, + Destination: mountDest, + Type: "tmpfs", + } + switch test.name { + case "RestoreDupMountsSuccess": + restoreSpec.Mounts = append(restoreSpec.Mounts, restoreMnt) + case "RestoreDupMountsFail": + restoreMnt1 := specs.Mount{ + Source: filepath.Join(restoreDir, "/restore-dir2"), + Destination: mountDest, + Type: "tmpfs", + } + restoreSpec.Mounts = append(restoreSpec.Mounts, restoreMnt1) + case "RestoreMountsFail": + restoreMnt.Destination = restoreSrc + } + restoreSpec.Mounts = append(restoreSpec.Mounts, restoreMnt) + + // Change the spec for the validation to fail. + _, bundleDir2, cleanup2, err := testutil.SetupContainer(restoreSpec, conf) + if err != nil { + t.Fatalf("error setting up container: %v", err) + } + defer cleanup2() + + // Restore into a new container with different ID (e.g. clone). Keep the + // initial container running to ensure no conflict with it. + args2 := Args{ + ID: testutil.RandomContainerID(), + Spec: restoreSpec, + BundleDir: bundleDir2, + } + cont2, err := New(conf, args2) + if err != nil { + t.Fatalf("error creating container: %v", err) + } + defer cont2.Destroy() + + err = cont2.Restore(conf, dir, false /* direct */, false /* background */) + if err == nil { + if test.wantErr == "" { + return + } + t.Fatalf("spec validation failed for test %v, got: nil, want: %v", test, test.wantErr) + } + + got := err.Error() + if !strings.Contains(got, test.wantErr) { + t.Fatalf("wrong error message, got: %v, want: %v", got, test.wantErr) + } + }) + } +}