From 7bf087e902e38434f1a6c99bb3b6d20eeac70bbb Mon Sep 17 00:00:00 2001 From: Georgiy Lebedev Date: Wed, 6 Sep 2023 16:31:05 +0300 Subject: [PATCH] Add support for device mapper management In the scope of #794, we will need to manage container snapshots backed by the thin pool device mapper, so we need to introduce a separate module for this. Closes #805 Part of #794 Signed-off-by: Georgiy Lebedev --- .github/workflows/unit_tests.yml | 2 +- devmapper/Makefile | 34 ++++ devmapper/deviceSnapshot.go | 125 +++++++++++++++ devmapper/devicemapper.go | 265 +++++++++++++++++++++++++++++++ devmapper/devicemapper_test.go | 136 ++++++++++++++++ 5 files changed, 561 insertions(+), 1 deletion(-) create mode 100644 devmapper/Makefile create mode 100644 devmapper/deviceSnapshot.go create mode 100644 devmapper/devicemapper.go create mode 100644 devmapper/devicemapper_test.go diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index c4e76d833..eff2f94c9 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -69,7 +69,7 @@ jobs: strategy: fail-fast: false matrix: - module: [ ctriface, ctriface/image ] + module: [ ctriface, ctriface/image, devmapper ] steps: - name: Set up Go 1.19 diff --git a/devmapper/Makefile b/devmapper/Makefile new file mode 100644 index 000000000..7bbea57cf --- /dev/null +++ b/devmapper/Makefile @@ -0,0 +1,34 @@ +# MIT License +# +# Copyright (c) 2023 Georgiy Lebedev, Dmitrii Ustiugov, Plamen Petrov and vHive team +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXTRAGOARGS:=-v -race -cover +CTRDLOGDIR:=/tmp/ctrd-logs + +test: + ./../scripts/clean_fcctr.sh + sudo mkdir -m777 -p $(CTRDLOGDIR) && sudo env "PATH=$(PATH)" /usr/local/bin/firecracker-containerd --config /etc/firecracker-containerd/config.toml 1>$(CTRDLOGDIR)/ctriface_log.out 2>$(CTRDLOGDIR)/ctriface_log.err & + sudo env "PATH=$(PATH)" go test ./ $(EXTRAGOARGS) + ./../scripts/clean_fcctr.sh +test-man: + echo "Nothing to test manually" + +.PHONY: test test-man diff --git a/devmapper/deviceSnapshot.go b/devmapper/deviceSnapshot.go new file mode 100644 index 000000000..432d3e6d0 --- /dev/null +++ b/devmapper/deviceSnapshot.go @@ -0,0 +1,125 @@ +// MIT License +// +// Copyright (c) 2023 Georgiy Lebedev, Amory Hoste and vHive team +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package devmapper + +import ( + "github.com/pkg/errors" + "os" + "path/filepath" + "strings" + "sync" + "syscall" +) + +// DeviceSnapshot represents a device mapper snapshot +type DeviceSnapshot struct { + sync.Mutex + path string + mountDir string + mountedReadonly bool +} + +// NewDeviceSnapshot initializes a new device mapper snapshot. +func NewDeviceSnapshot(path string) *DeviceSnapshot { + dsnp := new(DeviceSnapshot) + dsnp.path = path + dsnp.mountDir = "" + dsnp.mountedReadonly = false + return dsnp +} + +// Mount a snapshot device and returns the path where it is mounted. For better performance and efficiency, +// a snapshot is only mounted once and shared if it is already mounted. +func (dsnp *DeviceSnapshot) Mount(readOnly bool) (string, error) { + dsnp.Lock() + defer dsnp.Unlock() + + mountDir, err := os.MkdirTemp("", filepath.Base(dsnp.path)) + if err != nil { + return "", err + } + mountDir = removeTrailingSlash(mountDir) + + err = mountExt4(dsnp.path, mountDir, readOnly) + if err != nil { + return "", errors.Wrapf(err, "mounting %s at %s", dsnp.path, mountDir) + } + dsnp.mountDir = mountDir + dsnp.mountedReadonly = readOnly + + return dsnp.mountDir, nil +} + +// UnMount a device snapshot. Due to mounted snapshot being shared, a snapshot is only actually unmounted if it is not +// in use by anyone else. +func (dsnp *DeviceSnapshot) UnMount() error { + dsnp.Lock() + defer dsnp.Unlock() + + err := unMountExt4(dsnp.mountDir) + if err != nil { + return errors.Wrapf(err, "unmounting %s", dsnp.mountDir) + } + + err = os.RemoveAll(dsnp.mountDir) + if err != nil { + return errors.Wrapf(err, "removing %s", dsnp.mountDir) + } + dsnp.mountDir = "" + + return nil +} + +// mountExt4 mounts a snapshot device available at devicePath at the specified mountPath. +func mountExt4(devicePath, mountPath string, readOnly bool) error { + // Specify flags for faster mounting and performance: + // * Do not update access times for (all types of) files on this filesystem. + // * Do not allow access to devices (special files) on this filesystem. + // * Do not allow programs to be executed from this filesystem. + // * Do not honor set-user-ID and set-group-ID bits or file capabilities when executing programs from this filesystem. + // * Suppress the display of certain (printk()) warning messages in the kernel log. + var flags uintptr = syscall.MS_NOATIME | syscall.MS_NODEV | syscall.MS_NOEXEC | syscall.MS_NOSUID | syscall.MS_SILENT + options := make([]string, 0) + + if readOnly { + // Mount filesystem read-only. + flags |= syscall.MS_RDONLY + options = append(options, "noload") + } + + return syscall.Mount(devicePath, mountPath, "ext4", flags, strings.Join(options, ",")) +} + +// unMountExt4 unmounts a snapshot device mounted at mountPath. +func unMountExt4(mountPath string) error { + return syscall.Unmount(mountPath, syscall.MNT_DETACH) +} + +// removeTrailingSlash returns a path with the trailing slash removed. +func removeTrailingSlash(path string) string { + if strings.HasSuffix(path, "/") { + return path[:len(path)-1] + } else { + return path + } +} diff --git a/devmapper/devicemapper.go b/devmapper/devicemapper.go new file mode 100644 index 000000000..56ffc3b6c --- /dev/null +++ b/devmapper/devicemapper.go @@ -0,0 +1,265 @@ +// MIT License +// +// Copyright (c) 2023 Georgiy Lebedev, Amory Hoste and vHive team +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package devmapper + +import ( + "bytes" + "context" + "fmt" + "github.com/containerd/containerd" + "github.com/containerd/containerd/leases" + "github.com/containerd/containerd/snapshots" + "github.com/opencontainers/image-spec/identity" + "github.com/pkg/errors" + "os" + "os/exec" + "strings" + "sync" +) + +// DeviceMapper creates and manages device snapshots used to store container images. +type DeviceMapper struct { + sync.Mutex + snapDevices map[string]*DeviceSnapshot // maps revision snapkey to snapshot device + snapshotService snapshots.Snapshotter // used to interact with the device mapper through containerd + + // Manage leases to avoid garbage collection of manually created snapshots. Done automatically for snapshots + // created directly through containerd (eg. container.create) + leaseManager leases.Manager + leases map[string]*leases.Lease +} + +func NewDeviceMapper(client *containerd.Client) *DeviceMapper { + devMapper := new(DeviceMapper) + devMapper.snapDevices = make(map[string]*DeviceSnapshot) + devMapper.snapshotService = client.SnapshotService("devmapper") + devMapper.leaseManager = client.LeasesService() + devMapper.leases = make(map[string]*leases.Lease) + return devMapper +} + +// getImageKeys returns the key used in containerd to identify the snapshot of the given image +func getImageKey(image containerd.Image, ctx context.Context) (string, error) { + diffIDs, err := image.RootFS(ctx) + if err != nil { + return "", err + } + return identity.ChainID(diffIDs).String(), nil +} + +// CreateDeviceSnapshotFromImage creates a new device mapper snapshot based on the given image. +func (dmpr *DeviceMapper) CreateDeviceSnapshotFromImage(ctx context.Context, snapshotKey string, image containerd.Image) error { + parent, err := getImageKey(image, ctx) + if err != nil { + return err + } + + return dmpr.CreateDeviceSnapshot(ctx, snapshotKey, parent) +} + +// CreateDeviceSnapshot creates a new device mapper snapshot from the given parent snapshot. +func (dmpr *DeviceMapper) CreateDeviceSnapshot(ctx context.Context, snapKey, parentKey string) error { + // Create lease to avoid garbage collection + lease, err := dmpr.leaseManager.Create(ctx, leases.WithID(snapKey)) + if err != nil { + return err + } + + // Create snapshot from parent + leasedCtx := leases.WithLease(ctx, lease.ID) + mounts, err := dmpr.snapshotService.Prepare(leasedCtx, snapKey, parentKey) + if err != nil { + return err + } + + // Devmapper always only has a single mount /dev/mapper/fc-thinpool-snap-x + devSnapPath := mounts[0].Source + + dmpr.Lock() + dsnp := NewDeviceSnapshot(devSnapPath) + dmpr.snapDevices[snapKey] = dsnp + dmpr.leases[snapKey] = &lease + dmpr.Unlock() + return nil +} + +// RemoveDeviceSnapshot removes the device mapper snapshot identified by the given snapKey. This is only necessary for +// snapshots created through CreateDeviceSnapshot since other snapshots are managed by containerd. The locking here +// also assumes this function is only used to remove snapshots that are a child and are only used by a single container. +func (dmpr *DeviceMapper) RemoveDeviceSnapshot(ctx context.Context, snapKey string) error { + dmpr.Lock() + + lease, present := dmpr.leases[snapKey] + if !present { + dmpr.Unlock() + return errors.New(fmt.Sprintf("Delete device snapshot: lease for key %s does not exist", snapKey)) + } + + if _, present := dmpr.snapDevices[snapKey]; !present { + dmpr.Unlock() + return errors.New(fmt.Sprintf("Delete device snapshot: device for key %s does not exist", snapKey)) + } + delete(dmpr.snapDevices, snapKey) + delete(dmpr.leases, snapKey) + dmpr.Unlock() + + // Not only deactivates but also deletes device + err := dmpr.snapshotService.Remove(ctx, snapKey) + if err != nil { + return err + } + + if err := dmpr.leaseManager.Delete(ctx, *lease); err != nil { + return err + } + + return nil +} + +// GetImageSnapshot retrieves the device mapper snapshot for a given image. +func (dmpr *DeviceMapper) GetImageSnapshot(ctx context.Context, image containerd.Image) (*DeviceSnapshot, error) { + imageSnapKey, err := getImageKey(image, ctx) + if err != nil { + return nil, err + } + + return dmpr.GetDeviceSnapshot(ctx, imageSnapKey) +} + +// GetDeviceSnapshot returns the device mapper snapshot identified by the given snapKey. +func (dmpr *DeviceMapper) GetDeviceSnapshot(ctx context.Context, snapKey string) (*DeviceSnapshot, error) { + dmpr.Lock() + defer dmpr.Unlock() + + _, present := dmpr.snapDevices[snapKey] + if !present { + // Get snapshot from containerd if not yet stored by vHive devicemapper + mounts, err := dmpr.snapshotService.Mounts(ctx, snapKey) + if err != nil { + return nil, err + } + + // Devmapper always only has a single mount /dev/mapper/fc-thinpool-snap-x + devSnapPath := mounts[0].Source + + dsnp := NewDeviceSnapshot(devSnapPath) + dmpr.snapDevices[snapKey] = dsnp + } + + return dmpr.snapDevices[snapKey], nil +} + +// addTrailingSlash adds a trailing slash to a path if it is not present yet. +func addTrailingSlash(path string) string { + if strings.HasSuffix(path, "/") { + return path + } else { + return path + "/" + } +} + +// CreatePatch creates a patch file storing the file differences between and image and the changes applied +// by the container using rsync. Note that this is a different approach than using thin_delta which is able to +// extract blocks directly by leveraging the metadata stored by the device mapper. +func (dmpr *DeviceMapper) CreatePatch(ctx context.Context, patchPath, containerSnapKey string, image containerd.Image) error { + containerSnap, err := dmpr.GetDeviceSnapshot(ctx, containerSnapKey) + if err != nil { + return err + } + + imageSnap, err := dmpr.GetImageSnapshot(ctx, image) + if err != nil { + return err + } + + // 1. Mount original and snapshot image + imageMountPath, err := imageSnap.Mount(true) + if err != nil { + return err + } + defer func() { _ = imageSnap.UnMount() }() + + containerMountPath, err := containerSnap.Mount(true) + if err != nil { + return err + } + defer func() { _ = containerSnap.UnMount() }() + + // 2. Save changes to file + result := extractPatch(imageMountPath, containerMountPath, patchPath) + + // 3. Change the rights of patch file to enable upload to local storage + _ = exec.Command("sudo", "chmod", "777", patchPath) + + return result +} + +// extractPatch extracts the file differences between the file systems mounted at the supplied paths using rsync and +// writes the differences to the supplied patchPath. +func extractPatch(imageMountPath, containerMountPath, patchPath string) error { + patchArg := fmt.Sprintf("--only-write-batch=%s", patchPath) + + var errb bytes.Buffer + cmd := exec.Command("sudo", "rsync", "-ar", patchArg, addTrailingSlash(imageMountPath), addTrailingSlash(containerMountPath)) + cmd.Stderr = &errb + err := cmd.Run() + + if err != nil { + return errors.Wrapf(err, "creating patch between %s and %s at %s: %s", imageMountPath, containerMountPath, patchPath, errb.String()) + } + + err = os.Remove(patchPath + ".sh") // Remove unnecessary script output + if err != nil { + return errors.Wrapf(err, "removing %s", patchPath+".sh") + } + return nil +} + +// RestorePatch applies the file changes stored in the supplied patch file on top of the given container snapshot. +func (dmpr *DeviceMapper) RestorePatch(ctx context.Context, containerSnapKey, patchPath string) error { + containerSnap, err := dmpr.GetDeviceSnapshot(ctx, containerSnapKey) + if err != nil { + return err + } + + // 1. Mount container snapshot device + containerMountPath, err := containerSnap.Mount(false) + if err != nil { + return err + } + defer func() { _ = containerSnap.UnMount() }() + + // 2. Apply changes to container mounted file system + return applyPatch(containerMountPath, patchPath) +} + +// applyPatch applies the file changes stored in the supplied patch file to the filesystem mounted at the supplied path +func applyPatch(containerMountPath, patchPath string) error { + patchArg := fmt.Sprintf("--read-batch=%s", patchPath) + cmd := exec.Command("sudo", "rsync", "-ar", patchArg, addTrailingSlash(containerMountPath)) + err := cmd.Run() + if err != nil { + return errors.Wrapf(err, "applying %s at %s", patchPath, containerMountPath) + } + return nil +} diff --git a/devmapper/devicemapper_test.go b/devmapper/devicemapper_test.go new file mode 100644 index 000000000..d4b033a45 --- /dev/null +++ b/devmapper/devicemapper_test.go @@ -0,0 +1,136 @@ +// MIT License +// +// Copyright (c) 2023 Georgiy Lebedev, Plamen Petrov, Amory Hoste and vHive team +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package devmapper_test + +import ( + "context" + "fmt" + "github.com/containerd/containerd" + ctrdlog "github.com/containerd/containerd/log" + "github.com/containerd/containerd/namespaces" + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + "github.com/vhive-serverless/vhive/ctriface/image" + "github.com/vhive-serverless/vhive/devmapper" + "os" + "sync" + "testing" + "time" +) + +const ( + containerdAddress = "/run/firecracker-containerd/containerd.sock" + NamespaceName = "containerd" + TestImageName = "ghcr.io/ease-lab/helloworld:var_workload" +) + +func getAllImages() map[string]string { + return map[string]string{ + "helloworld": "ghcr.io/ease-lab/helloworld:var_workload", + "chameleon": "ghcr.io/ease-lab/chameleon:var_workload", + "pyaes": "ghcr.io/ease-lab/pyaes:var_workload", + "image_rotate": "ghcr.io/ease-lab/image_rotate:var_workload", + "lr_training": "ghcr.io/ease-lab/lr_training:var_workload", + } +} + +func TestMain(m *testing.M) { + // call flag.Parse() here if TestMain uses flags + + log.SetFormatter(&log.TextFormatter{ + TimestampFormat: ctrdlog.RFC3339NanoFixed, + FullTimestamp: true, + }) + + log.SetOutput(os.Stdout) + + log.SetLevel(log.InfoLevel) + + os.Exit(m.Run()) +} + +func testDevmapper(t *testing.T, mgr *image.ImageManager, dmpr *devmapper.DeviceMapper, snapKey, imageName string) { + // Pull image + testTimeout := 120 * time.Second + ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), NamespaceName), testTimeout) + defer cancel() + + img, err := mgr.GetImage(ctx, imageName) + require.NoError(t, err, fmt.Sprintf("Failed to pull image %s", imageName)) + + // Test devmapper + err = dmpr.CreateDeviceSnapshotFromImage(ctx, snapKey, *img) + require.NoError(t, err, fmt.Sprintf("Failed to create snapshot from image %s", imageName)) + + _, err = dmpr.GetDeviceSnapshot(ctx, snapKey) + if err != nil { + _ = dmpr.RemoveDeviceSnapshot(ctx, snapKey) + } + require.NoError(t, err, fmt.Sprintf("Failed to fetch previously created snapshot %s", snapKey)) + + err = dmpr.RemoveDeviceSnapshot(ctx, snapKey) + require.NoError(t, err, fmt.Sprintf("Failed to remove snapshot %s", snapKey)) +} + +func TestDevmapper(t *testing.T) { + snapKey := "testsnap-1" + + // Create containerd client + client, err := containerd.New(containerdAddress) + defer func() { _ = client.Close() }() + require.NoError(t, err, "Containerd client creation returned error") + + // Create image manager + mgr := image.NewImageManager(client, "devmapper") + + // Create devmapper + dmpr := devmapper.NewDeviceMapper(client) + + testDevmapper(t, mgr, dmpr, snapKey, TestImageName) +} + +func TestDevmapperConcurrent(t *testing.T) { + // Create containerd client + client, err := containerd.New(containerdAddress) + defer func() { _ = client.Close() }() + require.NoError(t, err, "Containerd client creation returned error") + + // Create image manager + mgr := image.NewImageManager(client, "devmapper") + + // Create devmapper + dmpr := devmapper.NewDeviceMapper(client) + + // Test concurrent devmapper + var wg sync.WaitGroup + wg.Add(len(getAllImages())) + + for _, imgName := range getAllImages() { + go func(imgName string) { + snapKey := fmt.Sprintf("testsnap-%s", imgName) + testDevmapper(t, mgr, dmpr, snapKey, imgName) + wg.Done() + }(imgName) + } + wg.Wait() +}