From bf541c6740fbb4f351547cccf40a85cc548dbadc Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 5 Jun 2024 18:16:54 -0400 Subject: [PATCH] machine/linux: Support virtiofs mounts (retain 9p default) I'm hitting a bug with 9p when trying to transfer large files. In RHEL at least 9p isn't supported because it's known to have a lot of design flaws; virtiofsd is the supported and recommended way to share files between a host and guest. Add a new hidden `PODMAN_MACHINE_VIRTFS` environment variable that can be set to `virtiofs` to switch to virtiofsd. Signed-off-by: Colin Walters --- contrib/cirrus/setup_environment.sh | 7 +- pkg/machine/apple/apple.go | 2 +- pkg/machine/e2e/basic_test.go | 27 +++++ pkg/machine/provider/platform.go | 2 +- pkg/machine/qemu/command/qemu_command_test.go | 2 + pkg/machine/qemu/machine.go | 21 +++- pkg/machine/qemu/stubber.go | 78 +++++++++++++-- pkg/machine/qemu/virtiofsd.go | 99 +++++++++++++++++++ pkg/machine/volumes.go | 5 + 9 files changed, 229 insertions(+), 14 deletions(-) create mode 100644 pkg/machine/qemu/virtiofsd.go diff --git a/contrib/cirrus/setup_environment.sh b/contrib/cirrus/setup_environment.sh index de16f88a22..605196cb17 100755 --- a/contrib/cirrus/setup_environment.sh +++ b/contrib/cirrus/setup_environment.sh @@ -410,7 +410,12 @@ case "$TEST_FLAVOR" in install_test_configs ;; machine-linux) - showrun dnf install -y podman-gvproxy* + showrun dnf install -y podman-gvproxy* virtiofsd + # Bootstrap this link if it isn't yet in the package; xref + # https://github.com/containers/podman/pull/22920 + if ! test -L /usr/libexec/podman/virtiofsd; then + showrun ln -sfr /usr/libexec/virtiofsd /usr/libexec/podman/virtiofsd + fi remove_packaged_podman_files showrun make install PREFIX=/usr ETCDIR=/etc install_test_configs diff --git a/pkg/machine/apple/apple.go b/pkg/machine/apple/apple.go index 93201407e9..7e0733437f 100644 --- a/pkg/machine/apple/apple.go +++ b/pkg/machine/apple/apple.go @@ -87,7 +87,7 @@ func GenerateSystemDFilesForVirtiofsMounts(mounts []machine.VirtIoFs) ([]ignitio mountUnit.Add("Mount", "What", "%s") mountUnit.Add("Mount", "Where", "%s") mountUnit.Add("Mount", "Type", "virtiofs") - mountUnit.Add("Mount", "Options", "context=\"system_u:object_r:nfs_t:s0\"") + mountUnit.Add("Mount", "Options", fmt.Sprintf("context=\"%s\"", machine.NFSSELinuxContext)) mountUnit.Add("Install", "WantedBy", "multi-user.target") mountUnitFile, err := mountUnit.ToString() if err != nil { diff --git a/pkg/machine/e2e/basic_test.go b/pkg/machine/e2e/basic_test.go index f1826a403b..2ddee67547 100644 --- a/pkg/machine/e2e/basic_test.go +++ b/pkg/machine/e2e/basic_test.go @@ -101,6 +101,33 @@ var _ = Describe("run basic podman commands", func() { Expect(runAlp).To(Exit(0)) }) + It("Volume should be virtiofs", func() { + // In theory this could run on MacOS too, but we know virtiofs works for that now, + // this is just testing linux + skipIfNotVmtype(define.QemuVirt, "This is just adding coverage for virtiofs on linux") + + tDir, err := filepath.Abs(GinkgoT().TempDir()) + Expect(err).ToNot(HaveOccurred()) + + err = os.WriteFile(filepath.Join(tDir, "testfile"), []byte("some test contents"), 0o644) + Expect(err).ToNot(HaveOccurred()) + + name := randomString() + i := new(initMachine).withImage(mb.imagePath).withNow() + + // Ensure that this is a volume, it may not be automatically on qemu + i.withVolume(tDir) + session, err := mb.setName(name).setCmd(i).run() + Expect(err).ToNot(HaveOccurred()) + Expect(session).To(Exit(0)) + + ssh := new(sshMachine).withSSHCommand([]string{"findmnt", "-no", "FSTYPE", tDir}) + findmnt, err := mb.setName(name).setCmd(ssh).run() + Expect(err).ToNot(HaveOccurred()) + Expect(findmnt).To(Exit(0)) + Expect(findmnt.outputToString()).To(ContainSubstring("virtiofs")) + }) + It("Podman ops with port forwarding and gvproxy", func() { name := randomString() i := new(initMachine) diff --git a/pkg/machine/provider/platform.go b/pkg/machine/provider/platform.go index b9cca9ca7a..2eb40072e1 100644 --- a/pkg/machine/provider/platform.go +++ b/pkg/machine/provider/platform.go @@ -32,7 +32,7 @@ func Get() (vmconfigs.VMProvider, error) { logrus.Debugf("Using Podman machine with `%s` virtualization provider", resolvedVMType.String()) switch resolvedVMType { case define.QemuVirt: - return new(qemu.QEMUStubber), nil + return qemu.NewStubber() default: return nil, fmt.Errorf("unsupported virtualization provider: `%s`", resolvedVMType.String()) } diff --git a/pkg/machine/qemu/command/qemu_command_test.go b/pkg/machine/qemu/command/qemu_command_test.go index ee329db22d..4860dacaf7 100644 --- a/pkg/machine/qemu/command/qemu_command_test.go +++ b/pkg/machine/qemu/command/qemu_command_test.go @@ -53,6 +53,8 @@ func TestQemuCmd(t *testing.T) { expected := []string{ "/usr/bin/qemu-system-x86_64", + "-object", + "memory-backend-memfd,id=mem,size=2048M,share=on", "-m", "2048", "-smp", "4", "-fw_cfg", fmt.Sprintf("name=opt/com.coreos/config,file=%s", ignPath), diff --git a/pkg/machine/qemu/machine.go b/pkg/machine/qemu/machine.go index 5cb0878a37..be33d627f3 100644 --- a/pkg/machine/qemu/machine.go +++ b/pkg/machine/qemu/machine.go @@ -25,9 +25,28 @@ import ( ) const ( - MountType9p = "9p" + MountType9p = "9p" + MountTypeVirtiofs = "virtiofs" ) +func NewStubber() (*QEMUStubber, error) { + var mountType string + if v, ok := os.LookupEnv("PODMAN_MACHINE_VIRTFS"); ok { + logrus.Debugf("using virtiofs %q", v) + switch v { + case MountType9p, MountTypeVirtiofs: + mountType = v + default: + return nil, fmt.Errorf("failed to parse PODMAN_MACHINE_VIRTFS=%s", v) + } + } else { + mountType = MountType9p + } + return &QEMUStubber{ + mountType: mountType, + }, nil +} + // qemuPid returns -1 or the PID of the running QEMU instance. func qemuPid(pidFile *define.VMFile) (int, error) { pidData, err := os.ReadFile(pidFile.GetPath()) diff --git a/pkg/machine/qemu/stubber.go b/pkg/machine/qemu/stubber.go index 26da286921..0a0aa15bf6 100644 --- a/pkg/machine/qemu/stubber.go +++ b/pkg/machine/qemu/stubber.go @@ -31,6 +31,11 @@ type QEMUStubber struct { vmconfigs.QEMUConfig // Command describes the final QEMU command line Command command.QemuCmd + + // mountType is one of virtiofs or 9p + mountType string + // virtiofsHelpers are virtiofsd child processes + virtiofsHelpers []virtiofsdHelperCmd } var ( @@ -83,13 +88,6 @@ func (q *QEMUStubber) setQEMUCommandLine(mc *vmconfigs.MachineConfig) error { } q.Command.SetSerialPort(*readySocket, *mc.QEMUHypervisor.QEMUPidPath, mc.Name) - // Add volumes to qemu command line - for _, mount := range mc.Mounts { - // the index provided in this case is thrown away - _, _, _, _, securityModel := vmconfigs.SplitVolume(0, mount.OriginalInput) - q.Command.SetVirtfsMount(mount.Source, mount.Tag, securityModel, mount.ReadOnly) - } - q.Command.SetUSBHostPassthrough(mc.Resources.USBs) return nil @@ -168,6 +166,33 @@ func (q *QEMUStubber) StartVM(mc *vmconfigs.MachineConfig) (func() error, func() defer dnr.Close() defer dnw.Close() + if q.mountType == MountTypeVirtiofs { + runtime, err := mc.RuntimeDir() + if err != nil { + return nil, nil, err + } + spawner, err := newVirtiofsdSpawner(runtime) + if err != nil { + return nil, nil, err + } + + for _, hostmnt := range mc.Mounts { + qemuArgs, virtiofsdHelper, err := spawner.spawnForMount(hostmnt) + if err != nil { + return nil, nil, fmt.Errorf("failed to init virtiofsd for mount %s: %w", hostmnt.Source, err) + } + q.Command = append(q.Command, qemuArgs...) + q.virtiofsHelpers = append(q.virtiofsHelpers, *virtiofsdHelper) + } + } else { + // Add volumes to qemu command line + for _, mount := range mc.Mounts { + // the index provided in this case is thrown away + _, _, _, _, securityModel := vmconfigs.SplitVolume(0, mount.OriginalInput) + q.Command.SetVirtfsMount(mount.Source, mount.Tag, securityModel, mount.ReadOnly) + } + } + cmdLine := q.Command // Disable graphic window when not in debug mode @@ -198,8 +223,20 @@ func (q *QEMUStubber) StartVM(mc *vmconfigs.MachineConfig) (func() error, func() return waitForReady(readySocket, cmd.Process.Pid, stderrBuf) } + releaseFunc := func() error { + if err := cmd.Process.Release(); err != nil { + return err + } + for _, virtiofsdCmd := range q.virtiofsHelpers { + if err := virtiofsdCmd.command.Process.Release(); err != nil { + return err + } + } + return nil + } + // if this is not the last line in the func, make it a defer - return cmd.Process.Release, readyFunc, nil + return releaseFunc, readyFunc, nil } func waitForReady(readySocket *define.VMFile, pid int, stdErrBuffer *bytes.Buffer) error { @@ -325,7 +362,9 @@ func (q *QEMUStubber) MountVolumesToVM(mc *vmconfigs.MachineConfig, quiet bool) if err != nil { return err } - switch mount.Type { + // NOTE: We ignore q.Type here (and it should eventually be dropped from being serialized); we + // want the mount type to be dynamic, not static. + switch q.mountType { case MountType9p: mountOptions := []string{"-t", "9p"} mountOptions = append(mountOptions, []string{"-o", "trans=virtio", mount.Tag, mount.Target}...) @@ -337,6 +376,18 @@ func (q *QEMUStubber) MountVolumesToVM(mc *vmconfigs.MachineConfig, quiet bool) if err != nil { return err } + case MountTypeVirtiofs: + mountOptions := []string{"-t", "virtiofs"} + mountOptions = append(mountOptions, []string{mount.Tag, mount.Target}...) + mountFlags := fmt.Sprintf("context=\"%s\"", machine.NFSSELinuxContext) + if mount.ReadOnly { + mountFlags += ",ro" + } + mountOptions = append(mountOptions, "-o", mountFlags) + err = machine.CommonSSH(mc.SSH.RemoteUsername, mc.SSH.IdentityPath, mc.Name, mc.SSH.Port, append([]string{"sudo", "mount"}, mountOptions...)) + if err != nil { + return err + } default: return fmt.Errorf("unknown mount type: %s", mount.Type) } @@ -345,7 +396,14 @@ func (q *QEMUStubber) MountVolumesToVM(mc *vmconfigs.MachineConfig, quiet bool) } func (q *QEMUStubber) MountType() vmconfigs.VolumeMountType { - return vmconfigs.NineP + switch q.mountType { + case MountType9p: + return vmconfigs.NineP + case MountTypeVirtiofs: + return vmconfigs.VirtIOFS + default: + panic(fmt.Errorf("unexpected mount type %q", q.mountType)) + } } func (q *QEMUStubber) PostStartNetworking(mc *vmconfigs.MachineConfig, noInfo bool) error { diff --git a/pkg/machine/qemu/virtiofsd.go b/pkg/machine/qemu/virtiofsd.go new file mode 100644 index 0000000000..fe2c697006 --- /dev/null +++ b/pkg/machine/qemu/virtiofsd.go @@ -0,0 +1,99 @@ +//go:build linux || freebsd + +package qemu + +import ( + "fmt" + "os" + "os/exec" + "time" + + "github.com/containers/common/pkg/config" + "github.com/containers/podman/v5/pkg/machine/define" + "github.com/containers/podman/v5/pkg/machine/vmconfigs" + "github.com/containers/storage/pkg/fileutils" + "github.com/sirupsen/logrus" +) + +// VirtiofsdSpawner spawns an instance of virtiofsd +type virtiofsdSpawner struct { + runtimeDir *define.VMFile + binaryPath string + mountCount uint +} + +func newVirtiofsdSpawner(runtimeDir *define.VMFile) (*virtiofsdSpawner, error) { + var binaryPath string + cfg, err := config.Default() + if err != nil { + return nil, err + } + binaryPath, err = cfg.FindHelperBinary("virtiofsd", true) + if err != nil { + return nil, fmt.Errorf("failed to find virtiofsd: %w", err) + } + return &virtiofsdSpawner{binaryPath: binaryPath, runtimeDir: runtimeDir}, nil +} + +// createVirtiofsCmd returns a new command instance configured to launch virtiofsd. +func (v *virtiofsdSpawner) createVirtiofsCmd(directory, socketPath string) *exec.Cmd { + args := []string{"--sandbox", "none", "--socket-path", socketPath, "--shared-dir", "."} + // We don't need seccomp filtering; we trust our workloads. This incidentally + // works around issues like https://gitlab.com/virtio-fs/virtiofsd/-/merge_requests/200. + args = append(args, "--seccomp=none") + cmd := exec.Command(v.binaryPath, args...) + // This sets things up so that the `.` we passed in the arguments is the target directory + cmd.Dir = directory + // Quiet the daemon by default + cmd.Env = append(cmd.Environ(), "RUST_LOG=ERROR") + cmd.Stdin = nil + cmd.Stdout = nil + if logrus.IsLevelEnabled(logrus.DebugLevel) { + cmd.Stderr = os.Stderr + } + return cmd +} + +type virtiofsdHelperCmd struct { + command exec.Cmd + socket *define.VMFile +} + +// spawnForMount returns on success a combination of qemu commandline and child process for virtiofsd +func (v *virtiofsdSpawner) spawnForMount(hostmnt *vmconfigs.Mount) ([]string, *virtiofsdHelperCmd, error) { + logrus.Debugf("Initializing virtiofsd mount for %s", hostmnt.Source) + // By far the most common failure to spawn virtiofsd will be a typo'd source directory, + // so let's synchronously check that ourselves here. + if err := fileutils.Exists(hostmnt.Source); err != nil { + return nil, nil, fmt.Errorf("failed to access virtiofs source directory %s", hostmnt.Source) + } + virtiofsChar := fmt.Sprintf("virtiofschar%d", v.mountCount) + virtiofsCharPath, err := v.runtimeDir.AppendToNewVMFile(virtiofsChar, nil) + if err != nil { + return nil, nil, err + } + + qemuCommand := []string{} + + qemuCommand = append(qemuCommand, "-chardev", fmt.Sprintf("socket,id=%s,path=%s", virtiofsChar, virtiofsCharPath.Path)) + qemuCommand = append(qemuCommand, "-device", fmt.Sprintf("vhost-user-fs-pci,queue-size=1024,chardev=%s,tag=%s", virtiofsChar, hostmnt.Tag)) + // TODO: Honor hostmnt.readonly somehow here (add an option to virtiofsd) + virtiofsdCmd := v.createVirtiofsCmd(hostmnt.Source, virtiofsCharPath.Path) + if err := virtiofsdCmd.Start(); err != nil { + return nil, nil, fmt.Errorf("failed to start virtiofsd") + } + // Wait for the socket + for { + if err := fileutils.Exists(virtiofsCharPath.Path); err == nil { + break + } + logrus.Debugf("waiting for virtiofsd socket %q", virtiofsCharPath.Path) + time.Sleep(time.Millisecond * 100) + } + // Increment our count of mounts which are used to create unique names for the devices + v.mountCount += 1 + return qemuCommand, &virtiofsdHelperCmd{ + command: *virtiofsdCmd, + socket: virtiofsCharPath, + }, nil +} diff --git a/pkg/machine/volumes.go b/pkg/machine/volumes.go index 8fda9155e3..241361eb3a 100644 --- a/pkg/machine/volumes.go +++ b/pkg/machine/volumes.go @@ -7,6 +7,11 @@ import ( "github.com/containers/podman/v5/pkg/machine/vmconfigs" ) +// NFSSELinuxContext is what is used by NFS mounts, which is allowed +// access by container_t. We need to fix the Fedora selinux policy +// to just allow access to virtiofs_t. +const NFSSELinuxContext = "system_u:object_r:nfs_t:s0" + type Volume interface { Kind() VolumeKind }