From 292070826c604ee83bd1ecfa4cfc2aeb99ac8221 Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Fri, 9 Aug 2024 10:32:55 +0200 Subject: [PATCH] virtio/fs/macos: implement DAX support Bring virtiofs DAX support into macOS relying on the mechanisms added for supporting virtio-gpu blobs, which allow us to request HVF the injection of memory regions. Signed-off-by: Sergio Lopez --- src/devices/src/virtio/fs/device.rs | 15 ++ src/devices/src/virtio/fs/filesystem.rs | 7 + .../src/virtio/fs/macos/passthrough.rs | 139 ++++++++++++++++++ src/devices/src/virtio/fs/server.rs | 42 +++++- src/devices/src/virtio/fs/worker.rs | 18 ++- src/vmm/src/builder.rs | 15 +- 6 files changed, 227 insertions(+), 9 deletions(-) diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index a2c17967..71ccd806 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -1,9 +1,13 @@ +#[cfg(target_os = "macos")] +use crossbeam_channel::Sender; use std::cmp; use std::io::Write; use std::sync::atomic::AtomicUsize; use std::sync::{Arc, Mutex}; use std::thread::JoinHandle; +#[cfg(target_os = "macos")] +use hvf::MemoryMapping; use utils::eventfd::{EventFd, EFD_NONBLOCK}; use virtio_bindings::{virtio_config::VIRTIO_F_VERSION_1, virtio_ring::VIRTIO_RING_F_EVENT_IDX}; use vm_memory::{ByteValued, GuestMemoryMmap}; @@ -49,6 +53,8 @@ pub struct Fs { passthrough_cfg: passthrough::Config, worker_thread: Option>, worker_stopfd: EventFd, + #[cfg(target_os = "macos")] + map_sender: Option>, } impl Fs { @@ -90,6 +96,8 @@ impl Fs { passthrough_cfg: fs_cfg, worker_thread: None, worker_stopfd: EventFd::new(EFD_NONBLOCK).map_err(FsError::EventFd)?, + #[cfg(target_os = "macos")] + map_sender: None, }) } @@ -112,6 +120,11 @@ impl Fs { pub fn set_shm_region(&mut self, shm_region: VirtioShmRegion) { self.shm_region = Some(shm_region); } + + #[cfg(target_os = "macos")] + pub fn set_map_sender(&mut self, map_sender: Sender) { + self.map_sender = Some(map_sender); + } } impl VirtioDevice for Fs { @@ -202,6 +215,8 @@ impl VirtioDevice for Fs { self.shm_region.clone(), self.passthrough_cfg.clone(), self.worker_stopfd.try_clone().unwrap(), + #[cfg(target_os = "macos")] + self.map_sender.clone(), ); self.worker_thread = Some(worker.run()); diff --git a/src/devices/src/virtio/fs/filesystem.rs b/src/devices/src/virtio/fs/filesystem.rs index 80ebdb09..6b4e6305 100644 --- a/src/devices/src/virtio/fs/filesystem.rs +++ b/src/devices/src/virtio/fs/filesystem.rs @@ -2,6 +2,11 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#[cfg(target_os = "macos")] +use crossbeam_channel::Sender; +#[cfg(target_os = "macos")] +use hvf::MemoryMapping; + use std::convert::TryInto; use std::ffi::{CStr, CString}; use std::fs::File; @@ -1121,6 +1126,7 @@ pub trait FileSystem { moffset: u64, host_shm_base: u64, shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, ) -> io::Result<()> { Err(io::Error::from_raw_os_error(libc::ENOSYS)) } @@ -1131,6 +1137,7 @@ pub trait FileSystem { requests: Vec, host_shm_base: u64, shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, ) -> io::Result<()> { Err(io::Error::from_raw_os_error(libc::ENOSYS)) } diff --git a/src/devices/src/virtio/fs/macos/passthrough.rs b/src/devices/src/virtio/fs/macos/passthrough.rs index 414e11e2..23cd0cda 100644 --- a/src/devices/src/virtio/fs/macos/passthrough.rs +++ b/src/devices/src/virtio/fs/macos/passthrough.rs @@ -4,6 +4,7 @@ use std::collections::btree_map; use std::collections::BTreeMap; +use std::collections::HashMap; use std::ffi::{CStr, CString}; use std::fs::File; use std::io; @@ -11,11 +12,14 @@ use std::io; use std::mem; use std::mem::MaybeUninit; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::ptr::null_mut; use std::str::FromStr; use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::{Arc, Mutex, RwLock}; use std::time::Duration; +use crossbeam_channel::{unbounded, Sender}; +use hvf::MemoryMapping; use vm_memory::ByteValued; use crate::virtio::fs::filesystem::SecContext; @@ -411,6 +415,8 @@ pub struct PassthroughFs { next_handle: AtomicU64, init_handle: u64, + map_windows: Mutex>, + // Whether writeback caching is enabled for this directory. This will only be true when // `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`. writeback: AtomicBool, @@ -446,6 +452,8 @@ impl PassthroughFs { next_handle: AtomicU64::new(1), init_handle: 0, + map_windows: Mutex::new(HashMap::new()), + writeback: AtomicBool::new(false), cfg, }) @@ -1802,4 +1810,135 @@ impl FileSystem for PassthroughFs { Ok(res as u64) } } + + fn setupmapping( + &self, + _ctx: Context, + inode: Inode, + _handle: Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + if map_sender.is_none() { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + let prot_flags = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { + libc::PROT_READ | libc::PROT_WRITE + } else { + libc::PROT_READ + }; + + if (moffset + len) > shm_size { + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + let guest_addr = guest_shm_base + moffset; + + debug!( + "setupmapping: ino {:?} guest_addr={:x} len={}", + inode, guest_addr, len + ); + + let file = self.open_inode(inode, libc::O_RDWR)?; + let fd = file.as_raw_fd(); + + let host_addr = unsafe { + libc::mmap( + null_mut(), + len as usize, + prot_flags, + libc::MAP_SHARED, + fd, + foffset as libc::off_t, + ) + }; + if host_addr == libc::MAP_FAILED { + return Err(linux_error(io::Error::last_os_error())); + } + + let ret = unsafe { libc::close(fd) }; + if ret == -1 { + return Err(linux_error(io::Error::last_os_error())); + } + + // We've checked that map_sender is something above. + let sender = map_sender.as_ref().unwrap(); + let (reply_sender, reply_receiver) = unbounded(); + sender + .send(MemoryMapping::AddMapping( + reply_sender, + host_addr as u64, + guest_addr, + len, + )) + .unwrap(); + if !reply_receiver.recv().unwrap() { + error!("Error requesting HVF the addition of a DAX window"); + unsafe { libc::munmap(host_addr, len as usize) }; + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + self.map_windows + .lock() + .unwrap() + .insert(guest_addr, host_addr as u64); + + Ok(()) + } + + fn removemapping( + &self, + _ctx: Context, + requests: Vec, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + if map_sender.is_none() { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + for req in requests { + let guest_addr = guest_shm_base + req.moffset; + if (req.moffset + req.len) > shm_size { + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + let host_addr = match self.map_windows.lock().unwrap().remove(&guest_addr) { + Some(a) => a, + None => return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))), + }; + debug!( + "removemapping: guest_addr={:x} len={:?}", + guest_addr, req.len + ); + + let sender = map_sender.as_ref().unwrap(); + let (reply_sender, reply_receiver) = unbounded(); + sender + .send(MemoryMapping::RemoveMapping( + reply_sender, + guest_addr, + req.len, + )) + .unwrap(); + if !reply_receiver.recv().unwrap() { + error!("Error requesting HVF the removal of a DAX window"); + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + let ret = unsafe { libc::munmap(host_addr as *mut libc::c_void, req.len as usize) }; + if ret == -1 { + error!("Error unmapping DAX window"); + return Err(linux_error(io::Error::last_os_error())); + } + } + + Ok(()) + } } diff --git a/src/devices/src/virtio/fs/server.rs b/src/devices/src/virtio/fs/server.rs index 39d6b7b4..95ddf731 100644 --- a/src/devices/src/virtio/fs/server.rs +++ b/src/devices/src/virtio/fs/server.rs @@ -2,6 +2,11 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#[cfg(target_os = "macos")] +use crossbeam_channel::Sender; +#[cfg(target_os = "macos")] +use hvf::MemoryMapping; + use std::convert::TryInto; use std::ffi::{CStr, CString}; use std::fs::File; @@ -78,6 +83,7 @@ impl Server { mut r: Reader, w: Writer, shm_region: &Option, + #[cfg(target_os = "macos")] map_sender: &Option>, ) -> Result { let in_header: InHeader = r.read_obj().map_err(Error::DecodeMessage)?; @@ -141,7 +147,15 @@ impl Server { let shm_base_addr = shm.host_addr; #[cfg(target_os = "macos")] let shm_base_addr = shm.guest_addr; - self.setupmapping(in_header, r, w, shm_base_addr, shm.size as u64) + self.setupmapping( + in_header, + r, + w, + shm_base_addr, + shm.size as u64, + #[cfg(target_os = "macos")] + map_sender, + ) } x if (x == Opcode::RemoveMapping as u32) && shm_region.is_some() => { let shm = shm_region.as_ref().unwrap(); @@ -149,7 +163,15 @@ impl Server { let shm_base_addr = shm.host_addr; #[cfg(target_os = "macos")] let shm_base_addr = shm.guest_addr; - self.removemapping(in_header, r, w, shm_base_addr, shm.size as u64) + self.removemapping( + in_header, + r, + w, + shm_base_addr, + shm.size as u64, + #[cfg(target_os = "macos")] + map_sender, + ) } _ => reply_error( linux_error(io::Error::from_raw_os_error(libc::ENOSYS)), @@ -1309,6 +1331,7 @@ impl Server { w: Writer, host_shm_base: u64, shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, ) -> Result { let SetupmappingIn { fh, @@ -1328,6 +1351,8 @@ impl Server { moffset, host_shm_base, shm_size, + #[cfg(target_os = "macos")] + map_sender, ) { Ok(()) => reply_ok(None::, None, in_header.unique, w), Err(e) => reply_error(e, in_header.unique, w), @@ -1341,6 +1366,7 @@ impl Server { w: Writer, host_shm_base: u64, shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, ) -> Result { let RemovemappingIn { count } = r.read_obj().map_err(Error::DecodeMessage)?; @@ -1368,10 +1394,14 @@ impl Server { ); } - match self - .fs - .removemapping(Context::from(in_header), requests, host_shm_base, shm_size) - { + match self.fs.removemapping( + Context::from(in_header), + requests, + host_shm_base, + shm_size, + #[cfg(target_os = "macos")] + map_sender, + ) { Ok(()) => reply_ok(None::, None, in_header.unique, w), Err(e) => reply_error(e, in_header.unique, w), } diff --git a/src/devices/src/virtio/fs/worker.rs b/src/devices/src/virtio/fs/worker.rs index 7a5c428d..475133da 100644 --- a/src/devices/src/virtio/fs/worker.rs +++ b/src/devices/src/virtio/fs/worker.rs @@ -1,3 +1,8 @@ +#[cfg(target_os = "macos")] +use crossbeam_channel::Sender; +#[cfg(target_os = "macos")] +use hvf::MemoryMapping; + use std::os::fd::AsRawFd; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; @@ -27,6 +32,8 @@ pub struct FsWorker { shm_region: Option, server: Server, stop_fd: EventFd, + #[cfg(target_os = "macos")] + map_sender: Option>, } impl FsWorker { @@ -42,6 +49,7 @@ impl FsWorker { shm_region: Option, passthrough_cfg: passthrough::Config, stop_fd: EventFd, + #[cfg(target_os = "macos")] map_sender: Option>, ) -> Self { Self { queues, @@ -55,6 +63,8 @@ impl FsWorker { shm_region, server: Server::new(PassthroughFs::new(passthrough_cfg).unwrap()), stop_fd, + #[cfg(target_os = "macos")] + map_sender, } } @@ -153,7 +163,13 @@ impl FsWorker { .map_err(FsError::QueueWriter) .unwrap(); - if let Err(e) = self.server.handle_message(reader, writer, &self.shm_region) { + if let Err(e) = self.server.handle_message( + reader, + writer, + &self.shm_region, + #[cfg(target_os = "macos")] + &self.map_sender, + ) { error!("error handling message: {:?}", e); } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index f360c3b7..d1b950d8 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -642,11 +642,18 @@ pub fn build_microvm( intc.clone(), virgl_flags, #[cfg(target_os = "macos")] - _map_sender, + _map_sender.clone(), )?; } #[cfg(not(feature = "tee"))] - attach_fs_devices(&mut vmm, &vm_resources.fs, &mut _shm_manager, intc.clone())?; + attach_fs_devices( + &mut vmm, + &vm_resources.fs, + &mut _shm_manager, + intc.clone(), + #[cfg(target_os = "macos")] + _map_sender, + )?; #[cfg(feature = "blk")] attach_block_devices(&mut vmm, &vm_resources.block, intc.clone())?; if let Some(vsock) = vm_resources.vsock.get() { @@ -1173,6 +1180,7 @@ fn attach_fs_devices( fs_devs: &FsBuilder, shm_manager: &mut ShmManager, intc: Option>>, + #[cfg(target_os = "macos")] map_sender: Sender, ) -> std::result::Result<(), StartMicrovmError> { use self::StartMicrovmError::*; @@ -1194,6 +1202,9 @@ fn attach_fs_devices( }); } + #[cfg(target_os = "macos")] + fs.lock().unwrap().set_map_sender(map_sender.clone()); + // The device mutex mustn't be locked here otherwise it will deadlock. attach_mmio_device( vmm,