From 8ed3bd33888c896d0c750037693af497cdff29e1 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Tue, 24 Oct 2023 14:08:10 +0200 Subject: [PATCH 1/2] [Linux] Make krun_start_enter error when root directory does not exist Previously krun_start_enter would succeed and the guest kernel would just panic. The root filesystem directory was opened lazily when the guest kernel used fuse init opcode. This commit changes it so the root directory is opened when creating the fs device. This only fixes it on Linux, but not in the macOS implementation. Signed-off-by: Matej Hrica --- src/devices/src/virtio/fs/device.rs | 2 +- .../src/virtio/fs/linux/passthrough.rs | 82 ++++++++++--------- src/devices/src/virtio/fs/mod.rs | 2 + src/libkrun/src/lib.rs | 4 +- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index b1e62ff2..1d69c86b 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -95,7 +95,7 @@ impl Fs { device_state: DeviceState::Inactive, config, shm_region: None, - server: Server::new(PassthroughFs::new(fs_cfg).unwrap()), + server: Server::new(PassthroughFs::new(fs_cfg).map_err(FsError::FailedToMount)?), intc: None, irq_line: None, }) diff --git a/src/devices/src/virtio/fs/linux/passthrough.rs b/src/devices/src/virtio/fs/linux/passthrough.rs index f9711f90..5d6cd657 100644 --- a/src/devices/src/virtio/fs/linux/passthrough.rs +++ b/src/devices/src/virtio/fs/linux/passthrough.rs @@ -266,6 +266,47 @@ impl Default for Config { } } +fn insert_root_dir( + root_dir: &str, + inodes: &mut MultikeyBTreeMap>, +) -> io::Result<()> { + let root = CString::new(root_dir).expect("CString::new failed"); + + // Safe because this doesn't modify any memory and we check the return value. + // We use `O_PATH` because we just want this for traversing the directory tree + // and not for actually reading the contents. + let fd = unsafe { + libc::openat( + libc::AT_FDCWD, + root.as_ptr(), + libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, + ) + }; + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we just opened this fd above. + let f = unsafe { File::from_raw_fd(fd) }; + + let st = stat(&f)?; + + // Not sure why the root inode gets a refcount of 2 but that's what libfuse does. + inodes.insert( + fuse::ROOT_ID, + InodeAltKey { + ino: st.st_ino, + dev: st.st_dev, + }, + Arc::new(InodeData { + inode: fuse::ROOT_ID, + file: f, + refcount: AtomicU64::new(2), + }), + ); + Ok(()) +} + /// A file system that simply "passes through" all requests it receives to the underlying file /// system. To keep the implementation simple it servers the contents of its root directory. Users /// that wish to serve only a specific directory should set up the environment so that that @@ -324,9 +365,11 @@ impl PassthroughFs { // Safe because we just opened this fd or it was provided by our caller. let proc_self_fd = unsafe { File::from_raw_fd(fd) }; + let mut inodes = MultikeyBTreeMap::new(); + insert_root_dir(&cfg.root_dir, &mut inodes)?; Ok(PassthroughFs { - inodes: RwLock::new(MultikeyBTreeMap::new()), + inodes: RwLock::new(inodes), next_inode: AtomicU64::new(fuse::ROOT_ID + 2), init_inode: fuse::ROOT_ID + 1, @@ -683,48 +726,11 @@ impl FileSystem for PassthroughFs { type Handle = Handle; fn init(&self, capable: FsOptions) -> io::Result { - let root = CString::new(self.cfg.root_dir.as_str()).expect("CString::new failed"); - - // Safe because this doesn't modify any memory and we check the return value. - // We use `O_PATH` because we just want this for traversing the directory tree - // and not for actually reading the contents. - let fd = unsafe { - libc::openat( - libc::AT_FDCWD, - root.as_ptr(), - libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, - ) - }; - if fd < 0 { - return Err(io::Error::last_os_error()); - } - - // Safe because we just opened this fd above. - let f = unsafe { File::from_raw_fd(fd) }; - - let st = stat(&f)?; - // Safe because this doesn't modify any memory and there is no need to check the return // value because this system call always succeeds. We need to clear the umask here because // we want the client to be able to set all the bits in the mode. unsafe { libc::umask(0o000) }; - let mut inodes = self.inodes.write().unwrap(); - - // Not sure why the root inode gets a refcount of 2 but that's what libfuse does. - inodes.insert( - fuse::ROOT_ID, - InodeAltKey { - ino: st.st_ino, - dev: st.st_dev, - }, - Arc::new(InodeData { - inode: fuse::ROOT_ID, - file: f, - refcount: AtomicU64::new(2), - }), - ); - let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO; if self.cfg.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { opts |= FsOptions::WRITEBACK_CACHE; diff --git a/src/devices/src/virtio/fs/mod.rs b/src/devices/src/virtio/fs/mod.rs index 7e307303..ad491704 100644 --- a/src/devices/src/virtio/fs/mod.rs +++ b/src/devices/src/virtio/fs/mod.rs @@ -59,6 +59,8 @@ pub enum FsError { InvalidXattrSize((u32, usize)), QueueReader(DescriptorError), QueueWriter(DescriptorError), + /// Could not open the root directory or mapped volumes (they do not exist, permission error...) + FailedToMount(io::Error), } type Result = std::result::Result; diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index cd814588..68d97297 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -770,8 +770,8 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { #[cfg(not(feature = "tee"))] if let Some(fs_cfg) = ctx_cfg.get_fs_cfg() { - if ctx_cfg.vmr.set_fs_device(fs_cfg).is_err() { - error!("Error configuring virtio-fs"); + if let Err(e) = ctx_cfg.vmr.set_fs_device(fs_cfg) { + error!("Error configuring virtio-fs {e:?}"); return -libc::EINVAL; } } From 0ddaf3c6ec2dc89ae199d27ccb384c7baae629c8 Mon Sep 17 00:00:00 2001 From: Matej Hrica Date: Wed, 1 Nov 2023 13:58:45 +0100 Subject: [PATCH 2/2] [macOS] Make krun_start_enter error when root dir or volumes do not exist Signed-off-by: Matej Hrica --- .../src/virtio/fs/macos/passthrough.rs | 227 ++++++++++-------- 1 file changed, 127 insertions(+), 100 deletions(-) diff --git a/src/devices/src/virtio/fs/macos/passthrough.rs b/src/devices/src/virtio/fs/macos/passthrough.rs index 402b8fa5..2e1d83e7 100644 --- a/src/devices/src/virtio/fs/macos/passthrough.rs +++ b/src/devices/src/virtio/fs/macos/passthrough.rs @@ -72,6 +72,7 @@ struct LinuxDirent64 { d_reclen: libc::c_ushort, d_ty: libc::c_uchar, } + unsafe impl ByteValued for LinuxDirent64 {} fn ebadf() -> io::Error { @@ -522,13 +523,135 @@ fn read_rosetta_data() -> io::Result> { Ok(data) } +fn insert_root_dir( + root_dir: &str, + inodes: &mut MultikeyBTreeMap>, + path_cache: &mut BTreeMap>, +) -> io::Result<()> { + let root_dir_cstr = CString::new(root_dir).expect("CString::new failed"); + + // Safe because this doesn't modify any memory and we check the return value. + let fd = unsafe { + libc::openat( + libc::AT_FDCWD, + root_dir_cstr.as_ptr(), + libc::O_NOFOLLOW | libc::O_CLOEXEC, + ) + }; + if fd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + // Safe because we just opened this fd above. + let f = unsafe { File::from_raw_fd(fd) }; + + let st = fstat(&f)?; + + // Safe because this doesn't modify any memory and there is no need to check the return + // value because this system call always succeeds. We need to clear the umask here because + // we want the client to be able to set all the bits in the mode. + unsafe { libc::umask(0o000) }; + + // Not sure why the root inode gets a refcount of 2 but that's what libfuse does. + inodes.insert( + fuse::ROOT_ID, + InodeAltKey { + ino: st.st_ino, + dev: st.st_dev, + }, + Arc::new(InodeData { + inode: fuse::ROOT_ID, + linkdata: CString::new("").unwrap(), + refcount: AtomicU64::new(2), + }), + ); + add_path(path_cache, fuse::ROOT_ID, root_dir.to_owned()); + Ok(()) +} + +fn insert_mapped_volumes( + mapped_volumes: &[(PathBuf, PathBuf)], + inodes: &mut MultikeyBTreeMap>, + next_inode: &mut u64, + path_cache: &mut BTreeMap>, + host_volumes: &mut HashMap, +) -> io::Result<()> { + for (host_vol, guest_vol) in mapped_volumes { + assert!(host_vol.is_absolute()); + assert!(guest_vol.is_absolute()); + assert_eq!(guest_vol.components().count(), 2); + + let guest_vol_str = guest_vol + .file_name() + .unwrap() + .to_str() + .expect("Couldn't parse guest volume as String"); + let host_vol_str = host_vol + .to_str() + .expect("Couldn't parse host volume as String"); + let path = CString::new(host_vol_str).expect("Couldn't parse volume as CString"); + // Safe because this doesn't modify any memory and we check the return value. + let fd = unsafe { libc::open(path.as_ptr(), libc::O_NOFOLLOW | libc::O_CLOEXEC) }; + if fd < 0 { + error!( + "Error setting up mapped volume: {:?}:{:?}: {:?}", + host_vol, + guest_vol, + io::Error::last_os_error(), + ); + continue; + } + + // Safe because we just opened this fd above. + let f = unsafe { File::from_raw_fd(fd) }; + + let st = fstat(&f)?; + let inode = *next_inode; + *next_inode += 1; + + inodes.insert( + inode, + InodeAltKey { + ino: st.st_ino, + dev: st.st_dev, + }, + Arc::new(InodeData { + inode, + linkdata: CString::new("").unwrap(), + refcount: AtomicU64::new(1), + }), + ); + add_path(path_cache, inode, host_vol_str.to_string()); + host_volumes.insert(guest_vol_str.to_string(), inode); + } + + Ok(()) +} + impl PassthroughFs { pub fn new(cfg: Config) -> io::Result { + let mut inodes = MultikeyBTreeMap::new(); + let mut next_inode = fuse::ROOT_ID + 2; + let mut path_cache = BTreeMap::new(); + let mut host_volumes = HashMap::new(); + + insert_root_dir(&cfg.root_dir, &mut inodes, &mut path_cache)?; + + if let Some(mapped_volumes) = &cfg.mapped_volumes { + insert_mapped_volumes( + mapped_volumes, + &mut inodes, + &mut next_inode, + &mut path_cache, + &mut host_volumes, + )?; + } + Ok(PassthroughFs { - inodes: RwLock::new(MultikeyBTreeMap::new()), - next_inode: AtomicU64::new(fuse::ROOT_ID + 2), + inodes: RwLock::new(inodes), + next_inode: AtomicU64::new(next_inode), init_inode: fuse::ROOT_ID + 1, - path_cache: Mutex::new(BTreeMap::new()), + path_cache: Mutex::new(path_cache), file_cache: Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())), pinned_files: Mutex::new(BTreeMap::new()), @@ -536,7 +659,7 @@ impl PassthroughFs { next_handle: AtomicU64::new(1), init_handle: 0, - host_volumes: RwLock::new(HashMap::new()), + host_volumes: RwLock::new(host_volumes), writeback: AtomicBool::new(false), rosetta_data: read_rosetta_data().ok(), @@ -1021,102 +1144,6 @@ impl FileSystem for PassthroughFs { type Handle = Handle; fn init(&self, capable: FsOptions) -> io::Result { - let root = CString::new(self.cfg.root_dir.as_str()).expect("CString::new failed"); - - // Safe because this doesn't modify any memory and we check the return value. - let fd = unsafe { - libc::openat( - libc::AT_FDCWD, - root.as_ptr(), - libc::O_NOFOLLOW | libc::O_CLOEXEC, - ) - }; - if fd < 0 { - return Err(linux_error(io::Error::last_os_error())); - } - - // Safe because we just opened this fd above. - let f = unsafe { File::from_raw_fd(fd) }; - - let st = fstat(&f)?; - - // Safe because this doesn't modify any memory and there is no need to check the return - // value because this system call always succeeds. We need to clear the umask here because - // we want the client to be able to set all the bits in the mode. - unsafe { libc::umask(0o000) }; - - let mut inodes = self.inodes.write().unwrap(); - - // Not sure why the root inode gets a refcount of 2 but that's what libfuse does. - inodes.insert( - fuse::ROOT_ID, - InodeAltKey { - ino: st.st_ino, - dev: st.st_dev, - }, - Arc::new(InodeData { - inode: fuse::ROOT_ID, - linkdata: CString::new("").unwrap(), - refcount: AtomicU64::new(2), - }), - ); - - let mut path_cache = self.path_cache.lock().unwrap(); - add_path(&mut path_cache, fuse::ROOT_ID, self.cfg.root_dir.clone()); - - if let Some(mapped_volumes) = &self.cfg.mapped_volumes { - for (host_vol, guest_vol) in mapped_volumes.iter() { - assert!(host_vol.is_absolute()); - assert!(guest_vol.is_absolute()); - assert_eq!(guest_vol.components().count(), 2); - - let guest_vol_str = guest_vol - .file_name() - .unwrap() - .to_str() - .expect("Couldn't parse guest volume as String"); - let host_vol_str = host_vol - .to_str() - .expect("Couldn't parse host volume as String"); - let path = CString::new(host_vol_str).expect("Couldn't parse volume as CString"); - // Safe because this doesn't modify any memory and we check the return value. - let fd = unsafe { libc::open(path.as_ptr(), libc::O_NOFOLLOW | libc::O_CLOEXEC) }; - if fd < 0 { - error!( - "Error setting up mapped volume: {:?}:{:?}: {:?}", - host_vol, - guest_vol, - io::Error::last_os_error(), - ); - continue; - } - - // Safe because we just opened this fd above. - let f = unsafe { File::from_raw_fd(fd) }; - - let st = fstat(&f)?; - let inode = self.next_inode.fetch_add(1, Ordering::Relaxed); - - inodes.insert( - inode, - InodeAltKey { - ino: st.st_ino, - dev: st.st_dev, - }, - Arc::new(InodeData { - inode, - linkdata: CString::new("").unwrap(), - refcount: AtomicU64::new(1), - }), - ); - add_path(&mut path_cache, inode, host_vol_str.to_string()); - self.host_volumes - .write() - .unwrap() - .insert(guest_vol_str.to_string(), inode); - } - } - let mut opts = FsOptions::empty(); if self.cfg.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { opts |= FsOptions::WRITEBACK_CACHE;