From c49f7741c39fad1c04910315285643e92be70242 Mon Sep 17 00:00:00 2001 From: Sergio Lopez Date: Wed, 7 Aug 2024 15:45:06 +0200 Subject: [PATCH] arch: implement SHM region management We have multiple devices (fs and gpu) that can use SHM regions. So far, we were creating a single SHM region so only one device could make use of it. In this commit, we implement SHM region management so multiple devices can request their own regions. For the moment, we're hardcoding the SHM region sizes for gpu and fs. A future commit will extend the API so users can configure those sizes as desired. Signed-off-by: Sergio Lopez --- src/arch/src/aarch64/mod.rs | 17 +-- src/arch/src/lib.rs | 14 ++- src/arch/src/x86_64/mod.rs | 21 ++-- src/devices/src/virtio/fs/server.rs | 12 +- src/devices/src/virtio/gpu/worker.rs | 3 +- src/vmm/src/builder.rs | 164 +++++++++++++++++++-------- src/vmm/src/device_manager/mod.rs | 3 + src/vmm/src/device_manager/shm.rs | 90 +++++++++++++++ src/vmm/src/lib.rs | 4 +- src/vmm/src/linux/vstate.rs | 14 ++- src/vmm/src/macos/vstate.rs | 1 - 11 files changed, 261 insertions(+), 82 deletions(-) create mode 100644 src/vmm/src/device_manager/shm.rs diff --git a/src/arch/src/aarch64/mod.rs b/src/arch/src/aarch64/mod.rs index 9450b94d..c76d99a9 100644 --- a/src/arch/src/aarch64/mod.rs +++ b/src/arch/src/aarch64/mod.rs @@ -16,12 +16,11 @@ pub mod macos; #[cfg(target_os = "macos")] pub use self::macos::*; -use std::cmp::min; use std::collections::HashMap; use std::fmt::Debug; use self::gic::GICDevice; -use crate::ArchMemoryInfo; +use crate::{round_up, ArchMemoryInfo}; use vm_memory::{Address, GuestAddress, GuestMemory, GuestMemoryMmap}; #[cfg(feature = "efi")] @@ -42,8 +41,6 @@ pub enum Error { /// The start of the memory area reserved for MMIO devices. pub const MMIO_MEM_START: u64 = layout::MAPPED_IO_START; -/// The size of the MMIO shared memory area used by virtio-fs DAX. -pub const MMIO_SHM_SIZE: u64 = 1 << 33; pub use self::fdt::DeviceInfoForFDT; use crate::DeviceType; @@ -51,26 +48,24 @@ use crate::DeviceType; /// Returns a Vec of the valid memory addresses for aarch64. /// See [`layout`](layout) module for a drawing of the specific memory model for this platform. pub fn arch_memory_regions(size: usize) -> (ArchMemoryInfo, Vec<(GuestAddress, usize)>) { - let dram_size = min(size as u64, layout::DRAM_MEM_MAX_SIZE) as usize; + let page_size: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; + let dram_size = round_up(size, page_size); let ram_last_addr = layout::DRAM_MEM_START + (dram_size as u64); let shm_start_addr = ((ram_last_addr / 0x4000_0000) + 1) * 0x4000_0000; + let info = ArchMemoryInfo { ram_last_addr, shm_start_addr, - shm_size: MMIO_SHM_SIZE, + page_size, }; let regions = if cfg!(feature = "efi") { vec![ // Space for loading EDK2 and its variables (GuestAddress(0u64), 0x800_0000), (GuestAddress(layout::DRAM_MEM_START), dram_size), - (GuestAddress(shm_start_addr), MMIO_SHM_SIZE as usize), ] } else { - vec![ - (GuestAddress(layout::DRAM_MEM_START), dram_size), - (GuestAddress(shm_start_addr), MMIO_SHM_SIZE as usize), - ] + vec![(GuestAddress(layout::DRAM_MEM_START), dram_size)] }; (info, regions) diff --git a/src/arch/src/lib.rs b/src/arch/src/lib.rs index f09be292..87046e80 100644 --- a/src/arch/src/lib.rs +++ b/src/arch/src/lib.rs @@ -11,7 +11,7 @@ use std::result; pub struct ArchMemoryInfo { pub ram_last_addr: u64, pub shm_start_addr: u64, - pub shm_size: u64, + pub page_size: usize, } /// Module for aarch64 related functionality. @@ -22,7 +22,6 @@ pub mod aarch64; pub use aarch64::{ arch_memory_regions, configure_system, get_kernel_start, initrd_load_addr, layout::CMDLINE_MAX_SIZE, layout::IRQ_BASE, layout::IRQ_MAX, Error, MMIO_MEM_START, - MMIO_SHM_SIZE, }; /// Module for x86_64 related functionality. @@ -33,7 +32,7 @@ pub mod x86_64; pub use crate::x86_64::{ arch_memory_regions, configure_system, get_kernel_start, initrd_load_addr, layout::CMDLINE_MAX_SIZE, layout::IRQ_BASE, layout::IRQ_MAX, Error, BIOS_SIZE, BIOS_START, - MMIO_MEM_START, MMIO_SHM_SIZE, RESET_VECTOR, + MMIO_MEM_START, RESET_VECTOR, }; /// Type for returning public functions outcome. @@ -66,6 +65,15 @@ pub struct InitrdConfig { /// Default (smallest) memory page size for the supported architectures. pub const PAGE_SIZE: usize = 4096; +pub fn round_up(size: usize, align: usize) -> usize { + let page_mask = align - 1; + (size + page_mask) & !page_mask +} +pub fn round_down(size: usize, align: usize) -> usize { + let page_mask = !(align - 1); + size & page_mask +} + impl fmt::Display for DeviceType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{self:?}") diff --git a/src/arch/src/x86_64/mod.rs b/src/arch/src/x86_64/mod.rs index 64dc6113..e31046e8 100644 --- a/src/arch/src/x86_64/mod.rs +++ b/src/arch/src/x86_64/mod.rs @@ -17,8 +17,7 @@ pub mod msr; /// Logic for configuring x86_64 registers. pub mod regs; -use crate::ArchMemoryInfo; -use crate::InitrdConfig; +use crate::{round_up, ArchMemoryInfo, InitrdConfig}; use arch_gen::x86::bootparam::{boot_params, E820_RAM}; use vm_memory::Bytes; use vm_memory::{ @@ -60,8 +59,6 @@ const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32; const MEM_32BIT_GAP_SIZE: u64 = 768 << 20; /// The start of the memory area reserved for MMIO devices. pub const MMIO_MEM_START: u64 = FIRST_ADDR_PAST_32BITS - MEM_32BIT_GAP_SIZE; -/// The size of the MMIO shared memory area used by virtio-fs DAX. -pub const MMIO_SHM_SIZE: u64 = 1 << 33; /// Returns a Vec of the valid memory addresses. /// These should be used to configure the GuestMemoryMmap structure for the platform. @@ -73,6 +70,9 @@ pub fn arch_memory_regions( kernel_load_addr: u64, kernel_size: usize, ) -> (ArchMemoryInfo, Vec<(GuestAddress, usize)>) { + let page_size: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; + + let size = round_up(size, page_size); if size < (kernel_load_addr + kernel_size as u64) as usize { panic!("Kernel doesn't fit in RAM"); } @@ -90,7 +90,6 @@ pub fn arch_memory_regions( vec![ (GuestAddress(0), kernel_load_addr as usize), (GuestAddress(kernel_load_addr + kernel_size as u64), size), - (GuestAddress(FIRST_ADDR_PAST_32BITS), MMIO_SHM_SIZE as usize), ], ) } @@ -108,7 +107,6 @@ pub fn arch_memory_regions( (MMIO_MEM_START - (kernel_load_addr + kernel_size as u64)) as usize, ), (GuestAddress(FIRST_ADDR_PAST_32BITS), remaining), - (GuestAddress(shm_start_addr), MMIO_SHM_SIZE as usize), ], ) } @@ -116,7 +114,7 @@ pub fn arch_memory_regions( let info = ArchMemoryInfo { ram_last_addr, shm_start_addr, - shm_size: MMIO_SHM_SIZE, + page_size, }; (info, regions) } @@ -132,6 +130,9 @@ pub fn arch_memory_regions( kernel_load_addr: u64, kernel_size: usize, ) -> (ArchMemoryInfo, Vec<(GuestAddress, usize)>) { + let page_size: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; + + let size = round_up(size, page_size); if size < (kernel_load_addr + kernel_size as u64) as usize { panic!("Kernel doesn't fit in RAM"); } @@ -170,7 +171,7 @@ pub fn arch_memory_regions( let info = ArchMemoryInfo { ram_last_addr, shm_start_addr, - shm_size: 0, + page_size, }; (info, regions) } @@ -319,7 +320,7 @@ mod tests { #[test] fn regions_lt_4gb() { let (_info, regions) = arch_memory_regions(1usize << 29, KERNEL_LOAD_ADDR, KERNEL_SIZE); - assert_eq!(3, regions.len()); + assert_eq!(2, regions.len()); assert_eq!(GuestAddress(0), regions[0].0); assert_eq!(KERNEL_LOAD_ADDR as usize, regions[0].1); assert_eq!( @@ -333,7 +334,7 @@ mod tests { fn regions_gt_4gb() { let (_info, regions) = arch_memory_regions((1usize << 32) + 0x8000, KERNEL_LOAD_ADDR, KERNEL_SIZE); - assert_eq!(4, regions.len()); + assert_eq!(3, regions.len()); assert_eq!(GuestAddress(0), regions[0].0); assert_eq!(KERNEL_LOAD_ADDR as usize, regions[0].1); assert_eq!( diff --git a/src/devices/src/virtio/fs/server.rs b/src/devices/src/virtio/fs/server.rs index 8f91e693..b97f6641 100644 --- a/src/devices/src/virtio/fs/server.rs +++ b/src/devices/src/virtio/fs/server.rs @@ -137,11 +137,19 @@ impl Server { x if x == Opcode::CopyFileRange as u32 => self.copyfilerange(in_header, r, w), x if (x == Opcode::SetupMapping as u32) && shm_region.is_some() => { let shm = shm_region.unwrap(); - self.setupmapping(in_header, r, w, shm.host_addr, shm.size as u64) + #[cfg(target_os = "linux")] + let shm_base_addr = shm.host_addr; + #[cfg(target_os = "macos")] + let shm_base_addr = shm.guest_addr; + self.setupmapping(in_header, r, w, shm_base_addr, shm.size as u64) } x if (x == Opcode::RemoveMapping as u32) && shm_region.is_some() => { let shm = shm_region.unwrap(); - self.removemapping(in_header, r, w, shm.host_addr, shm.size as u64) + #[cfg(target_os = "linux")] + let shm_base_addr = shm.host_addr; + #[cfg(target_os = "macos")] + let shm_base_addr = shm.guest_addr; + self.removemapping(in_header, r, w, shm_base_addr, shm.size as u64) } _ => reply_error( linux_error(io::Error::from_raw_os_error(libc::ENOSYS)), diff --git a/src/devices/src/virtio/gpu/worker.rs b/src/devices/src/virtio/gpu/worker.rs index 5597d055..16c15ca2 100644 --- a/src/devices/src/virtio/gpu/worker.rs +++ b/src/devices/src/virtio/gpu/worker.rs @@ -16,7 +16,7 @@ use utils::eventfd::EventFd; use vm_memory::{GuestAddress, GuestMemoryMmap}; use super::super::descriptor_utils::{Reader, Writer}; -use super::super::{GpuError, Queue as VirtQueue, VirtioShmRegion, VIRTIO_MMIO_INT_VRING}; +use super::super::{GpuError, Queue as VirtQueue, VIRTIO_MMIO_INT_VRING}; use super::protocol::{ virtio_gpu_ctrl_hdr, virtio_gpu_mem_entry, GpuCommand, GpuResponse, VirtioGpuResult, }; @@ -24,6 +24,7 @@ use super::virtio_gpu::VirtioGpu; use crate::legacy::Gic; use crate::virtio::gpu::protocol::{VIRTIO_GPU_FLAG_FENCE, VIRTIO_GPU_FLAG_INFO_RING_IDX}; use crate::virtio::gpu::virtio_gpu::VirtioGpuRing; +use crate::virtio::VirtioShmRegion; use crate::Error as DeviceError; pub struct Worker { diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 8e517625..f360c3b7 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -18,13 +18,12 @@ use super::{Error, Vmm}; #[cfg(target_arch = "x86_64")] use crate::device_manager::legacy::PortIODeviceManager; use crate::device_manager::mmio::MMIODeviceManager; +use crate::resources::VmResources; #[cfg(target_os = "macos")] use devices::legacy::VcpuList; use devices::legacy::{Gic, Serial}; #[cfg(feature = "net")] use devices::virtio::Net; -#[cfg(not(feature = "tee"))] -use devices::virtio::VirtioShmRegion; use devices::virtio::{port_io, MmioTransport, PortDescription, Vsock}; #[cfg(target_os = "macos")] use hvf::MemoryMapping; @@ -45,8 +44,6 @@ use crate::vmm_config::block::BlockBuilder; use crate::vmm_config::boot_source::DEFAULT_KERNEL_CMDLINE; #[cfg(not(feature = "tee"))] use crate::vmm_config::fs::FsBuilder; -#[cfg(feature = "tee")] -use crate::vmm_config::kernel_bundle::{InitrdBundle, QbootBundle}; #[cfg(target_os = "linux")] use crate::vstate::KvmContext; #[cfg(all(target_os = "linux", feature = "tee"))] @@ -55,6 +52,9 @@ use crate::vstate::{Error as VstateError, Vcpu, VcpuConfig, Vm}; use arch::ArchMemoryInfo; #[cfg(feature = "tee")] use arch::InitrdConfig; +use device_manager::shm::ShmManager; +#[cfg(not(feature = "tee"))] +use devices::virtio::VirtioShmRegion; #[cfg(feature = "tee")] use kvm_bindings::KVM_MAX_CPUID_ENTRIES; use libc::{STDERR_FILENO, STDIN_FILENO, STDOUT_FILENO}; @@ -63,6 +63,8 @@ use polly::event_manager::{Error as EventManagerError, EventManager}; use utils::eventfd::EventFd; #[cfg(not(feature = "efi"))] use vm_memory::mmap::MmapRegion; +#[cfg(not(feature = "tee"))] +use vm_memory::Address; #[cfg(any(target_arch = "aarch64", feature = "tee"))] use vm_memory::Bytes; #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] @@ -130,7 +132,12 @@ pub enum StartMicrovmError { SecureVirtAttest(VstateError), /// Cannot initialize the Secure Virtualization backend. SecureVirtPrepare(VstateError), - + /// Error configuring an SHM region. + ShmConfig(device_manager::shm::Error), + /// Error creating an SHM region. + ShmCreate(device_manager::shm::Error), + /// Error obtaining the host address of an SHM region. + ShmHostAddr(vm_memory::GuestMemoryError), /// The TEE specified is not supported. InvalidTee, } @@ -294,6 +301,27 @@ impl Display for StartMicrovmError { "Cannot initialize the Secure Virtualization backend. {err_msg}" ) } + ShmHostAddr(ref err) => { + let mut err_msg = format!("{:?}", err); + err_msg = err_msg.replace('\"', ""); + + write!( + f, + "Error obtaining the host address of an SHM region. {err_msg}" + ) + } + ShmConfig(ref err) => { + let mut err_msg = format!("{:?}", err); + err_msg = err_msg.replace('\"', ""); + + write!(f, "Error while configuring an SHM region. {err_msg}") + } + ShmCreate(ref err) => { + let mut err_msg = format!("{:?}", err); + err_msg = err_msg.replace('\"', ""); + + write!(f, "Error while creating an SHM region. {err_msg}") + } InvalidTee => { write!(f, "TEE selected is not currently supported") } @@ -310,7 +338,7 @@ enum Payload { #[cfg(feature = "efi")] Efi, #[cfg(feature = "tee")] - Tee(MmapRegion, u64, usize, &QbootBundle, &InitrdBundle), + Tee(MmapRegion, u64, usize, u64, usize, u64, usize), } /// Builds and starts a microVM based on the current Firecracker VmResources configuration. @@ -351,8 +379,10 @@ pub fn build_microvm( kernel_region, kernel_bundle.guest_addr, kernel_bundle.size, - qboot_bundle, - initrd_bundle, + qboot_bundle.host_addr, + qboot_bundle.size, + initrd_bundle.host_addr, + initrd_bundle.size, ); #[cfg(all(target_os = "linux", target_arch = "x86_64", not(feature = "tee")))] let payload = Payload::KernelMmap(kernel_region, kernel_bundle.guest_addr, kernel_bundle.size); @@ -361,11 +391,15 @@ pub fn build_microvm( #[cfg(all(target_arch = "aarch64", feature = "efi"))] let payload = Payload::Efi; - let (guest_memory, arch_memory_info) = create_guest_memory( + let (guest_memory, arch_memory_info, mut _shm_manager) = create_guest_memory( vm_resources .vm_config() .mem_size_mib .ok_or(StartMicrovmError::MissingMemSizeConfig)?, + #[cfg(feature = "tee")] + None, + #[cfg(not(feature = "tee"))] + Some(vm_resources), payload, )?; let vcpu_config = vm_resources.vcpu_config(); @@ -576,15 +610,6 @@ pub fn build_microvm( )?; } - #[cfg(not(feature = "tee"))] - let _shm_region = Some(VirtioShmRegion { - host_addr: guest_memory - .get_host_address(GuestAddress(arch_memory_info.shm_start_addr)) - .unwrap() as u64, - guest_addr: arch_memory_info.shm_start_addr, - size: arch_memory_info.shm_size as usize, - }); - let mut vmm = Vmm { guest_memory, arch_memory_info, @@ -613,7 +638,7 @@ pub fn build_microvm( attach_gpu_device( &mut vmm, event_manager, - _shm_region, + &mut _shm_manager, intc.clone(), virgl_flags, #[cfg(target_os = "macos")] @@ -621,7 +646,7 @@ pub fn build_microvm( )?; } #[cfg(not(feature = "tee"))] - attach_fs_devices(&mut vmm, &vm_resources.fs, None, intc.clone())?; + attach_fs_devices(&mut vmm, &vm_resources.fs, &mut _shm_manager, intc.clone())?; #[cfg(feature = "blk")] attach_block_devices(&mut vmm, &vm_resources.block, intc.clone())?; if let Some(vsock) = vm_resources.vsock.get() { @@ -729,7 +754,7 @@ fn load_payload( Ok(guest_mem) } #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] - Payload::KernelMmap(kernel_region, kernel_load_addr, kernel_size) => guest_mem + Payload::KernelMmap(kernel_region, kernel_load_addr, _kernel_size) => guest_mem .insert_region(Arc::new( GuestRegionMmap::new(kernel_region, GuestAddress(kernel_load_addr)) .map_err(StartMicrovmError::GuestMemoryMmap)?, @@ -738,23 +763,29 @@ fn load_payload( #[cfg(test)] Payload::Empty => Ok(guest_mem), #[cfg(feature = "tee")] - Payload::Tee(kernel_region, kernel_load_addr, kernel_size, qboot_bundle, initrd_bundle) => { + Payload::Tee( + kernel_region, + kernel_load_addr, + kernel_size, + qboot_host_addr, + qboot_size, + initrd_host_addr, + initrd_size, + ) => { let kernel_data = unsafe { std::slice::from_raw_parts(kernel_region.as_ptr(), kernel_size) }; guest_mem .write(kernel_data, GuestAddress(kernel_load_addr)) .unwrap(); - let qboot_data = unsafe { - std::slice::from_raw_parts(qboot_bundle.host_addr as *mut u8, qboot_bundle.size) - }; + let qboot_data = + unsafe { std::slice::from_raw_parts(qboot_host_addr as *mut u8, qboot_size) }; guest_mem .write(qboot_data, GuestAddress(arch::BIOS_START)) .unwrap(); - let initrd_data = unsafe { - std::slice::from_raw_parts(initrd_bundle.host_addr as *mut u8, initrd_bundle.size) - }; + let initrd_data = + unsafe { std::slice::from_raw_parts(initrd_host_addr as *mut u8, initrd_size) }; guest_mem .write( initrd_data, @@ -773,34 +804,57 @@ fn load_payload( fn create_guest_memory( mem_size: usize, + vm_resources: Option<&VmResources>, payload: Payload, -) -> std::result::Result<(GuestMemoryMmap, ArchMemoryInfo), StartMicrovmError> { +) -> std::result::Result<(GuestMemoryMmap, ArchMemoryInfo, ShmManager), StartMicrovmError> { let mem_size = mem_size << 20; #[cfg(target_arch = "x86_64")] - let (arch_mem_info, arch_mem_regions) = match payload { + let (arch_mem_info, mut arch_mem_regions) = match payload { #[cfg(not(feature = "tee"))] - Payload::KernelMmap(_kernel_region, kernel_load_addr, kernel_size) => { - arch::arch_memory_regions(mem_size, payload, kernel_size) + Payload::KernelMmap(ref _kernel_region, kernel_load_addr, kernel_size) => { + arch::arch_memory_regions(mem_size, kernel_load_addr, kernel_size) } #[cfg(feature = "tee")] Payload::Tee( - _kernel_region, + ref _kernel_region, kernel_load_addr, kernel_size, - _qboot_bundle, - _initrd_bundle, - ) => arch::arch_memory_regions(mem_size, payload, kernel_size), + _qboot_host_addr, + _qboot_size, + _initrd_host_addr, + _initrd_size, + ) => arch::arch_memory_regions(mem_size, kernel_load_addr, kernel_size), + #[cfg(test)] + Payload::Empty => arch::arch_memory_regions(mem_size, 0, 0), }; #[cfg(target_arch = "aarch64")] - let (arch_mem_info, arch_mem_regions) = arch::arch_memory_regions(mem_size); + let (arch_mem_info, mut arch_mem_regions) = arch::arch_memory_regions(mem_size); + + let mut shm_manager = ShmManager::new(&arch_mem_info); + + if let Some(vm_resources) = vm_resources { + #[cfg(not(feature = "tee"))] + for (index, _fs) in vm_resources.fs.list.iter().enumerate() { + shm_manager + .create_fs_region(index, 1 << 29) + .map_err(StartMicrovmError::ShmCreate)?; + } + if vm_resources.gpu_virgl_flags.is_some() { + shm_manager + .create_gpu_region(1 << 33) + .map_err(StartMicrovmError::ShmCreate)?; + } + + arch_mem_regions.extend(shm_manager.regions()); + } let guest_mem = GuestMemoryMmap::from_ranges(&arch_mem_regions) .map_err(StartMicrovmError::GuestMemoryMmap)?; let guest_mem = load_payload(guest_mem, payload)?; - Ok((guest_mem, arch_mem_info)) + Ok((guest_mem, arch_mem_info, shm_manager)) } #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] @@ -1117,7 +1171,7 @@ fn attach_mmio_device( fn attach_fs_devices( vmm: &mut Vmm, fs_devs: &FsBuilder, - shm_region: Option, + shm_manager: &mut ShmManager, intc: Option>>, ) -> std::result::Result<(), StartMicrovmError> { use self::StartMicrovmError::*; @@ -1129,8 +1183,15 @@ fn attach_fs_devices( fs.lock().unwrap().set_intc(intc.clone()); } - if let Some(ref shm) = shm_region { - fs.lock().unwrap().set_shm_region(shm.clone()); + if let Some(shm_region) = shm_manager.fs_region(i) { + fs.lock().unwrap().set_shm_region(VirtioShmRegion { + host_addr: vmm + .guest_memory + .get_host_address(shm_region.guest_addr) + .map_err(StartMicrovmError::ShmHostAddr)? as u64, + guest_addr: shm_region.guest_addr.raw_value(), + size: shm_region.size, + }); } // The device mutex mustn't be locked here otherwise it will deadlock. @@ -1385,7 +1446,7 @@ fn attach_rng_device( fn attach_gpu_device( vmm: &mut Vmm, event_manager: &mut EventManager, - shm_region: Option, + shm_manager: &mut ShmManager, intc: Option>>, virgl_flags: u32, #[cfg(target_os = "macos")] map_sender: Sender, @@ -1411,8 +1472,15 @@ fn attach_gpu_device( gpu.lock().unwrap().set_intc(intc); } - if let Some(ref shm) = shm_region { - gpu.lock().unwrap().set_shm_region(shm.clone()); + if let Some(shm_region) = shm_manager.gpu_region() { + gpu.lock().unwrap().set_shm_region(VirtioShmRegion { + host_addr: vmm + .guest_memory + .get_host_address(shm_region.guest_addr) + .map_err(StartMicrovmError::ShmHostAddr)? as u64, + guest_addr: shm_region.guest_addr.raw_value(), + size: shm_region.size, + }); } // The device mutex mustn't be locked here otherwise it will deadlock. @@ -1449,7 +1517,7 @@ pub mod tests { fn default_guest_memory( mem_size_mib: usize, - ) -> std::result::Result<(GuestMemoryMmap, ArchMemoryInfo), StartMicrovmError> { + ) -> std::result::Result<(GuestMemoryMmap, ArchMemoryInfo, ShmManager), StartMicrovmError> { let kernel_guest_addr: u64 = 0x1000; let kernel_size: usize = 0x1000; let kernel_host_addr: u64 = 0x1000; @@ -1460,6 +1528,7 @@ pub mod tests { create_guest_memory( mem_size_mib, + None, Payload::KernelMmap(kernel_region, kernel_guest_addr, kernel_size), ) } @@ -1469,7 +1538,7 @@ pub mod tests { fn test_create_vcpus_x86_64() { let vcpu_count = 2; - let (guest_memory, _arch_memory_info) = default_guest_memory(128).unwrap(); + let (guest_memory, _arch_memory_info, _shm_manager) = default_guest_memory(128).unwrap(); let mut vm = setup_vm(&guest_memory).unwrap(); setup_interrupt_controller(&mut vm).unwrap(); let vcpu_config = VcpuConfig { @@ -1496,7 +1565,8 @@ pub mod tests { #[test] #[cfg(all(target_arch = "aarch64", target_os = "linux"))] fn test_create_vcpus_aarch64() { - let (guest_memory, _arch_memory_info) = create_guest_memory(128, Payload::Empty).unwrap(); + let (guest_memory, _arch_memory_info) = + create_guest_memory(128, None, Payload::Empty).unwrap(); let vm = setup_vm(&guest_memory).unwrap(); let vcpu_count = 2; diff --git a/src/vmm/src/device_manager/mod.rs b/src/vmm/src/device_manager/mod.rs index 5e37286d..b73c8efe 100644 --- a/src/vmm/src/device_manager/mod.rs +++ b/src/vmm/src/device_manager/mod.rs @@ -8,6 +8,9 @@ /// Legacy Device Manager. pub mod legacy; +/// Device Shared Memory Region Manager. +pub mod shm; + /// Memory Mapped I/O Manager. #[cfg(target_os = "linux")] pub mod kvm; diff --git a/src/vmm/src/device_manager/shm.rs b/src/vmm/src/device_manager/shm.rs new file mode 100644 index 00000000..f8fa4dfa --- /dev/null +++ b/src/vmm/src/device_manager/shm.rs @@ -0,0 +1,90 @@ +use std::collections::HashMap; + +use arch::{round_up, ArchMemoryInfo}; +use vm_memory::GuestAddress; + +#[derive(Debug)] +pub enum Error { + DuplicatedGpuRegion, + OutOfSpace, +} + +#[derive(Clone)] +pub struct ShmRegion { + pub guest_addr: GuestAddress, + pub size: usize, +} + +pub struct ShmManager { + next_guest_addr: u64, + page_size: usize, + fs_regions: HashMap, + gpu_region: Option, +} + +impl ShmManager { + pub fn new(info: &ArchMemoryInfo) -> ShmManager { + Self { + next_guest_addr: info.shm_start_addr, + page_size: info.page_size, + fs_regions: HashMap::new(), + gpu_region: None, + } + } + + pub fn regions(&self) -> Vec<(GuestAddress, usize)> { + let mut regions: Vec<(GuestAddress, usize)> = Vec::new(); + + for region in self.fs_regions.iter() { + regions.push((region.1.guest_addr, region.1.size)); + } + + if let Some(region) = &self.gpu_region { + regions.push((region.guest_addr, region.size)); + } + + regions + } + + #[cfg(not(feature = "tee"))] + pub fn fs_region(&self, index: usize) -> Option<&ShmRegion> { + self.fs_regions.get(&index) + } + + #[cfg(feature = "gpu")] + pub fn gpu_region(&self) -> Option<&ShmRegion> { + self.gpu_region.as_ref() + } + + fn create_region(&mut self, size: usize) -> Result { + let size = round_up(size, self.page_size); + + let region = ShmRegion { + guest_addr: GuestAddress(self.next_guest_addr), + size, + }; + + if let Some(addr) = self.next_guest_addr.checked_add(size as u64) { + self.next_guest_addr = addr; + Ok(region) + } else { + Err(Error::OutOfSpace) + } + } + + pub fn create_gpu_region(&mut self, size: usize) -> Result<(), Error> { + if self.gpu_region.is_some() { + Err(Error::DuplicatedGpuRegion) + } else { + self.gpu_region = Some(self.create_region(size)?); + Ok(()) + } + } + + #[cfg(not(feature = "tee"))] + pub fn create_fs_region(&mut self, index: usize, size: usize) -> Result<(), Error> { + let region = self.create_region(size)?; + self.fs_regions.insert(index, region); + Ok(()) + } +} diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index ea3fdb38..0e680a84 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -52,9 +52,7 @@ use crate::terminal::term_set_canonical_mode; use crate::vstate::VcpuEvent; use crate::vstate::{Vcpu, VcpuHandle, VcpuResponse, Vm}; -use arch::ArchMemoryInfo; -use arch::DeviceType; -use arch::InitrdConfig; +use arch::{ArchMemoryInfo, DeviceType, InitrdConfig}; #[cfg(target_os = "macos")] use crossbeam_channel::Sender; use devices::virtio::VmmExitObserver; diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index d02dad59..77726880 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -34,7 +34,6 @@ use kbs_types::Tee; #[cfg(feature = "tee")] use crate::resources::TeeConfig; use crate::vmm_config::machine_config::CpuFeaturesTemplate; -use arch; #[cfg(target_arch = "aarch64")] use arch::aarch64::gic::GICDevice; #[cfg(target_arch = "x86_64")] @@ -111,6 +110,8 @@ pub enum Error { SetupGIC(arch::aarch64::gic::Error), /// Cannot set the memory regions. SetUserMemoryRegion(kvm_ioctls::Error), + /// Error creating memory map for SHM region. + ShmMmap(io::Error), #[cfg(feature = "amd-sev")] /// Error initializing the Secure Virtualization Backend (SEV). SevSecVirtInit(SevError), @@ -270,6 +271,7 @@ impl Display for Error { "Cannot set the local interruption due to bad configuration: {e:?}" ), SetUserMemoryRegion(e) => write!(f, "Cannot set the memory regions: {e}"), + ShmMmap(e) => write!(f, "Error creating memory map for SHM region: {e}"), #[cfg(feature = "tee")] SevSecVirtInit(e) => { write!( @@ -453,6 +455,7 @@ impl KvmContext { /// A wrapper around creating and using a VM. pub struct Vm { fd: VmFd, + next_mem_slot: u32, // X86 specific fields. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] @@ -492,6 +495,7 @@ impl Vm { Ok(Vm { fd: vm_fd, + next_mem_slot: 0, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] supported_cpuid, #[cfg(target_arch = "x86_64")] @@ -524,6 +528,7 @@ impl Vm { Ok(Vm { fd: vm_fd, + next_mem_slot: 0, supported_cpuid, supported_msrs, sev, @@ -553,12 +558,12 @@ impl Vm { if guest_mem.num_regions() > kvm_max_memslots { return Err(Error::NotEnoughMemorySlots); } - for (index, region) in guest_mem.iter().enumerate() { + for region in guest_mem.iter() { // It's safe to unwrap because the guest address is valid. let host_addr = guest_mem.get_host_address(region.start_addr()).unwrap(); - info!("Guest memory starts at {:x?}", host_addr); + debug!("Guest memory starts at {:x?}", host_addr); let memory_region = kvm_userspace_memory_region { - slot: index as u32, + slot: self.next_mem_slot, guest_phys_addr: region.start_addr().raw_value(), memory_size: region.len(), userspace_addr: host_addr as u64, @@ -571,6 +576,7 @@ impl Vm { .set_user_memory_region(memory_region) .map_err(Error::SetUserMemoryRegion)?; }; + self.next_mem_slot += 1; } #[cfg(target_arch = "x86_64")] diff --git a/src/vmm/src/macos/vstate.rs b/src/vmm/src/macos/vstate.rs index 3e89621e..0266b70b 100644 --- a/src/vmm/src/macos/vstate.rs +++ b/src/vmm/src/macos/vstate.rs @@ -17,7 +17,6 @@ use std::time::Duration; use super::super::{FC_EXIT_CODE_GENERIC_ERROR, FC_EXIT_CODE_OK}; use crate::vmm_config::machine_config::CpuFeaturesTemplate; -use arch; use arch::aarch64::gic::GICDevice; use crossbeam_channel::{unbounded, Receiver, RecvTimeoutError, Sender}; use devices::legacy::{Gic, VcpuList};