Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
eb06840
cherry-pick gdma changes
justus-camp-microsoft Apr 23, 2025
e9e293f
some cleanup
justus-camp-microsoft Apr 24, 2025
1d5479d
unused import
justus-camp-microsoft Apr 24, 2025
93dfba9
get rid of crate
justus-camp-microsoft Apr 24, 2025
eea14e1
some of feedback
justus-camp-microsoft May 2, 2025
d6fc130
remove arm state, re-arm on restore
justus-camp-microsoft May 2, 2025
fe7e8bd
save on hwc failure, save hwc failure state
justus-camp-microsoft May 7, 2025
a48267e
move some duplicated code to init function
justus-camp-microsoft May 7, 2025
1950a50
Merge branch 'main' into gdma
Jun 25, 2025
06ac5b2
Merge branch 'main' into gdma
Sep 8, 2025
87ec2a3
retarget always
Sep 8, 2025
980f5b0
unmap interrupts on drop
Sep 10, 2025
054c767
PR feedback
justus-camp-microsoft Sep 25, 2025
1baec11
unmap all interrupts at once
justus-camp-microsoft Sep 25, 2025
038c2da
remove eq_id_msix saving and reconstruct, other minor feedback
justus-camp-microsoft Sep 25, 2025
e6fcd99
enable keepalive
justus-camp-microsoft Sep 11, 2025
d799d6b
triple fault to fix, only calling save and not restoring
justus-camp-microsoft Sep 22, 2025
0b4c25b
calling save but still destroying everything, triple faults sometimes…
justus-camp-microsoft Sep 23, 2025
4dde9aa
passing test, enabled by default currently
justus-camp-microsoft Sep 24, 2025
3177ddc
default off
justus-camp-microsoft Sep 25, 2025
c403fc5
run format, make RPC return an option in case device disappears, remo…
justus-camp-microsoft Sep 25, 2025
8e30a76
some logging
justus-camp-microsoft Sep 25, 2025
6646ca2
merge main
justus-camp-microsoft Oct 6, 2025
b3a95cc
cleanup from self-review
justus-camp-microsoft Oct 6, 2025
60008ad
don't have mana keepalive on by default in openvmm RPC
justus-camp-microsoft Oct 7, 2025
6b4351e
fix some ordering issues, move some tests around
justus-camp-microsoft Oct 7, 2025
c53567f
Merge branch 'main' into full_enablement
justus-camp-microsoft Oct 8, 2025
46ac820
add a comment, put some duplicated code in a helper method
justus-camp-microsoft Oct 10, 2025
cdad413
Merge remote-tracking branch 'upstream/main' into full_enablement
justus-camp-microsoft Oct 21, 2025
b5a4c16
add an upgrade test
justus-camp-microsoft Oct 23, 2025
c7925a8
Merge branch 'main' into full_enablement
justus-camp-microsoft Oct 23, 2025
de661a1
logging changes
justus-camp-microsoft Oct 30, 2025
ba67c7b
bail when keepalive not supported
justus-camp-microsoft Oct 30, 2025
d9b0b21
split into helper
justus-camp-microsoft Oct 30, 2025
0058602
Merge branch 'main' into full_enablement
justus-camp-microsoft Oct 30, 2025
3c2648c
self-review
justus-camp-microsoft Oct 30, 2025
64d766e
clean up error paths
justus-camp-microsoft Oct 31, 2025
1150c18
large_enum_variant
justus-camp-microsoft Oct 31, 2025
ecef6dc
Merge remote-tracking branch 'upstream/main' into full_enablement
justus-camp-microsoft Nov 5, 2025
922344a
try reverting error path cleanup to see if that's why CI is failing
justus-camp-microsoft Nov 6, 2025
93661e1
reapply error path changes
justus-camp-microsoft Nov 6, 2025
b7eae79
give the boot_heavy tests some more RAM
justus-camp-microsoft Nov 6, 2025
7b6c85b
increase it one more time for fun
justus-camp-microsoft Nov 6, 2025
46424ea
log the e820 map and its regions, also hide some memory from the kernel
justus-camp-microsoft Nov 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 69 additions & 7 deletions openhcl/openhcl_boot/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ fn shim_parameters(shim_params_raw_offset: isize) -> ShimParams {
#[cfg_attr(not(target_arch = "x86_64"), expect(dead_code))]
mod x86_boot {
use crate::PageAlign;
use crate::boot_logger::log;
use crate::memory::AddressSpaceManager;
use crate::single_threaded::OffStackRef;
use crate::single_threaded::off_stack;
Expand Down Expand Up @@ -405,18 +406,41 @@ mod x86_boot {
boot_params: &mut boot_params,
ext: &mut E820Ext,
address_space: &AddressSpaceManager,
initrd: Option<Range<u64>>,
) -> Result<bool, BuildE820MapError> {
boot_params.e820_entries = 0;
let mut entries = boot_params
.e820_map
.iter_mut()
.chain(ext.entries.iter_mut());

// Find the largest region and the one containing initrd
let mut largest_ram: Option<MemoryRange> = None;
let mut initrd_hosting_ram: Option<MemoryRange> = None;
for (range, typ) in address_space.vtl2_ranges() {
if matches!(typ, MemoryVtlType::VTL2_RAM) {
if largest_ram.is_none_or(|r| range.len() > r.len()) {
largest_ram = Some(range);
}

if initrd.clone().is_some()
&& range.start() <= initrd.clone().unwrap().start
&& range.end() >= initrd.clone().unwrap().end
{
initrd_hosting_ram = Some(range);
}
}
}

let mut n = 0;
for (range, typ) in address_space.vtl2_ranges() {
match typ {
MemoryVtlType::VTL2_RAM => {
add_e820_entry(entries.next(), range, E820_RAM)?;
if Some(range) == largest_ram || Some(range) == initrd_hosting_ram {
add_e820_entry(entries.next(), range, E820_RAM)?;
} else {
add_e820_entry(entries.next(), range, E820_RESERVED)?;
}
n += 1;
}
MemoryVtlType::VTL2_CONFIG
Expand All @@ -438,6 +462,43 @@ mod x86_boot {
}
}

for (range, typ) in address_space.vtl2_ranges() {
let (kind, label) = match typ {
MemoryVtlType::VTL2_RAM => {
if Some(range) == largest_ram {
("RAM", "usable")
} else if Some(range) == initrd_hosting_ram {
("RAM", "initrd")
} else {
("RESERVED", "hidden_ram")
}
}
MemoryVtlType::VTL2_PERSISTED_STATE_HEADER => {
("RESERVED", "persisted_state_header")
}
MemoryVtlType::VTL2_PERSISTED_STATE_PROTOBUF => {
("RESERVED", "persisted_state_payload")
}
MemoryVtlType::VTL2_GPA_POOL => ("RESERVED", "keep_alive_gpa_pool"),
MemoryVtlType::VTL2_BOOTSHIM_LOG_BUFFER => ("RESERVED", "boot_log_buffer"),
MemoryVtlType::VTL2_TDX_PAGE_TABLES => ("RESERVED", "tdx_page_tables"),
MemoryVtlType::VTL2_CONFIG => ("RESERVED", "config"),
MemoryVtlType::VTL2_SIDECAR_IMAGE => ("RESERVED", "sidecar_image"),
MemoryVtlType::VTL2_SIDECAR_NODE => ("RESERVED", "sidecar_node"),
MemoryVtlType::VTL2_RESERVED => ("RESERVED", "generic_reserved"),
_ => ("UNKNOWN", "unexpected"),
};

log!(
"e820 {}: {:#x}-{:#x} len={:#x} {}",
kind,
range.start(),
range.end(),
range.len(),
label
);
}

let base = n.min(boot_params.e820_map.len());
boot_params.e820_entries = base as u8;

Expand Down Expand Up @@ -474,12 +535,13 @@ mod x86_boot {
boot_params.hdr.ramdisk_image = (initrd.start as u32).into();
boot_params.ext_ramdisk_image = (initrd.start >> 32) as u32;
let initrd_len = initrd.end - initrd.start;
log!("initrd start: {:#x} end: {:#x}", initrd.start, initrd.end);
boot_params.hdr.ramdisk_size = (initrd_len as u32).into();
boot_params.ext_ramdisk_size = (initrd_len >> 32) as u32;

let e820_ext = OffStackRef::leak(off_stack!(E820Ext, zeroed()));

let used_ext = build_e820_map(boot_params, e820_ext, address_space)
let used_ext = build_e820_map(boot_params, e820_ext, address_space, Some(initrd))
.expect("building e820 map must succeed");

if used_ext {
Expand Down Expand Up @@ -1128,7 +1190,7 @@ mod test {
None,
);

assert!(build_e820_map(&mut boot_params, &mut ext, &address_space).is_ok());
assert!(build_e820_map(&mut boot_params, &mut ext, &address_space, None).is_ok());

check_e820(
&boot_params,
Expand Down Expand Up @@ -1159,7 +1221,7 @@ mod test {
Some(reclaim),
);

assert!(build_e820_map(&mut boot_params, &mut ext, &address_space).is_ok());
assert!(build_e820_map(&mut boot_params, &mut ext, &address_space, None).is_ok());

check_e820(
&boot_params,
Expand Down Expand Up @@ -1195,7 +1257,7 @@ mod test {
Some(reclaim),
);

assert!(build_e820_map(&mut boot_params, &mut ext, &address_space).is_ok());
assert!(build_e820_map(&mut boot_params, &mut ext, &address_space, None).is_ok());

check_e820(
&boot_params,
Expand Down Expand Up @@ -1236,7 +1298,7 @@ mod test {
Some(reclaim),
);

assert!(build_e820_map(&mut boot_params, &mut ext, &address_space).is_ok());
assert!(build_e820_map(&mut boot_params, &mut ext, &address_space, None).is_ok());

check_e820(
&boot_params,
Expand Down Expand Up @@ -1322,7 +1384,7 @@ mod test {
let mut ext = FromZeros::new_zeroed();
let total_ranges = address_space.vtl2_ranges().count();

let used_ext = build_e820_map(&mut boot_params, &mut ext, &address_space).unwrap();
let used_ext = build_e820_map(&mut boot_params, &mut ext, &address_space, None).unwrap();

// Verify that we used the extension
assert!(used_ext, "should use extension when there are many ranges");
Expand Down
54 changes: 39 additions & 15 deletions openhcl/underhill_core/src/dispatch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use hyperv_ic_resources::shutdown::ShutdownRpc;
use hyperv_ic_resources::shutdown::ShutdownType;
use igvm_defs::MemoryMapEntryType;
use inspect::Inspect;
use mana_driver::save_restore::ManaSavedState;
use mesh::CancelContext;
use mesh::MeshPayload;
use mesh::error::RemoteError;
Expand Down Expand Up @@ -114,6 +115,8 @@ pub trait LoadedVmNetworkSettings: Inspect {
vmbus_server: &Option<VmbusServerHandle>,
dma_client_spawner: DmaClientSpawner,
is_isolated: bool,
save_restore_supported: bool,
mana_state: Option<&ManaSavedState>,
) -> anyhow::Result<RuntimeSavedState>;

/// Callback when network is removed externally.
Expand All @@ -127,6 +130,9 @@ pub trait LoadedVmNetworkSettings: Inspect {
&self,
mut params: PacketCaptureParams<Socket>,
) -> anyhow::Result<PacketCaptureParams<Socket>>;

/// Save the network state for restoration after servicing.
async fn save(&mut self) -> Vec<ManaSavedState>;
}

/// A VM that has been loaded and can be run.
Expand Down Expand Up @@ -187,6 +193,7 @@ pub(crate) struct LoadedVm {
pub _periodic_telemetry_task: Task<()>,

pub nvme_keep_alive: bool,
pub mana_keep_alive: bool,
pub test_configuration: Option<TestScenarioConfig>,
pub dma_manager: OpenhclDmaManager,
}
Expand Down Expand Up @@ -298,7 +305,7 @@ impl LoadedVm {
WorkerRpc::Restart(rpc) => {
let state = async {
let running = self.stop().await;
match self.save(None, false).await {
match self.save(None, false, false).await {
Ok(servicing_state) => Some((rpc, servicing_state)),
Err(err) => {
if running {
Expand Down Expand Up @@ -363,7 +370,7 @@ impl LoadedVm {
UhVmRpc::Save(rpc) => {
rpc.handle_failable(async |()| {
let running = self.stop().await;
let r = self.save(None, false).await;
let r = self.save(None, false, false).await;
if running {
self.start(None).await;
}
Expand Down Expand Up @@ -565,6 +572,7 @@ impl LoadedVm {
// NOTE: This is set via the corresponding env arg, as this feature is
// experimental.
let nvme_keepalive = self.nvme_keep_alive && capabilities_flags.enable_nvme_keepalive();
let mana_keepalive = self.mana_keep_alive && capabilities_flags.enable_mana_keepalive();

// Do everything before the log flush under a span.
let r = async {
Expand All @@ -579,7 +587,7 @@ impl LoadedVm {
anyhow::bail!("cannot service underhill while paused");
}

let mut state = self.save(Some(deadline), nvme_keepalive).await?;
let mut state = self.save(Some(deadline), nvme_keepalive, mana_keepalive).await?;
state.init_state.correlation_id = Some(correlation_id);

// Unload any network devices.
Expand Down Expand Up @@ -741,19 +749,33 @@ impl LoadedVm {
async fn save(
&mut self,
_deadline: Option<std::time::Instant>,
vf_keepalive_flag: bool,
nvme_keepalive_flag: bool,
mana_keepalive_flag: bool,
) -> anyhow::Result<ServicingState> {
assert!(!self.state_units.is_running());

let emuplat = (self.emuplat_servicing.save()).context("emuplat save failed")?;

// Only save dma manager state if we are expected to keep VF devices
// alive across save. Otherwise, don't persist the state at all, as
// there should be no live DMA across save.
//
// This has to happen before saving the network state, otherwise its allocations
// are marked as Free and are unable to be restored.
let dma_manager_state = if nvme_keepalive_flag || mana_keepalive_flag {
use vmcore::save_restore::SaveRestore;
Some(self.dma_manager.save().context("dma_manager save failed")?)
} else {
None
};

// Only save NVMe state when there are NVMe controllers and keep alive
// was enabled.
let nvme_state = if let Some(n) = &self.nvme_manager {
// DEVNOTE: A subtlety here is that the act of saving the NVMe state also causes the driver
// to enter a state where subsequent teardown operations will noop. There is a STRONG
// correlation between save/restore and keepalive.
n.save(vf_keepalive_flag)
n.save(nvme_keepalive_flag)
.instrument(tracing::info_span!("nvme_manager_save", CVM_ALLOWED))
.await
.map(|s| NvmeSavedState { nvme_state: s })
Expand All @@ -762,6 +784,15 @@ impl LoadedVm {
};

let units = self.save_units().await.context("state unit save failed")?;

let mana_state = if let Some(network_settings) = &mut self.network_settings
&& mana_keepalive_flag
{
Some(network_settings.save().await)
} else {
None
};

let vmgs = if let Some((vmgs_thin_client, vmgs_disk_metadata, _)) = self.vmgs.as_ref() {
Some((
vmgs_thin_client.save().await.context("vmgs save failed")?,
Expand All @@ -771,16 +802,6 @@ impl LoadedVm {
None
};

// Only save dma manager state if we are expected to keep VF devices
// alive across save. Otherwise, don't persist the state at all, as
// there should be no live DMA across save.
let dma_manager_state = if vf_keepalive_flag {
use vmcore::save_restore::SaveRestore;
Some(self.dma_manager.save().context("dma_manager save failed")?)
} else {
None
};

let vmbus_client = if let Some(vmbus_client) = &mut self.vmbus_client {
vmbus_client.stop().await;
Some(vmbus_client.save().await)
Expand All @@ -800,6 +821,7 @@ impl LoadedVm {
nvme_state,
dma_manager_state,
vmbus_client,
mana_state,
},
units,
};
Expand Down Expand Up @@ -866,6 +888,8 @@ impl LoadedVm {
&self.vmbus_server,
self.dma_manager.client_spawner(),
self.isolation.is_isolated(),
self.mana_keep_alive,
None, // No existing mana state
)
.await?;

Expand Down
Loading
Loading