Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setup the VMM reservoir in a background task #5124

Merged
merged 35 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
1d0ed8c
Setup the VMM reservoir in a background task
andrewjstone Feb 22, 2024
bbbe079
comments
andrewjstone Feb 22, 2024
bdea2f6
review fixes
andrewjstone Feb 23, 2024
710b0ce
Wait for VMM reservoir allocation when ensuring propolis instances
andrewjstone Feb 26, 2024
233f2d1
Revert "Wait for VMM reservoir allocation when ensuring propolis inst…
andrewjstone Feb 27, 2024
bedf0c1
remove watch from vmm_reservoir
andrewjstone Feb 27, 2024
7bdf2e3
Add sled_agent_get to nexus internal API
andrewjstone Feb 27, 2024
7c4a2de
wip
andrewjstone Feb 28, 2024
58a84fa
wip
andrewjstone Feb 28, 2024
f6116b6
wip
andrewjstone Feb 28, 2024
cf185cf
wip
andrewjstone Feb 28, 2024
7dbcedc
wip
andrewjstone Feb 29, 2024
7429a8d
wip
andrewjstone Feb 29, 2024
c6a396c
wip
andrewjstone Feb 29, 2024
32cd05b
wip
andrewjstone Feb 29, 2024
e7ce5af
wip
andrewjstone Feb 29, 2024
00d248e
wip
andrewjstone Feb 29, 2024
adc29dc
Solid test for sled-agent
andrewjstone Feb 29, 2024
3418a60
Make sled upserts conditional on rcgen
andrewjstone Mar 1, 2024
0488749
extra rcgen check
andrewjstone Mar 1, 2024
8a7d297
plumb through reservoir size changes
andrewjstone Mar 4, 2024
f379afb
typo
andrewjstone Mar 4, 2024
9c130e2
Merge branch 'main' into ajs/vmm-reservoir-bg-thread
andrewjstone Mar 4, 2024
f49208e
Return an result for sled upserts, with error for decommissioned
andrewjstone Mar 5, 2024
5b1914b
fix warnings
andrewjstone Mar 5, 2024
e5795c9
clippy
andrewjstone Mar 5, 2024
8d1ae0c
Merge branch 'main' into ajs/vmm-reservoir-bg-thread
andrewjstone Mar 5, 2024
0f904f4
Give sled-agent its own generation number
andrewjstone Mar 7, 2024
4fd1afd
review fixes
andrewjstone Mar 7, 2024
edbf0a0
Merge branch 'main' into ajs/vmm-reservoir-bg-thread
andrewjstone Mar 7, 2024
5b01ef0
Handle sled decommissioning
andrewjstone Mar 7, 2024
332832e
Merge branch 'main' into ajs/vmm-reservoir-bg-thread
andrewjstone Mar 7, 2024
e9167a8
type fix for Baseboard
andrewjstone Mar 7, 2024
c611a81
COMMENTS
andrewjstone Mar 7, 2024
9027e5b
Merge branch 'main' into ajs/vmm-reservoir-bg-thread
andrewjstone Mar 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions sled-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dpd-client.workspace = true
display-error-chain.workspace = true
dropshot.workspace = true
flate2.workspace = true
flume.workspace = true
futures.workspace = true
glob.workspace = true
hex.workspace = true
Expand Down
112 changes: 11 additions & 101 deletions sled-agent/src/instance_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,24 @@ use crate::params::{
InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse,
InstanceStateRequested, InstanceUnregisterResponse,
};
use crate::vmm_reservoir::VmmReservoirManagerHandle;
use crate::zone_bundle::BundleError;
use crate::zone_bundle::ZoneBundler;
use omicron_common::api::external::ByteCount;

use anyhow::anyhow;
use illumos_utils::dladm::Etherstub;
use illumos_utils::link::VnicAllocator;
use illumos_utils::opte::PortManager;
use illumos_utils::running_zone::ZoneBuilderFactory;
use illumos_utils::vmm_reservoir;
use omicron_common::api::external::ByteCount;
use omicron_common::api::internal::nexus::InstanceRuntimeState;
use omicron_common::api::internal::nexus::SledInstanceState;
use omicron_common::api::internal::nexus::VmmRuntimeState;
use sled_storage::manager::StorageHandle;
use slog::Logger;
use std::collections::BTreeMap;
use std::net::SocketAddr;
use std::sync::{Arc, Mutex};
use std::sync::Arc;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;

Expand All @@ -48,12 +48,6 @@ pub enum Error {
#[error("OPTE port management error: {0}")]
Opte(#[from] illumos_utils::opte::Error),

#[error("Failed to create reservoir: {0}")]
Reservoir(#[from] vmm_reservoir::Error),

#[error("Invalid reservoir configuration: {0}")]
ReservoirConfig(String),

#[error("Cannot find data link: {0}")]
Underlay(#[from] sled_hardware::underlay::Error),

Expand All @@ -72,12 +66,6 @@ pub enum Error {
RequestDropped(#[from] oneshot::error::RecvError),
}

pub enum ReservoirMode {
None,
Size(u32),
Percentage(u8),
}

pub(crate) struct InstanceManagerServices {
pub nexus_client: NexusClientWithResolver,
pub vnic_allocator: VnicAllocator<Etherstub>,
Expand All @@ -90,14 +78,8 @@ pub(crate) struct InstanceManagerServices {
// Describes the internals of the "InstanceManager", though most of the
// instance manager's state exists within the "InstanceManagerRunner" structure.
struct InstanceManagerInternal {
log: Logger,
tx: mpsc::Sender<InstanceManagerRequest>,
// NOTE: Arguably, this field could be "owned" by the InstanceManagerRunner.
// It was not moved there, and the reservoir functions were not converted to
// use the message-passing interface (see: "InstanceManagerRequest") because
// callers of "get/set reservoir size" are not async, and (in the case of
// getting the size) they also do not expect a "Result" type.
reservoir_size: Mutex<ByteCount>,
vmm_reservoir_manager: VmmReservoirManagerHandle,

#[allow(dead_code)]
runner_handle: tokio::task::JoinHandle<()>,
Expand All @@ -110,6 +92,7 @@ pub struct InstanceManager {

impl InstanceManager {
/// Initializes a new [`InstanceManager`] object.
#[allow(clippy::too_many_arguments)]
pub fn new(
log: Logger,
nexus_client: NexusClientWithResolver,
Expand All @@ -118,6 +101,7 @@ impl InstanceManager {
storage: StorageHandle,
zone_bundler: ZoneBundler,
zone_builder_factory: ZoneBuilderFactory,
vmm_reservoir_manager: VmmReservoirManagerHandle,
) -> Result<InstanceManager, Error> {
let (tx, rx) = mpsc::channel(QUEUE_SIZE);
let (terminate_tx, terminate_rx) = mpsc::unbounded_channel();
Expand All @@ -142,91 +126,13 @@ impl InstanceManager {

Ok(Self {
inner: Arc::new(InstanceManagerInternal {
log,
tx,
// no reservoir size set on startup
reservoir_size: Mutex::new(ByteCount::from_kibibytes_u32(0)),
vmm_reservoir_manager,
runner_handle,
}),
})
}

/// Sets the VMM reservoir to the requested percentage of usable physical
/// RAM or to a size in MiB. Either mode will round down to the nearest
/// aligned size required by the control plane.
pub fn set_reservoir_size(
&self,
hardware: &sled_hardware::HardwareManager,
mode: ReservoirMode,
) -> Result<(), Error> {
let hardware_physical_ram_bytes = hardware.usable_physical_ram_bytes();
let req_bytes = match mode {
ReservoirMode::None => return Ok(()),
ReservoirMode::Size(mb) => {
let bytes = ByteCount::from_mebibytes_u32(mb).to_bytes();
if bytes > hardware_physical_ram_bytes {
return Err(Error::ReservoirConfig(format!(
"cannot specify a reservoir of {bytes} bytes when \
physical memory is {hardware_physical_ram_bytes} bytes",
)));
}
bytes
}
ReservoirMode::Percentage(percent) => {
if !matches!(percent, 1..=99) {
return Err(Error::ReservoirConfig(format!(
"VMM reservoir percentage of {} must be between 0 and \
100",
percent
)));
};
(hardware_physical_ram_bytes as f64 * (percent as f64 / 100.0))
.floor() as u64
}
};

let req_bytes_aligned = vmm_reservoir::align_reservoir_size(req_bytes);

if req_bytes_aligned == 0 {
warn!(
self.inner.log,
"Requested reservoir size of {} bytes < minimum aligned size \
of {} bytes",
req_bytes,
vmm_reservoir::RESERVOIR_SZ_ALIGN
);
return Ok(());
}

// The max ByteCount value is i64::MAX, which is ~8 million TiB.
// As this value is either a percentage of DRAM or a size in MiB
// represented as a u32, constructing this should always work.
let reservoir_size = ByteCount::try_from(req_bytes_aligned).unwrap();
if let ReservoirMode::Percentage(percent) = mode {
info!(
self.inner.log,
"{}% of {} physical ram = {} bytes)",
percent,
hardware_physical_ram_bytes,
req_bytes,
);
}
info!(
self.inner.log,
"Setting reservoir size to {reservoir_size} bytes"
);
vmm_reservoir::ReservoirControl::set(reservoir_size)?;

*self.inner.reservoir_size.lock().unwrap() = reservoir_size;

Ok(())
}

/// Returns the last-set size of the reservoir
pub fn reservoir_size(&self) -> ByteCount {
*self.inner.reservoir_size.lock().unwrap()
}

pub async fn ensure_registered(
&self,
instance_id: Uuid,
Expand Down Expand Up @@ -379,6 +285,10 @@ impl InstanceManager {
.map_err(|_| Error::FailedSendInstanceManagerClosed)?;
rx.await?
}

andrewjstone marked this conversation as resolved.
Show resolved Hide resolved
pub fn reservoir_size(&self) -> ByteCount {
self.inner.vmm_reservoir_manager.reservoir_size()
}
}

// Most requests that can be sent to the "InstanceManagerRunner" task.
Expand Down
1 change: 1 addition & 0 deletions sled-agent/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ mod smf_helper;
mod storage_monitor;
mod swap_device;
mod updates;
mod vmm_reservoir;
mod zone_bundle;

#[cfg(test)]
Expand Down
50 changes: 16 additions & 34 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::bootstrap::early_networking::{
};
use crate::bootstrap::params::{BaseboardId, StartSledAgentRequest};
use crate::config::Config;
use crate::instance_manager::{InstanceManager, ReservoirMode};
use crate::instance_manager::InstanceManager;
use crate::long_running_tasks::LongRunningTaskHandles;
use crate::metrics::MetricsManager;
use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue};
Expand All @@ -25,6 +25,7 @@ use crate::params::{
use crate::services::{self, ServiceManager};
use crate::storage_monitor::UnderlayAccess;
use crate::updates::{ConfigUpdates, UpdateManager};
use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager};
use crate::zone_bundle;
use crate::zone_bundle::BundleError;
use bootstore::schemes::v0 as bootstore;
Expand Down Expand Up @@ -413,6 +414,19 @@ impl SledAgent {
.map_err(|_| ())
.expect("Failed to send to StorageMonitor");

// Configure the VMM reservoir as either a percentage of DRAM or as an
// exact size in MiB.
let reservoir_mode = ReservoirMode::from_config(
config.vmm_reservoir_percentage,
config.vmm_reservoir_size_mb,
);

let vmm_reservoir_manager = VmmReservoirManager::spawn(
&log,
long_running_task_handles.hardware_manager.clone(),
reservoir_mode,
);

let instances = InstanceManager::new(
parent_log.clone(),
nexus_client.clone(),
Expand All @@ -421,41 +435,9 @@ impl SledAgent {
storage_manager.clone(),
long_running_task_handles.zone_bundler.clone(),
ZoneBuilderFactory::default(),
vmm_reservoir_manager,
)?;

// Configure the VMM reservoir as either a percentage of DRAM or as an
// exact size in MiB.
let reservoir_mode = match (
config.vmm_reservoir_percentage,
config.vmm_reservoir_size_mb,
) {
(None, None) => ReservoirMode::None,
(Some(p), None) => ReservoirMode::Percentage(p),
(None, Some(mb)) => ReservoirMode::Size(mb),
(Some(_), Some(_)) => panic!(
"only one of vmm_reservoir_percentage and \
vmm_reservoir_size_mb is allowed"
),
};

match reservoir_mode {
ReservoirMode::None => warn!(log, "Not using VMM reservoir"),
ReservoirMode::Size(0) | ReservoirMode::Percentage(0) => {
warn!(log, "Not using VMM reservoir (size 0 bytes requested)")
}
_ => {
instances
.set_reservoir_size(
&long_running_task_handles.hardware_manager,
reservoir_mode,
)
.map_err(|e| {
error!(log, "Failed to setup VMM reservoir: {e}");
e
})?;
}
}

let update_config = ConfigUpdates {
zone_artifact_path: Utf8PathBuf::from("/opt/oxide"),
};
Expand Down
Loading
Loading