Skip to content

Send blueprint disks & datasets to simulated sled agent #8358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,7 @@ stdout:
blueprint ......<REDACTED_BLUEPRINT_ID>.......
parent: <none>

sled: ..........<REDACTED_UUID>........... (active, config generation 2)
sled: ..........<REDACTED_UUID>........... (active, config generation 4)

physical disks:
-----------------------------------------------------------------
Expand Down Expand Up @@ -1472,7 +1472,7 @@ parent: <none>



sled: ..........<REDACTED_UUID>........... (active, config generation 2)
sled: ..........<REDACTED_UUID>........... (active, config generation 4)

physical disks:
----------------------------------------------------------------
Expand Down Expand Up @@ -1545,7 +1545,7 @@ stdout:
blueprint ......<REDACTED_BLUEPRINT_ID>.......
parent: <none>

sled: ..........<REDACTED_UUID>........... (active, config generation 2)
sled: ..........<REDACTED_UUID>........... (active, config generation 4)

physical disks:
-----------------------------------------------------------------
Expand Down Expand Up @@ -1576,7 +1576,7 @@ parent: <none>



sled: ..........<REDACTED_UUID>........... (active, config generation 2)
sled: ..........<REDACTED_UUID>........... (active, config generation 4)

physical disks:
----------------------------------------------------------------
Expand Down
74 changes: 70 additions & 4 deletions nexus/src/app/background/tasks/blueprint_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,13 @@ impl BackgroundTask for BlueprintPlanner {
#[cfg(test)]
mod test {
use super::*;
use crate::app::background::Activator;
use crate::app::background::tasks::blueprint_execution::BlueprintExecutor;
use crate::app::background::tasks::blueprint_load::TargetBlueprintLoader;
use crate::app::background::tasks::inventory_collection::InventoryCollector;
use nexus_test_utils_macros::nexus_test;
use nexus_types::deployment::PendingMgsUpdates;
use omicron_uuid_kinds::OmicronZoneUuid;

type ControlPlaneTestContext =
nexus_test_utils::ControlPlaneTestContext<crate::Server>;
Expand Down Expand Up @@ -280,7 +284,7 @@ mod test {
cptestctx.logctx.log.clone(),
&[cptestctx.internal_dns.dns_server.local_address()],
)
.unwrap();
.expect("can't start resolver");
let mut collector = InventoryCollector::new(
datastore.clone(),
resolver.clone(),
Expand All @@ -305,7 +309,7 @@ mod test {
let status = serde_json::from_value::<BlueprintPlannerStatus>(
planner.activate(&opctx).await,
)
.unwrap();
.expect("can't activate planner");
let blueprint_id = match status {
BlueprintPlannerStatus::Targeted {
parent_blueprint_id,
Expand All @@ -330,11 +334,73 @@ mod test {
blueprint.diff_since_blueprint(initial_blueprint).has_changes()
);

// Planning again should not change the plan.
// Planning again should not change the plan, because nothing has changed.
let status = serde_json::from_value::<BlueprintPlannerStatus>(
planner.activate(&opctx).await,
)
.unwrap();
.expect("can't re-activate planner");
assert_eq!(
status,
BlueprintPlannerStatus::Unchanged {
parent_blueprint_id: blueprint_id,
}
);

// Enable execution.
let mut target = *target;
target.enabled = true;
datastore
.blueprint_target_set_current_enabled(&opctx, target)
.await
.expect("can't enable execution");

// Ping the loader again so it gets the updated target.
loader.activate(&opctx).await;
let (target, blueprint) = &*rx_loader
.borrow_and_update()
.clone()
.expect("failed to re-load blueprint");
assert_eq!(target.target_id, blueprint.id);
assert_eq!(target.target_id, blueprint_id);
assert!(
blueprint.diff_since_blueprint(initial_blueprint).has_changes()
);

// Trigger an inventory collection.
collector.activate(&opctx).await;

// Execute the plan.
let (dummy_tx, _dummy_rx) = watch::channel(PendingMgsUpdates::new());
let mut executor = BlueprintExecutor::new(
datastore.clone(),
resolver.clone(),
rx_loader.clone(),
OmicronZoneUuid::new_v4(),
Activator::new(),
dummy_tx,
);
let value = executor.activate(&opctx).await;
let value = value.as_object().expect("response is not a JSON object");
assert_eq!(value["target_id"], blueprint.id.to_string());
assert!(value["enabled"].as_bool().expect("enabled should be boolean"));
assert!(value["execution_error"].is_null());
assert!(
!value["event_report"]
.as_object()
.expect("event report should be an object")["Ok"]
.as_object()
.expect("event report is not Ok")["step_events"]
.as_array()
.expect("steps should be an array")
.is_empty()
);

// Planning again should not change the plan, because the execution
// is all fake.
let status = serde_json::from_value::<BlueprintPlannerStatus>(
planner.activate(&opctx).await,
)
.expect("can't re-activate planner");
assert_eq!(
status,
BlueprintPlannerStatus::Unchanged {
Expand Down
85 changes: 36 additions & 49 deletions nexus/test-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ use slog::{Logger, debug, error, o};
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::fmt::Debug;
use std::iter::{once, repeat, zip};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6};
use std::sync::Arc;
use std::time::Duration;
Expand Down Expand Up @@ -395,6 +396,8 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> {
pub internal_dns: Option<dns_server::TransientServer>,
dns_config: Option<DnsConfigParams>,
initial_blueprint_id: Option<BlueprintUuid>,
blueprint_disks: BTreeMap<SledUuid, IdMap<BlueprintPhysicalDiskConfig>>,
blueprint_datasets: BTreeMap<SledUuid, IdMap<BlueprintDatasetConfig>>,
blueprint_zones: Vec<BlueprintZoneConfig>,

pub silo_name: Option<Name>,
Expand Down Expand Up @@ -443,6 +446,8 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
internal_dns: None,
dns_config: None,
initial_blueprint_id: None,
blueprint_disks: BTreeMap::new(),
blueprint_datasets: BTreeMap::new(),
blueprint_zones: Vec::new(),
silo_name: None,
user_name: None,
Expand Down Expand Up @@ -844,7 +849,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
self.dns_config = Some(dns_config);
}

// Perform RSS handoff
/// Perform RSS handoff
pub async fn start_nexus_external(
&mut self,
tls_certificates: Vec<Certificate>,
Expand Down Expand Up @@ -882,11 +887,11 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
// The first sled agent is the only one that'll have configured
// blueprint zones, but the others all need to have disks.

let maybe_zones = std::iter::once(Some(&self.blueprint_zones))
.chain(std::iter::repeat(None));
let maybe_zones =
once(Some(&self.blueprint_zones)).chain(repeat(None));

for (sled_agent, maybe_zones) in
std::iter::zip(self.sled_agents.iter(), maybe_zones)
zip(self.sled_agents.iter(), maybe_zones)
{
let sled_id = sled_agent.sled_agent_id();

Expand Down Expand Up @@ -948,11 +953,13 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
}
}

self.blueprint_disks.insert(sled_id, disks.clone());
self.blueprint_datasets.insert(sled_id, datasets.clone());
blueprint_sleds.insert(
sled_id,
BlueprintSledConfig {
state: SledState::Active,
sled_agent_generation: Generation::new().next(),
sled_agent_generation: 4.into(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's up with this number? I feel like it could use a constant and comment.

disks,
datasets,
zones,
Expand Down Expand Up @@ -1100,33 +1107,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
/// tell the other Sled Agents to report they have no zones configured, and
/// write the early network config to all sleds.
pub async fn configure_sled_agents(&mut self) {
let Some(sled_agent) = self.sled_agents.first() else {
panic!("expected sled agent has not been created");
};

let client = sled_agent_client::Client::new(
&format!("http://{}", sled_agent.local_addr()),
self.logctx.log.clone(),
);

client
.omicron_config_put(&OmicronSledConfig {
generation: Generation::new().next(),
// Sending no disks or datasets is probably wrong, but there are
// a lot of inconsistencies with this in nexus-test.
disks: IdMap::default(),
datasets: IdMap::default(),
zones: self
.blueprint_zones
.clone()
.into_iter()
.map(From::from)
.collect(),
remove_mupdate_override: None,
})
.await
.expect("Failed to configure sled agent with our zones");

let early_network_config = EarlyNetworkConfig {
body: EarlyNetworkConfigBody {
ntp_servers: Vec::new(),
Expand All @@ -1143,33 +1123,40 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> {
schema_version: 2,
};

client
.write_network_bootstore_config(&early_network_config)
.await
.expect("Failed to write early networking config to bootstore");
macro_rules! from_clone {
($source: expr) => {
$source.clone().into_iter().map(From::from).collect()
};
}

for sled_agent in self.sled_agents.iter().skip(1) {
let generation = Generation::from_u32(2);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same -- where's this come from?

let zones =
once(from_clone!(self.blueprint_zones)).chain(repeat(vec![]));
Comment on lines +1133 to +1134
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this constructing a structure with non-empty zones for the first sled agent and no zones for the others?

for (sled_agent, sled_zones) in zip(self.sled_agents.iter(), zones) {
let sled_id = sled_agent.sled_agent_id();
let client = sled_agent_client::Client::new(
&format!("http://{}", sled_agent.local_addr()),
self.logctx.log.clone(),
);

client
.omicron_config_put(&OmicronSledConfig {
generation: Generation::new().next(),
// As above, sending no disks or datasets is probably wrong
disks: IdMap::default(),
datasets: IdMap::default(),
zones: IdMap::default(),
generation,
disks: from_clone!(self.blueprint_disks[&sled_id]),
datasets: from_clone!(self.blueprint_datasets[&sled_id]),
zones: sled_zones.into_iter().collect(),
remove_mupdate_override: None,
})
.await
.expect("Failed to configure sled agent with our zones");
.expect("Failed to configure sled agent {sled_id} with zones");

client
.write_network_bootstore_config(&early_network_config)
.await
.expect("Failed to write early networking config to bootstore");
.expect(
"Failed to write early networking config \
to bootstore on sled {sled_id}",
);
}
}

Expand Down Expand Up @@ -1742,7 +1729,7 @@ async fn setup_with_config_impl<N: NexusServer>(
}

// Start expected services: these will all be allocated to the first sled
// agent. Afterwards, configure the first sled agent, and start the rest of
// agent. Afterwards, configure the sled agents and start the rest of the
// the required services.

builder
Expand All @@ -1762,10 +1749,6 @@ async fn setup_with_config_impl<N: NexusServer>(
"populate_internal_dns",
Box::new(|builder| builder.populate_internal_dns().boxed()),
),
(
"configure_sled_agents",
Box::new(|builder| builder.configure_sled_agents().boxed()),
),
(
"start_nexus_external",
Box::new(|builder| {
Expand All @@ -1776,6 +1759,10 @@ async fn setup_with_config_impl<N: NexusServer>(
.boxed()
}),
),
(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why this had to move to after starting Nexus. This configuration mimics what RSS would normally do, right? And that happens before Nexus starts.

"configure_sled_agents",
Box::new(|builder| builder.configure_sled_agents().boxed()),
),
(
"start_oximeter",
Box::new(|builder| builder.start_oximeter().boxed()),
Expand Down
Loading