diff --git a/.github/buildomat/jobs/a4x2-prepare.sh b/.github/buildomat/jobs/a4x2-prepare.sh index 1438ec06de..daadec27a2 100755 --- a/.github/buildomat/jobs/a4x2-prepare.sh +++ b/.github/buildomat/jobs/a4x2-prepare.sh @@ -3,7 +3,7 @@ #: name = "a4x2-prepare" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.78.0" +#: rust_toolchain = true #: output_rules = [ #: "=/out/cargo-bay-ce.tgz", #: "=/out/cargo-bay-cr1.tgz", diff --git a/.github/buildomat/jobs/build-and-test-helios.sh b/.github/buildomat/jobs/build-and-test-helios.sh index b63d2e783f..d3d071cb3e 100755 --- a/.github/buildomat/jobs/build-and-test-helios.sh +++ b/.github/buildomat/jobs/build-and-test-helios.sh @@ -3,7 +3,7 @@ #: name = "build-and-test (helios)" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.78.0" +#: rust_toolchain = true #: output_rules = [ #: "%/work/*", #: "%/var/tmp/omicron_tmp/*", diff --git a/.github/buildomat/jobs/build-and-test-linux.sh b/.github/buildomat/jobs/build-and-test-linux.sh index 4a1f86c3e1..c4e99f0f2a 100755 --- a/.github/buildomat/jobs/build-and-test-linux.sh +++ b/.github/buildomat/jobs/build-and-test-linux.sh @@ -3,7 +3,7 @@ #: name = "build-and-test (ubuntu-22.04)" #: variety = "basic" #: target = "ubuntu-22.04" -#: rust_toolchain = "1.78.0" +#: rust_toolchain = true #: output_rules = [ #: "%/work/*", #: "%/var/tmp/omicron_tmp/*", diff --git a/.github/buildomat/jobs/clippy.sh b/.github/buildomat/jobs/clippy.sh index 1f4c578e47..cff9c45a1b 100755 --- a/.github/buildomat/jobs/clippy.sh +++ b/.github/buildomat/jobs/clippy.sh @@ -3,7 +3,7 @@ #: name = "clippy (helios)" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.78.0" +#: rust_toolchain = true #: output_rules = [] # Run clippy on illumos (not just other systems) because a bunch of our code diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index a2aac86aec..2dde4286dc 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -238,7 +238,7 @@ infra_ip_last = \"$UPLINK_IP\" /^routes/c\\ routes = \\[{nexthop = \"$GATEWAY_IP\", destination = \"0.0.0.0/0\"}\\] /^addresses/c\\ -addresses = \\[\"$UPLINK_IP/24\"\\] +addresses = \\[{address = \"$UPLINK_IP/24\"} \\] } " pkg/config-rss.toml diff -u pkg/config-rss.toml{~,} || true diff --git a/.github/buildomat/jobs/omicron-common.sh b/.github/buildomat/jobs/omicron-common.sh index 345d99f405..e9e2774cd2 100755 --- a/.github/buildomat/jobs/omicron-common.sh +++ b/.github/buildomat/jobs/omicron-common.sh @@ -3,7 +3,7 @@ #: name = "omicron-common (helios)" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.78.0" +#: rust_toolchain = true #: output_rules = [] # Verify that omicron-common builds successfully when used as a dependency diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 7099306a97..d9632f39e6 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -3,7 +3,7 @@ #: name = "helios / package" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.78.0" +#: rust_toolchain = true #: output_rules = [ #: "=/work/package.tar.gz", #: ] diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index 5b2d1bd405..47f7df9d9d 100755 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -3,7 +3,7 @@ #: name = "helios / build TUF repo" #: variety = "basic" #: target = "helios-2.0" -#: rust_toolchain = "1.78.0" +#: rust_toolchain = true #: output_rules = [ #: "=/work/manifest.toml", #: "=/work/repo.zip", diff --git a/Cargo.lock b/Cargo.lock index 94635f4539..3204afe4a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1064,6 +1064,20 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" +[[package]] +name = "cockroach-admin-client" +version = "0.1.0" +dependencies = [ + "chrono", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "progenitor", + "reqwest", + "schemars", + "serde", + "slog", +] + [[package]] name = "colorchoice" version = "1.0.1" @@ -5375,22 +5389,30 @@ dependencies = [ "clap", "csv", "dropshot 0.10.2-dev", + "expectorate", "http 0.2.12", "illumos-utils", "nexus-test-utils", "omicron-common", "omicron-rpaths", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", + "once_cell", + "openapi-lint", + "openapiv3", "pq-sys", "schemars", "serde", + "serde_json", "slog", "slog-async", "slog-dtrace", "slog-error-chain", + "subprocess", "thiserror", "tokio", + "tokio-postgres", "toml 0.8.13", "url", ] @@ -5561,6 +5583,7 @@ dependencies = [ "cancel-safe-futures", "chrono", "clap", + "cockroach-admin-client", "criterion", "crucible-agent-client", "crucible-pantry-client", @@ -5770,7 +5793,9 @@ dependencies = [ name = "omicron-passwords" version = "0.1.0" dependencies = [ + "anyhow", "argon2", + "clap", "criterion", "omicron-workspace-hack", "rand 0.8.5", diff --git a/Cargo.toml b/Cargo.toml index e9779cd91c..a6a599e3e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "bootstore", "certificates", "clients/bootstrap-agent-client", + "clients/cockroach-admin-client", "clients/ddm-admin-client", "clients/dns-service-client", "clients/dpd-client", @@ -89,6 +90,7 @@ default-members = [ "bootstore", "certificates", "clients/bootstrap-agent-client", + "clients/cockroach-admin-client", "clients/ddm-admin-client", "clients/dns-service-client", "clients/dpd-client", @@ -239,6 +241,7 @@ ciborium = "0.2.2" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } +cockroach-admin-client = { path = "clients/cockroach-admin-client" } colored = "2.1" const_format = "0.2.32" cookie = "0.18" diff --git a/clients/cockroach-admin-client/Cargo.toml b/clients/cockroach-admin-client/Cargo.toml new file mode 100644 index 0000000000..cbf81c708f --- /dev/null +++ b/clients/cockroach-admin-client/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "cockroach-admin-client" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +omicron-uuid-kinds.workspace = true +progenitor.workspace = true +reqwest = { workspace = true, features = [ "json", "rustls-tls", "stream" ] } +schemars.workspace = true +serde.workspace = true +slog.workspace = true +omicron-workspace-hack.workspace = true diff --git a/clients/cockroach-admin-client/src/lib.rs b/clients/cockroach-admin-client/src/lib.rs new file mode 100644 index 0000000000..b7f067b97d --- /dev/null +++ b/clients/cockroach-admin-client/src/lib.rs @@ -0,0 +1,24 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Interface for making API requests to an Omicron CockroachDB admin server + +progenitor::generate_api!( + spec = "../../openapi/cockroach-admin.json", + inner_type = slog::Logger, + pre_hook = (|log: &slog::Logger, request: &reqwest::Request| { + slog::debug!(log, "client request"; + "method" => %request.method(), + "uri" => %request.url(), + "body" => ?&request.body(), + ); + }), + post_hook = (|log: &slog::Logger, result: &Result<_, _>| { + slog::debug!(log, "client response"; "result" => ?result); + }), + derives = [schemars::JsonSchema], + replace = { + TypedUuidForOmicronZoneKind = omicron_uuid_kinds::OmicronZoneUuid, + } +); diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index acf282a1f9..767d8f53d5 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -149,6 +149,49 @@ impl From instance_state: s.instance_state.into(), propolis_id: s.propolis_id, vmm_state: s.vmm_state.into(), + migration_state: s.migration_state.map(Into::into), + } + } +} + +impl From + for types::MigrationRuntimeState +{ + fn from( + s: omicron_common::api::internal::nexus::MigrationRuntimeState, + ) -> Self { + Self { + migration_id: s.migration_id, + role: s.role.into(), + state: s.state.into(), + gen: s.gen, + time_updated: s.time_updated, + } + } +} + +impl From + for types::MigrationRole +{ + fn from(s: omicron_common::api::internal::nexus::MigrationRole) -> Self { + use omicron_common::api::internal::nexus::MigrationRole as Input; + match s { + Input::Source => Self::Source, + Input::Target => Self::Target, + } + } +} + +impl From + for types::MigrationState +{ + fn from(s: omicron_common::api::internal::nexus::MigrationState) -> Self { + use omicron_common::api::internal::nexus::MigrationState as Input; + match s { + Input::Pending => Self::Pending, + Input::InProgress => Self::InProgress, + Input::Completed => Self::Completed, + Input::Failed => Self::Failed, } } } diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 862ae00cc9..aa42429089 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -33,8 +33,9 @@ progenitor::generate_api!( BgpConfig = { derives = [Eq, Hash] }, BgpPeerConfig = { derives = [Eq, Hash] }, OmicronPhysicalDiskConfig = { derives = [Eq, Hash, PartialOrd, Ord] }, - PortConfigV1 = { derives = [Eq, Hash] }, + PortConfigV2 = { derives = [Eq, Hash] }, RouteConfig = { derives = [Eq, Hash] }, + UplinkAddressConfig = { derives = [Eq, Hash] }, VirtualNetworkInterfaceHost = { derives = [Eq, Hash] }, }, crates = { @@ -327,6 +328,47 @@ impl From instance_state: s.instance_state.into(), propolis_id: s.propolis_id, vmm_state: s.vmm_state.into(), + migration_state: s.migration_state.map(Into::into), + } + } +} + +impl From + for omicron_common::api::internal::nexus::MigrationRuntimeState +{ + fn from(s: types::MigrationRuntimeState) -> Self { + Self { + migration_id: s.migration_id, + state: s.state.into(), + role: s.role.into(), + gen: s.gen, + time_updated: s.time_updated, + } + } +} + +impl From + for omicron_common::api::internal::nexus::MigrationRole +{ + fn from(r: types::MigrationRole) -> Self { + use omicron_common::api::internal::nexus::MigrationRole as Output; + match r { + types::MigrationRole::Source => Output::Source, + types::MigrationRole::Target => Output::Target, + } + } +} + +impl From + for omicron_common::api::internal::nexus::MigrationState +{ + fn from(s: types::MigrationState) -> Self { + use omicron_common::api::internal::nexus::MigrationState as Output; + match s { + types::MigrationState::Pending => Output::Pending, + types::MigrationState::InProgress => Output::InProgress, + types::MigrationState::Failed => Output::Failed, + types::MigrationState::Completed => Output::Completed, } } } diff --git a/clients/wicketd-client/src/lib.rs b/clients/wicketd-client/src/lib.rs index 8edb797b20..6198c6cf9e 100644 --- a/clients/wicketd-client/src/lib.rs +++ b/clients/wicketd-client/src/lib.rs @@ -24,7 +24,7 @@ progenitor::generate_api!( GetLocationResponse = { derives = [PartialEq, Eq, PartialOrd, Ord] }, ImageVersion = { derives = [PartialEq, Eq, PartialOrd, Ord]}, RackInitId = { derives = [PartialEq, Eq, PartialOrd, Ord] }, - RackNetworkConfigV1 = { derives = [PartialEq, Eq, PartialOrd, Ord] }, + RackNetworkConfigV2 = { derives = [PartialEq, Eq, PartialOrd, Ord] }, RackOperationStatus = { derives = [PartialEq, Eq, PartialOrd, Ord] }, RackResetId = { derives = [PartialEq, Eq, PartialOrd, Ord] }, RackV1Inventory = { derives = [PartialEq, Eq, PartialOrd, Ord]}, @@ -62,7 +62,7 @@ progenitor::generate_api!( Ipv4Range = omicron_common::address::Ipv4Range, Ipv6Range = omicron_common::address::Ipv6Range, M2Slot = installinator_common::M2Slot, - PortConfigV1 = omicron_common::api::internal::shared::PortConfigV1, + PortConfigV2 = omicron_common::api::internal::shared::PortConfigV2, PortFec = omicron_common::api::internal::shared::PortFec, PortSpeed = omicron_common::api::internal::shared::PortSpeed, ProgressEventForGenericSpec = update_engine::events::ProgressEvent, diff --git a/cockroach-admin/Cargo.toml b/cockroach-admin/Cargo.toml index e0c02493c2..49401afb9d 100644 --- a/cockroach-admin/Cargo.toml +++ b/cockroach-admin/Cargo.toml @@ -17,6 +17,8 @@ dropshot.workspace = true http.workspace = true illumos-utils.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +once_cell.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" schemars.workspace = true @@ -27,13 +29,19 @@ slog-error-chain.workspace = true serde.workspace = true thiserror.workspace = true tokio.workspace = true +tokio-postgres.workspace = true toml.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] +expectorate.workspace = true nexus-test-utils.workspace = true omicron-test-utils.workspace = true +openapi-lint.workspace = true +openapiv3.workspace = true +serde_json.workspace = true +subprocess.workspace = true url.workspace = true [lints] diff --git a/cockroach-admin/src/bin/cockroach-admin.rs b/cockroach-admin/src/bin/cockroach-admin.rs index eb28082faa..0399c8bbb0 100644 --- a/cockroach-admin/src/bin/cockroach-admin.rs +++ b/cockroach-admin/src/bin/cockroach-admin.rs @@ -12,6 +12,7 @@ use omicron_cockroach_admin::CockroachCli; use omicron_cockroach_admin::Config; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; +use omicron_uuid_kinds::OmicronZoneUuid; use std::net::SocketAddr; use std::net::SocketAddrV6; @@ -38,6 +39,10 @@ enum Args { /// Path to the server config file #[clap(long, action)] config_file_path: Utf8PathBuf, + + /// ID of the zone within which we're running + #[clap(long, action)] + zone_id: OmicronZoneUuid, }, } @@ -59,16 +64,20 @@ async fn main_impl() -> Result<(), CmdError> { cockroach_address, http_address, config_file_path, + zone_id, } => { let cockroach_cli = CockroachCli::new(path_to_cockroach_binary, cockroach_address); let mut config = Config::from_file(&config_file_path) .map_err(|err| CmdError::Failure(anyhow!(err)))?; config.dropshot.bind_address = SocketAddr::V6(http_address); - let server = - omicron_cockroach_admin::start_server(cockroach_cli, config) - .await - .map_err(|err| CmdError::Failure(anyhow!(err)))?; + let server = omicron_cockroach_admin::start_server( + zone_id, + cockroach_cli, + config, + ) + .await + .map_err(|err| CmdError::Failure(anyhow!(err)))?; server.await.map_err(|err| { CmdError::Failure(anyhow!( "server failed after starting: {err}" diff --git a/cockroach-admin/src/cockroach_cli.rs b/cockroach-admin/src/cockroach_cli.rs index 5b3958546f..00478b81a1 100644 --- a/cockroach-admin/src/cockroach_cli.rs +++ b/cockroach-admin/src/cockroach_cli.rs @@ -75,6 +75,10 @@ impl CockroachCli { Self { path_to_cockroach_binary, cockroach_address } } + pub fn cockroach_address(&self) -> SocketAddrV6 { + self.cockroach_address + } + pub async fn node_status( &self, ) -> Result, CockroachCliError> { diff --git a/cockroach-admin/src/context.rs b/cockroach-admin/src/context.rs index b3f39f463a..ea281f7b75 100644 --- a/cockroach-admin/src/context.rs +++ b/cockroach-admin/src/context.rs @@ -2,8 +2,182 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use std::net::SocketAddr; + use crate::CockroachCli; +use anyhow::bail; +use anyhow::Context; +use dropshot::HttpError; +use omicron_uuid_kinds::OmicronZoneUuid; +use slog::Logger; +use slog_error_chain::InlineErrorChain; +use tokio::sync::OnceCell; pub struct ServerContext { - pub cockroach_cli: CockroachCli, + zone_id: OmicronZoneUuid, + cockroach_cli: CockroachCli, + // Cockroach node IDs never change; we defer contacting our local node to + // ask for its ID until we need to, but once we have it we don't need to ask + // again. + node_id: OnceCell, + log: Logger, +} + +impl ServerContext { + pub fn new( + zone_id: OmicronZoneUuid, + cockroach_cli: CockroachCli, + log: Logger, + ) -> Self { + Self { zone_id, cockroach_cli, node_id: OnceCell::new(), log } + } + + pub fn cockroach_cli(&self) -> &CockroachCli { + &self.cockroach_cli + } + + pub fn zone_id(&self) -> OmicronZoneUuid { + self.zone_id + } + + pub async fn node_id(&self) -> Result<&str, HttpError> { + match self + .node_id + .get_or_try_init(|| self.read_node_id_from_cockroach()) + .await + { + Ok(id) => Ok(id.as_str()), + Err(err) => { + let message = format!( + "failed to read node ID from local cockroach instance: \ + {err:#}", + ); + Err(HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + }) + } + } + } + + async fn read_node_id_from_cockroach(&self) -> anyhow::Result { + let cockroach_address = self.cockroach_cli().cockroach_address(); + // TODO-cleanup This connection string is duplicated in Nexus - maybe we + // should centralize it? I'm not sure where we could put it; + // omicron_common, perhaps? + let connect_url = format!( + "postgresql://root@{cockroach_address}/omicron?sslmode=disable", + ); + let (client, connection) = + tokio_postgres::connect(&connect_url, tokio_postgres::NoTls) + .await + .with_context(|| { + format!("failed to connect to {connect_url}") + })?; + + let log = self.log.clone(); + tokio::spawn(async move { + if let Err(e) = connection.await { + slog::warn!( + log, "connection error reading node ID"; + "err" => InlineErrorChain::new(&e), + ); + } + }); + + // This uses an undocumented internal function - not awesome, but we're + // told this is "unlikely to change for some time". + // https://github.com/cockroachdb/cockroach/issues/124988 requests that + // this be documented (or an alternative be documented / provided). + let row = client + .query_one("SELECT crdb_internal.node_id()::TEXT", &[]) + .await + .context("failed to send node ID query")?; + + let node_id = row + .try_get(0) + .context("failed to read results of node ID query")?; + + // We'll be paranoid: While it seems unlikely we could ever get an + // incorrect node ID from the internal builtin, since it's not + // documented, we don't know for sure if it's possible for our query to + // be forwarded to a different node. Let's also run `NodeStatus`, and + // ensure that this node ID's address matches the address of our local + // crdb instance. + let node_statuses = self + .cockroach_cli() + .node_status() + .await + .context("failed to get node status")?; + + let our_node_status = node_statuses + .iter() + .find(|status| status.node_id == node_id) + .with_context(|| { + format!( + "node status did not include information for our node ID \ + ({node_id}): {node_statuses:?}" + ) + })?; + + if our_node_status.address != SocketAddr::V6(cockroach_address) { + bail!( + "node ID / address mismatch: we fetched node ID {node_id} \ + from our local cockroach at {cockroach_address}, but \ + `node status` reported this node ID at address {}", + our_node_status.address + ) + } + + Ok(node_id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + use std::net::SocketAddrV6; + use url::Url; + + #[tokio::test] + async fn test_node_id() { + let logctx = dev::test_setup_log("test_node_id"); + let mut db = test_setup_database(&logctx.log).await; + + // Construct a `ServerContext`. + let db_url = db.listen_url().to_string(); + let url: Url = db_url.parse().expect("valid url"); + let cockroach_address: SocketAddrV6 = format!( + "{}:{}", + url.host().expect("url has host"), + url.port().expect("url has port") + ) + .parse() + .expect("valid SocketAddrV6"); + let cli = CockroachCli::new("cockroach".into(), cockroach_address); + let context = ServerContext::new( + OmicronZoneUuid::new_v4(), + cli, + logctx.log.clone(), + ); + + // We should be able to fetch a node id, and it should be `1` (since we + // have a single-node test cockroach instance). + let node_id = + context.node_id().await.expect("successfully read node ID"); + assert_eq!(node_id, "1"); + + // The `OnceCell` should be populated now; even if we shut down the DB, + // we can still fetch the node ID. + db.cleanup().await.unwrap(); + let node_id = + context.node_id().await.expect("successfully read node ID"); + assert_eq!(node_id, "1"); + + logctx.cleanup_successful(); + } } diff --git a/cockroach-admin/src/http_entrypoints.rs b/cockroach-admin/src/http_entrypoints.rs index 24d36c9823..bf12eb933b 100644 --- a/cockroach-admin/src/http_entrypoints.rs +++ b/cockroach-admin/src/http_entrypoints.rs @@ -8,6 +8,7 @@ use dropshot::endpoint; use dropshot::HttpError; use dropshot::HttpResponseOk; use dropshot::RequestContext; +use omicron_uuid_kinds::OmicronZoneUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -17,6 +18,7 @@ type CrdbApiDescription = dropshot::ApiDescription>; pub fn api() -> CrdbApiDescription { fn register_endpoints(api: &mut CrdbApiDescription) -> Result<(), String> { + api.register(node_id)?; api.register(node_status)?; Ok(()) } @@ -44,6 +46,40 @@ async fn node_status( ) -> Result, HttpError> { let ctx = rqctx.context(); let all_nodes = - ctx.cockroach_cli.node_status().await.map_err(HttpError::from)?; + ctx.cockroach_cli().node_status().await.map_err(HttpError::from)?; Ok(HttpResponseOk(ClusterNodeStatus { all_nodes })) } + +/// CockroachDB Node ID +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeId { + /// The ID of this Omicron zone. + /// + /// This is included to ensure correctness even if a socket address on a + /// sled is reused for a different zone; if our caller is trying to + /// determine the node ID for a particular Omicron CockroachDB zone, they'll + /// contact us by socket address. We include our zone ID in the response for + /// their confirmation that we are the zone they intended to contact. + pub zone_id: OmicronZoneUuid, + // CockroachDB node IDs are integers, in practice, but our use of them is as + // input and output to the `cockroach` CLI. We use a string which is a bit + // more natural (no need to parse CLI output or stringify an ID to send it + // as input) and leaves open the door for the format to change in the + // future. + pub node_id: String, +} + +/// Get the CockroachDB node ID of the local cockroach instance. +#[endpoint { + method = GET, + path = "/node/id", +}] +async fn node_id( + rqctx: RequestContext>, +) -> Result, HttpError> { + let ctx = rqctx.context(); + let node_id = ctx.node_id().await?.to_string(); + let zone_id = ctx.zone_id(); + Ok(HttpResponseOk(NodeId { zone_id, node_id })) +} diff --git a/cockroach-admin/src/lib.rs b/cockroach-admin/src/lib.rs index d6c53c8dc6..f4a32cb6c0 100644 --- a/cockroach-admin/src/lib.rs +++ b/cockroach-admin/src/lib.rs @@ -4,6 +4,7 @@ use context::ServerContext; use omicron_common::FileKv; +use omicron_uuid_kinds::OmicronZoneUuid; use slog::debug; use slog::error; use slog::Drain; @@ -51,6 +52,7 @@ pub type Server = dropshot::HttpServer>; /// Start the dropshot server pub async fn start_server( + zone_id: OmicronZoneUuid, cockroach_cli: CockroachCli, server_config: Config, ) -> Result { @@ -72,7 +74,11 @@ pub async fn start_server( } } - let context = ServerContext { cockroach_cli }; + let context = ServerContext::new( + zone_id, + cockroach_cli, + log.new(slog::o!("component" => "ServerContext")), + ); let http_server_starter = dropshot::HttpServerStarter::new( &server_config.dropshot, http_entrypoints::api(), diff --git a/cockroach-admin/tests/integration_tests/commands.rs b/cockroach-admin/tests/integration_tests/commands.rs new file mode 100644 index 0000000000..875427d948 --- /dev/null +++ b/cockroach-admin/tests/integration_tests/commands.rs @@ -0,0 +1,43 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tests for the executable commands in this repo. + +use expectorate::assert_contents; +use omicron_test_utils::dev::test_cmds::{ + assert_exit_code, path_to_executable, run_command, EXIT_SUCCESS, +}; +use openapiv3::OpenAPI; +use std::path::PathBuf; +use subprocess::Exec; + +// path to executable +const CMD_COCKROACH_ADMIN: &str = env!("CARGO_BIN_EXE_cockroach-admin"); + +fn path_to_cockroach_admin() -> PathBuf { + path_to_executable(CMD_COCKROACH_ADMIN) +} + +#[test] +fn test_cockroach_admin_openapi() { + let exec = Exec::cmd(path_to_cockroach_admin()).arg("openapi"); + let (exit_status, stdout_text, stderr_text) = run_command(exec); + assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); + assert_contents( + "tests/output/cmd-cockroach-admin-openapi-stderr", + &stderr_text, + ); + + let spec: OpenAPI = serde_json::from_str(&stdout_text) + .expect("stdout was not valid OpenAPI"); + + // Check for lint errors. + let errors = openapi_lint::validate(&spec); + assert!(errors.is_empty(), "{}", errors.join("\n\n")); + + // Confirm that the output hasn't changed. It's expected that we'll change + // this file as the API evolves, but pay attention to the diffs to ensure + // that the changes match your expectations. + assert_contents("../openapi/cockroach-admin.json", &stdout_text); +} diff --git a/cockroach-admin/tests/integration_tests/mod.rs b/cockroach-admin/tests/integration_tests/mod.rs new file mode 100644 index 0000000000..1bf43dc00c --- /dev/null +++ b/cockroach-admin/tests/integration_tests/mod.rs @@ -0,0 +1,5 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod commands; diff --git a/cockroach-admin/tests/mod.rs b/cockroach-admin/tests/mod.rs new file mode 100644 index 0000000000..99aeeb8299 --- /dev/null +++ b/cockroach-admin/tests/mod.rs @@ -0,0 +1,5 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +mod integration_tests; diff --git a/cockroach-admin/tests/output/cmd-cockroach-admin-openapi-stderr b/cockroach-admin/tests/output/cmd-cockroach-admin-openapi-stderr new file mode 100644 index 0000000000..e69de29bb2 diff --git a/common/src/address.rs b/common/src/address.rs index b246f8f392..44942a9854 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -31,6 +31,12 @@ pub const MIN_PORT: u16 = u16::MIN; /// Reconfigurator (to know whether to add new Nexus zones) pub const NEXUS_REDUNDANCY: usize = 3; +/// The amount of redundancy for CockroachDb services. +/// +/// This is used by both RSS (to distribute the initial set of services) and the +/// Reconfigurator (to know whether to add new crdb zones) +pub const COCKROACHDB_REDUNDANCY: usize = 5; + /// The amount of redundancy for internal DNS servers. /// /// Must be less than or equal to MAX_DNS_REDUNDANCY. diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 6b171b59fe..0af437bd99 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -2553,6 +2553,9 @@ pub struct SwitchPortAddressConfig { /// The IP address and prefix. pub address: oxnet::IpNet, + /// An optional VLAN ID + pub vlan_id: Option, + /// The interface name this address belongs to. // TODO: https://github.com/oxidecomputer/omicron/issues/3050 // Use `Name` instead of `String` for `interface_name` type diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index b569437f43..4f990c56e1 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -16,6 +16,7 @@ use omicron_uuid_kinds::UpstairsSessionKind; use parse_display::{Display, FromStr}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use std::fmt; use std::net::SocketAddr; use std::time::Duration; use strum::{EnumIter, IntoEnumIterator}; @@ -108,6 +109,97 @@ pub struct SledInstanceState { /// The most recent state of the sled's VMM process. pub vmm_state: VmmRuntimeState, + + /// The current state of any in-progress migration for this instance, as + /// understood by this sled. + pub migration_state: Option, +} + +/// An update from a sled regarding the state of a migration, indicating the +/// role of the VMM whose migration state was updated. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct MigrationRuntimeState { + pub migration_id: Uuid, + pub state: MigrationState, + pub role: MigrationRole, + pub gen: Generation, + + /// Timestamp for the migration state update. + pub time_updated: DateTime, +} + +/// The state of an instance's live migration. +#[derive( + Clone, + Copy, + Debug, + Default, + PartialEq, + Eq, + Deserialize, + Serialize, + JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum MigrationState { + /// The migration has not started for this VMM. + #[default] + Pending, + /// The migration is in progress. + InProgress, + /// The migration has failed. + Failed, + /// The migration has completed. + Completed, +} + +impl MigrationState { + pub fn label(&self) -> &'static str { + match self { + Self::Pending => "pending", + Self::InProgress => "in_progress", + Self::Completed => "completed", + Self::Failed => "failed", + } + } + /// Returns `true` if this migration state means that the migration is no + /// longer in progress (it has either succeeded or failed). + #[must_use] + pub fn is_terminal(&self) -> bool { + matches!(self, MigrationState::Completed | MigrationState::Failed) + } +} + +impl fmt::Display for MigrationState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.label()) + } +} + +#[derive( + Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum MigrationRole { + /// This update concerns the source VMM of a migration. + Source, + /// This update concerns the target VMM of a migration. + Target, +} + +impl MigrationRole { + pub fn label(&self) -> &'static str { + match self { + Self::Source => "source", + Self::Target => "target", + } + } +} + +impl fmt::Display for MigrationRole { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.label()) + } } // Oximeter producer/collector objects. diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 9e3e1a71f5..3d710fc952 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -152,13 +152,17 @@ pub enum SourceNatConfigError { UnalignedPortPair { first_port: u16, last_port: u16 }, } +// We alias [`PortConfig`] to the current version of the protocol, so +// that we can convert between versions as necessary. +pub type PortConfig = PortConfigV2; + // We alias [`RackNetworkConfig`] to the current version of the protocol, so // that we can convert between versions as necessary. -pub type RackNetworkConfig = RackNetworkConfigV1; +pub type RackNetworkConfig = RackNetworkConfigV2; /// Initial network configuration #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] -pub struct RackNetworkConfigV1 { +pub struct RackNetworkConfigV2 { pub rack_subnet: Ipv6Net, // TODO: #3591 Consider making infra-ip ranges implicit for uplinks /// First ip address to be used for configuring network infrastructure @@ -166,7 +170,7 @@ pub struct RackNetworkConfigV1 { /// Last ip address to be used for configuring network infrastructure pub infra_ip_last: Ipv4Addr, /// Uplinks for connecting the rack to external networks - pub ports: Vec, + pub ports: Vec, /// BGP configurations for connecting the rack to external networks pub bgp: Vec, /// BFD configuration for connecting the rack to external networks @@ -299,12 +303,81 @@ pub struct RouteConfig { pub vlan_id: Option, } +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema, Hash, +)] +pub struct UplinkAddressConfig { + pub address: IpNet, + /// The VLAN id (if any) associated with this address. + #[serde(default)] + pub vlan_id: Option, +} + +impl UplinkAddressConfig { + pub fn addr(&self) -> IpAddr { + self.address.addr() + } +} + +impl std::fmt::Display for UplinkAddressConfig { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.vlan_id { + None => write!(f, "{}", self.address), + Some(v) => write!(f, "{};{}", self.address, v), + } + } +} + +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct UplinkAddressConfigError(String); + +impl std::fmt::Display for UplinkAddressConfigError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "parse switch location error: {}", self.0) + } +} + +/// Convert a string into an UplinkAddressConfig. +/// 192.168.1.1/24 => UplinkAddressConfig { 192.168.1.1/24, None } +/// 192.168.1.1/24;200 => UplinkAddressConfig { 192.168.1.1/24, Some(200) } +impl FromStr for UplinkAddressConfig { + type Err = UplinkAddressConfigError; + + fn from_str(s: &str) -> Result { + let fields: Vec<&str> = s.split(';').collect(); + let (address, vlan_id) = match fields.len() { + 1 => Ok((fields[0], None)), + 2 => Ok((fields[0], Some(fields[1]))), + _ => Err(UplinkAddressConfigError(format!( + "not a valid uplink address: {s}" + ))), + }?; + let address = address.parse().map_err(|_| { + UplinkAddressConfigError(format!( + "not a valid ip address: {address}" + )) + })?; + let vlan_id = match vlan_id { + None => Ok(None), + Some(v) => match v.parse() { + Err(_) => Err(format!("invalid vlan id: {v}")), + Ok(vlan_id) if vlan_id > 1 && vlan_id < 4096 => { + Ok(Some(vlan_id)) + } + Ok(vlan_id) => Err(format!("vlan id out of range: {vlan_id}")), + }, + } + .map_err(|e| UplinkAddressConfigError(e))?; + Ok(UplinkAddressConfig { address, vlan_id }) + } +} + #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, JsonSchema)] -pub struct PortConfigV1 { +pub struct PortConfigV2 { /// The set of routes associated with this port. pub routes: Vec, - /// This port's addresses. - pub addresses: Vec, + /// This port's addresses and optional vlan IDs + pub addresses: Vec, /// Switch the port belongs to. pub switch: SwitchLocation, /// Nmae of the port this config applies to. @@ -320,46 +393,6 @@ pub struct PortConfigV1 { pub autoneg: bool, } -impl From for PortConfigV1 { - fn from(value: UplinkConfig) -> Self { - PortConfigV1 { - routes: vec![RouteConfig { - destination: "0.0.0.0/0".parse().unwrap(), - nexthop: value.gateway_ip.into(), - vlan_id: None, - }], - addresses: vec![value.uplink_cidr.into()], - switch: value.switch, - port: value.uplink_port, - uplink_port_speed: value.uplink_port_speed, - uplink_port_fec: value.uplink_port_fec, - bgp_peers: vec![], - autoneg: false, - } - } -} - -/// Deprecated, use PortConfigV1 instead. Cannot actually deprecate due to -/// -#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] -pub struct UplinkConfig { - /// Gateway address - pub gateway_ip: Ipv4Addr, - /// Switch to use for uplink - pub switch: SwitchLocation, - /// Switchport to use for external connectivity - pub uplink_port: String, - /// Speed for the Switchport - pub uplink_port_speed: PortSpeed, - /// Forward Error Correction setting for the uplink port - pub uplink_port_fec: PortFec, - /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport - /// (must be in infra_ip pool) - pub uplink_cidr: Ipv4Net, - /// VLAN id to use for uplink - pub uplink_vid: Option, -} - /// A set of switch uplinks. #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] pub struct SwitchPorts { @@ -372,12 +405,12 @@ pub struct HostPortConfig { pub port: String, /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport - /// (must be in infra_ip pool) - pub addrs: Vec, + /// (must be in infra_ip pool). May also include an optional VLAN ID. + pub addrs: Vec, } -impl From for HostPortConfig { - fn from(x: PortConfigV1) -> Self { +impl From for HostPortConfig { + fn from(x: PortConfigV2) -> Self { Self { port: x.port, addrs: x.addresses } } } diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index ccb824cda4..174ffe5e3e 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -43,6 +43,10 @@ task: "blueprint_loader" Loads the current target blueprint from the DB +task: "crdb_node_id_collector" + Collects node IDs of running CockroachDB zones + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -163,6 +167,10 @@ task: "blueprint_loader" Loads the current target blueprint from the DB +task: "crdb_node_id_collector" + Collects node IDs of running CockroachDB zones + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -270,6 +278,10 @@ task: "blueprint_loader" Loads the current target blueprint from the DB +task: "crdb_node_id_collector" + Collects node IDs of running CockroachDB zones + + task: "dns_config_external" watches external DNS data stored in CockroachDB diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 22d613f838..9f16c6026c 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -244,6 +244,10 @@ task: "blueprint_loader" Loads the current target blueprint from the DB +task: "crdb_node_id_collector" + Collects node IDs of running CockroachDB zones + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -426,6 +430,13 @@ task: "bfd_manager" started at (s ago) and ran for ms last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } +task: "crdb_node_id_collector" + configured period: every 10m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + last completion reported error: no blueprint + task: "external_endpoints" configured period: every 1m currently executing: no diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index b5e65249ce..bc212281b2 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -24,6 +24,7 @@ use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; +use nexus_types::deployment::ZoneKind; use nexus_types::deployment::{Blueprint, UnstableReconfiguratorState}; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::inventory::Collection; @@ -744,7 +745,8 @@ fn cmd_blueprint_edit( let label = match args.edit_command { BlueprintEditCommands::AddNexus { sled_id } => { - let current = builder.sled_num_nexus_zones(sled_id); + let current = + builder.sled_num_zones_of_kind(sled_id, ZoneKind::Nexus); let added = builder .sled_ensure_zone_multiple_nexus(sled_id, current + 1) .context("failed to add Nexus zone")?; diff --git a/dev-tools/xtask/src/download.rs b/dev-tools/xtask/src/download.rs index ccfe8b2bc9..ce227b7c4d 100644 --- a/dev-tools/xtask/src/download.rs +++ b/dev-tools/xtask/src/download.rs @@ -133,7 +133,7 @@ pub async fn run_cmd(args: DownloadArgs) -> Result<()> { Target::TransceiverControl => { downloader.download_transceiver_control().await } - }.context("Failed to download {target:?}")?; + }.context(format!("Failed to download {target:?}"))?; info!(&log, "Download complete"); Ok(()) diff --git a/dev-tools/xtask/src/external.rs b/dev-tools/xtask/src/external.rs index 9c0bc69b55..05e668297d 100644 --- a/dev-tools/xtask/src/external.rs +++ b/dev-tools/xtask/src/external.rs @@ -52,14 +52,17 @@ impl External { self } - pub fn exec(mut self, bin_target: impl AsRef) -> Result<()> { - let error = self - .command - .arg("--bin") - .arg(bin_target) - .arg("--") - .args(self.args) - .exec(); + pub fn exec_example(self, example_target: impl AsRef) -> Result<()> { + self.exec_common("--example", example_target.as_ref()) + } + + pub fn exec_bin(self, bin_target: impl AsRef) -> Result<()> { + self.exec_common("--bin", bin_target.as_ref()) + } + + fn exec_common(mut self, kind: &'static str, target: &OsStr) -> Result<()> { + let error = + self.command.arg(kind).arg(target).arg("--").args(self.args).exec(); Err(error).context("failed to exec `cargo run`") } } diff --git a/dev-tools/xtask/src/main.rs b/dev-tools/xtask/src/main.rs index 22e5a22632..96afdac17d 100644 --- a/dev-tools/xtask/src/main.rs +++ b/dev-tools/xtask/src/main.rs @@ -34,6 +34,9 @@ struct Args { #[derive(Subcommand)] enum Cmds { + /// Run Argon2 hash with specific parameters (quick performance check) + Argon2(external::External), + /// Check that dependencies are not duplicated in any packages in the /// workspace CheckWorkspaceDeps, @@ -69,12 +72,15 @@ enum Cmds { async fn main() -> Result<()> { let args = Args::parse(); match args.cmd { + Cmds::Argon2(external) => { + external.cargo_args(["--release"]).exec_example("argon2") + } Cmds::Clippy(args) => clippy::run_cmd(args), Cmds::CheckWorkspaceDeps => check_workspace_deps::run_cmd(), Cmds::Download(args) => download::run_cmd(args).await, #[cfg(target_os = "illumos")] Cmds::Releng(external) => { - external.cargo_args(["--release"]).exec("omicron-releng") + external.cargo_args(["--release"]).exec_bin("omicron-releng") } #[cfg(target_os = "illumos")] Cmds::VerifyLibraries(args) => verify_libraries::run_cmd(args), diff --git a/dev-tools/xtask/src/virtual_hardware.rs b/dev-tools/xtask/src/virtual_hardware.rs index 0ec9f91492..5384433f55 100644 --- a/dev-tools/xtask/src/virtual_hardware.rs +++ b/dev-tools/xtask/src/virtual_hardware.rs @@ -114,7 +114,7 @@ const ZPOOL: &'static str = "/usr/sbin/zpool"; const ZONEADM: &'static str = "/usr/sbin/zoneadm"; const SIDECAR_LITE_COMMIT: &'static str = - "960f11afe859e0316088e04578aedb700fba6159"; + "de6fab7885a6bbc5327accffd2a872a31e2f1cb6"; const SOFTNPU_COMMIT: &'static str = "3203c51cf4473d30991b522062ac0df2e045c2f2"; const PXA_MAC_DEFAULT: &'static str = "a8:e1:de:01:70:1d"; diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 50c4b4e174..097467ef04 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -292,7 +292,7 @@ routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] # Addresses associated with this port. # For softnpu, an address within the "infra" block above that will be used for # the softnpu uplink port. You can just pick the first address in that pool. -addresses = ["192.168.1.30/24"] +addresses = [{address = "192.168.1.30/24"}] # Name of the uplink port. This should always be "qsfp0" when using softnpu. port = "qsfp0" # The speed of this port. diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 321064df49..67acb5ec1b 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -517,6 +517,11 @@ pub struct BlueprintTasksConfig { /// executes the latest target blueprint #[serde_as(as = "DurationSeconds")] pub period_secs_execute: Duration, + + /// period (in seconds) for periodic activations of the background task that + /// collects the node IDs of CockroachDB zones + #[serde_as(as = "DurationSeconds")] + pub period_secs_collect_crdb_node_ids: Duration, } #[serde_as] @@ -792,6 +797,7 @@ mod test { phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 + blueprints.period_secs_collect_crdb_node_ids = 180 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 @@ -915,7 +921,9 @@ mod test { }, blueprints: BlueprintTasksConfig { period_secs_load: Duration::from_secs(10), - period_secs_execute: Duration::from_secs(60) + period_secs_execute: Duration::from_secs(60), + period_secs_collect_crdb_node_ids: + Duration::from_secs(180), }, sync_service_zone_nat: SyncServiceZoneNatConfig { period_secs: Duration::from_secs(30) @@ -1003,6 +1011,7 @@ mod test { phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 + blueprints.period_secs_collect_crdb_node_ids = 180 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 58a1e824cb..81cf6499b2 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -22,6 +22,7 @@ camino.workspace = true camino-tempfile.workspace = true clap.workspace = true chrono.workspace = true +cockroach-admin-client.workspace = true crucible-agent-client.workspace = true crucible-pantry-client.workspace = true dns-service-client.workspace = true @@ -118,6 +119,7 @@ hyper-rustls.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true hubtools.workspace = true +nexus-db-queries = { workspace = true, features = ["testing"] } nexus-test-utils-macros.workspace = true nexus-test-utils.workspace = true omicron-sled-agent.workspace = true diff --git a/nexus/db-model/src/cockroachdb_node_id.rs b/nexus/db-model/src/cockroachdb_node_id.rs new file mode 100644 index 0000000000..1179b36f0b --- /dev/null +++ b/nexus/db-model/src/cockroachdb_node_id.rs @@ -0,0 +1,16 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for mapping CockroachDB Omicron zone IDs to internal-to-CRDB node IDs + +use crate::schema::cockroachdb_zone_id_to_node_id; +use crate::typed_uuid::DbTypedUuid; +use omicron_uuid_kinds::OmicronZoneKind; + +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = cockroachdb_zone_id_to_node_id)] +pub struct CockroachZoneIdToNodeId { + pub omicron_zone_id: DbTypedUuid, + pub crdb_node_id: String, +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 040882a8f0..30dc82965d 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -17,6 +17,7 @@ mod block_size; mod bootstore; mod bytecount; mod certificate; +mod cockroachdb_node_id; mod collection; mod console_session; mod dataset; @@ -42,6 +43,8 @@ pub mod ipv6; mod ipv6net; mod l4_port_range; mod macaddr; +mod migration; +mod migration_state; mod name; mod network_interface; mod oximeter_info; @@ -126,6 +129,7 @@ pub use block_size::*; pub use bootstore::*; pub use bytecount::*; pub use certificate::*; +pub use cockroachdb_node_id::*; pub use collection::*; pub use console_session::*; pub use dataset::*; @@ -152,6 +156,8 @@ pub use ipv4net::*; pub use ipv6::*; pub use ipv6net::*; pub use l4_port_range::*; +pub use migration::*; +pub use migration_state::*; pub use name::*; pub use network_interface::*; pub use oximeter_info::*; diff --git a/nexus/db-model/src/migration.rs b/nexus/db-model/src/migration.rs new file mode 100644 index 0000000000..5739122a46 --- /dev/null +++ b/nexus/db-model/src/migration.rs @@ -0,0 +1,86 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::Generation; +use crate::schema::migration; +use crate::MigrationState; +use chrono::DateTime; +use chrono::Utc; +use omicron_common::api::internal::nexus; +use serde::Deserialize; +use serde::Serialize; +use uuid::Uuid; + +/// The state of a migration as understood by Nexus. +#[derive( + Clone, + Debug, + Queryable, + Insertable, + Selectable, + Serialize, + Deserialize, + Eq, + PartialEq, +)] +#[diesel(table_name = migration)] +pub struct Migration { + /// The migration's UUID. + /// + /// This is the primary key of the migration table and is referenced by the + /// `instance` table's `migration_id` field. + pub id: Uuid, + + /// The time at which this migration record was created. + pub time_created: DateTime, + + /// The time at which this migration record was deleted, + pub time_deleted: Option>, + + /// The state of the migration source VMM. + pub source_state: MigrationState, + + /// The ID of the migration source VMM. + pub source_propolis_id: Uuid, + + /// The generation number for the source state. + pub source_gen: Generation, + + /// The time the source VMM state was most recently updated. + pub time_source_updated: Option>, + + /// The state of the migration target VMM. + pub target_state: MigrationState, + + /// The ID of the migration target VMM. + pub target_propolis_id: Uuid, + + /// The generation number for the target state. + pub target_gen: Generation, + + /// The time the target VMM state was most recently updated. + pub time_target_updated: Option>, +} + +impl Migration { + pub fn new( + migration_id: Uuid, + source_propolis_id: Uuid, + target_propolis_id: Uuid, + ) -> Self { + Self { + id: migration_id, + time_created: Utc::now(), + time_deleted: None, + source_state: nexus::MigrationState::Pending.into(), + source_propolis_id, + source_gen: Generation::new(), + time_source_updated: None, + target_state: nexus::MigrationState::Pending.into(), + target_propolis_id, + target_gen: Generation::new(), + time_target_updated: None, + } + } +} diff --git a/nexus/db-model/src/migration_state.rs b/nexus/db-model/src/migration_state.rs new file mode 100644 index 0000000000..694198eb56 --- /dev/null +++ b/nexus/db-model/src/migration_state.rs @@ -0,0 +1,49 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database representation of a migration's state as understood by Nexus. + +use super::impl_enum_wrapper; +use omicron_common::api::internal::nexus; +use serde::Deserialize; +use serde::Serialize; +use std::fmt; +use std::io::Write; + +impl_enum_wrapper!( + #[derive(Clone, SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "migration_state", schema = "public"))] + pub struct MigrationStateEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq, Eq)] + #[diesel(sql_type = MigrationStateEnum)] + pub struct MigrationState(pub nexus::MigrationState); + + // Enum values + Pending => b"pending" + InProgress => b"in_progress" + Completed => b"completed" + Failed => b"failed" +); + +impl MigrationState { + /// Returns `true` if this migration state means that the migration is no + /// longer in progress (it has either succeeded or failed). + #[must_use] + pub fn is_terminal(&self) -> bool { + self.0.is_terminal() + } +} + +impl fmt::Display for MigrationState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } +} + +impl From for MigrationState { + fn from(s: nexus::MigrationState) -> Self { + Self(s) + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index dedb0efc62..0587e10ad5 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -322,6 +322,7 @@ table! { rsvd_address_lot_block_id -> Uuid, address -> Inet, interface_name -> Text, + vlan_id -> Nullable, } } @@ -1615,6 +1616,13 @@ table! { } } +table! { + cockroachdb_zone_id_to_node_id (omicron_zone_id, crdb_node_id) { + omicron_zone_id -> Uuid, + crdb_node_id -> Text, + } +} + table! { bootstore_keys (key, generation) { key -> Text, @@ -1758,6 +1766,26 @@ table! { } } +table! { + migration (id) { + id -> Uuid, + time_created -> Timestamptz, + time_deleted -> Nullable, + source_state -> crate::MigrationStateEnum, + source_propolis_id -> Uuid, + source_gen -> Int8, + time_source_updated -> Nullable, + target_state -> crate::MigrationStateEnum, + target_propolis_id -> Uuid, + target_gen -> Int8, + time_target_updated -> Nullable, + } +} + +allow_tables_to_appear_in_same_query!(instance, migration); +allow_tables_to_appear_in_same_query!(migration, vmm); +joinable!(instance -> migration (migration_id)); + allow_tables_to_appear_in_same_query!( ip_pool_range, ip_pool, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 4e0e9cb233..8f529c80a7 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(72, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(75, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,9 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(75, "add-cockroach-zone-id-to-node-id"), + KnownVersion::new(74, "add-migration-table"), + KnownVersion::new(73, "add-vlan-to-uplink"), KnownVersion::new(72, "fix-provisioning-counters"), KnownVersion::new(71, "add-saga-unwound-vmm-state"), KnownVersion::new(70, "separate-instance-and-vmm-states"), diff --git a/nexus/db-model/src/switch_port.rs b/nexus/db-model/src/switch_port.rs index b10f6ba679..48afd7b52a 100644 --- a/nexus/db-model/src/switch_port.rs +++ b/nexus/db-model/src/switch_port.rs @@ -722,6 +722,7 @@ pub struct SwitchPortAddressConfig { pub rsvd_address_lot_block_id: Uuid, pub address: IpNetwork, pub interface_name: String, + pub vlan_id: Option, } impl SwitchPortAddressConfig { @@ -731,6 +732,7 @@ impl SwitchPortAddressConfig { rsvd_address_lot_block_id: Uuid, address: IpNetwork, interface_name: String, + vlan_id: Option, ) -> Self { Self { port_settings_id, @@ -738,6 +740,7 @@ impl SwitchPortAddressConfig { rsvd_address_lot_block_id, address, interface_name, + vlan_id: vlan_id.map(|x| x.into()), } } } @@ -749,6 +752,7 @@ impl Into for SwitchPortAddressConfig { address_lot_block_id: self.address_lot_block_id, address: self.address.into(), interface_name: self.interface_name, + vlan_id: self.vlan_id.map(|x| x.into()), } } } diff --git a/nexus/db-model/src/vmm.rs b/nexus/db-model/src/vmm.rs index cfa1d43759..ceaef2e709 100644 --- a/nexus/db-model/src/vmm.rs +++ b/nexus/db-model/src/vmm.rs @@ -21,7 +21,14 @@ use uuid::Uuid; /// An individual VMM process that incarnates a specific instance. #[derive( - Clone, Queryable, Debug, Selectable, Serialize, Deserialize, Insertable, + Clone, + Queryable, + Debug, + Selectable, + Serialize, + Deserialize, + Insertable, + PartialEq, )] #[diesel(table_name = vmm)] pub struct Vmm { @@ -101,6 +108,7 @@ impl Vmm { Queryable, Serialize, Deserialize, + PartialEq, )] #[diesel(table_name = vmm)] pub struct VmmRuntimeState { diff --git a/nexus/db-queries/src/db/datastore/cockroachdb_node_id.rs b/nexus/db-queries/src/db/datastore/cockroachdb_node_id.rs new file mode 100644 index 0000000000..fee915ab59 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/cockroachdb_node_id.rs @@ -0,0 +1,166 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Datastore methods involving CockroachDB node IDs. + +use super::DataStore; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::ExpressionMethods; +use diesel::OptionalExtension; +use diesel::QueryDsl; +use nexus_auth::authz; +use nexus_auth::context::OpContext; +use nexus_db_model::to_db_typed_uuid; +use nexus_db_model::CockroachZoneIdToNodeId; +use omicron_common::api::external::Error; +use omicron_common::api::external::LookupResult; +use omicron_uuid_kinds::OmicronZoneUuid; + +impl DataStore { + /// Get the CockroachDB node ID of a given Omicron zone ID. + /// + /// Returns `Ok(None)` if no node ID is known. This can occur if the + /// requested zone ID isn't a CockroachDB zone, or if it is a CockroachDB + /// zone but the background task responsible for collecting its node ID has + /// not yet successfully done so. + pub async fn cockroachdb_node_id( + &self, + opctx: &OpContext, + omicron_zone_id: OmicronZoneUuid, + ) -> LookupResult> { + use db::schema::cockroachdb_zone_id_to_node_id::dsl; + + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + + dsl::cockroachdb_zone_id_to_node_id + .select(dsl::crdb_node_id) + .filter(dsl::omicron_zone_id.eq(to_db_typed_uuid(omicron_zone_id))) + .first_async(&*conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Record the CockroachDB node ID of a given Omicron zone ID. + /// + /// This function must only be called with valid CockroachDB zone IDs. It + /// will return an error if the given `omicron_zone_id` already has an entry + /// in this table that does not exactly match `crdb_node_id`. + pub async fn set_cockroachdb_node_id( + &self, + opctx: &OpContext, + omicron_zone_id: OmicronZoneUuid, + crdb_node_id: String, + ) -> Result<(), Error> { + use db::schema::cockroachdb_zone_id_to_node_id::dsl; + + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let conn = self.pool_connection_authorized(opctx).await?; + + let row = CockroachZoneIdToNodeId { + omicron_zone_id: omicron_zone_id.into(), + crdb_node_id, + }; + + let _nrows = diesel::insert_into(dsl::cockroachdb_zone_id_to_node_id) + .values(row) + .on_conflict((dsl::omicron_zone_id, dsl::crdb_node_id)) + .do_nothing() + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::test_utils::datastore_test; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + + #[tokio::test] + async fn test_cockroachdb_node_id() { + let logctx = + dev::test_setup_log("test_service_network_interfaces_list"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Make up a CRDB zone id. + let crdb_zone_id = OmicronZoneUuid::new_v4(); + + // We shouldn't have a mapping for it yet. + let node_id = datastore + .cockroachdb_node_id(&opctx, crdb_zone_id) + .await + .expect("looked up node ID"); + assert_eq!(node_id, None); + + // We can assign a mapping. + let fake_node_id = "test-node"; + datastore + .set_cockroachdb_node_id( + &opctx, + crdb_zone_id, + fake_node_id.to_string(), + ) + .await + .expect("set node ID"); + + // We can look up the mapping we created. + let node_id = datastore + .cockroachdb_node_id(&opctx, crdb_zone_id) + .await + .expect("looked up node ID"); + assert_eq!(node_id.as_deref(), Some(fake_node_id)); + + // We can't assign a different node ID to this same zone. + let different_node_id = "test-node-2"; + datastore + .set_cockroachdb_node_id( + &opctx, + crdb_zone_id, + different_node_id.to_string(), + ) + .await + .expect_err("failed to set node ID"); + + // We can't assign the same node ID to a different zone, either. + let different_zone_id = OmicronZoneUuid::new_v4(); + datastore + .set_cockroachdb_node_id( + &opctx, + different_zone_id, + fake_node_id.to_string(), + ) + .await + .expect_err("failed to set node ID"); + + // We can reassign the same node ID (i.e., setting is idempotent). + datastore + .set_cockroachdb_node_id( + &opctx, + crdb_zone_id, + fake_node_id.to_string(), + ) + .await + .expect("set node ID is idempotent"); + + // The mapping should not have changed. + let node_id = datastore + .cockroachdb_node_id(&opctx, crdb_zone_id) + .await + .expect("looked up node ID"); + assert_eq!(node_id.as_deref(), Some(fake_node_id)); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index b9989fe31c..9fc6c54da7 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -21,6 +21,7 @@ use crate::db::lookup::LookupPath; use crate::db::model::Generation; use crate::db::model::Instance; use crate::db::model::InstanceRuntimeState; +use crate::db::model::Migration; use crate::db::model::Name; use crate::db::model::Project; use crate::db::model::Sled; @@ -46,6 +47,7 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use omicron_common::api::internal::nexus::MigrationRuntimeState; use omicron_common::bail_unless; use ref_cast::RefCast; use uuid::Uuid; @@ -118,6 +120,26 @@ impl From for omicron_common::api::external::Instance { } } +/// A complete snapshot of the database records describing the current state of +/// an instance: the [`Instance`] record itself, along with its active [`Vmm`], +/// target [`Vmm`], and current [`Migration`], if they exist. +/// +/// This is returned by [`DataStore::instance_fetch_all`]. +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub struct InstanceSnapshot { + /// The instance record. + pub instance: Instance, + /// The [`Vmm`] record pointed to by the instance's `active_propolis_id`, if + /// it is set. + pub active_vmm: Option, + /// The [`Vmm`] record pointed to by the instance's `target_propolis_id`, if + /// it is set. + pub target_vmm: Option, + /// The [`Migration`] record pointed to by the instance's `migration_id`, if + /// it is set. + pub migration: Option, +} + /// A token which represents that a saga holds the instance-updater lock on a /// particular instance. /// @@ -141,6 +163,25 @@ pub enum UpdaterLockError { Query(#[from] Error), } +/// The result of an [`DataStore::instance_and_vmm_update_runtime`] call, +/// indicating which records were updated. +#[derive(Copy, Clone, Debug)] +pub struct InstanceUpdateResult { + /// `true` if the instance record was updated, `false` otherwise. + pub instance_updated: bool, + /// `true` if the VMM record was updated, `false` otherwise. + pub vmm_updated: bool, + /// Indicates whether a migration record for this instance was updated, if a + /// [`MigrationRuntimeState`] was provided to + /// [`DataStore::instance_and_vmm_update_runtime`]. + /// + /// - `Some(true)` if a migration record was updated + /// - `Some(false)` if a [`MigrationRuntimeState`] was provided, but the + /// migration record was not updated + /// - `None` if no [`MigrationRuntimeState`] was provided + pub migration_updated: Option, +} + impl DataStore { /// Idempotently insert a database record for an Instance /// @@ -310,6 +351,92 @@ impl DataStore { Ok(InstanceAndActiveVmm { instance, vmm }) } + /// Fetches all database records describing the state of the provided + /// instance in a single atomic query. + /// + /// If an instance with the provided UUID exists, this method returns an + /// [`InstanceSnapshot`], which contains the following: + /// + /// - The [`Instance`] record itself, + /// - The instance's active [`Vmm`] record, if the `active_propolis_id` + /// column is not null, + /// - The instance's target [`Vmm`] record, if the `target_propolis_id` + /// column is not null, + /// - The instance's current active [`Migration`], if the `migration_id` + /// column is not null. + pub async fn instance_fetch_all( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + ) -> LookupResult { + opctx.authorize(authz::Action::Read, authz_instance).await?; + + use db::schema::instance::dsl as instance_dsl; + use db::schema::migration::dsl as migration_dsl; + use db::schema::vmm; + + // Create a Diesel alias to allow us to LEFT JOIN the `instance` table + // with the `vmm` table twice; once on the `active_propolis_id` and once + // on the `target_propolis_id`. + let (active_vmm, target_vmm) = + diesel::alias!(vmm as active_vmm, vmm as target_vmm); + let vmm_selection = + >::construct_selection(); + + let query = instance_dsl::instance + .filter(instance_dsl::id.eq(authz_instance.id())) + .filter(instance_dsl::time_deleted.is_null()) + .left_join( + active_vmm.on(active_vmm + .field(vmm::id) + .nullable() + .eq(instance_dsl::active_propolis_id) + .and(active_vmm.field(vmm::time_deleted).is_null())), + ) + .left_join( + target_vmm.on(target_vmm + .field(vmm::id) + .nullable() + .eq(instance_dsl::target_propolis_id) + .and(target_vmm.field(vmm::time_deleted).is_null())), + ) + .left_join( + migration_dsl::migration.on(migration_dsl::id + .nullable() + .eq(instance_dsl::migration_id) + .and(migration_dsl::time_deleted.is_null())), + ) + .select(( + Instance::as_select(), + active_vmm.fields(vmm_selection).nullable(), + target_vmm.fields(vmm_selection).nullable(), + Option::::as_select(), + )); + + let (instance, active_vmm, target_vmm, migration) = + query + .first_async::<( + Instance, + Option, + Option, + Option, + )>( + &*self.pool_connection_authorized(opctx).await? + ) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Instance, + LookupType::ById(authz_instance.id()), + ), + ) + })?; + + Ok(InstanceSnapshot { instance, migration, active_vmm, target_vmm }) + } + // TODO-design It's tempting to return the updated state of the Instance // here because it's convenient for consumers and by using a RETURNING // clause, we could ensure that the "update" and "fetch" are atomic. @@ -372,12 +499,11 @@ impl DataStore { /// /// # Return value /// - /// - `Ok((instance_updated, vmm_updated))` if the query was issued - /// successfully. `instance_updated` and `vmm_updated` are each true if - /// the relevant item was updated and false otherwise. Note that an update - /// can fail because it was inapplicable (i.e. the database has state with - /// a newer generation already) or because the relevant record was not - /// found. + /// - `Ok(`[`InstanceUpdateResult`]`)` if the query was issued + /// successfully. The returned [`InstanceUpdateResult`] indicates which + /// database record(s) were updated. Note that an update can fail because + /// it was inapplicable (i.e. the database has state with a newer + /// generation already) or because the relevant record was not found. /// - `Err` if another error occurred while accessing the database. pub async fn instance_and_vmm_update_runtime( &self, @@ -385,12 +511,14 @@ impl DataStore { new_instance: &InstanceRuntimeState, vmm_id: &Uuid, new_vmm: &VmmRuntimeState, - ) -> Result<(bool, bool), Error> { + migration: &Option, + ) -> Result { let query = crate::db::queries::instance::InstanceAndVmmUpdate::new( *instance_id, new_instance.clone(), *vmm_id, new_vmm.clone(), + migration.clone(), ); // The InstanceAndVmmUpdate query handles and indicates failure to find @@ -413,7 +541,22 @@ impl DataStore { None => false, }; - Ok((instance_updated, vmm_updated)) + let migration_updated = if migration.is_some() { + Some(match result.migration_status { + Some(UpdateStatus::Updated) => true, + Some(UpdateStatus::NotUpdatedButExists) => false, + None => false, + }) + } else { + debug_assert_eq!(result.migration_status, None); + None + }; + + Ok(InstanceUpdateResult { + instance_updated, + vmm_updated, + migration_updated, + }) } /// Lists all instances on in-service sleds with active Propolis VMM @@ -785,14 +928,16 @@ mod tests { use super::*; use crate::db::datastore::test_utils::datastore_test; use crate::db::lookup::LookupPath; + use nexus_db_model::InstanceState; use nexus_db_model::Project; + use nexus_db_model::VmmState; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params; use omicron_common::api::external::ByteCount; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev; - async fn test_setup( + async fn create_test_instance( datastore: &DataStore, opctx: &OpContext, ) -> authz::Instance { @@ -816,7 +961,6 @@ mod tests { ) .await .expect("project must be created successfully"); - let _ = datastore .project_create_instance( &opctx, @@ -861,7 +1005,7 @@ mod tests { let (opctx, datastore) = datastore_test(&logctx, &db).await; let saga1 = Uuid::new_v4(); let saga2 = Uuid::new_v4(); - let authz_instance = test_setup(&datastore, &opctx).await; + let authz_instance = create_test_instance(&datastore, &opctx).await; macro_rules! assert_locked { ($id:expr) => {{ @@ -935,7 +1079,7 @@ mod tests { dev::test_setup_log("test_instance_updater_lock_is_idempotent"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let authz_instance = test_setup(&datastore, &opctx).await; + let authz_instance = create_test_instance(&datastore, &opctx).await; let saga1 = Uuid::new_v4(); // attempt to lock the instance once. @@ -993,7 +1137,7 @@ mod tests { ); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let authz_instance = test_setup(&datastore, &opctx).await; + let authz_instance = create_test_instance(&datastore, &opctx).await; let saga1 = Uuid::new_v4(); let saga2 = Uuid::new_v4(); @@ -1073,4 +1217,170 @@ mod tests { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_instance_fetch_all() { + // Setup + let logctx = dev::test_setup_log("test_instance_fetch_all"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + let authz_instance = create_test_instance(&datastore, &opctx).await; + let snapshot = + dbg!(datastore.instance_fetch_all(&opctx, &authz_instance).await) + .expect("instance fetch must succeed"); + + assert_eq!( + dbg!(snapshot.instance.id()), + dbg!(authz_instance.id()), + "must have fetched the correct instance" + ); + assert_eq!( + dbg!(snapshot.active_vmm), + None, + "instance does not have an active VMM" + ); + assert_eq!( + dbg!(snapshot.target_vmm), + None, + "instance does not have a target VMM" + ); + assert_eq!( + dbg!(snapshot.migration), + None, + "instance does not have a migration" + ); + + let active_vmm = datastore + .vmm_insert( + &opctx, + Vmm { + id: Uuid::new_v4(), + time_created: Utc::now(), + time_deleted: None, + instance_id: authz_instance.id(), + sled_id: Uuid::new_v4(), + propolis_ip: "10.1.9.32".parse().unwrap(), + propolis_port: 420.into(), + runtime: VmmRuntimeState { + time_state_updated: Utc::now(), + gen: Generation::new(), + state: VmmState::Running, + }, + }, + ) + .await + .expect("active VMM should be inserted successfully!"); + datastore + .instance_update_runtime( + &authz_instance.id(), + &InstanceRuntimeState { + time_updated: Utc::now(), + gen: Generation( + snapshot.instance.runtime_state.gen.0.next(), + ), + nexus_state: InstanceState::Vmm, + propolis_id: Some(active_vmm.id), + ..snapshot.instance.runtime_state.clone() + }, + ) + .await + .expect("instance update should work"); + let snapshot = + dbg!(datastore.instance_fetch_all(&opctx, &authz_instance).await) + .expect("instance fetch must succeed"); + + assert_eq!( + dbg!(snapshot.instance.id()), + dbg!(authz_instance.id()), + "must have fetched the correct instance" + ); + assert_eq!( + dbg!(snapshot.active_vmm.map(|vmm| vmm.id)), + Some(dbg!(active_vmm.id)), + "fetched active VMM must be the instance's active VMM" + ); + assert_eq!( + dbg!(snapshot.target_vmm), + None, + "instance does not have a target VMM" + ); + assert_eq!( + dbg!(snapshot.migration), + None, + "instance does not have a migration" + ); + + let target_vmm = datastore + .vmm_insert( + &opctx, + Vmm { + id: Uuid::new_v4(), + time_created: Utc::now(), + time_deleted: None, + instance_id: authz_instance.id(), + sled_id: Uuid::new_v4(), + propolis_ip: "10.1.9.42".parse().unwrap(), + propolis_port: 666.into(), + runtime: VmmRuntimeState { + time_state_updated: Utc::now(), + gen: Generation::new(), + state: VmmState::Running, + }, + }, + ) + .await + .expect("target VMM should be inserted successfully!"); + let migration = datastore + .migration_insert( + &opctx, + Migration::new(Uuid::new_v4(), active_vmm.id, target_vmm.id), + ) + .await + .expect("migration should be inserted successfully!"); + datastore + .instance_update_runtime( + &authz_instance.id(), + &InstanceRuntimeState { + time_updated: Utc::now(), + gen: Generation( + snapshot.instance.runtime_state.gen.0.next(), + ), + nexus_state: InstanceState::Vmm, + propolis_id: Some(active_vmm.id), + dst_propolis_id: Some(target_vmm.id), + migration_id: Some(migration.id), + ..snapshot.instance.runtime_state.clone() + }, + ) + .await + .expect("instance update should work"); + let snapshot = + dbg!(datastore.instance_fetch_all(&opctx, &authz_instance).await) + .expect("instance fetch must succeed"); + + assert_eq!( + dbg!(snapshot.instance.id()), + dbg!(authz_instance.id()), + "must have fetched the correct instance" + ); + assert_eq!( + dbg!(snapshot.active_vmm.map(|vmm| vmm.id)), + Some(dbg!(active_vmm.id)), + "fetched active VMM must be the instance's active VMM" + ); + assert_eq!( + dbg!(snapshot.target_vmm.map(|vmm| vmm.id)), + Some(dbg!(target_vmm.id)), + "fetched target VMM must be the instance's target VMM" + ); + assert_eq!( + dbg!(snapshot.migration.map(|m| m.id)), + Some(dbg!(migration.id)), + "fetched migration must be the instance's migration" + ); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/migration.rs b/nexus/db-queries/src/db/datastore/migration.rs new file mode 100644 index 0000000000..ba8a4e0392 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/migration.rs @@ -0,0 +1,92 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! [`DataStore`] methods on [`Migration`]s. + +use super::DataStore; +use crate::context::OpContext; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::model::{Migration, MigrationState}; +use crate::db::schema::migration::dsl; +use crate::db::update_and_check::UpdateAndCheck; +use crate::db::update_and_check::UpdateStatus; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::Utc; +use diesel::prelude::*; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::UpdateResult; +use omicron_common::api::internal::nexus; +use uuid::Uuid; + +impl DataStore { + /// Insert a database record for a migration. + pub async fn migration_insert( + &self, + opctx: &OpContext, + migration: Migration, + ) -> CreateResult { + diesel::insert_into(dsl::migration) + .values(migration) + .on_conflict(dsl::id) + .do_update() + .set(dsl::time_created.eq(dsl::time_created)) + .returning(Migration::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Marks a migration record as deleted if and only if both sides of the + /// migration are in a terminal state. + pub async fn migration_terminate( + &self, + opctx: &OpContext, + migration_id: Uuid, + ) -> UpdateResult { + const TERMINAL_STATES: &[MigrationState] = &[ + MigrationState(nexus::MigrationState::Completed), + MigrationState(nexus::MigrationState::Failed), + ]; + + diesel::update(dsl::migration) + .filter(dsl::id.eq(migration_id)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::source_state.eq_any(TERMINAL_STATES)) + .filter(dsl::target_state.eq_any(TERMINAL_STATES)) + .set(dsl::time_deleted.eq(Utc::now())) + .check_if_exists::(migration_id) + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) + .await + .map(|r| match r.status { + UpdateStatus::Updated => true, + UpdateStatus::NotUpdatedButExists => false, + }) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Unconditionally mark a migration record as deleted. + /// + /// This is distinct from [`DataStore::migration_terminate`], as it will + /// mark a migration as deleted regardless of the states of the source and + /// target VMMs. + pub async fn migration_mark_deleted( + &self, + opctx: &OpContext, + migration_id: Uuid, + ) -> UpdateResult { + diesel::update(dsl::migration) + .filter(dsl::id.eq(migration_id)) + .filter(dsl::time_deleted.is_null()) + .set(dsl::time_deleted.eq(Utc::now())) + .check_if_exists::(migration_id) + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) + .await + .map(|r| match r.status { + UpdateStatus::Updated => true, + UpdateStatus::NotUpdatedButExists => false, + }) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 203dab7025..919bf97392 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -54,6 +54,7 @@ mod bfd; mod bgp; mod bootstore; mod certificate; +mod cockroachdb_node_id; mod cockroachdb_settings; mod console_session; mod dataset; @@ -69,6 +70,7 @@ pub mod instance; mod inventory; mod ip_pool; mod ipv4_nat_entry; +mod migration; mod network_interface; mod oximeter; mod physical_disk; diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index edb16e95ac..d3cc6f56ab 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -549,6 +549,7 @@ impl DataStore { rsvd_block.id, address.address.into(), interface_name.clone(), + address.vlan_id )); } diff --git a/nexus/db-queries/src/db/pool.rs b/nexus/db-queries/src/db/pool.rs index c6fdebedc1..adf7778e93 100644 --- a/nexus/db-queries/src/db/pool.rs +++ b/nexus/db-queries/src/db/pool.rs @@ -61,7 +61,7 @@ fn make_dns_resolver( bootstrap_dns, DnsResolverConfig { query_interval: tokio::time::Duration::from_secs(10), - hardcoded_ttl: Some(std::time::Duration::from_secs(60)), + hardcoded_ttl: Some(tokio::time::Duration::MAX), ..Default::default() }, )) diff --git a/nexus/db-queries/src/db/queries/instance.rs b/nexus/db-queries/src/db/queries/instance.rs index ea40877450..ed584c6ce6 100644 --- a/nexus/db-queries/src/db/queries/instance.rs +++ b/nexus/db-queries/src/db/queries/instance.rs @@ -12,8 +12,14 @@ use diesel::sql_types::{Nullable, Uuid as SqlUuid}; use diesel::{pg::Pg, query_builder::AstPass}; use diesel::{Column, ExpressionMethods, QueryDsl, RunQueryDsl}; use nexus_db_model::{ - schema::{instance::dsl as instance_dsl, vmm::dsl as vmm_dsl}, - InstanceRuntimeState, VmmRuntimeState, + schema::{ + instance::dsl as instance_dsl, migration::dsl as migration_dsl, + vmm::dsl as vmm_dsl, + }, + Generation, InstanceRuntimeState, MigrationState, VmmRuntimeState, +}; +use omicron_common::api::internal::nexus::{ + MigrationRole, MigrationRuntimeState, }; use uuid::Uuid; @@ -64,6 +70,12 @@ use crate::db::update_and_check::UpdateStatus; // SELECT vmm_result.found, vmm_result.updated, instance_result.found, // instance_result.updated // FROM vmm_result, instance_result; +/// +/// If a [`MigrationRuntimeState`] is provided, similar "found" and "update" +/// clauses are also added to join the `migration` record for the instance's +/// active migration, if one exists, and update the migration record. If no +/// migration record is provided, this part of the query is skipped, and the +/// `migration_found` and `migration_updated` portions are always `false`. // // The "wrapper" SELECTs when finding instances and VMMs are used to get a NULL // result in the final output instead of failing the entire query if the target @@ -76,6 +88,12 @@ pub struct InstanceAndVmmUpdate { vmm_find: Box + Send>, instance_update: Box + Send>, vmm_update: Box + Send>, + migration: Option, +} + +struct MigrationUpdate { + find: Box + Send>, + update: Box + Send>, } /// Contains the result of a combined instance-and-VMM update operation. @@ -89,6 +107,11 @@ pub struct InstanceAndVmmUpdateResult { /// `Some(status)` if the target VMM was found; the wrapped `UpdateStatus` /// indicates whether the row was updated. `None` if the VMM was not found. pub vmm_status: Option, + + /// `Some(status)` if the target migration was found; the wrapped `UpdateStatus` + /// indicates whether the row was updated. `None` if the migration was not + /// found, or no migration update was performed. + pub migration_status: Option, } /// Computes the update status to return from the results of queries that find @@ -135,6 +158,7 @@ impl InstanceAndVmmUpdate { new_instance_runtime_state: InstanceRuntimeState, vmm_id: Uuid, new_vmm_runtime_state: VmmRuntimeState, + migration: Option, ) -> Self { let instance_find = Box::new( instance_dsl::instance @@ -165,24 +189,90 @@ impl InstanceAndVmmUpdate { .set(new_vmm_runtime_state), ); - Self { instance_find, vmm_find, instance_update, vmm_update } + let migration = migration.map( + |MigrationRuntimeState { + role, + migration_id, + state, + gen, + time_updated, + }| { + let state = MigrationState::from(state); + let find = Box::new( + migration_dsl::migration + .filter(migration_dsl::id.eq(migration_id)) + .filter(migration_dsl::time_deleted.is_null()) + .select(migration_dsl::id), + ); + let gen = Generation::from(gen); + let update: Box + Send> = match role { + MigrationRole::Target => Box::new( + diesel::update(migration_dsl::migration) + .filter(migration_dsl::id.eq(migration_id)) + .filter( + migration_dsl::target_propolis_id.eq(vmm_id), + ) + .filter(migration_dsl::target_gen.lt(gen)) + .set(( + migration_dsl::target_state.eq(state), + migration_dsl::time_target_updated + .eq(time_updated), + )), + ), + MigrationRole::Source => Box::new( + diesel::update(migration_dsl::migration) + .filter(migration_dsl::id.eq(migration_id)) + .filter( + migration_dsl::source_propolis_id.eq(vmm_id), + ) + .filter(migration_dsl::source_gen.lt(gen)) + .set(( + migration_dsl::source_state.eq(state), + migration_dsl::time_source_updated + .eq(time_updated), + )), + ), + }; + MigrationUpdate { find, update } + }, + ); + + Self { instance_find, vmm_find, instance_update, vmm_update, migration } } pub async fn execute_and_check( self, conn: &(impl async_bb8_diesel::AsyncConnection + Sync), ) -> Result { - let (vmm_found, vmm_updated, instance_found, instance_updated) = - self.get_result_async::<(Option, - Option, - Option, - Option)>(conn).await?; + let ( + vmm_found, + vmm_updated, + instance_found, + instance_updated, + migration_found, + migration_updated, + ) = self + .get_result_async::<( + Option, + Option, + Option, + Option, + Option, + Option, + )>(conn) + .await?; let instance_status = compute_update_status(instance_found, instance_updated); let vmm_status = compute_update_status(vmm_found, vmm_updated); + let migration_status = + compute_update_status(migration_found, migration_updated); - Ok(InstanceAndVmmUpdateResult { instance_status, vmm_status }) + Ok(InstanceAndVmmUpdateResult { + instance_status, + vmm_status, + migration_status, + }) } } @@ -197,6 +287,8 @@ impl Query for InstanceAndVmmUpdate { Nullable, Nullable, Nullable, + Nullable, + Nullable, ); } @@ -212,6 +304,12 @@ impl QueryFragment for InstanceAndVmmUpdate { self.vmm_find.walk_ast(out.reborrow())?; out.push_sql(") AS id), "); + if let Some(MigrationUpdate { ref find, .. }) = self.migration { + out.push_sql("migration_found AS (SELECT ("); + find.walk_ast(out.reborrow())?; + out.push_sql(") AS id), "); + } + out.push_sql("instance_updated AS ("); self.instance_update.walk_ast(out.reborrow())?; out.push_sql(" RETURNING id), "); @@ -220,6 +318,12 @@ impl QueryFragment for InstanceAndVmmUpdate { self.vmm_update.walk_ast(out.reborrow())?; out.push_sql(" RETURNING id), "); + if let Some(MigrationUpdate { ref update, .. }) = self.migration { + out.push_sql("migration_updated AS ("); + update.walk_ast(out.reborrow())?; + out.push_sql(" RETURNING id), "); + } + out.push_sql("vmm_result AS ("); out.push_sql("SELECT vmm_found."); out.push_identifier(vmm_dsl::id::NAME)?; @@ -244,11 +348,37 @@ impl QueryFragment for InstanceAndVmmUpdate { out.push_identifier(instance_dsl::id::NAME)?; out.push_sql(" = instance_updated."); out.push_identifier(instance_dsl::id::NAME)?; - out.push_sql(") "); + out.push_sql(")"); + + if self.migration.is_some() { + out.push_sql(", "); + out.push_sql("migration_result AS ("); + out.push_sql("SELECT migration_found."); + out.push_identifier(migration_dsl::id::NAME)?; + out.push_sql(" AS found, migration_updated."); + out.push_identifier(migration_dsl::id::NAME)?; + out.push_sql(" AS updated"); + out.push_sql( + " FROM migration_found LEFT JOIN migration_updated ON migration_found.", + ); + out.push_identifier(migration_dsl::id::NAME)?; + out.push_sql(" = migration_updated."); + out.push_identifier(migration_dsl::id::NAME)?; + out.push_sql(")"); + } + out.push_sql(" "); out.push_sql("SELECT vmm_result.found, vmm_result.updated, "); - out.push_sql("instance_result.found, instance_result.updated "); - out.push_sql("FROM vmm_result, instance_result;"); + out.push_sql("instance_result.found, instance_result.updated, "); + if self.migration.is_some() { + out.push_sql("migration_result.found, migration_result.updated "); + } else { + out.push_sql("NULL, NULL "); + } + out.push_sql("FROM vmm_result, instance_result"); + if self.migration.is_some() { + out.push_sql(", migration_result"); + } Ok(()) } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index d90c240e8e..c282232ef8 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -110,6 +110,7 @@ phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 +blueprints.period_secs_collect_crdb_node_ids = 180 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 7272062293..ef4996db54 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -493,6 +493,7 @@ mod test { use omicron_common::address::get_switch_zone_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; + use omicron_common::address::COCKROACHDB_REDUNDANCY; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; @@ -501,6 +502,7 @@ mod test { use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::OmicronZoneUuid; + use sled_agent_client::ZoneKind; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; @@ -1235,6 +1237,7 @@ mod test { external_ip_rows: &[], service_nic_rows: &[], target_nexus_zone_count: NEXUS_REDUNDANCY, + target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, log, @@ -1260,7 +1263,7 @@ mod test { .unwrap(); let sled_id = blueprint.sleds().next().expect("expected at least one sled"); - let nalready = builder.sled_num_nexus_zones(sled_id); + let nalready = builder.sled_num_zones_of_kind(sled_id, ZoneKind::Nexus); let rv = builder .sled_ensure_zone_multiple_nexus(sled_id, nalready + 1) .unwrap(); diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index c6b912a683..9d7c542eda 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -6,6 +6,7 @@ use crate::ip_allocator::IpAllocator; use crate::planner::zone_needs_expungement; +use crate::planner::DiscretionaryOmicronZone; use crate::planner::ZoneExpungeReason; use anyhow::anyhow; use internal_dns::config::Host; @@ -29,6 +30,7 @@ use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledDetails; use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; +use nexus_types::deployment::ZpoolFilter; use nexus_types::deployment::ZpoolName; use nexus_types::external_api::views::SledState; use omicron_common::address::get_internal_dns_server_addresses; @@ -50,6 +52,7 @@ use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use rand::rngs::StdRng; use rand::SeedableRng; +use sled_agent_client::ZoneKind; use slog::debug; use slog::error; use slog::info; @@ -78,6 +81,10 @@ use super::zones::BuilderZonesConfig; pub enum Error { #[error("sled {sled_id}: ran out of available addresses for sled")] OutOfAddresses { sled_id: SledUuid }, + #[error( + "sled {sled_id}: no available zpools for additional {kind:?} zones" + )] + NoAvailableZpool { sled_id: SledUuid, kind: ZoneKind }, #[error("no Nexus zones exist in parent blueprint")] NoNexusZonesInParentBlueprint, #[error("no external service IP addresses are available")] @@ -722,15 +729,19 @@ impl<'a> BlueprintBuilder<'a> { Ok(Ensure::Added) } - /// Return the number of Nexus zones that would be configured to run on the - /// given sled if this builder generated a blueprint + /// Return the number of zones of a given kind that would be configured to + /// run on the given sled if this builder generated a blueprint. /// /// This value may change before a blueprint is actually generated if /// further changes are made to the builder. - pub fn sled_num_nexus_zones(&self, sled_id: SledUuid) -> usize { + pub fn sled_num_zones_of_kind( + &self, + sled_id: SledUuid, + kind: ZoneKind, + ) -> usize { self.zones .current_sled_zones(sled_id) - .filter(|(z, _)| z.zone_type.is_nexus()) + .filter(|(z, _)| z.zone_type.kind() == kind) .count() } @@ -777,7 +788,7 @@ impl<'a> BlueprintBuilder<'a> { external_dns_servers: Vec, ) -> Result { // How many Nexus zones do we need to add? - let nexus_count = self.sled_num_nexus_zones(sled_id); + let nexus_count = self.sled_num_zones_of_kind(sled_id, ZoneKind::Nexus); let num_nexus_to_add = match desired_zone_count.checked_sub(nexus_count) { Some(0) => return Ok(EnsureMultiple::NotNeeded), @@ -850,6 +861,51 @@ impl<'a> BlueprintBuilder<'a> { self.cockroachdb_setting_preserve_downgrade = version; } + pub fn sled_ensure_zone_multiple_cockroachdb( + &mut self, + sled_id: SledUuid, + desired_zone_count: usize, + ) -> Result { + // How many CRDB zones do we need to add? + let crdb_count = + self.sled_num_zones_of_kind(sled_id, ZoneKind::CockroachDb); + let num_crdb_to_add = match desired_zone_count.checked_sub(crdb_count) { + Some(0) => return Ok(EnsureMultiple::NotNeeded), + Some(n) => n, + None => { + return Err(Error::Planner(anyhow!( + "removing a CockroachDb zone not yet supported \ + (sled {sled_id} has {crdb_count}; \ + planner wants {desired_zone_count})" + ))); + } + }; + for _ in 0..num_crdb_to_add { + let zone_id = self.rng.zone_rng.next(); + let underlay_ip = self.sled_alloc_ip(sled_id)?; + let pool_name = self.sled_alloc_zpool( + sled_id, + DiscretionaryOmicronZone::CockroachDb, + )?; + let port = omicron_common::address::COCKROACH_PORT; + let address = SocketAddrV6::new(underlay_ip, port, 0, 0); + let zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: zone_id, + underlay_address: underlay_ip, + zone_type: BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { + address, + dataset: OmicronZoneDataset { pool_name }, + }, + ), + }; + self.sled_add_zone(sled_id, zone)?; + } + + Ok(EnsureMultiple::Changed { added: num_crdb_to_add, removed: 0 }) + } + fn sled_add_zone( &mut self, sled_id: SledUuid, @@ -906,6 +962,46 @@ impl<'a> BlueprintBuilder<'a> { allocator.alloc().ok_or(Error::OutOfAddresses { sled_id }) } + fn sled_alloc_zpool( + &mut self, + sled_id: SledUuid, + kind: DiscretionaryOmicronZone, + ) -> Result { + let resources = self.sled_resources(sled_id)?; + + // We refuse to choose a zpool for a zone of a given `kind` if this + // sled already has a zone of that kind on the same zpool. Build up a + // set of invalid zpools for this sled/kind pair. + let mut skip_zpools = BTreeSet::new(); + for zone_config in self.current_sled_zones(sled_id) { + match (kind, &zone_config.zone_type) { + ( + DiscretionaryOmicronZone::Nexus, + BlueprintZoneType::Nexus(_), + ) => { + // TODO handle this case once we track transient datasets + } + ( + DiscretionaryOmicronZone::CockroachDb, + BlueprintZoneType::CockroachDb(crdb), + ) => { + skip_zpools.insert(&crdb.dataset.pool_name); + } + (DiscretionaryOmicronZone::Nexus, _) + | (DiscretionaryOmicronZone::CockroachDb, _) => (), + } + } + + for &zpool_id in resources.all_zpools(ZpoolFilter::InService) { + let zpool_name = ZpoolName::new_external(zpool_id); + if !skip_zpools.contains(&zpool_name) { + return Ok(zpool_name); + } + } + + Err(Error::NoAvailableZpool { sled_id, kind: kind.into() }) + } + fn sled_resources( &self, sled_id: SledUuid, @@ -1179,6 +1275,7 @@ pub mod test { /// Checks various conditions that should be true for all blueprints pub fn verify_blueprint(blueprint: &Blueprint) { + // There should be no duplicate underlay IPs. let mut underlay_ips: BTreeMap = BTreeMap::new(); for (_, zone) in blueprint.all_omicron_zones(BlueprintZoneFilter::All) { @@ -1196,6 +1293,33 @@ pub mod test { ); } } + + // On any given zpool, we should have at most one zone of any given + // kind. + let mut kinds_by_zpool: BTreeMap< + ZpoolUuid, + BTreeMap, + > = BTreeMap::new(); + for (_, zone) in blueprint.all_omicron_zones(BlueprintZoneFilter::All) { + if let Some(dataset) = zone.zone_type.dataset() { + let kind = zone.zone_type.kind(); + if let Some(previous) = kinds_by_zpool + .entry(dataset.pool_name.id()) + .or_default() + .insert(kind, zone.id) + { + panic!( + "zpool {} has two zones of kind {kind:?}: {} and {}\ + \n\n\ + blueprint: {}", + dataset.pool_name, + zone.id, + previous, + blueprint.display(), + ); + } + } + } } #[test] @@ -1881,4 +2005,96 @@ pub mod test { logctx.cleanup_successful(); } + + #[test] + fn test_ensure_cockroachdb() { + static TEST_NAME: &str = "blueprint_builder_test_ensure_cockroachdb"; + let logctx = test_setup_log(TEST_NAME); + + // Discard the example blueprint and start with an empty one. + let (_, input, _) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + let input = { + // Clear out the external networking records from `input`, since + // we're building an empty blueprint. + let mut builder = input.into_builder(); + *builder.network_resources_mut() = + OmicronZoneNetworkResources::new(); + builder.build() + }; + let parent = BlueprintBuilder::build_empty_with_sleds_seeded( + input.all_sled_ids(SledFilter::Commissioned), + "test", + TEST_NAME, + ); + + // Pick an arbitrary sled. + let (target_sled_id, sled_resources) = input + .all_sled_resources(SledFilter::InService) + .next() + .expect("at least one sled"); + + // It should have multiple zpools. + let num_sled_zpools = sled_resources.zpools.len(); + assert!( + num_sled_zpools > 1, + "expected more than 1 zpool, got {num_sled_zpools}" + ); + + // We should be able to ask for a CRDB zone per zpool. + let mut builder = BlueprintBuilder::new_based_on( + &logctx.log, + &parent, + &input, + "test", + ) + .expect("constructed builder"); + let ensure_result = builder + .sled_ensure_zone_multiple_cockroachdb( + target_sled_id, + num_sled_zpools, + ) + .expect("ensured multiple CRDB zones"); + assert_eq!( + ensure_result, + EnsureMultiple::Changed { added: num_sled_zpools, removed: 0 } + ); + + let blueprint = builder.build(); + verify_blueprint(&blueprint); + assert_eq!( + blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .filter(|(sled_id, z)| { + *sled_id == target_sled_id + && z.zone_type.kind() == ZoneKind::CockroachDb + }) + .count(), + num_sled_zpools + ); + + // If we instead ask for one more zone than there are zpools, we should + // get a zpool allocation error. + let mut builder = BlueprintBuilder::new_based_on( + &logctx.log, + &parent, + &input, + "test", + ) + .expect("constructed builder"); + let ensure_error = builder + .sled_ensure_zone_multiple_cockroachdb( + target_sled_id, + num_sled_zpools + 1, + ) + .expect_err("failed to create too many CRDB zones"); + match ensure_error { + Error::NoAvailableZpool { sled_id, kind } => { + assert_eq!(target_sled_id, sled_id); + assert_eq!(kind, ZoneKind::CockroachDb); + } + _ => panic!("unexpected error {ensure_error}"), + } + + logctx.cleanup_successful(); + } } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 39a57bbbb3..4e288e8d8a 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -34,7 +34,7 @@ use std::collections::BTreeSet; use std::hash::Hash; use std::str::FromStr; -use self::omicron_zone_placement::DiscretionaryOmicronZone; +pub(crate) use self::omicron_zone_placement::DiscretionaryOmicronZone; use self::omicron_zone_placement::OmicronZonePlacement; use self::omicron_zone_placement::OmicronZonePlacementSledState; @@ -305,7 +305,8 @@ impl<'a> Planner<'a> { continue; } - // Every provisionable zpool on the sled should have a Crucible zone on it. + // Every provisionable zpool on the sled should have a Crucible zone + // on it. let mut ncrucibles_added = 0; for zpool_id in sled_resources.all_zpools(ZpoolFilter::InService) { if self @@ -338,70 +339,132 @@ impl<'a> Planner<'a> { } } - self.ensure_correct_number_of_nexus_zones(&sleds_waiting_for_ntp_zone)?; - - Ok(()) + self.do_plan_add_discretionary_zones(&sleds_waiting_for_ntp_zone) } - fn ensure_correct_number_of_nexus_zones( + fn do_plan_add_discretionary_zones( &mut self, sleds_waiting_for_ntp_zone: &BTreeSet, ) -> Result<(), Error> { - // Count the number of Nexus zones on all in-service sleds. This will - // include sleds that are in service but not eligible for new services, - // but will not include sleds that have been expunged or decommissioned. - let mut num_total_nexus = 0; + // We usually don't need to construct an `OmicronZonePlacement` to add + // discretionary zones, so defer its creation until it's needed. + let mut zone_placement = None; + + for zone_kind in [ + DiscretionaryOmicronZone::Nexus, + DiscretionaryOmicronZone::CockroachDb, + ] { + let num_zones_to_add = self.num_additional_zones_needed(zone_kind); + if num_zones_to_add == 0 { + continue; + } + // We need to add at least one zone; construct our `zone_placement` + // (or reuse the existing one if a previous loop iteration already + // created it). + let zone_placement = match zone_placement.as_mut() { + Some(zone_placement) => zone_placement, + None => { + // This constructs a picture of the sleds as we currently + // understand them, as far as which sleds have discretionary + // zones. This will remain valid as we loop through the + // `zone_kind`s in this function, as any zone additions will + // update the `zone_placement` heap in-place. + let current_discretionary_zones = self + .input + .all_sled_resources(SledFilter::Discretionary) + .filter(|(sled_id, _)| { + !sleds_waiting_for_ntp_zone.contains(&sled_id) + }) + .map(|(sled_id, sled_resources)| { + OmicronZonePlacementSledState { + sled_id, + num_zpools: sled_resources + .all_zpools(ZpoolFilter::InService) + .count(), + discretionary_zones: self + .blueprint + .current_sled_zones(sled_id) + .filter_map(|zone| { + DiscretionaryOmicronZone::from_zone_type( + &zone.zone_type, + ) + }) + .collect(), + } + }); + zone_placement.insert(OmicronZonePlacement::new( + current_discretionary_zones, + )) + } + }; + self.add_discretionary_zones( + zone_placement, + zone_kind, + num_zones_to_add, + )?; + } + + Ok(()) + } + + // Given the current blueprint state and policy, returns the number of + // additional zones needed of the given `zone_kind` to satisfy the policy. + fn num_additional_zones_needed( + &mut self, + zone_kind: DiscretionaryOmicronZone, + ) -> usize { + // Count the number of `kind` zones on all in-service sleds. This + // will include sleds that are in service but not eligible for new + // services, but will not include sleds that have been expunged or + // decommissioned. + let mut num_existing_kind_zones = 0; for sled_id in self.input.all_sled_ids(SledFilter::InService) { - let num_nexus = self.blueprint.sled_num_nexus_zones(sled_id); - num_total_nexus += num_nexus; + let num_zones_of_kind = self + .blueprint + .sled_num_zones_of_kind(sled_id, zone_kind.into()); + num_existing_kind_zones += num_zones_of_kind; } - // TODO-correctness What should we do if we have _too many_ Nexus - // instances? For now, just log it the number of zones any time we have - // at least the minimum number. - let mut nexus_to_add = self - .input - .target_nexus_zone_count() - .saturating_sub(num_total_nexus); - if nexus_to_add == 0 { + let target_count = match zone_kind { + DiscretionaryOmicronZone::Nexus => { + self.input.target_nexus_zone_count() + } + DiscretionaryOmicronZone::CockroachDb => { + self.input.target_cockroachdb_zone_count() + } + }; + + // TODO-correctness What should we do if we have _too many_ + // `zone_kind` zones? For now, just log it the number of zones any + // time we have at least the minimum number. + let num_zones_to_add = + target_count.saturating_sub(num_existing_kind_zones); + if num_zones_to_add == 0 { info!( - self.log, "sufficient Nexus zones exist in plan"; - "desired_count" => self.input.target_nexus_zone_count(), - "current_count" => num_total_nexus, + self.log, "sufficient {zone_kind:?} zones exist in plan"; + "desired_count" => target_count, + "current_count" => num_existing_kind_zones, ); - return Ok(()); } + num_zones_to_add + } - let mut zone_placement = OmicronZonePlacement::new( - self.input - .all_sled_resources(SledFilter::Discretionary) - .filter(|(sled_id, _)| { - !sleds_waiting_for_ntp_zone.contains(&sled_id) - }) - .map(|(sled_id, sled_resources)| { - OmicronZonePlacementSledState { - sled_id, - num_zpools: sled_resources - .all_zpools(ZpoolFilter::InService) - .count(), - discretionary_zones: self - .blueprint - .current_sled_zones(sled_id) - .filter_map(|zone| { - DiscretionaryOmicronZone::from_zone_type( - &zone.zone_type, - ) - }) - .collect(), - } - }), - ); - - // Build a map of sled -> new nexus zones to add. + // Attempts to place `num_zones_to_add` new zones of `kind`. + // + // It is not an error if there are too few eligible sleds to start a + // sufficient number of zones; instead, we'll log a warning and start as + // many as we can (up to `num_zones_to_add`). + fn add_discretionary_zones( + &mut self, + zone_placement: &mut OmicronZonePlacement, + kind: DiscretionaryOmicronZone, + mut num_zones_to_add: usize, + ) -> Result<(), Error> { + // Build a map of sled -> new zones to add. let mut sleds_to_change: BTreeMap = BTreeMap::new(); - for i in 0..nexus_to_add { - match zone_placement.place_zone(DiscretionaryOmicronZone::Nexus) { + for i in 0..num_zones_to_add { + match zone_placement.place_zone(kind) { Ok(sled_id) => { *sleds_to_change.entry(sled_id).or_default() += 1; } @@ -412,14 +475,14 @@ impl<'a> Planner<'a> { // able to produce blueprints to achieve that status. warn!( self.log, - "failed to place all new desired Nexus instances"; + "failed to place all new desired {kind:?} zones"; "placed" => i, - "wanted_to_place" => nexus_to_add, + "wanted_to_place" => num_zones_to_add, ); - // Adjust `nexus_to_add` downward so it's consistent with - // the number of Nexuses we're actually adding. - nexus_to_add = i; + // Adjust `num_zones_to_add` downward so it's consistent + // with the number of zones we're actually adding. + num_zones_to_add = i; break; } @@ -427,30 +490,43 @@ impl<'a> Planner<'a> { } // For each sled we need to change, actually do so. - let mut total_added = 0; - for (sled_id, additional_nexus_count) in sleds_to_change { + let mut new_zones_added = 0; + for (sled_id, additional_zone_count) in sleds_to_change { // TODO-cleanup This is awkward: the builder wants to know how many - // total Nexus zones go on a given sled, but we have a count of how - // many we want to add. Construct a new target count. Maybe the - // builder should provide a different interface here? - let new_nexus_count = self.blueprint.sled_num_nexus_zones(sled_id) - + additional_nexus_count; - match self - .blueprint - .sled_ensure_zone_multiple_nexus(sled_id, new_nexus_count)? - { + // total zones go on a given sled, but we have a count of how many + // we want to add. Construct a new target count. Maybe the builder + // should provide a different interface here? + let new_total_zone_count = + self.blueprint.sled_num_zones_of_kind(sled_id, kind.into()) + + additional_zone_count; + + let result = match kind { + DiscretionaryOmicronZone::Nexus => { + self.blueprint.sled_ensure_zone_multiple_nexus( + sled_id, + new_total_zone_count, + )? + } + DiscretionaryOmicronZone::CockroachDb => { + self.blueprint.sled_ensure_zone_multiple_cockroachdb( + sled_id, + new_total_zone_count, + )? + } + }; + match result { EnsureMultiple::Changed { added, removed: _ } => { info!( self.log, "will add {added} Nexus zone(s) to sled"; "sled_id" => %sled_id, ); - total_added += added; + new_zones_added += added; } // This is only possible if we asked the sled to ensure the same // number of zones it already has, but that's impossible based // on the way we built up `sleds_to_change`. EnsureMultiple::NotNeeded => unreachable!( - "sled on which we added Nexus zones did not add any" + "sled on which we added {kind:?} zones did not add any" ), } } @@ -459,8 +535,8 @@ impl<'a> Planner<'a> { // arrived here, we think we've added the number of Nexus zones we // needed to. assert_eq!( - total_added, nexus_to_add, - "internal error counting Nexus zones" + new_zones_added, num_zones_to_add, + "internal error counting {kind:?} zones" ); Ok(()) @@ -928,7 +1004,7 @@ mod test { 1 ); - // Now run the planner. It should add additional Nexus instances to the + // Now run the planner. It should add additional Nexus zones to the // one sled we have. let mut builder = input.into_builder(); builder.policy_mut().target_nexus_zone_count = 5; diff --git a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs index 26e72db434..08eccb0468 100644 --- a/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs +++ b/nexus/reconfigurator/planning/src/planner/omicron_zone_placement.rs @@ -13,8 +13,9 @@ use std::mem; #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[cfg_attr(test, derive(test_strategy::Arbitrary))] -pub(super) enum DiscretionaryOmicronZone { +pub(crate) enum DiscretionaryOmicronZone { Nexus, + CockroachDb, // TODO expand this enum as we start to place more services } @@ -24,11 +25,11 @@ impl DiscretionaryOmicronZone { ) -> Option { match zone_type { BlueprintZoneType::Nexus(_) => Some(Self::Nexus), + BlueprintZoneType::CockroachDb(_) => Some(Self::CockroachDb), // Zones that we should place but don't yet. BlueprintZoneType::BoundaryNtp(_) | BlueprintZoneType::Clickhouse(_) | BlueprintZoneType::ClickhouseKeeper(_) - | BlueprintZoneType::CockroachDb(_) | BlueprintZoneType::CruciblePantry(_) | BlueprintZoneType::ExternalDns(_) | BlueprintZoneType::InternalDns(_) @@ -46,6 +47,7 @@ impl From for ZoneKind { fn from(zone: DiscretionaryOmicronZone) -> Self { match zone { DiscretionaryOmicronZone::Nexus => Self::Nexus, + DiscretionaryOmicronZone::CockroachDb => Self::CockroachDb, } } } diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index f2a979cb4a..0499e0ef5b 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -76,6 +76,7 @@ pub struct SystemDescription { available_non_scrimlet_slots: BTreeSet, available_scrimlet_slots: BTreeSet, target_nexus_zone_count: usize, + target_cockroachdb_zone_count: usize, target_cockroachdb_cluster_version: CockroachDbClusterVersion, service_ip_pool_ranges: Vec, internal_dns_version: Generation, @@ -124,8 +125,15 @@ impl SystemDescription { // Policy defaults let target_nexus_zone_count = NEXUS_REDUNDANCY; + + // TODO-cleanup This is wrong, but we don't currently set up any CRDB + // nodes in our fake system, so this prevents downstream test issues + // with the planner thinking our system is out of date from the gate. + let target_cockroachdb_zone_count = 0; + let target_cockroachdb_cluster_version = CockroachDbClusterVersion::POLICY; + // IPs from TEST-NET-1 (RFC 5737) let service_ip_pool_ranges = vec![IpRange::try_from(( "192.0.2.2".parse::().unwrap(), @@ -140,6 +148,7 @@ impl SystemDescription { available_non_scrimlet_slots, available_scrimlet_slots, target_nexus_zone_count, + target_cockroachdb_zone_count, target_cockroachdb_cluster_version, service_ip_pool_ranges, internal_dns_version: Generation::new(), @@ -307,6 +316,7 @@ impl SystemDescription { let policy = Policy { service_ip_pool_ranges: self.service_ip_pool_ranges.clone(), target_nexus_zone_count: self.target_nexus_zone_count, + target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, }; @@ -454,8 +464,10 @@ impl Sled { let model = format!("model{}", unique); let serial = format!("serial{}", unique); let revision = 0; - let mut zpool_rng = - TypedUuidRng::from_seed("SystemSimultatedSled", "ZpoolUuid"); + let mut zpool_rng = TypedUuidRng::from_seed( + "SystemSimultatedSled", + (sled_id, "ZpoolUuid"), + ); let zpools: BTreeMap<_, _> = (0..nzpools) .map(|_| { let zpool = ZpoolUuid::from(zpool_rng.next()); diff --git a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt index 03e76422e9..01b7ceb46b 100644 --- a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt +++ b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt @@ -8,16 +8,16 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-3fb05590-2632-413e-989f-aaaabaf01fab + fake-vendor fake-model serial-4711ce46-43f6-4732-9769-a69ea519b62d + fake-vendor fake-model serial-4a2cc08d-8e18-4f0e-8fc6-443cb2016858 + fake-vendor fake-model serial-7032a67e-2ff6-45cc-af34-8b3502965cc9 + fake-vendor fake-model serial-908218e9-26ea-4d75-86f9-4b99ff72dcb5 + fake-vendor fake-model serial-9bc4e63d-b8fe-4ac6-ac3a-cf097d06cc6d + fake-vendor fake-model serial-9f343299-ef7a-46aa-9904-061be15abfeb + fake-vendor fake-model serial-c8523dd7-4e87-4e4b-8e46-04b806f0763c + fake-vendor fake-model serial-e02245bc-ca0d-4f08-ac1e-870c4fa2a17c + fake-vendor fake-model serial-f9bcdb70-6846-4330-9d37-bfdd5583aea6 omicron zones at generation 2: @@ -44,16 +44,16 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-092eaf94-328b-45b6-99da-c850a06e8592 + fake-vendor fake-model serial-3195b46a-d32d-458e-ad7f-b0b2f91af483 + fake-vendor fake-model serial-327bdbbb-c40e-4784-888e-18492a753708 + fake-vendor fake-model serial-4f067c20-2860-49b1-8a03-6715a3c12c0e + fake-vendor fake-model serial-564460fe-7357-4883-a3af-1c931f473e83 + fake-vendor fake-model serial-66f85be6-1143-48a1-a898-504c6b540035 + fake-vendor fake-model serial-97b3f199-b488-4ce0-bd34-484d4d3bd194 + fake-vendor fake-model serial-a38f8150-2efd-4b55-9ffb-3c98e2939e13 + fake-vendor fake-model serial-d788bf53-35dd-4fa7-a820-c1233e859d03 + fake-vendor fake-model serial-ff66a45c-38a8-4b62-825e-e7c9470bc8bc omicron zones at generation 2: @@ -80,16 +80,16 @@ to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-29867b4d-f12b-40ea-b59e-5169d0b2a831 + fake-vendor fake-model serial-54b3613b-a80e-4eda-aa1c-0d92050de367 + fake-vendor fake-model serial-75939bd0-28f6-428b-8ce2-e55241d201ce + fake-vendor fake-model serial-7bcb41c3-6fc7-4cab-ac5f-b2e09f62567d + fake-vendor fake-model serial-92aeebc3-4154-4147-b721-0ccf5e337d8d + fake-vendor fake-model serial-960229b6-dbb2-4df0-ad93-83ddb28484bc + fake-vendor fake-model serial-9e1428de-ad48-4655-8ccc-bbf4cb1badda + fake-vendor fake-model serial-e03fee18-20c9-4c61-8927-bf80525f9b78 + fake-vendor fake-model serial-e25482d4-7111-4acf-b621-4aab851ffda5 + fake-vendor fake-model serial-e90056a4-dd19-4ebb-b484-c677aea31d80 omicron zones at generation 2: diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt index 0253baa9f8..3b14db49c7 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt @@ -9,16 +9,16 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-014eb1e9-04fe-4f36-8339-0a090b053ada + fake-vendor fake-model serial-31a3bc64-7a3b-496d-b644-785dc44b6e37 + fake-vendor fake-model serial-7bb40bd6-9c43-4b63-8337-18313c72aea2 + fake-vendor fake-model serial-988aa8c2-cb5e-406b-9289-425dc2e5bc3a + fake-vendor fake-model serial-ad574c09-2ae0-4534-a2a4-f923ce20ae87 + fake-vendor fake-model serial-ad91e238-4901-4ff4-a91b-75233c936426 + fake-vendor fake-model serial-ce58d463-d442-4c97-a6b4-f7d98c3fd902 + fake-vendor fake-model serial-f18f7689-0059-4b79-880e-34faf7a0fe0e + fake-vendor fake-model serial-f1d6cea4-640f-415e-89fe-2b1784ce3db8 + fake-vendor fake-model serial-f4a96860-bdeb-4435-bdf5-2a10beb3d44a omicron zones at generation 2: @@ -45,16 +45,16 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-12057b4a-0b06-4f70-ba22-336de2385bfe + fake-vendor fake-model serial-29758363-6c77-40c3-8740-9c0c64f6e14a + fake-vendor fake-model serial-3f331c10-7882-48ab-85d9-05108490b55b + fake-vendor fake-model serial-5152d1aa-9045-4e06-9ef6-6eadac3696e4 + fake-vendor fake-model serial-5c0dd424-d905-4fc5-a73c-36254fdd470c + fake-vendor fake-model serial-794df76f-bca0-4635-9eb6-773ad0108f7e + fake-vendor fake-model serial-9024d350-38a7-459b-8550-3b2c4a88b5c1 + fake-vendor fake-model serial-95e86080-e162-4980-a589-db6bb1a95ca7 + fake-vendor fake-model serial-d55d36d7-df92-4615-944d-440a1f8b5001 + fake-vendor fake-model serial-db6686c8-2dd9-4032-8444-2a06b43baa68 omicron zones at generation 2: @@ -81,16 +81,16 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-2a94863d-16e2-4535-973b-e98dd47fd18d + fake-vendor fake-model serial-32456d15-f5b6-4efc-90c8-dbba979b69cb + fake-vendor fake-model serial-416fe9f9-5161-4b0f-9e11-c9d81563ded5 + fake-vendor fake-model serial-4c68800e-23f8-485b-b251-628fd151e445 + fake-vendor fake-model serial-9dd87c4d-5fb4-475a-86fa-c0da81a3e00a + fake-vendor fake-model serial-be93a517-445e-46c2-aa21-3dc526d4a413 + fake-vendor fake-model serial-d9344e2b-84d2-4392-84ab-41b86ed02237 + fake-vendor fake-model serial-eab188d0-b34a-4673-b254-12e705597654 + fake-vendor fake-model serial-f1e0386f-11b6-4cdf-8250-826d256db6b5 + fake-vendor fake-model serial-f8c9c9a9-d73e-4cdf-a9af-03cfbbbce12b omicron zones at generation 2: @@ -119,16 +119,16 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- -+ fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 -+ fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 -+ fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 -+ fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 -+ fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 -+ fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 -+ fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 -+ fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c -+ fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b -+ fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 ++ fake-vendor fake-model serial-1bb5ee5d-c2c6-4eaa-86c4-817d89cf10cf ++ fake-vendor fake-model serial-298d1eec-0313-4a42-8af9-0e51299a14ef ++ fake-vendor fake-model serial-2eed666f-a10b-42d0-b626-68335d3270b8 ++ fake-vendor fake-model serial-6cc4d7a7-2a89-4f2f-aa55-5e7a10d0fc08 ++ fake-vendor fake-model serial-7aad6fd9-b698-4c77-af6b-947be10ba953 ++ fake-vendor fake-model serial-a5a15e51-c48a-40e4-a2d8-1c7198c1d46b ++ fake-vendor fake-model serial-b81d4993-ea5b-4720-b8c8-2360c1121d6e ++ fake-vendor fake-model serial-d0064c4d-f5f7-4c89-9f37-0ca475048e79 ++ fake-vendor fake-model serial-dba739c1-76e4-4b6a-a173-89c938fa13ef ++ fake-vendor fake-model serial-e6f289fe-142e-4778-8629-dc87adb53f06 omicron zones at generation 2: diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt index 5a824edf84..b252a21d7d 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt @@ -9,16 +9,16 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-014eb1e9-04fe-4f36-8339-0a090b053ada + fake-vendor fake-model serial-31a3bc64-7a3b-496d-b644-785dc44b6e37 + fake-vendor fake-model serial-7bb40bd6-9c43-4b63-8337-18313c72aea2 + fake-vendor fake-model serial-988aa8c2-cb5e-406b-9289-425dc2e5bc3a + fake-vendor fake-model serial-ad574c09-2ae0-4534-a2a4-f923ce20ae87 + fake-vendor fake-model serial-ad91e238-4901-4ff4-a91b-75233c936426 + fake-vendor fake-model serial-ce58d463-d442-4c97-a6b4-f7d98c3fd902 + fake-vendor fake-model serial-f18f7689-0059-4b79-880e-34faf7a0fe0e + fake-vendor fake-model serial-f1d6cea4-640f-415e-89fe-2b1784ce3db8 + fake-vendor fake-model serial-f4a96860-bdeb-4435-bdf5-2a10beb3d44a omicron zones at generation 2: @@ -45,16 +45,16 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-12057b4a-0b06-4f70-ba22-336de2385bfe + fake-vendor fake-model serial-29758363-6c77-40c3-8740-9c0c64f6e14a + fake-vendor fake-model serial-3f331c10-7882-48ab-85d9-05108490b55b + fake-vendor fake-model serial-5152d1aa-9045-4e06-9ef6-6eadac3696e4 + fake-vendor fake-model serial-5c0dd424-d905-4fc5-a73c-36254fdd470c + fake-vendor fake-model serial-794df76f-bca0-4635-9eb6-773ad0108f7e + fake-vendor fake-model serial-9024d350-38a7-459b-8550-3b2c4a88b5c1 + fake-vendor fake-model serial-95e86080-e162-4980-a589-db6bb1a95ca7 + fake-vendor fake-model serial-d55d36d7-df92-4615-944d-440a1f8b5001 + fake-vendor fake-model serial-db6686c8-2dd9-4032-8444-2a06b43baa68 omicron zones at generation 2: @@ -81,16 +81,16 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-2a94863d-16e2-4535-973b-e98dd47fd18d + fake-vendor fake-model serial-32456d15-f5b6-4efc-90c8-dbba979b69cb + fake-vendor fake-model serial-416fe9f9-5161-4b0f-9e11-c9d81563ded5 + fake-vendor fake-model serial-4c68800e-23f8-485b-b251-628fd151e445 + fake-vendor fake-model serial-9dd87c4d-5fb4-475a-86fa-c0da81a3e00a + fake-vendor fake-model serial-be93a517-445e-46c2-aa21-3dc526d4a413 + fake-vendor fake-model serial-d9344e2b-84d2-4392-84ab-41b86ed02237 + fake-vendor fake-model serial-eab188d0-b34a-4673-b254-12e705597654 + fake-vendor fake-model serial-f1e0386f-11b6-4cdf-8250-826d256db6b5 + fake-vendor fake-model serial-f8c9c9a9-d73e-4cdf-a9af-03cfbbbce12b omicron zones at generation 2: @@ -119,16 +119,16 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-1bb5ee5d-c2c6-4eaa-86c4-817d89cf10cf + fake-vendor fake-model serial-298d1eec-0313-4a42-8af9-0e51299a14ef + fake-vendor fake-model serial-2eed666f-a10b-42d0-b626-68335d3270b8 + fake-vendor fake-model serial-6cc4d7a7-2a89-4f2f-aa55-5e7a10d0fc08 + fake-vendor fake-model serial-7aad6fd9-b698-4c77-af6b-947be10ba953 + fake-vendor fake-model serial-a5a15e51-c48a-40e4-a2d8-1c7198c1d46b + fake-vendor fake-model serial-b81d4993-ea5b-4720-b8c8-2360c1121d6e + fake-vendor fake-model serial-d0064c4d-f5f7-4c89-9f37-0ca475048e79 + fake-vendor fake-model serial-dba739c1-76e4-4b6a-a173-89c938fa13ef + fake-vendor fake-model serial-e6f289fe-142e-4778-8629-dc87adb53f06 omicron zones generation 2 -> 3: diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt index 7219c300b7..556ca094e1 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt @@ -9,16 +9,16 @@ to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-1e2ec79e-9c11-4133-ac77-e0b994a507d5 + fake-vendor fake-model serial-440ae69d-5e2e-4539-91d0-e2930bdd7203 + fake-vendor fake-model serial-4e91d4a3-bb6c-44bb-bd4e-bf8913c1ba2b + fake-vendor fake-model serial-67de3a80-29cb-4066-b743-e285a2ca1f4e + fake-vendor fake-model serial-9139b70f-c1d3-475d-8f02-7c9acba52b2b + fake-vendor fake-model serial-95fbb110-5272-4646-ab50-21b31b7cde23 + fake-vendor fake-model serial-9bf35cd7-4938-4c34-8189-288b3195cb64 + fake-vendor fake-model serial-9d833141-18a1-4f24-8a34-6076c026aa87 + fake-vendor fake-model serial-a279461f-a7b9-413f-a79f-cb4dab4c3fce + fake-vendor fake-model serial-ff7e002b-3ad8-4d45-b03a-c46ef0ac8e59 omicron zones at generation 2: @@ -47,16 +47,16 @@ to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- -- fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 -- fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 -- fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 -- fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 -- fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 -- fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 -- fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 -- fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c -- fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b -- fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 +- fake-vendor fake-model serial-069446b4-7881-49dc-838a-63a782d4896d +- fake-vendor fake-model serial-20eba316-dffe-4516-9703-af561da19b0b +- fake-vendor fake-model serial-426f4b6d-4a82-4106-bf4b-64ee86a2a5a4 +- fake-vendor fake-model serial-82daeef2-8641-4bf5-ac66-f7b5f62c48b6 +- fake-vendor fake-model serial-8e5feeb2-14f1-440f-a909-3c34aa8e129b +- fake-vendor fake-model serial-942e2123-7c4e-4f6b-9317-1341fe212647 +- fake-vendor fake-model serial-97a5ce17-df5b-47e7-baf8-80ae710ce18e +- fake-vendor fake-model serial-debc9fb6-bd58-4e4f-b8b8-6a9a07fcf25d +- fake-vendor fake-model serial-f63a32a9-0659-43cf-8efc-8f34e7af9d45 +- fake-vendor fake-model serial-ffea118f-7715-4e21-8fc5-bb23cd0f59e8 omicron zones generation 2 -> 3: @@ -95,16 +95,16 @@ to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-07068f19-1ff2-48da-8e72-874780df2339 + fake-vendor fake-model serial-0f12e6ee-41d2-4eb0-813f-ba5240900ded + fake-vendor fake-model serial-0fdb4a39-3cd5-47a0-9064-e7f3c285af61 + fake-vendor fake-model serial-13572832-83ad-40d6-896a-751f7e53f4f6 + fake-vendor fake-model serial-3602bdd9-f7bb-4490-87a6-8f061f7712f5 + fake-vendor fake-model serial-65707837-95a4-45d7-84e6-8b9a4da215f1 + fake-vendor fake-model serial-7a43b2b0-3846-401c-8317-d555715a00f7 + fake-vendor fake-model serial-855e3ef1-6929-4e21-8451-0e62bd93c7c9 + fake-vendor fake-model serial-8adcf329-4cee-4075-b798-28b5add1edf5 + fake-vendor fake-model serial-99e926d6-bd42-4cde-9f63-5ecc7ea14322 omicron zones generation 2 -> 3: diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt index 1e1f834d6c..6954d4e12b 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt @@ -7,16 +7,16 @@ parent: 516e80a3-b362-4fac-bd3c-4559717120dd ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-1e2ec79e-9c11-4133-ac77-e0b994a507d5 + fake-vendor fake-model serial-440ae69d-5e2e-4539-91d0-e2930bdd7203 + fake-vendor fake-model serial-4e91d4a3-bb6c-44bb-bd4e-bf8913c1ba2b + fake-vendor fake-model serial-67de3a80-29cb-4066-b743-e285a2ca1f4e + fake-vendor fake-model serial-9139b70f-c1d3-475d-8f02-7c9acba52b2b + fake-vendor fake-model serial-95fbb110-5272-4646-ab50-21b31b7cde23 + fake-vendor fake-model serial-9bf35cd7-4938-4c34-8189-288b3195cb64 + fake-vendor fake-model serial-9d833141-18a1-4f24-8a34-6076c026aa87 + fake-vendor fake-model serial-a279461f-a7b9-413f-a79f-cb4dab4c3fce + fake-vendor fake-model serial-ff7e002b-3ad8-4d45-b03a-c46ef0ac8e59 omicron zones at generation 2: @@ -44,16 +44,16 @@ parent: 516e80a3-b362-4fac-bd3c-4559717120dd ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-07068f19-1ff2-48da-8e72-874780df2339 + fake-vendor fake-model serial-0f12e6ee-41d2-4eb0-813f-ba5240900ded + fake-vendor fake-model serial-0fdb4a39-3cd5-47a0-9064-e7f3c285af61 + fake-vendor fake-model serial-13572832-83ad-40d6-896a-751f7e53f4f6 + fake-vendor fake-model serial-3602bdd9-f7bb-4490-87a6-8f061f7712f5 + fake-vendor fake-model serial-65707837-95a4-45d7-84e6-8b9a4da215f1 + fake-vendor fake-model serial-7a43b2b0-3846-401c-8317-d555715a00f7 + fake-vendor fake-model serial-855e3ef1-6929-4e21-8451-0e62bd93c7c9 + fake-vendor fake-model serial-8adcf329-4cee-4075-b798-28b5add1edf5 + fake-vendor fake-model serial-99e926d6-bd42-4cde-9f63-5ecc7ea14322 omicron zones at generation 3: diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index be2bf3c248..d3f667170c 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -9,16 +9,16 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-13e6503b-5300-4ccd-abc4-c1512b435929 + fake-vendor fake-model serial-44cdb6f2-fa6c-4b69-bab2-3ae4e1ec4b34 + fake-vendor fake-model serial-4de5fc8e-0e41-4ab9-ba12-2dc63882c96a + fake-vendor fake-model serial-51564e7a-d69f-4942-bcfe-330224633ca6 + fake-vendor fake-model serial-5ca23cb3-cc90-41c5-a474-01898cdd0796 + fake-vendor fake-model serial-6a23a532-0712-4a8d-be9b-e8c17e97aa4b + fake-vendor fake-model serial-6f1a330e-e8d4-4c09-97fc-8918b69b2a3c + fake-vendor fake-model serial-7113d104-fb55-4299-bf53-b3c59d258e44 + fake-vendor fake-model serial-8c10be49-3a66-40d4-a082-64d09d916f14 + fake-vendor fake-model serial-d1ebfd7b-3842-4ad7-be31-2b9c031209a9 omicron zones at generation 2: @@ -47,16 +47,16 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- -- fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 -- fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 -- fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 -- fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 -- fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 -- fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 -- fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 -- fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c -- fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b -- fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 +- fake-vendor fake-model serial-22930645-144a-415c-bceb-2dbfafb9c29e +- fake-vendor fake-model serial-24155070-8a43-4244-a3ba-853d8c71972d +- fake-vendor fake-model serial-494782c7-3821-4f49-918b-ce42cc4d18ad +- fake-vendor fake-model serial-6ea8a67f-d27d-472b-844c-6c8245b00e2b +- fake-vendor fake-model serial-77565d57-c235-4905-b3c7-32d1c2ca2c44 +- fake-vendor fake-model serial-8746874c-dc3b-4454-93cd-2a8fc13720fe +- fake-vendor fake-model serial-a42c5a67-6e10-4586-a56e-48bb8260e75f +- fake-vendor fake-model serial-ca89b120-7bcd-4eeb-baa7-71031fbd103b +- fake-vendor fake-model serial-ef61aa97-c862-428c-82f3-0a69a50d6155 +- fake-vendor fake-model serial-ef64ff6d-250d-47ac-8686-e696cfb46966 omicron zones generation 2 -> 3: @@ -95,16 +95,16 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- -- fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 -- fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 -- fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 -- fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 -- fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 -- fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 -- fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 -- fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c -- fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b -- fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 +- fake-vendor fake-model serial-09a5de95-c15f-486e-b776-fca62bf5e179 +- fake-vendor fake-model serial-11b8eccf-7c78-4bde-8639-b35a83082a95 +- fake-vendor fake-model serial-1931c422-4c6a-4597-8ae7-ecb44718462c +- fake-vendor fake-model serial-21a8a87e-73a4-42d4-a426-f6eec94004e3 +- fake-vendor fake-model serial-222c0b55-2966-46b6-816c-9063a7587806 +- fake-vendor fake-model serial-3676f688-f41c-4f89-936a-6b04c3011b2a +- fake-vendor fake-model serial-5e9e14c4-d60d-4b5c-a11c-bba54eb24c9f +- fake-vendor fake-model serial-74f7b89e-88f5-4336-ba8b-22283a6966c5 +- fake-vendor fake-model serial-a787cac8-b5e3-49e3-aaab-20d8eadd8a63 +- fake-vendor fake-model serial-d56b0c9f-0e57-43d8-a1ac-8b4d2c303c29 omicron zones at generation 2: @@ -131,16 +131,16 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-4069c804-c51a-4adc-8822-3cbbab56ed3f + fake-vendor fake-model serial-5248a306-4a03-449e-a8a3-6f86d26da755 + fake-vendor fake-model serial-55196665-ed61-4b23-9a74-0711bf2eaf90 + fake-vendor fake-model serial-6b2a719a-35eb-469f-aa54-114a1f21f37d + fake-vendor fake-model serial-7ed4296a-66d1-4fb2-bc56-9b23b8f27d7e + fake-vendor fake-model serial-984e2389-e7fd-4af9-ab02-e3caf77f95b5 + fake-vendor fake-model serial-a5f75431-3795-426c-8f80-176f658281a5 + fake-vendor fake-model serial-cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901 + fake-vendor fake-model serial-e405da11-cb6b-4ebc-bac1-9bc997352e10 + fake-vendor fake-model serial-f4d7f914-ec73-4b65-8696-5068591d9065 omicron zones generation 2 -> 3: @@ -170,16 +170,16 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-33d48d85-751e-4982-b738-eae4d9a05f01 + fake-vendor fake-model serial-39ca2e23-4c38-4743-afe0-26b0380b27db + fake-vendor fake-model serial-4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c + fake-vendor fake-model serial-60131a33-1f12-4dbb-9435-bdd368db1f51 + fake-vendor fake-model serial-77e45b5b-869f-4e78-8ce3-28bbe8cf37e9 + fake-vendor fake-model serial-789d607d-d196-428e-a988-f7886a327859 + fake-vendor fake-model serial-b104b94c-2197-4e76-bfbd-6f966bd5af66 + fake-vendor fake-model serial-cd62306a-aedf-47e8-93d5-92a358d64c7b + fake-vendor fake-model serial-f1693454-aac1-4265-b8a0-4e9f3f41c7b3 + fake-vendor fake-model serial-fe4fdfba-3b6d-47d3-8612-1fb2390b650a omicron zones generation 2 -> 3: diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt index 262bd14811..4d366f849c 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -9,16 +9,16 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-4069c804-c51a-4adc-8822-3cbbab56ed3f + fake-vendor fake-model serial-5248a306-4a03-449e-a8a3-6f86d26da755 + fake-vendor fake-model serial-55196665-ed61-4b23-9a74-0711bf2eaf90 + fake-vendor fake-model serial-6b2a719a-35eb-469f-aa54-114a1f21f37d + fake-vendor fake-model serial-7ed4296a-66d1-4fb2-bc56-9b23b8f27d7e + fake-vendor fake-model serial-984e2389-e7fd-4af9-ab02-e3caf77f95b5 + fake-vendor fake-model serial-a5f75431-3795-426c-8f80-176f658281a5 + fake-vendor fake-model serial-cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901 + fake-vendor fake-model serial-e405da11-cb6b-4ebc-bac1-9bc997352e10 + fake-vendor fake-model serial-f4d7f914-ec73-4b65-8696-5068591d9065 omicron zones at generation 3: @@ -48,16 +48,16 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-33d48d85-751e-4982-b738-eae4d9a05f01 + fake-vendor fake-model serial-39ca2e23-4c38-4743-afe0-26b0380b27db + fake-vendor fake-model serial-4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c + fake-vendor fake-model serial-60131a33-1f12-4dbb-9435-bdd368db1f51 + fake-vendor fake-model serial-77e45b5b-869f-4e78-8ce3-28bbe8cf37e9 + fake-vendor fake-model serial-789d607d-d196-428e-a988-f7886a327859 + fake-vendor fake-model serial-b104b94c-2197-4e76-bfbd-6f966bd5af66 + fake-vendor fake-model serial-cd62306a-aedf-47e8-93d5-92a358d64c7b + fake-vendor fake-model serial-f1693454-aac1-4265-b8a0-4e9f3f41c7b3 + fake-vendor fake-model serial-fe4fdfba-3b6d-47d3-8612-1fb2390b650a omicron zones at generation 3: @@ -111,16 +111,16 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-13e6503b-5300-4ccd-abc4-c1512b435929 + fake-vendor fake-model serial-44cdb6f2-fa6c-4b69-bab2-3ae4e1ec4b34 + fake-vendor fake-model serial-4de5fc8e-0e41-4ab9-ba12-2dc63882c96a + fake-vendor fake-model serial-51564e7a-d69f-4942-bcfe-330224633ca6 + fake-vendor fake-model serial-5ca23cb3-cc90-41c5-a474-01898cdd0796 + fake-vendor fake-model serial-6a23a532-0712-4a8d-be9b-e8c17e97aa4b + fake-vendor fake-model serial-6f1a330e-e8d4-4c09-97fc-8918b69b2a3c + fake-vendor fake-model serial-7113d104-fb55-4299-bf53-b3c59d258e44 + fake-vendor fake-model serial-8c10be49-3a66-40d4-a082-64d09d916f14 + fake-vendor fake-model serial-d1ebfd7b-3842-4ad7-be31-2b9c031209a9 omicron zones at generation 2: diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt index 07bf7673c0..5a2ed5a28a 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -7,16 +7,16 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-13e6503b-5300-4ccd-abc4-c1512b435929 + fake-vendor fake-model serial-44cdb6f2-fa6c-4b69-bab2-3ae4e1ec4b34 + fake-vendor fake-model serial-4de5fc8e-0e41-4ab9-ba12-2dc63882c96a + fake-vendor fake-model serial-51564e7a-d69f-4942-bcfe-330224633ca6 + fake-vendor fake-model serial-5ca23cb3-cc90-41c5-a474-01898cdd0796 + fake-vendor fake-model serial-6a23a532-0712-4a8d-be9b-e8c17e97aa4b + fake-vendor fake-model serial-6f1a330e-e8d4-4c09-97fc-8918b69b2a3c + fake-vendor fake-model serial-7113d104-fb55-4299-bf53-b3c59d258e44 + fake-vendor fake-model serial-8c10be49-3a66-40d4-a082-64d09d916f14 + fake-vendor fake-model serial-d1ebfd7b-3842-4ad7-be31-2b9c031209a9 omicron zones at generation 2: @@ -44,16 +44,16 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-4069c804-c51a-4adc-8822-3cbbab56ed3f + fake-vendor fake-model serial-5248a306-4a03-449e-a8a3-6f86d26da755 + fake-vendor fake-model serial-55196665-ed61-4b23-9a74-0711bf2eaf90 + fake-vendor fake-model serial-6b2a719a-35eb-469f-aa54-114a1f21f37d + fake-vendor fake-model serial-7ed4296a-66d1-4fb2-bc56-9b23b8f27d7e + fake-vendor fake-model serial-984e2389-e7fd-4af9-ab02-e3caf77f95b5 + fake-vendor fake-model serial-a5f75431-3795-426c-8f80-176f658281a5 + fake-vendor fake-model serial-cf32a1ce-2c9e-49f5-b1cf-4af7f2a28901 + fake-vendor fake-model serial-e405da11-cb6b-4ebc-bac1-9bc997352e10 + fake-vendor fake-model serial-f4d7f914-ec73-4b65-8696-5068591d9065 omicron zones at generation 3: @@ -84,16 +84,16 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b ---------------------------------------------------------------------- vendor model serial ---------------------------------------------------------------------- - fake-vendor fake-model serial-088f76ef-e985-41fd-8630-c321ed8cca37 - fake-vendor fake-model serial-30d0e693-dec4-402f-baa0-d6d9a93c98a7 - fake-vendor fake-model serial-32e90a17-7080-4c33-a94d-05f4bfb5d368 - fake-vendor fake-model serial-44473266-e28a-43fa-9314-c3416b8b3c14 - fake-vendor fake-model serial-53372ece-d666-4f5b-8f25-286e36242088 - fake-vendor fake-model serial-795061c9-db7b-404a-a2a3-0dad5fdfceb1 - fake-vendor fake-model serial-7b8bc126-4ff8-434f-a949-e98eda2709a5 - fake-vendor fake-model serial-b644318e-da11-46e1-b650-47a067e6024c - fake-vendor fake-model serial-bb2b397b-a3f5-4142-a433-4f2ab5fe284b - fake-vendor fake-model serial-bdbf1352-725d-4b17-98d5-4d7105726721 + fake-vendor fake-model serial-33d48d85-751e-4982-b738-eae4d9a05f01 + fake-vendor fake-model serial-39ca2e23-4c38-4743-afe0-26b0380b27db + fake-vendor fake-model serial-4fbd2fe0-2eac-41b8-8e8d-4fa46c3e8b6c + fake-vendor fake-model serial-60131a33-1f12-4dbb-9435-bdd368db1f51 + fake-vendor fake-model serial-77e45b5b-869f-4e78-8ce3-28bbe8cf37e9 + fake-vendor fake-model serial-789d607d-d196-428e-a988-f7886a327859 + fake-vendor fake-model serial-b104b94c-2197-4e76-bfbd-6f966bd5af66 + fake-vendor fake-model serial-cd62306a-aedf-47e8-93d5-92a358d64c7b + fake-vendor fake-model serial-f1693454-aac1-4265-b8a0-4e9f3f41c7b3 + fake-vendor fake-model serial-fe4fdfba-3b6d-47d3-8612-1fb2390b650a omicron zones at generation 3: diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 24e9afddf8..68971ec3e1 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,6 +33,7 @@ use nexus_types::identity::Resource; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; +use omicron_common::address::COCKROACHDB_REDUNDANCY; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; @@ -60,6 +61,7 @@ pub struct PlanningInputFromDb<'a> { pub external_ip_rows: &'a [nexus_db_model::ExternalIp], pub service_nic_rows: &'a [nexus_db_model::ServiceNetworkInterface], pub target_nexus_zone_count: usize, + pub target_cockroachdb_zone_count: usize, pub target_cockroachdb_cluster_version: CockroachDbClusterVersion, pub internal_dns_version: nexus_db_model::Generation, pub external_dns_version: nexus_db_model::Generation, @@ -74,6 +76,7 @@ impl PlanningInputFromDb<'_> { let policy = Policy { service_ip_pool_ranges, target_nexus_zone_count: self.target_nexus_zone_count, + target_cockroachdb_zone_count: self.target_cockroachdb_zone_count, target_cockroachdb_cluster_version: self .target_cockroachdb_cluster_version, }; @@ -234,6 +237,7 @@ pub async fn reconfigurator_state_load( zpool_rows: &zpool_rows, ip_pool_range_rows: &ip_pool_range_rows, target_nexus_zone_count: NEXUS_REDUNDANCY, + target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, external_ip_rows: &external_ip_rows, service_nic_rows: &service_nic_rows, diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs index 69725acf1d..b01d1213de 100644 --- a/nexus/src/app/background/blueprint_execution.rs +++ b/nexus/src/app/background/blueprint_execution.rs @@ -54,9 +54,10 @@ impl BlueprintExecutor { let update = self.rx_blueprint.borrow_and_update().clone(); let Some(update) = update else { - warn!(&opctx.log, - "Blueprint execution: skipped"; - "reason" => "no blueprint"); + warn!( + &opctx.log, "Blueprint execution: skipped"; + "reason" => "no blueprint", + ); return json!({"error": "no blueprint" }); }; diff --git a/nexus/src/app/background/crdb_node_id_collector.rs b/nexus/src/app/background/crdb_node_id_collector.rs new file mode 100644 index 0000000000..2736514021 --- /dev/null +++ b/nexus/src/app/background/crdb_node_id_collector.rs @@ -0,0 +1,573 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for collecting the Cockroach Node ID for running CRDB zones +//! +//! Cockroach assigns a node ID when the node is initially started and joins the +//! cluster. The node IDs are 1-up counters that are never reused. Cluster +//! management operations (e.g., decommissioning nodes) are keyed off of the +//! node ID. However, because node IDs aren't assigned until the node has +//! started and joins the cluster, it means there is a gap between when Omicron +//! creates a CRDB zone (and picks an Omicron zone ID for it) and when that zone +//! gets a CRDB node ID. This RPW exists to backfill the mapping from Omicron +//! zone ID <-> CRDB node ID for Cockroach zones. +//! +//! This isn't foolproof. If a Cockroach node fails to start, it won't have a +//! node ID and therefore this RPW won't be able to make an assignment. If a +//! Cockroach node succeeds in starting and gets a node ID but then fails in an +//! unrecoverable way before this RPW has collected its node ID, that will also +//! result in a missing assignment. Consumers of the Omicron zone ID <-> CRDB +//! node ID don't have a way of distinguishing these two failure modes from this +//! RPW alone, and will need to gather other information (e.g., asking CRDB for +//! the status of all nodes and looking for orphans, perhaps) to determine +//! whether a zone without a known node ID ever existed. + +use super::common::BackgroundTask; +use anyhow::ensure; +use anyhow::Context; +use futures::future::BoxFuture; +use futures::stream; +use futures::FutureExt; +use futures::StreamExt; +use nexus_auth::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::blueprint_zone_type; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintTarget; +use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::BlueprintZoneType; +use omicron_common::address::COCKROACH_ADMIN_PORT; +use omicron_uuid_kinds::OmicronZoneUuid; +use serde_json::json; +use std::net::SocketAddrV6; +use std::sync::Arc; +use tokio::sync::watch; + +pub struct CockroachNodeIdCollector { + datastore: Arc, + rx_blueprint: watch::Receiver>>, +} + +impl CockroachNodeIdCollector { + pub fn new( + datastore: Arc, + rx_blueprint: watch::Receiver< + Option>, + >, + ) -> Self { + Self { datastore, rx_blueprint } + } + + /// Implementation for `BackgroundTask::activate`, added here to produce + /// better compile errors. + /// + /// The presence of `boxed()` in `BackgroundTask::activate` has caused some + /// confusion with compilation errors in the past. So separate this method + /// out. + async fn activate_impl( + &mut self, + opctx: &OpContext, + addrs_from_blueprint: &T, + ) -> serde_json::Value { + // Get the latest blueprint, cloning to prevent holding a read lock + // on the watch. + let update = self.rx_blueprint.borrow_and_update().clone(); + + let Some((_bp_target, blueprint)) = update.as_deref() else { + warn!( + &opctx.log, "Blueprint execution: skipped"; + "reason" => "no blueprint", + ); + return json!({"error": "no blueprint" }); + }; + + // With a bit of concurrency, confirm we know the node IDs for all the + // CRDB zones in the blueprint. + let mut results = + stream::iter(addrs_from_blueprint.cockroach_admin_addrs(blueprint)) + .map(|(zone_id, admin_addr)| { + let datastore = &self.datastore; + async move { + ensure_node_id_known( + opctx, datastore, zone_id, admin_addr, + ) + .await + .map_err(|err| (zone_id, err)) + } + }) + .buffer_unordered(8); + + let mut nsuccess = 0; + let mut errors = vec![]; + while let Some(result) = results.next().await { + match result { + Ok(()) => { + nsuccess += 1; + } + Err((zone_id, err)) => { + errors.push(json!({ + "zone_id": zone_id, + "err": format!("{err:#}"), + })); + } + } + } + + if errors.is_empty() { + json!({ "nsuccess": nsuccess }) + } else { + json!({ + "nsuccess": nsuccess, + "errors": errors, + }) + } + } +} + +// This trait exists so we can inject addresses in our unit tests below that +// aren't required to have admin servers listening on the fixed +// `COCKROACH_ADMIN_PORT`. +trait CockroachAdminFromBlueprint { + fn cockroach_admin_addrs<'a>( + &'a self, + blueprint: &'a Blueprint, + ) -> impl Iterator + 'a; +} + +struct CockroachAdminFromBlueprintViaFixedPort; + +impl CockroachAdminFromBlueprint for CockroachAdminFromBlueprintViaFixedPort { + fn cockroach_admin_addrs<'a>( + &'a self, + blueprint: &'a Blueprint, + ) -> impl Iterator + 'a { + // We can only actively collect from zones that should be running; if + // there are CRDB zones in other states that still need their node ID + // collected, we have to wait until they're running. + let zone_filter = BlueprintZoneFilter::ShouldBeRunning; + + blueprint.all_omicron_zones(zone_filter).filter_map( + |(_sled_id, zone)| match &zone.zone_type { + BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { address, .. }, + ) => { + let mut admin_addr = *address; + admin_addr.set_port(COCKROACH_ADMIN_PORT); + Some((zone.id, admin_addr)) + } + _ => None, + }, + ) + } +} + +async fn ensure_node_id_known( + opctx: &OpContext, + datastore: &DataStore, + zone_id: OmicronZoneUuid, + admin_addr: SocketAddrV6, +) -> anyhow::Result<()> { + // Do we already know the node ID for this zone? + if datastore + .cockroachdb_node_id(opctx, zone_id) + .await + .with_context(|| { + format!("fetching existing node ID for zone {zone_id}") + })? + .is_some() + { + return Ok(()); + } + + // We don't know the address; contact the admin server and ask if it knows. + let admin_url = format!("http://{admin_addr}"); + let admin_client = + cockroach_admin_client::Client::new(&admin_url, opctx.log.clone()); + let node = admin_client + .node_id() + .await + .with_context(|| { + format!("failed to fetch node ID for zone {zone_id} at {admin_url}") + })? + .into_inner(); + + // Ensure the address we have for this zone is the zone we think it is. + // Absent bugs, the only way this can fail is if our blueprint is out of + // date, and there's now a new zone running at `admin_addr`; we _should_ + // fail in that case, and we'll catch up to reality when we reload the + // target blueprint. + ensure!( + zone_id == node.zone_id, + "expected cockroach zone {zone_id} at {admin_url}, but found zone {}", + node.zone_id + ); + + // Record this value. We have a harmless TOCTOU here; if multiple Nexus + // instances all checked for a node ID, found none, and get here, this call + // is idempotent (as long as they all are inserting the same node ID, which + // they certainly should be!). + datastore + .set_cockroachdb_node_id(opctx, zone_id, node.node_id.clone()) + .await + .with_context(|| { + format!( + "failed to record node ID {} for zone {zone_id}", + node.node_id + ) + }) +} + +impl BackgroundTask for CockroachNodeIdCollector { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + self.activate_impl(opctx, &CockroachAdminFromBlueprintViaFixedPort) + .boxed() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use httptest::matchers::any; + use httptest::responders::json_encoded; + use httptest::responders::status_code; + use httptest::Expectation; + use nexus_db_queries::db::datastore::pub_test_utils::datastore_test; + use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; + use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::BlueprintZoneConfig; + use nexus_types::deployment::BlueprintZoneDisposition; + use omicron_test_utils::dev; + use omicron_uuid_kinds::SledUuid; + use std::collections::BTreeMap; + use std::iter; + use std::net::SocketAddr; + use uuid::Uuid; + + // The `CockroachAdminFromBlueprintViaFixedPort` type above is the standard + // way to map from a blueprint to an iterator of cockroach-admin addresses. + // We can't use that in the more thorough test below (and it exists so we + // can _write_ that test), so test it in isolation here. + #[test] + fn test_default_cockroach_admin_addrs_from_blueprint() { + // Construct an empty blueprint with one sled. + let sled_id = SledUuid::new_v4(); + let mut blueprint = BlueprintBuilder::build_empty_with_sleds( + iter::once(sled_id), + "test", + ); + let bp_zones = blueprint + .blueprint_zones + .get_mut(&sled_id) + .expect("found entry for test sled"); + + let make_crdb_zone_config = + |disposition, id, addr: SocketAddrV6| BlueprintZoneConfig { + disposition, + id, + underlay_address: *addr.ip(), + zone_type: BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { + address: addr, + dataset: nexus_types::inventory::OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .unwrap(), + }, + }, + ), + }; + + // Add three CRDB zones with known addresses; the first and third are + // in service, and the second is expunged. Only the first and third + // should show up when we ask for addresses below. + let crdb_id1 = OmicronZoneUuid::new_v4(); + let crdb_id2 = OmicronZoneUuid::new_v4(); + let crdb_id3 = OmicronZoneUuid::new_v4(); + let crdb_addr1: SocketAddrV6 = "[2001:db8::1]:1111".parse().unwrap(); + let crdb_addr2: SocketAddrV6 = "[2001:db8::2]:1234".parse().unwrap(); + let crdb_addr3: SocketAddrV6 = "[2001:db8::3]:1234".parse().unwrap(); + bp_zones.zones.push(make_crdb_zone_config( + BlueprintZoneDisposition::InService, + crdb_id1, + crdb_addr1, + )); + bp_zones.zones.push(make_crdb_zone_config( + BlueprintZoneDisposition::Expunged, + crdb_id2, + crdb_addr2, + )); + bp_zones.zones.push(make_crdb_zone_config( + BlueprintZoneDisposition::InService, + crdb_id3, + crdb_addr3, + )); + + // Also add a non-CRDB zone to ensure it's filtered out. + bp_zones.zones.push(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: OmicronZoneUuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { + address: "[::1]:0".parse().unwrap(), + }, + ), + }); + + // We expect to see CRDB zones 1 and 3 with their IPs but the ports + // changed to `COCKROACH_ADMIN_PORT`. + let expected = vec![ + ( + crdb_id1, + SocketAddrV6::new(*crdb_addr1.ip(), COCKROACH_ADMIN_PORT, 0, 0), + ), + ( + crdb_id3, + SocketAddrV6::new(*crdb_addr3.ip(), COCKROACH_ADMIN_PORT, 0, 0), + ), + ]; + + let admin_addrs = CockroachAdminFromBlueprintViaFixedPort + .cockroach_admin_addrs(&blueprint) + .collect::>(); + assert_eq!(expected, admin_addrs); + } + + #[tokio::test] + async fn test_activate_fails_if_no_blueprint() { + let logctx = dev::test_setup_log("test_activate_fails_if_no_blueprint"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = + datastore_test(&logctx, &db, Uuid::new_v4()).await; + + let (_tx_blueprint, rx_blueprint) = watch::channel(None); + let mut collector = + CockroachNodeIdCollector::new(datastore.clone(), rx_blueprint); + let result = collector.activate(&opctx).await; + + assert_eq!(result, json!({"error": "no blueprint"})); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + struct FakeCockroachAdminAddrs(Vec<(OmicronZoneUuid, SocketAddrV6)>); + + impl CockroachAdminFromBlueprint for FakeCockroachAdminAddrs { + fn cockroach_admin_addrs<'a>( + &'a self, + _blueprint: &'a Blueprint, + ) -> impl Iterator + 'a + { + self.0.iter().copied() + } + } + + #[tokio::test] + async fn test_activate_with_no_unknown_node_ids() { + let logctx = + dev::test_setup_log("test_activate_with_no_unknown_node_ids"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = + datastore_test(&logctx, &db, Uuid::new_v4()).await; + + let blueprint = BlueprintBuilder::build_empty_with_sleds( + iter::once(SledUuid::new_v4()), + "test", + ); + let blueprint_target = BlueprintTarget { + target_id: blueprint.id, + enabled: true, + time_made_target: Utc::now(), + }; + + let (_tx_blueprint, rx_blueprint) = + watch::channel(Some(Arc::new((blueprint_target, blueprint)))); + let mut collector = + CockroachNodeIdCollector::new(datastore.clone(), rx_blueprint); + + // The blueprint is empty. This should be fine: we should get no + // successes and no errors. + let result = collector.activate(&opctx).await; + assert_eq!(result, json!({"nsuccess": 0})); + + // Create a few fake CRDB zones, and assign them node IDs in the + // datastore. + let crdb_zones = + (0..5).map(|_| OmicronZoneUuid::new_v4()).collect::>(); + for (i, zone_id) in crdb_zones.iter().copied().enumerate() { + datastore + .set_cockroachdb_node_id( + &opctx, + zone_id, + format!("test-node-{i}"), + ) + .await + .expect("assigned fake node ID"); + } + + // Activate again, injecting our fake CRDB zones with arbitrary + // cockroach-admin addresses. Because the node IDs are already in the + // datastore, the collector shouldn't try to contact these addresses and + // should instead report that all nodes are recorded successfully. + let result = collector + .activate_impl( + &opctx, + &FakeCockroachAdminAddrs( + crdb_zones + .iter() + .map(|&zone_id| (zone_id, "[::1]:0".parse().unwrap())) + .collect(), + ), + ) + .await; + assert_eq!(result, json!({"nsuccess": crdb_zones.len()})); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_activate_with_unknown_node_ids() { + // Test setup. + let logctx = dev::test_setup_log("test_activate_with_unknown_node_ids"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = + datastore_test(&logctx, &db, Uuid::new_v4()).await; + + let blueprint = BlueprintBuilder::build_empty_with_sleds( + iter::once(SledUuid::new_v4()), + "test", + ); + let blueprint_target = BlueprintTarget { + target_id: blueprint.id, + enabled: true, + time_made_target: Utc::now(), + }; + + let (_tx_blueprint, rx_blueprint) = + watch::channel(Some(Arc::new((blueprint_target, blueprint)))); + let mut collector = + CockroachNodeIdCollector::new(datastore.clone(), rx_blueprint); + + // We'll send in three Cockroach nodes for the collector to gather: + // + // 1. Node 1 will succeed + // 2. Node 2 will fail + // 3. Node 3 will succeed, but will report an unexpected zone ID + // + // We should see one success and two errors in the activation result. We + // need to start three fake cockroach-admin servers to handle the + // requests. + let make_httptest_server = || { + httptest::ServerBuilder::new() + .bind_addr("[::1]:0".parse().unwrap()) + .run() + .expect("started httptest server") + }; + let crdb_zone_id1 = OmicronZoneUuid::new_v4(); + let crdb_zone_id2 = OmicronZoneUuid::new_v4(); + let crdb_zone_id3 = OmicronZoneUuid::new_v4(); + let crdb_zone_id4 = OmicronZoneUuid::new_v4(); + let crdb_node_id1 = "fake-node-1"; + let crdb_node_id3 = "fake-node-3"; + let mut admin1 = make_httptest_server(); + let mut admin2 = make_httptest_server(); + let mut admin3 = make_httptest_server(); + let crdb_admin_addrs = FakeCockroachAdminAddrs( + vec![ + (crdb_zone_id1, admin1.addr()), + (crdb_zone_id2, admin2.addr()), + (crdb_zone_id3, admin3.addr()), + ] + .into_iter() + .map(|(zone_id, addr)| { + let SocketAddr::V6(addr6) = addr else { + panic!("expected IPv6 addr; got {addr}"); + }; + (zone_id, addr6) + }) + .collect(), + ); + + // Node 1 succeeds. + admin1.expect(Expectation::matching(any()).times(1).respond_with( + json_encoded(cockroach_admin_client::types::NodeId { + zone_id: crdb_zone_id1, + node_id: crdb_node_id1.to_string(), + }), + )); + // Node 2 fails. + admin2.expect( + Expectation::matching(any()) + .times(1) + .respond_with(status_code(503)), + ); + // Node 3 succeeds, but with an unexpected zone_id. + admin3.expect(Expectation::matching(any()).times(1).respond_with( + json_encoded(cockroach_admin_client::types::NodeId { + zone_id: crdb_zone_id4, + node_id: crdb_node_id3.to_string(), + }), + )); + + let result = collector.activate_impl(&opctx, &crdb_admin_addrs).await; + + admin1.verify_and_clear(); + admin2.verify_and_clear(); + admin3.verify_and_clear(); + + let result = result.as_object().expect("JSON object"); + + // We should have one success (node 1). + assert_eq!( + result.get("nsuccess").expect("nsuccess key").as_number(), + Some(&serde_json::Number::from(1)) + ); + let errors = result + .get("errors") + .expect("errors key") + .as_array() + .expect("errors array") + .iter() + .map(|val| { + let error = val.as_object().expect("error object"); + let zone_id = error + .get("zone_id") + .expect("zone_id key") + .as_str() + .expect("zone_id string"); + let err = error + .get("err") + .expect("err key") + .as_str() + .expect("err string"); + (zone_id, err) + }) + .collect::>(); + println!("errors: {errors:?}"); + assert_eq!(errors.len(), 2); + + // We should have an error for node 2. We don't check the specific + // message because it may change if progenitor changes how it reports a + // 503 with no body. + assert!(errors.contains_key(crdb_zone_id2.to_string().as_str())); + + // The error message for node 3 should contain both the expected and + // unexpected zone IDs. + let crdb_zone_id3 = crdb_zone_id3.to_string(); + let crdb_zone_id4 = crdb_zone_id4.to_string(); + let crdb_err3 = + errors.get(crdb_zone_id3.as_str()).expect("error for zone 3"); + assert!(crdb_err3.contains(&crdb_zone_id3)); + assert!(crdb_err3.contains(&crdb_zone_id4)); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index a87c53860d..f78cb69d76 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -9,6 +9,7 @@ use super::bfd; use super::blueprint_execution; use super::blueprint_load; use super::common; +use super::crdb_node_id_collector; use super::dns_config; use super::dns_propagation; use super::dns_servers; @@ -86,6 +87,9 @@ pub struct BackgroundTasks { /// task handle for blueprint execution background task pub task_blueprint_executor: common::TaskHandle, + /// task handle for collecting CockroachDB node IDs + pub task_crdb_node_id_collector: common::TaskHandle, + /// task handle for the service zone nat tracker pub task_service_zone_nat_tracker: common::TaskHandle, @@ -263,6 +267,21 @@ impl BackgroundTasks { config.blueprints.period_secs_execute, Box::new(blueprint_executor), opctx.child(BTreeMap::new()), + vec![Box::new(rx_blueprint.clone())], + ); + + // Background task: CockroachDB node ID collector + let crdb_node_id_collector = + crdb_node_id_collector::CockroachNodeIdCollector::new( + datastore.clone(), + rx_blueprint.clone(), + ); + let task_crdb_node_id_collector = driver.register( + String::from("crdb_node_id_collector"), + String::from("Collects node IDs of running CockroachDB zones"), + config.blueprints.period_secs_collect_crdb_node_ids, + Box::new(crdb_node_id_collector), + opctx.child(BTreeMap::new()), vec![Box::new(rx_blueprint)], ); @@ -438,6 +457,7 @@ impl BackgroundTasks { task_phantom_disks, task_blueprint_loader, task_blueprint_executor, + task_crdb_node_id_collector, task_service_zone_nat_tracker, task_switch_port_settings_manager, task_v2p_manager, diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 6de9e6f4d3..7d1fc43d69 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -9,6 +9,7 @@ mod bfd; mod blueprint_execution; mod blueprint_load; mod common; +mod crdb_node_id_collector; mod dns_config; mod dns_propagation; mod dns_servers; diff --git a/nexus/src/app/background/sync_switch_configuration.rs b/nexus/src/app/background/sync_switch_configuration.rs index 54fc5b8be0..8552d62988 100644 --- a/nexus/src/app/background/sync_switch_configuration.rs +++ b/nexus/src/app/background/sync_switch_configuration.rs @@ -51,8 +51,8 @@ use omicron_common::{ use serde_json::json; use sled_agent_client::types::{ BgpConfig as SledBgpConfig, BgpPeerConfig as SledBgpPeerConfig, - EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, PortConfigV1, - RackNetworkConfigV1, RouteConfig as SledRouteConfig, + EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, PortConfigV2, + RackNetworkConfigV2, RouteConfig as SledRouteConfig, UplinkAddressConfig, }; use std::{ collections::{hash_map::Entry, HashMap, HashSet}, @@ -901,7 +901,7 @@ impl BackgroundTask for SwitchPortSettingsManager { bgp.dedup(); - let mut ports: Vec = vec![]; + let mut ports: Vec = vec![]; for (location, port, change) in &changes { let PortSettingsChange::Apply(info) = change else { @@ -922,8 +922,12 @@ impl BackgroundTask for SwitchPortSettingsManager { }, }; - let mut port_config = PortConfigV1 { - addresses: info.addresses.iter().map(|a| a.address.into()).collect(), + let mut port_config = PortConfigV2 { + addresses: info.addresses.iter().map(|a| + UplinkAddressConfig { + address: a.address.into(), + vlan_id: a.vlan_id.map(|v| v.into()) + }).collect(), autoneg: info .links .get(0) //TODO breakout support @@ -1096,10 +1100,10 @@ impl BackgroundTask for SwitchPortSettingsManager { let mut desired_config = EarlyNetworkConfig { generation: 0, - schema_version: 1, + schema_version: 2, body: EarlyNetworkConfigBody { ntp_servers, - rack_network_config: Some(RackNetworkConfigV1 { + rack_network_config: Some(RackNetworkConfigV2 { rack_subnet: subnet, infra_ip_first, infra_ip_last, @@ -1401,7 +1405,14 @@ fn uplinks( }; let config = HostPortConfig { port: port.port_name.clone(), - addrs: config.addresses.iter().map(|a| a.address.into()).collect(), + addrs: config + .addresses + .iter() + .map(|a| UplinkAddressConfig { + address: a.address.into(), + vlan_id: a.vlan_id.map(|v| v.into()), + }) + .collect(), }; match uplinks.entry(*location) { diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 280f4306c7..ca4635b13e 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -17,6 +17,7 @@ use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; +use omicron_common::address::COCKROACHDB_REDUNDANCY; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -175,6 +176,7 @@ impl super::Nexus { external_ip_rows: &external_ip_rows, service_nic_rows: &service_nic_rows, target_nexus_zone_count: NEXUS_REDUNDANCY, + target_cockroachdb_zone_count: COCKROACHDB_REDUNDANCY, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, log: &opctx.log, diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 27f62036b1..943665cab3 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -25,6 +25,7 @@ use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; +use nexus_db_queries::db::datastore::instance::InstanceUpdateResult; use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup; @@ -563,25 +564,27 @@ impl super::Nexus { // outright fails, this operation fails. If the operation nominally // succeeds but nothing was updated, this action is outdated and the // caller should not proceed with migration. - let (updated, _) = match instance_put_result { - Ok(state) => { - self.write_returned_instance_state(&instance_id, state).await? - } - Err(e) => { - if e.instance_unhealthy() { - let _ = self - .mark_instance_failed( - &instance_id, - &prev_instance_runtime, - &e, - ) - .await; + let InstanceUpdateResult { instance_updated, .. } = + match instance_put_result { + Ok(state) => { + self.write_returned_instance_state(&instance_id, state) + .await? } - return Err(e.into()); - } - }; + Err(e) => { + if e.instance_unhealthy() { + let _ = self + .mark_instance_failed( + &instance_id, + &prev_instance_runtime, + &e, + ) + .await; + } + return Err(e.into()); + } + }; - if updated { + if instance_updated { Ok(self .db_datastore .instance_refetch(opctx, &authz_instance) @@ -1321,7 +1324,7 @@ impl super::Nexus { &self, instance_id: &Uuid, state: Option, - ) -> Result<(bool, bool), Error> { + ) -> Result { slog::debug!(&self.log, "writing instance state returned from sled agent"; "instance_id" => %instance_id, @@ -1335,6 +1338,7 @@ impl super::Nexus { &state.instance_state.into(), &state.propolis_id, &state.vmm_state.into(), + &state.migration_state, ) .await; @@ -1346,7 +1350,13 @@ impl super::Nexus { update_result } else { - Ok((false, false)) + // There was no instance state to write back, so --- perhaps + // obviously --- nothing happened. + Ok(InstanceUpdateResult { + instance_updated: false, + vmm_updated: false, + migration_updated: None, + }) } } @@ -1954,16 +1964,6 @@ impl super::Nexus { } } -/// Records what aspects of an instance's state were actually changed in a -/// [`notify_instance_updated`] call. -/// -/// This is (presently) used for debugging purposes only. -#[derive(Copy, Clone)] -pub(crate) struct InstanceUpdated { - pub instance_updated: bool, - pub vmm_updated: bool, -} - /// Invoked by a sled agent to publish an updated runtime state for an /// Instance. #[allow(clippy::too_many_arguments)] // :( @@ -1976,14 +1976,15 @@ pub(crate) async fn notify_instance_updated( instance_id: &Uuid, new_runtime_state: &nexus::SledInstanceState, v2p_notification_tx: tokio::sync::watch::Sender<()>, -) -> Result, Error> { +) -> Result, Error> { let propolis_id = new_runtime_state.propolis_id; info!(log, "received new runtime state from sled agent"; "instance_id" => %instance_id, "instance_state" => ?new_runtime_state.instance_state, "propolis_id" => %propolis_id, - "vmm_state" => ?new_runtime_state.vmm_state); + "vmm_state" => ?new_runtime_state.vmm_state, + "migration_state" => ?new_runtime_state.migration_state); // Grab the current state of the instance in the DB to reason about // whether this update is stale or not. @@ -2071,9 +2072,44 @@ pub(crate) async fn notify_instance_updated( &db::model::VmmRuntimeState::from( new_runtime_state.vmm_state.clone(), ), + &new_runtime_state.migration_state, ) .await; + // Has a migration terminated? If so,mark the migration record as deleted if + // and only if both sides of the migration are in a terminal state. + if let Some(nexus::MigrationRuntimeState { + migration_id, + state, + role, + .. + }) = new_runtime_state.migration_state + { + if state.is_terminal() { + info!( + log, + "migration has terminated, trying to delete it..."; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id, + "migration_id" => %propolis_id, + "migration_state" => %state, + "migration_role" => %role, + ); + if !datastore.migration_terminate(opctx, migration_id).await? { + info!( + log, + "did not mark migration record as deleted (the other half \ + may not yet have reported termination)"; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id, + "migration_id" => %propolis_id, + "migration_state" => %state, + "migration_role" => %role, + ); + } + } + } + // If the VMM is now in a terminal state, make sure its resources get // cleaned up. // @@ -2112,13 +2148,14 @@ pub(crate) async fn notify_instance_updated( } match result { - Ok((instance_updated, vmm_updated)) => { + Ok(result) => { info!(log, "instance and vmm updated by sled agent"; "instance_id" => %instance_id, "propolis_id" => %propolis_id, - "instance_updated" => instance_updated, - "vmm_updated" => vmm_updated); - Ok(Some(InstanceUpdated { instance_updated, vmm_updated })) + "instance_updated" => result.instance_updated, + "vmm_updated" => result.vmm_updated, + "migration_updated" => ?result.migration_updated); + Ok(Some(result)) } // The update command should swallow object-not-found errors and diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index da97c77c04..780cb85f3f 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -554,7 +554,8 @@ impl super::Nexus { .iter() .map(|a| Address { address_lot: NameOrId::Name(address_lot_name.clone()), - address: (*a), + address: a.address, + vlan_id: a.vlan_id, }) .collect(); @@ -565,7 +566,11 @@ impl super::Nexus { let routes: Vec = uplink_config .routes .iter() - .map(|r| Route { dst: r.destination, gw: r.nexthop, vid: None }) + .map(|r| Route { + dst: r.destination, + gw: r.nexthop, + vid: r.vlan_id, + }) .collect(); port_settings_params diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 1434064666..cbcad41a4f 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -65,6 +65,12 @@ declare_saga_actions! { - sim_destroy_vmm_record } + CREATE_MIGRATION_RECORD -> "migration_record" { + + sim_create_migration_record + - sim_delete_migration_record + } + + // This step the instance's migration ID and destination Propolis ID // fields. Because the instance is active, its current sled agent maintains // its most recent runtime state, so to update it, the saga calls into the @@ -128,6 +134,7 @@ impl NexusSaga for SagaInstanceMigrate { builder.append(reserve_resources_action()); builder.append(allocate_propolis_ip_action()); builder.append(create_vmm_record_action()); + builder.append(create_migration_record_action()); builder.append(set_migration_ids_action()); builder.append(ensure_destination_propolis_action()); builder.append(instance_migrate_action()); @@ -189,6 +196,57 @@ async fn sim_allocate_propolis_ip( .await } +async fn sim_create_migration_record( + sagactx: NexusActionContext, +) -> Result { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let source_propolis_id = params.src_vmm.id; + let migration_id = sagactx.lookup::("migrate_id")?; + let target_propolis_id = sagactx.lookup::("dst_propolis_id")?; + + info!(osagactx.log(), "creating migration record"; + "migration_id" => %migration_id, + "source_propolis_id" => %source_propolis_id, + "target_propolis_id" => %target_propolis_id); + + osagactx + .datastore() + .migration_insert( + &opctx, + db::model::Migration::new( + migration_id, + source_propolis_id, + target_propolis_id, + ), + ) + .await + .map_err(ActionError::action_failed) +} + +async fn sim_delete_migration_record( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx: &std::sync::Arc = + sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + let migration_id = sagactx.lookup::("migrate_id")?; + + info!(osagactx.log(), "deleting migration record"; + "migration_id" => %migration_id); + osagactx.datastore().migration_mark_deleted(&opctx, migration_id).await?; + Ok(()) +} + async fn sim_create_vmm_record( sagactx: NexusActionContext, ) -> Result { diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index deb43c42b6..de00290616 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -65,7 +65,7 @@ use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; use sled_agent_client::types::EarlyNetworkConfig; use sled_agent_client::types::EarlyNetworkConfigBody; -use sled_agent_client::types::RackNetworkConfigV1; +use sled_agent_client::types::RackNetworkConfigV2; use slog::{debug, error, o, Logger}; use std::collections::BTreeMap; use std::collections::HashMap; @@ -939,7 +939,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .write_network_bootstore_config(&EarlyNetworkConfig { body: EarlyNetworkConfigBody { ntp_servers: Vec::new(), - rack_network_config: Some(RackNetworkConfigV1 { + rack_network_config: Some(RackNetworkConfigV2 { bfd: Vec::new(), bgp: Vec::new(), infra_ip_first: "192.0.2.10".parse().unwrap(), @@ -951,7 +951,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { }), }, generation: 1, - schema_version: 1, + schema_version: 2, }) .await .expect("Failed to write early networking config to bootstore"); diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 861d78e20c..9aed5bcb69 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -106,6 +106,7 @@ physical_disk_adoption.period_secs = 30 physical_disk_adoption.disable = true blueprints.period_secs_load = 100 blueprints.period_secs_execute = 600 +blueprints.period_secs_collect_crdb_node_ids = 600 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 7672bbc034..ca46a8bf06 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -979,6 +979,12 @@ pub enum AllowedMethod { /// always fail in the correct way. #[allow(dead_code)] GetUnimplemented, + /// HTTP "GET" method, but where the response data may change for reasons + /// other than successful user interaction. This should be uncommon; in + /// most cases resources do not change merely due to the passage of time, + /// although one common case is when the response data is updated by a + /// background task. + GetVolatile, /// HTTP "GET" method with websocket handshake headers. GetWebsocket, /// HTTP "POST" method, with sample input (which should be valid input for @@ -994,10 +1000,11 @@ impl AllowedMethod { pub fn http_method(&self) -> &'static http::Method { match self { AllowedMethod::Delete => &Method::DELETE, - AllowedMethod::Get => &Method::GET, - AllowedMethod::GetNonexistent => &Method::GET, - AllowedMethod::GetUnimplemented => &Method::GET, - AllowedMethod::GetWebsocket => &Method::GET, + AllowedMethod::Get + | AllowedMethod::GetNonexistent + | AllowedMethod::GetUnimplemented + | AllowedMethod::GetVolatile + | AllowedMethod::GetWebsocket => &Method::GET, AllowedMethod::Post(_) => &Method::POST, AllowedMethod::Put(_) => &Method::PUT, } @@ -1013,6 +1020,7 @@ impl AllowedMethod { | AllowedMethod::Get | AllowedMethod::GetNonexistent | AllowedMethod::GetUnimplemented + | AllowedMethod::GetVolatile | AllowedMethod::GetWebsocket => None, AllowedMethod::Post(body) => Some(&body), AllowedMethod::Put(body) => Some(&body), @@ -2057,7 +2065,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![ - AllowedMethod::Get, + AllowedMethod::GetVolatile, ], }, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 565e2fbafb..948f8a18f3 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -673,6 +673,36 @@ async fn test_instance_start_creates_networking_state( #[nexus_test] async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { + use nexus_db_model::Migration; + use omicron_common::api::internal::nexus::MigrationState; + async fn migration_fetch( + cptestctx: &ControlPlaneTestContext, + migration_id: Uuid, + ) -> Migration { + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::prelude::*; + use nexus_db_queries::db::schema::migration::dsl; + + let datastore = + cptestctx.server.server_context().nexus.datastore().clone(); + let db_state = dsl::migration + // N.B. that for the purposes of this test, we explicitly should + // *not* filter out migrations that are marked as deleted, as the + // migration record is marked as deleted once the migration completes. + .filter(dsl::id.eq(migration_id)) + .select(Migration::as_select()) + .get_results_async::( + &*datastore.pool_connection_for_tests().await.unwrap(), + ) + .await + .unwrap(); + + info!(&cptestctx.logctx.log, "refetched migration info from db"; + "migration" => ?db_state); + + db_state.into_iter().next().unwrap() + } + let client = &cptestctx.external_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; @@ -729,7 +759,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { let migrate_url = format!("/v1/instances/{}/migrate", &instance_id.to_string()); - let _ = NexusRequest::new( + let instance = NexusRequest::new( RequestBuilder::new(client, Method::POST, &migrate_url) .body(Some(¶ms::InstanceMigrate { dst_sled_id })) .expect_status(Some(StatusCode::OK)), @@ -749,12 +779,40 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { assert_eq!(current_sled, original_sled); + // Ensure that both sled agents report that the migration is in progress. + let migration_id = { + let datastore = apictx.nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + let (.., authz_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(instance.identity.id) + .lookup_for(nexus_db_queries::authz::Action::Read) + .await + .unwrap(); + datastore + .instance_refetch(&opctx, &authz_instance) + .await + .unwrap() + .runtime_state + .migration_id + .expect("since we've started a migration, the instance record must have a migration id!") + }; + let migration = dbg!(migration_fetch(cptestctx, migration_id).await); + assert_eq!(migration.target_state, MigrationState::Pending.into()); + assert_eq!(migration.source_state, MigrationState::Pending.into()); + // Explicitly simulate the migration action on the target. Simulated // migrations always succeed. The state transition on the target is // sufficient to move the instance back into a Running state (strictly // speaking no further updates from the source are required if the target // successfully takes over). instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; + // Ensure that both sled agents report that the migration has completed. + instance_simulate_on_sled(cptestctx, nexus, original_sled, instance_id) + .await; + let instance = instance_get(&client, &instance_url).await; assert_eq!(instance.runtime.run_state, InstanceState::Running); @@ -765,6 +823,11 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { .expect("migrated instance should still have a sled"); assert_eq!(current_sled, dst_sled_id); + + let migration = dbg!(migration_fetch(cptestctx, migration_id).await); + assert_eq!(migration.target_state, MigrationState::Completed.into()); + assert_eq!(migration.source_state, MigrationState::Completed.into()); + assert!(migration.time_deleted.is_some()); } #[nexus_test] diff --git a/nexus/tests/integration_tests/switch_port.rs b/nexus/tests/integration_tests/switch_port.rs index 41542d8554..0b71ddb2cf 100644 --- a/nexus/tests/integration_tests/switch_port.rs +++ b/nexus/tests/integration_tests/switch_port.rs @@ -149,6 +149,7 @@ async fn test_port_settings_basic_crud(ctx: &ControlPlaneTestContext) { AddressConfig { addresses: vec![Address { address: "203.0.113.10/24".parse().unwrap(), + vlan_id: None, address_lot: NameOrId::Name("parkinglot".parse().unwrap()), }], }, diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index 4f9f75c770..e1a37403c0 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -426,6 +426,7 @@ async fn verify_endpoint( allowed, AllowedMethod::Get | AllowedMethod::GetUnimplemented + | AllowedMethod::GetVolatile | AllowedMethod::GetWebsocket ) }); @@ -461,6 +462,22 @@ async fn verify_endpoint( .unwrap(); None } + Some(AllowedMethod::GetVolatile) => { + // Same thing as `Get`, but avoid returning the output to prevent + // the resource change detection ahead. + info!(log, "test: privileged GET (volatile output)"); + record_operation(WhichTest::PrivilegedGet(Some( + &http::StatusCode::OK, + ))); + NexusRequest::object_get(client, uri.as_str()) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap_or_else(|e| panic!("Failed to GET: {uri}: {e}")) + .parsed_body::() + .unwrap(); + None + } Some(AllowedMethod::GetWebsocket) => { info!(log, "test: privileged GET WebSocket"); record_operation(WhichTest::PrivilegedGet(Some( diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 4fcd49a254..d64cde2d06 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -33,7 +33,6 @@ use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::OmicronPhysicalDisksConfig; -use sled_agent_client::ZoneKind; use slog_error_chain::SlogInlineError; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -75,6 +74,7 @@ pub use planning_input::SledDisk; pub use planning_input::SledFilter; pub use planning_input::SledResources; pub use planning_input::ZpoolFilter; +pub use sled_agent_client::ZoneKind; pub use zone_type::blueprint_zone_type; pub use zone_type::BlueprintZoneType; diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index bb74c3655e..10d528bbfd 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -91,6 +91,10 @@ impl PlanningInput { self.policy.target_nexus_zone_count } + pub fn target_cockroachdb_zone_count(&self) -> usize { + self.policy.target_cockroachdb_zone_count + } + pub fn target_cockroachdb_cluster_version( &self, ) -> CockroachDbClusterVersion { @@ -632,6 +636,9 @@ pub struct Policy { /// desired total number of deployed Nexus zones pub target_nexus_zone_count: usize, + /// desired total number of deployed CockroachDB zones + pub target_cockroachdb_zone_count: usize, + /// desired CockroachDB `cluster.preserve_downgrade_option` setting. /// at present this is hardcoded based on the version of CockroachDB we /// presently ship and the tick-tock pattern described in RFD 469. @@ -684,6 +691,7 @@ impl PlanningInputBuilder { policy: Policy { service_ip_pool_ranges: Vec::new(), target_nexus_zone_count: 0, + target_cockroachdb_zone_count: 0, target_cockroachdb_cluster_version: CockroachDbClusterVersion::POLICY, }, diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs index 5b14f1ee3c..dc0fd98129 100644 --- a/nexus/types/src/deployment/zone_type.rs +++ b/nexus/types/src/deployment/zone_type.rs @@ -13,6 +13,7 @@ use omicron_common::api::internal::shared::NetworkInterface; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; +use sled_agent_client::types::OmicronZoneDataset; use sled_agent_client::types::OmicronZoneType; use sled_agent_client::ZoneKind; @@ -139,6 +140,41 @@ impl BlueprintZoneType { | BlueprintZoneType::Oximeter(_) => false, } } + + // Returns the dataset associated with this zone. + // + // TODO-cleanup This currently returns `None` for zones that only have + // transient datasets. This should change to a non-optional value once Nexus + // is aware of them. + pub fn dataset(&self) -> Option<&OmicronZoneDataset> { + match self { + BlueprintZoneType::Clickhouse( + blueprint_zone_type::Clickhouse { dataset, .. }, + ) + | BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { dataset, .. }, + ) + | BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { dataset, .. }, + ) + | BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + dataset, + .. + }) + | BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { dataset, .. }, + ) + | BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { dataset, .. }, + ) => Some(dataset), + // Transient-dataset-only zones + BlueprintZoneType::BoundaryNtp(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Nexus(_) + | BlueprintZoneType::Oximeter(_) => None, + } + } } impl From for OmicronZoneType { diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 3f53503cc2..ac169a35ee 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1753,6 +1753,9 @@ pub struct Address { /// The address and prefix length of this address. pub address: IpNet, + + /// Optional VLAN ID for this address + pub vlan_id: Option, } /// Select a port settings object by an optional name or id. diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index b09f34ea9e..5d175e7b09 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -737,14 +737,14 @@ "description": "Password hashes must be in PHC (Password Hashing Competition) string format. Passwords must be hashed with Argon2id. Password hashes may be rejected if the parameters appear not to be secure enough.", "type": "string" }, - "PortConfigV1": { + "PortConfigV2": { "type": "object", "properties": { "addresses": { - "description": "This port's addresses.", + "description": "This port's addresses and optional vlan IDs", "type": "array", "items": { - "$ref": "#/components/schemas/IpNet" + "$ref": "#/components/schemas/UplinkAddressConfig" } }, "autoneg": { @@ -901,7 +901,7 @@ "description": "Initial rack network configuration", "allOf": [ { - "$ref": "#/components/schemas/RackNetworkConfigV1" + "$ref": "#/components/schemas/RackNetworkConfigV2" } ] }, @@ -934,7 +934,7 @@ "recovery_silo" ] }, - "RackNetworkConfigV1": { + "RackNetworkConfigV2": { "description": "Initial network configuration", "type": "object", "properties": { @@ -967,7 +967,7 @@ "description": "Uplinks for connecting the rack to external networks", "type": "array", "items": { - "$ref": "#/components/schemas/PortConfigV1" + "$ref": "#/components/schemas/PortConfigV2" } }, "rack_subnet": { @@ -1225,6 +1225,24 @@ } ] }, + "UplinkAddressConfig": { + "type": "object", + "properties": { + "address": { + "$ref": "#/components/schemas/IpNet" + }, + "vlan_id": { + "nullable": true, + "description": "The VLAN id (if any) associated with this address.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "address" + ] + }, "UserId": { "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", "type": "string" diff --git a/openapi/cockroach-admin.json b/openapi/cockroach-admin.json new file mode 100644 index 0000000000..a46b0014a1 --- /dev/null +++ b/openapi/cockroach-admin.json @@ -0,0 +1,181 @@ +{ + "openapi": "3.0.3", + "info": { + "title": "Oxide CockroachDb Cluster Admin API", + "description": "API for interacting with the Oxide control plane's CockroachDb cluster", + "contact": { + "url": "https://oxide.computer", + "email": "api@oxide.computer" + }, + "version": "0.0.1" + }, + "paths": { + "/node/id": { + "get": { + "summary": "Get the CockroachDB node ID of the local cockroach instance.", + "operationId": "node_id", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/NodeId" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/node/status": { + "get": { + "summary": "Get the status of all nodes in the CRDB cluster", + "operationId": "node_status", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ClusterNodeStatus" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "schemas": { + "ClusterNodeStatus": { + "type": "object", + "properties": { + "all_nodes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/NodeStatus" + } + } + }, + "required": [ + "all_nodes" + ] + }, + "Error": { + "description": "Error information from a response.", + "type": "object", + "properties": { + "error_code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "request_id": { + "type": "string" + } + }, + "required": [ + "message", + "request_id" + ] + }, + "NodeId": { + "description": "CockroachDB Node ID", + "type": "object", + "properties": { + "node_id": { + "type": "string" + }, + "zone_id": { + "description": "The ID of this Omicron zone.\n\nThis is included to ensure correctness even if a socket address on a sled is reused for a different zone; if our caller is trying to determine the node ID for a particular Omicron CockroachDB zone, they'll contact us by socket address. We include our zone ID in the response for their confirmation that we are the zone they intended to contact.", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForOmicronZoneKind" + } + ] + } + }, + "required": [ + "node_id", + "zone_id" + ] + }, + "NodeStatus": { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "build": { + "type": "string" + }, + "is_available": { + "type": "boolean" + }, + "is_live": { + "type": "boolean" + }, + "locality": { + "type": "string" + }, + "node_id": { + "type": "string" + }, + "sql_address": { + "type": "string" + }, + "started_at": { + "type": "string", + "format": "date-time" + }, + "updated_at": { + "type": "string", + "format": "date-time" + } + }, + "required": [ + "address", + "build", + "is_available", + "is_live", + "locality", + "node_id", + "sql_address", + "started_at", + "updated_at" + ] + }, + "TypedUuidForOmicronZoneKind": { + "type": "string", + "format": "uuid" + } + }, + "responses": { + "Error": { + "description": "Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } +} diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 637334483d..b27dc6ce00 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3428,6 +3428,88 @@ "minLength": 5, "maxLength": 17 }, + "MigrationRole": { + "oneOf": [ + { + "description": "This update concerns the source VMM of a migration.", + "type": "string", + "enum": [ + "source" + ] + }, + { + "description": "This update concerns the target VMM of a migration.", + "type": "string", + "enum": [ + "target" + ] + } + ] + }, + "MigrationRuntimeState": { + "description": "An update from a sled regarding the state of a migration, indicating the role of the VMM whose migration state was updated.", + "type": "object", + "properties": { + "gen": { + "$ref": "#/components/schemas/Generation" + }, + "migration_id": { + "type": "string", + "format": "uuid" + }, + "role": { + "$ref": "#/components/schemas/MigrationRole" + }, + "state": { + "$ref": "#/components/schemas/MigrationState" + }, + "time_updated": { + "description": "Timestamp for the migration state update.", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "gen", + "migration_id", + "role", + "state", + "time_updated" + ] + }, + "MigrationState": { + "description": "The state of an instance's live migration.", + "oneOf": [ + { + "description": "The migration has not started for this VMM.", + "type": "string", + "enum": [ + "pending" + ] + }, + { + "description": "The migration is in progress.", + "type": "string", + "enum": [ + "in_progress" + ] + }, + { + "description": "The migration has failed.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The migration has completed.", + "type": "string", + "enum": [ + "completed" + ] + } + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", @@ -3724,14 +3806,14 @@ "vendor" ] }, - "PortConfigV1": { + "PortConfigV2": { "type": "object", "properties": { "addresses": { - "description": "This port's addresses.", + "description": "This port's addresses and optional vlan IDs", "type": "array", "items": { - "$ref": "#/components/schemas/IpNet" + "$ref": "#/components/schemas/UplinkAddressConfig" } }, "autoneg": { @@ -4048,7 +4130,7 @@ "description": "Initial rack network configuration", "allOf": [ { - "$ref": "#/components/schemas/RackNetworkConfigV1" + "$ref": "#/components/schemas/RackNetworkConfigV2" } ] }, @@ -4083,7 +4165,7 @@ "zpools" ] }, - "RackNetworkConfigV1": { + "RackNetworkConfigV2": { "description": "Initial network configuration", "type": "object", "properties": { @@ -4116,7 +4198,7 @@ "description": "Uplinks for connecting the rack to external networks", "type": "array", "items": { - "$ref": "#/components/schemas/PortConfigV1" + "$ref": "#/components/schemas/PortConfigV2" } }, "rack_subnet": { @@ -4572,6 +4654,15 @@ } ] }, + "migration_state": { + "nullable": true, + "description": "The current state of any in-progress migration for this instance, as understood by this sled.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, "propolis_id": { "description": "The ID of the VMM whose state is being reported.", "type": "string", @@ -4890,6 +4981,24 @@ "items" ] }, + "UplinkAddressConfig": { + "type": "object", + "properties": { + "address": { + "$ref": "#/components/schemas/IpNet" + }, + "vlan_id": { + "nullable": true, + "description": "The VLAN id (if any) associated with this address.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "address" + ] + }, "UpstairsRepairType": { "type": "string", "enum": [ diff --git a/openapi/nexus.json b/openapi/nexus.json index a0789aecde..01ec9aeb56 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -8996,6 +8996,13 @@ "$ref": "#/components/schemas/NameOrId" } ] + }, + "vlan_id": { + "nullable": true, + "description": "Optional VLAN ID for this address", + "type": "integer", + "format": "uint16", + "minimum": 0 } }, "required": [ @@ -17282,6 +17289,13 @@ "description": "The port settings object this address configuration belongs to.", "type": "string", "format": "uuid" + }, + "vlan_id": { + "nullable": true, + "description": "An optional VLAN ID", + "type": "integer", + "format": "uint16", + "minimum": 0 } }, "required": [ diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 68513345e2..25e8d1c5da 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2613,7 +2613,7 @@ "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/RackNetworkConfigV1" + "$ref": "#/components/schemas/RackNetworkConfigV2" } ] } @@ -2707,10 +2707,10 @@ "type": "object", "properties": { "addrs": { - "description": "IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport (must be in infra_ip pool)", + "description": "IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport (must be in infra_ip pool). May also include an optional VLAN ID.", "type": "array", "items": { - "$ref": "#/components/schemas/IpNet" + "$ref": "#/components/schemas/UplinkAddressConfig" } }, "port": { @@ -3406,6 +3406,88 @@ "minLength": 5, "maxLength": 17 }, + "MigrationRole": { + "oneOf": [ + { + "description": "This update concerns the source VMM of a migration.", + "type": "string", + "enum": [ + "source" + ] + }, + { + "description": "This update concerns the target VMM of a migration.", + "type": "string", + "enum": [ + "target" + ] + } + ] + }, + "MigrationRuntimeState": { + "description": "An update from a sled regarding the state of a migration, indicating the role of the VMM whose migration state was updated.", + "type": "object", + "properties": { + "gen": { + "$ref": "#/components/schemas/Generation" + }, + "migration_id": { + "type": "string", + "format": "uuid" + }, + "role": { + "$ref": "#/components/schemas/MigrationRole" + }, + "state": { + "$ref": "#/components/schemas/MigrationState" + }, + "time_updated": { + "description": "Timestamp for the migration state update.", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "gen", + "migration_id", + "role", + "state", + "time_updated" + ] + }, + "MigrationState": { + "description": "The state of an instance's live migration.", + "oneOf": [ + { + "description": "The migration has not started for this VMM.", + "type": "string", + "enum": [ + "pending" + ] + }, + { + "description": "The migration is in progress.", + "type": "string", + "enum": [ + "in_progress" + ] + }, + { + "description": "The migration has failed.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The migration has completed.", + "type": "string", + "enum": [ + "completed" + ] + } + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", @@ -3974,14 +4056,14 @@ "zones" ] }, - "PortConfigV1": { + "PortConfigV2": { "type": "object", "properties": { "addresses": { - "description": "This port's addresses.", + "description": "This port's addresses and optional vlan IDs", "type": "array", "items": { - "$ref": "#/components/schemas/IpNet" + "$ref": "#/components/schemas/UplinkAddressConfig" } }, "autoneg": { @@ -4094,7 +4176,7 @@ "minItems": 2, "maxItems": 2 }, - "RackNetworkConfigV1": { + "RackNetworkConfigV2": { "description": "Initial network configuration", "type": "object", "properties": { @@ -4127,7 +4209,7 @@ "description": "Uplinks for connecting the rack to external networks", "type": "array", "items": { - "$ref": "#/components/schemas/PortConfigV1" + "$ref": "#/components/schemas/PortConfigV2" } }, "rack_subnet": { @@ -4187,6 +4269,15 @@ } ] }, + "migration_state": { + "nullable": true, + "description": "The current state of any in-progress migration for this instance, as understood by this sled.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, "propolis_id": { "description": "The ID of the VMM whose state is being reported.", "type": "string", @@ -4443,6 +4534,24 @@ "version" ] }, + "UplinkAddressConfig": { + "type": "object", + "properties": { + "address": { + "$ref": "#/components/schemas/IpNet" + }, + "vlan_id": { + "nullable": true, + "description": "The VLAN id (if any) associated with this address.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "address" + ] + }, "VirtualNetworkInterfaceHost": { "description": "A mapping from a virtual NIC to a physical host", "type": "object", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index edef5b9813..df13340334 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -4999,6 +4999,24 @@ } ] }, + "UplinkAddressConfig": { + "type": "object", + "properties": { + "address": { + "$ref": "#/components/schemas/IpNet" + }, + "vlan_id": { + "nullable": true, + "description": "The VLAN id (if any) associated with this address.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "address" + ] + }, "UserSpecifiedBgpPeerConfig": { "description": "User-specified version of [`BgpPeerConfig`].\n\nThis is similar to [`BgpPeerConfig`], except it doesn't have the sensitive `md5_auth_key` parameter, instead requiring that the user provide the key separately.\n\n[`BgpPeerConfig`]: omicron_common::api::internal::shared::BgpPeerConfig", "type": "object", @@ -5144,13 +5162,13 @@ } }, "UserSpecifiedPortConfig": { - "description": "User-specified version of [`PortConfigV1`].\n\nAll of [`PortConfigV1`] is user-specified. But we expect the port name to be a key, rather than a field as in [`PortConfigV1`]. So this has all of the fields other than the port name.\n\n[`PortConfigV1`]: omicron_common::api::internal::shared::PortConfigV1", + "description": "User-specified version of [`PortConfigV2`].\n\nAll of [`PortConfigV2`] is user-specified. But we expect the port name to be a key, rather than a field as in [`PortConfigV2`]. So this has all of the fields other than the port name.\n\n[`PortConfigV2`]: omicron_common::api::internal::shared::PortConfigV2", "type": "object", "properties": { "addresses": { "type": "array", "items": { - "$ref": "#/components/schemas/IpNet" + "$ref": "#/components/schemas/UplinkAddressConfig" } }, "autoneg": { diff --git a/package-manifest.toml b/package-manifest.toml index e1dfd1f4d6..8e27588be3 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -548,10 +548,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3" +source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "28965f303a64f49cf5b83322babe1e0ceb4cfe33fb2df8c8d452d8c3ec02d933" +source.sha256 = "33e3b09408551be860debac08de50a840909d4e6c6bed9aecaef63fe8bef2d69" output.type = "tarball" [package.mg-ddm] @@ -564,10 +564,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3" +source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "6fa53be6fc5ad6273e0ca5e969c882ea40c473722415b060dfea420e962d4f8e" +source.sha256 = "81674afa17873f84bb49a800c8511938d1c2e871026cbb17e5eed2b645b1eb55" output.type = "zone" output.intermediate_only = true @@ -579,10 +579,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "5630887d0373857f77cb264f84aa19bdec720ce3" +source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" # The SHA256 digest is automatically posted to: -# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt -source.sha256 = "6ae4bc3b332e91706c1c6633a7fc218aac65b7feff5643ee2dbbe79b841e0df3" +# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt +source.sha256 = "5e8bdd6774ef6041189621306577d0e0d174d596d216e53740ce6f035316c5af" output.type = "zone" output.intermediate_only = true @@ -628,8 +628,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "6334bf74fa21790c15f1c4e494ea2ec0edd1c83c" -source.sha256 = "5929f9abf0daf4bbf17d835e5d69fc842b9617b312fb5644fa99daf785203700" +source.commit = "861c00bacbdf7a6e22471f0dabd8f926409b5292" +source.sha256 = "1db849892c60b22f600fb081d4b0145d8ecd98acce9fad3094499a5d2159d001" output.type = "zone" output.intermediate_only = true @@ -653,8 +653,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "6334bf74fa21790c15f1c4e494ea2ec0edd1c83c" -source.sha256 = "0294a1911212c4764d1034b5e0ca00cc9dfc51df482a9f6e5547b191b4481ad8" +source.commit = "861c00bacbdf7a6e22471f0dabd8f926409b5292" +source.sha256 = "00b2b9372145bc8974f3c75ba7a59d8f2a8178c67cc1869086d29c7f3a2deb36" output.type = "zone" output.intermediate_only = true @@ -671,8 +671,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "6334bf74fa21790c15f1c4e494ea2ec0edd1c83c" -source.sha256 = "1a188da01dccf565058145b43573a549a2eb4d71fe8800170152b823af27a010" +source.commit = "861c00bacbdf7a6e22471f0dabd8f926409b5292" +source.sha256 = "b0b62b22c0e781edb0790b8730b99bb6e635c95ad3e83c2afbb2b15956153d66" output.type = "zone" output.intermediate_only = true diff --git a/passwords/Cargo.toml b/passwords/Cargo.toml index eda3a020dc..1a44255ee4 100644 --- a/passwords/Cargo.toml +++ b/passwords/Cargo.toml @@ -17,8 +17,13 @@ serde_with.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] +# For tests argon2alt = { package = "rust-argon2", version = "2.1.0" } +# For benchmark criterion.workspace = true +# For the "argon2" example +anyhow.workspace = true +clap.workspace = true [[bench]] name = "argon2" diff --git a/passwords/examples/argon2.rs b/passwords/examples/argon2.rs new file mode 100644 index 0000000000..3761f33d4f --- /dev/null +++ b/passwords/examples/argon2.rs @@ -0,0 +1,95 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Command-line tool for playing with Argon2 parameters + +use anyhow::Context; +use argon2::Algorithm; +use argon2::Argon2; +use argon2::Params; +use argon2::Version; +use clap::Parser; +use omicron_passwords::Hasher; +use omicron_passwords::Password; +use omicron_passwords::ARGON2_COST_M_KIB; +use omicron_passwords::ARGON2_COST_P; +use omicron_passwords::ARGON2_COST_T; + +/// Quickly check performance of Argon2 hashing with given parameter values +/// +/// For a bit more stats, modify the associated benchmark to use your values and +/// run that. This tool is aimed at more quickly iterating on which values are +/// worth benchmarking. +#[derive(Parser)] +struct Cli { + /// iterations + #[arg(long, default_value_t = 5)] + count: u128, + /// input (password) to hash + #[arg(long, default_value_t = String::from("hunter2"))] + input: String, + /// argon2 parameter 'm': memory size in 1 KiB blocks + #[arg(long, default_value_t = ARGON2_COST_M_KIB)] + m_cost: u32, + /// argon2 parameter 'p': degree of parallelism + #[arg(long, default_value_t = ARGON2_COST_P)] + p_cost: u32, + /// argon2 parameter 't': number of iterations + #[arg(long, default_value_t = ARGON2_COST_T)] + t_cost: u32, +} + +fn main() -> anyhow::Result<()> { + let cli = Cli::parse(); + const ALGORITHM: Algorithm = Algorithm::Argon2id; + let version = Version::default(); + const OUTPUT_SIZE_OVERRIDE: Option = None; + let params = + Params::new(cli.m_cost, cli.t_cost, cli.p_cost, OUTPUT_SIZE_OVERRIDE) + .context("unsupported Argon2 parameters")?; + let argon = Argon2::new(ALGORITHM, version, params); + let mut hasher = Hasher::new(argon.clone(), rand::thread_rng()); + let password = Password::new(&cli.input).unwrap(); + let password_hash = hasher.create_password(&password).unwrap(); + + println!("algorithm: {} version {:?}", ALGORITHM, version); + println!(" 'm' cost: {} KiB", cli.m_cost); + println!(" 'p' cost: {} (degree of parallelism)", cli.p_cost); + println!(" 't' cost: {} (number of iterations)", cli.t_cost); + println!( + "output size override: {}", + OUTPUT_SIZE_OVERRIDE + .map(|s| s.to_string()) + .as_deref() + .unwrap_or("none") + ); + println!("trials: {}", cli.count); + + if cfg!(debug_assertions) { + eprintln!( + "WARN: running a debug binary \ + (performance numbers are not meaningful)" + ); + } + + let start = std::time::Instant::now(); + for i in 0..cli.count { + eprint!("iter {} ... ", i + 1); + let iter_start = std::time::Instant::now(); + hasher.verify_password(&password, &password_hash).unwrap(); + let iter_elapsed = iter_start.elapsed(); + eprintln!("{} ms", iter_elapsed.as_millis()); + } + + let total_elapsed = start.elapsed(); + println!( + "completed {} iteration{} in {} ms (average: {} ms per iteration)", + cli.count, + if cli.count == 1 { "" } else { "s" }, + total_elapsed.as_millis(), + total_elapsed.as_millis() / cli.count, + ); + + Ok(()) +} diff --git a/passwords/src/lib.rs b/passwords/src/lib.rs index c7e9f1a118..7ab523847a 100644 --- a/passwords/src/lib.rs +++ b/passwords/src/lib.rs @@ -33,9 +33,9 @@ use thiserror::Error; // values (provided by the `argon2` crate) for the version, salt length, and // output length. const ARGON2_ALGORITHM: argon2::Algorithm = argon2::Algorithm::Argon2id; -const ARGON2_COST_M_KIB: u32 = 96 * 1024; -const ARGON2_COST_T: u32 = 13; -const ARGON2_COST_P: u32 = 1; +pub const ARGON2_COST_M_KIB: u32 = 96 * 1024; +pub const ARGON2_COST_T: u32 = 13; +pub const ARGON2_COST_P: u32 = 1; // Maximum password length, intended to prevent denial of service attacks. See // CVE-2013-1443, CVE-2014-9016, and CVE-2014-9034 for examples. diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 7c513cfbad..98a63622e8 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,7 +1,4 @@ [toolchain] -# NOTE: This toolchain is also specified in various jobs in -# .github/buildomat/jobs/. If you update it here, update those files too. -# # We choose a specific toolchain (rather than "stable") for repeatability. The # intent is to keep this up-to-date with recently-released stable Rust. channel = "1.78.0" diff --git a/schema/crdb/add-cockroach-zone-id-to-node-id/up.sql b/schema/crdb/add-cockroach-zone-id-to-node-id/up.sql new file mode 100644 index 0000000000..fdb7d00082 --- /dev/null +++ b/schema/crdb/add-cockroach-zone-id-to-node-id/up.sql @@ -0,0 +1,5 @@ +CREATE TABLE IF NOT EXISTS omicron.public.cockroachdb_zone_id_to_node_id ( + omicron_zone_id UUID NOT NULL UNIQUE, + crdb_node_id TEXT NOT NULL UNIQUE, + PRIMARY KEY (omicron_zone_id, crdb_node_id) +); diff --git a/schema/crdb/add-migration-table/up01.sql b/schema/crdb/add-migration-table/up01.sql new file mode 100644 index 0000000000..7659d7dca3 --- /dev/null +++ b/schema/crdb/add-migration-table/up01.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.migration_state AS ENUM ( + 'pending', + 'in_progress', + 'failed', + 'completed' +); diff --git a/schema/crdb/add-migration-table/up02.sql b/schema/crdb/add-migration-table/up02.sql new file mode 100644 index 0000000000..9e0654c1bc --- /dev/null +++ b/schema/crdb/add-migration-table/up02.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.migration ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ, + source_state omicron.public.migration_state NOT NULL, + source_propolis_id UUID NOT NULL, + source_gen INT8 NOT NULL DEFAULT 1, + time_source_updated TIMESTAMPTZ, + target_state omicron.public.migration_state NOT NULL, + target_propolis_id UUID NOT NULL, + target_gen INT8 NOT NULL DEFAULT 1, + time_target_updated TIMESTAMPTZ +); diff --git a/schema/crdb/add-vlan-to-uplink/up.sql b/schema/crdb/add-vlan-to-uplink/up.sql new file mode 100644 index 0000000000..9ffa043d32 --- /dev/null +++ b/schema/crdb/add-vlan-to-uplink/up.sql @@ -0,0 +1 @@ + ALTER TABLE omicron.public.switch_port_settings_address_config ADD COLUMN IF NOT EXISTS vlan_id INT4 DEFAULT NULL; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b0774817f0..4fd1930d33 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2763,6 +2763,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.switch_port_settings_address_config ( rsvd_address_lot_block_id UUID NOT NULL, address INET, interface_name TEXT, + vlan_id INT4, /* TODO https://github.com/oxidecomputer/omicron/issues/3013 */ PRIMARY KEY (port_settings_id, address, interface_name) @@ -3492,6 +3493,20 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( PRIMARY KEY (blueprint_id, id) ); +-- Mapping of Omicron zone ID to CockroachDB node ID. This isn't directly used +-- by the blueprint tables above, but is used by the more general Reconfigurator +-- system along with them (e.g., to decommission expunged CRDB nodes). +CREATE TABLE IF NOT EXISTS omicron.public.cockroachdb_zone_id_to_node_id ( + omicron_zone_id UUID NOT NULL UNIQUE, + crdb_node_id TEXT NOT NULL UNIQUE, + + -- We require the pair to be unique, and also require each column to be + -- unique: there should only be one entry for a given zone ID, one entry for + -- a given node ID, and we need a unique requirement on the pair (via this + -- primary key) to support `ON CONFLICT DO NOTHING` idempotent inserts. + PRIMARY KEY (omicron_zone_id, crdb_node_id) +); + /*******************************************************************/ /* @@ -4064,6 +4079,55 @@ VALUES ( ON CONFLICT (id) DO NOTHING; +CREATE TYPE IF NOT EXISTS omicron.public.migration_state AS ENUM ( + 'pending', + 'in_progress', + 'failed', + 'completed' +); + +-- A table of the states of current migrations. +CREATE TABLE IF NOT EXISTS omicron.public.migration ( + id UUID PRIMARY KEY, + + /* The time this migration record was created. */ + time_created TIMESTAMPTZ NOT NULL, + + /* The time this migration record was deleted. */ + time_deleted TIMESTAMPTZ, + + /* The state of the migration source */ + source_state omicron.public.migration_state NOT NULL, + + /* The ID of the migration source Propolis */ + source_propolis_id UUID NOT NULL, + + /* Generation number owned and incremented by the source sled-agent */ + source_gen INT8 NOT NULL DEFAULT 1, + + /* Timestamp of when the source field was last updated. + * + * This is provided by the sled-agent when publishing a migration state + * update. + */ + time_source_updated TIMESTAMPTZ, + + /* The state of the migration target */ + target_state omicron.public.migration_state NOT NULL, + + /* The ID of the migration target Propolis */ + target_propolis_id UUID NOT NULL, + + /* Generation number owned and incremented by the target sled-agent */ + target_gen INT8 NOT NULL DEFAULT 1, + + /* Timestamp of when the source field was last updated. + * + * This is provided by the sled-agent when publishing a migration state + * update. + */ + time_target_updated TIMESTAMPTZ +); /* * Keep this at the end of file so that the database does not contain a version @@ -4076,7 +4140,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '72.0.0', NULL) + (TRUE, NOW(), NOW(), '75.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 5971235634..04ba5d8d31 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -609,7 +609,7 @@ "description": "Password hashes must be in PHC (Password Hashing Competition) string format. Passwords must be hashed with Argon2id. Password hashes may be rejected if the parameters appear not to be secure enough.", "type": "string" }, - "PortConfigV1": { + "PortConfigV2": { "type": "object", "required": [ "addresses", @@ -622,10 +622,10 @@ ], "properties": { "addresses": { - "description": "This port's addresses.", + "description": "This port's addresses and optional vlan IDs", "type": "array", "items": { - "$ref": "#/definitions/IpNet" + "$ref": "#/definitions/UplinkAddressConfig" } }, "autoneg": { @@ -780,7 +780,7 @@ "description": "Initial rack network configuration", "allOf": [ { - "$ref": "#/definitions/RackNetworkConfigV1" + "$ref": "#/definitions/RackNetworkConfigV2" } ] }, @@ -804,7 +804,7 @@ } } }, - "RackNetworkConfigV1": { + "RackNetworkConfigV2": { "description": "Initial network configuration", "type": "object", "required": [ @@ -844,7 +844,7 @@ "description": "Uplinks for connecting the rack to external networks", "type": "array", "items": { - "$ref": "#/definitions/PortConfigV1" + "$ref": "#/definitions/PortConfigV2" } }, "rack_subnet": { @@ -986,6 +986,26 @@ } ] }, + "UplinkAddressConfig": { + "type": "object", + "required": [ + "address" + ], + "properties": { + "address": { + "$ref": "#/definitions/IpNet" + }, + "vlan_id": { + "description": "The VLAN id (if any) associated with this address.", + "type": [ + "integer", + "null" + ], + "format": "uint16", + "minimum": 0.0 + } + } + }, "UserId": { "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", "type": "string" diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index bd12bb745a..61cfd8485d 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -24,8 +24,8 @@ use omicron_common::address::DENDRITE_PORT; use omicron_common::address::{MGD_PORT, MGS_PORT}; use omicron_common::api::external::{BfdMode, ImportExportPolicy}; use omicron_common::api::internal::shared::{ - BgpConfig, PortConfigV1, PortFec, PortSpeed, RackNetworkConfig, - RackNetworkConfigV1, SwitchLocation, UplinkConfig, + BgpConfig, PortConfig, PortConfigV2, PortFec, PortSpeed, RackNetworkConfig, + RackNetworkConfigV2, RouteConfig, SwitchLocation, UplinkAddressConfig, }; use omicron_common::backoff::{ retry_notify, retry_policy_local, BackoffError, ExponentialBackoff, @@ -33,7 +33,7 @@ use omicron_common::backoff::{ }; use omicron_common::OMICRON_DPD_TAG; use omicron_ddm_admin_client::DdmError; -use oxnet::{IpNet, Ipv6Net}; +use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use slog::Logger; @@ -360,7 +360,7 @@ impl<'a> EarlyNetworkSetup<'a> { &mut self, rack_network_config: &RackNetworkConfig, switch_zone_underlay_ip: Ipv6Addr, - ) -> Result, EarlyNetworkSetupError> { + ) -> Result, EarlyNetworkSetupError> { // First, we have to know which switch we are: ask MGS. info!( self.log, @@ -646,7 +646,7 @@ impl<'a> EarlyNetworkSetup<'a> { fn build_port_config( &self, - port_config: &PortConfigV1, + port_config: &PortConfig, ) -> Result<(PortSettings, PortId), EarlyNetworkSetupError> { info!(self.log, "Building Port Configuration"); let mut dpd_port_settings = PortSettings { links: HashMap::new() }; @@ -726,7 +726,7 @@ fn retry_policy_switch_mapping() -> ExponentialBackoff { // The first production version of the `EarlyNetworkConfig`. // // If this version is in the bootstore than we need to convert it to -// `EarlyNetworkConfigV1`. +// `EarlyNetworkConfigV2`. // // Once we do this for all customers that have initialized racks with the // old version we can go ahead and remove this type and its conversion code @@ -748,6 +748,29 @@ struct EarlyNetworkConfigV0 { pub rack_network_config: Option, } +// The second production version of the `EarlyNetworkConfig`. +// +// If this version is in the bootstore than we need to convert it to +// `EarlyNetworkConfigV2`. +// +// Once we do this for all customers that have initialized racks with the +// old version we can go ahead and remove this type and its conversion code +// altogether. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +struct EarlyNetworkConfigV1 { + // The current generation number of data as stored in CRDB. + // The initial generation is set during RSS time and then only mutated + // by Nexus. + pub generation: u64, + + // Which version of the data structure do we have. This is to help with + // deserialization and conversion in future updates. + pub schema_version: u32, + + // The actual configuration details + pub body: EarlyNetworkConfigBodyV1, +} + /// Network configuration required to bring up the control plane /// /// The fields in this structure are those from @@ -776,9 +799,9 @@ impl EarlyNetworkConfig { log: &Logger, config: &bootstore::NetworkConfig, ) -> Result { - // Try to deserialize the latest version of the data structure (v1). If + // Try to deserialize the latest version of the data structure (v2). If // that succeeds we are done. - let v1_error = + let v2_error = match serde_json::from_slice::(&config.blob) { Ok(val) => return Ok(val), Err(error) => { @@ -787,24 +810,49 @@ impl EarlyNetworkConfig { warn!( log, "Failed to deserialize EarlyNetworkConfig \ - as v1, trying next as v0: {}", + as v2, trying next as v1: {}", error, ); error } }; + match serde_json::from_slice::(&config.blob) { + Ok(val) => { + // Convert from v1 to v2 + return Ok(EarlyNetworkConfig { + generation: val.generation, + schema_version: 2, + body: EarlyNetworkConfigBody { + ntp_servers: val.body.ntp_servers, + rack_network_config: val.body.rack_network_config.map( + |v1_config| RackNetworkConfigV1::to_v2(v1_config), + ), + }, + }); + } + Err(error) => { + // Log this error. + warn!( + log, + "Failed to deserialize EarlyNetworkConfig \ + as v1, trying next as v0: {}", + error + ); + } + }; + match serde_json::from_slice::(&config.blob) { Ok(val) => { - // Convert from v0 to v1 + // Convert from v0 to v2 return Ok(EarlyNetworkConfig { generation: val.generation, - schema_version: 1, + schema_version: 2, body: EarlyNetworkConfigBody { ntp_servers: val.ntp_servers, rack_network_config: val.rack_network_config.map( |v0_config| { - RackNetworkConfigV0::to_v1( + RackNetworkConfigV0::to_v2( val.rack_subnet, v0_config, ) @@ -822,9 +870,9 @@ impl EarlyNetworkConfig { } }; - // Return the v1 error preferentially over the v0 error as it's more - // likely to be useful. - Err(v1_error) + // Return the v2 error preferentially over subsequent errors as it's + // more likely to be useful. + Err(v2_error) } } @@ -857,13 +905,22 @@ impl From for bootstore::NetworkConfig { } } +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +struct EarlyNetworkConfigBodyV1 { + /// The external NTP server addresses. + pub ntp_servers: Vec, + + // Rack network configuration as delivered from RSS or Nexus + pub rack_network_config: Option, +} + /// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to /// /// /// Our first version of `RackNetworkConfig`. If this exists in the bootstore, we /// upgrade out of it into `RackNetworkConfigV1` or later versions if possible. #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] -pub struct RackNetworkConfigV0 { +struct RackNetworkConfigV0 { // TODO: #3591 Consider making infra-ip ranges implicit for uplinks /// First ip address to be used for configuring network infrastructure pub infra_ip_first: Ipv4Addr, @@ -876,22 +933,22 @@ pub struct RackNetworkConfigV0 { impl RackNetworkConfigV0 { /// Convert from `RackNetworkConfigV0` to `RackNetworkConfigV1` /// - /// We cannot use `From for `RackNetworkConfigV1` + /// We cannot use `From for `RackNetworkConfigV2` /// because the `rack_subnet` field does not exist in `RackNetworkConfigV0` /// and must be passed in from the `EarlyNetworkConfigV0` struct which - /// contains the `RackNetworkConfivV0` struct. - pub fn to_v1( + /// contains the `RackNetworkConfigV0` struct. + pub fn to_v2( rack_subnet: Ipv6Addr, v0: RackNetworkConfigV0, - ) -> RackNetworkConfigV1 { - RackNetworkConfigV1 { + ) -> RackNetworkConfigV2 { + RackNetworkConfigV2 { rack_subnet: Ipv6Net::new(rack_subnet, 56).unwrap(), infra_ip_first: v0.infra_ip_first, infra_ip_last: v0.infra_ip_last, ports: v0 .uplinks .into_iter() - .map(|uplink| PortConfigV1::from(uplink)) + .map(|uplink| PortConfigV2::from(uplink)) .collect(), bgp: vec![], bfd: vec![], @@ -899,6 +956,137 @@ impl RackNetworkConfigV0 { } } +/// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to +/// +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +struct PortConfigV1 { + /// The set of routes associated with this port. + pub routes: Vec, + /// This port's addresses and optional vlan IDs + pub addresses: Vec, + /// Switch the port belongs to. + pub switch: SwitchLocation, + /// Nmae of the port this config applies to. + pub port: String, + /// Port speed. + pub uplink_port_speed: PortSpeed, + /// Port forward error correction type. + pub uplink_port_fec: PortFec, + /// BGP peers on this port + pub bgp_peers: Vec, + /// Whether or not to set autonegotiation + #[serde(default)] + pub autoneg: bool, +} + +impl From for PortConfigV2 { + fn from(value: PortConfigV1) -> Self { + PortConfigV2 { + routes: value.routes.clone(), + addresses: value + .addresses + .iter() + .map(|a| UplinkAddressConfig { address: *a, vlan_id: None }) + .collect(), + switch: value.switch, + port: value.port, + uplink_port_speed: value.uplink_port_speed, + uplink_port_fec: value.uplink_port_fec, + bgp_peers: vec![], + autoneg: false, + } + } +} + +/// Deprecated, use PortConfigV2 instead. Cannot actually deprecate due to +/// +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] +struct UplinkConfig { + /// Gateway address + pub gateway_ip: Ipv4Addr, + /// Switch to use for uplink + pub switch: SwitchLocation, + /// Switchport to use for external connectivity + pub uplink_port: String, + /// Speed for the Switchport + pub uplink_port_speed: PortSpeed, + /// Forward Error Correction setting for the uplink port + pub uplink_port_fec: PortFec, + /// IP Address and prefix (e.g., `192.168.0.1/16`) to apply to switchport + /// (must be in infra_ip pool) + pub uplink_cidr: Ipv4Net, + /// VLAN id to use for uplink + pub uplink_vid: Option, +} + +impl From for PortConfigV2 { + fn from(value: UplinkConfig) -> Self { + PortConfigV2 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: value.gateway_ip.into(), + vlan_id: value.uplink_vid, + }], + addresses: vec![UplinkAddressConfig { + address: value.uplink_cidr.into(), + vlan_id: value.uplink_vid, + }], + switch: value.switch, + port: value.uplink_port, + uplink_port_speed: value.uplink_port_speed, + uplink_port_fec: value.uplink_port_fec, + bgp_peers: vec![], + autoneg: false, + } + } +} + +/// Deprecated, use `RackNetworkConfig` instead. Cannot actually deprecate due to +/// +/// +/// Our second version of `RackNetworkConfig`. If this exists in the bootstore, +/// we upgrade out of it into `RackNetworkConfigV1` or later versions if +/// possible. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +struct RackNetworkConfigV1 { + pub rack_subnet: Ipv6Net, + // TODO: #3591 Consider making infra-ip ranges implicit for uplinks + /// First ip address to be used for configuring network infrastructure + pub infra_ip_first: Ipv4Addr, + /// Last ip address to be used for configuring network infrastructure + pub infra_ip_last: Ipv4Addr, + /// Uplinks for connecting the rack to external networks + pub ports: Vec, + /// BGP configurations for connecting the rack to external networks + pub bgp: Vec, + /// BFD configuration for connecting the rack to external networks + #[serde(default)] + pub bfd: Vec, +} + +impl RackNetworkConfigV1 { + /// Convert from `RackNetworkConfigV1` to `RackNetworkConfigV2` + /// + /// We cannot use `From for `RackNetworkConfigV1` + /// because the `rack_subnet` field does not exist in `RackNetworkConfigV0` + /// and must be passed in from the `EarlyNetworkConfigV0` struct which + /// contains the `RackNetworkConfivV0` struct. + pub fn to_v2(v1: RackNetworkConfigV1) -> RackNetworkConfigV2 { + RackNetworkConfigV2 { + rack_subnet: v1.rack_subnet, + infra_ip_first: v1.infra_ip_first, + infra_ip_last: v1.infra_ip_last, + ports: v1 + .ports + .into_iter() + .map(|ports| PortConfigV2::from(ports)) + .collect(), + bgp: v1.bgp.clone(), + bfd: v1.bfd.clone(), + } + } +} + // The following two conversion functions translate the speed and fec types used // in the internal API to the types used in the dpd-client API. The conversion // is done here, rather than with "impl From" at the definition, to avoid a @@ -929,12 +1117,13 @@ fn convert_fec(fec: &PortFec) -> dpd_client::types::PortFec { mod tests { use super::*; use omicron_common::api::internal::shared::RouteConfig; + use omicron_common::api::internal::shared::UplinkAddressConfig; use omicron_test_utils::dev::test_setup_log; #[test] - fn serialized_early_network_config_v0_to_v1_conversion() { + fn serialized_early_network_config_v0_to_v2_conversion() { let logctx = test_setup_log( - "serialized_early_network_config_v0_to_v1_conversion", + "serialized_early_network_config_v0_to_v2_conversion", ); let v0 = EarlyNetworkConfigV0 { generation: 1, @@ -959,7 +1148,7 @@ mod tests { let bootstore_conf = bootstore::NetworkConfig { generation: 1, blob: v0_serialized }; - let v1 = EarlyNetworkConfig::deserialize_bootstore_config( + let v2 = EarlyNetworkConfig::deserialize_bootstore_config( &logctx.log, &bootstore_conf, ) @@ -968,20 +1157,23 @@ mod tests { let uplink = v0_rack_network_config.uplinks[0].clone(); let expected = EarlyNetworkConfig { generation: 1, - schema_version: 1, + schema_version: 2, body: EarlyNetworkConfigBody { ntp_servers: v0.ntp_servers.clone(), - rack_network_config: Some(RackNetworkConfigV1 { + rack_network_config: Some(RackNetworkConfigV2 { rack_subnet: Ipv6Net::new(v0.rack_subnet, 56).unwrap(), infra_ip_first: v0_rack_network_config.infra_ip_first, infra_ip_last: v0_rack_network_config.infra_ip_last, - ports: vec![PortConfigV1 { + ports: vec![PortConfigV2 { routes: vec![RouteConfig { destination: "0.0.0.0/0".parse().unwrap(), nexthop: uplink.gateway_ip.into(), vlan_id: None, }], - addresses: vec![uplink.uplink_cidr.into()], + addresses: vec![UplinkAddressConfig { + address: uplink.uplink_cidr.into(), + vlan_id: None, + }], switch: uplink.switch, port: uplink.uplink_port, uplink_port_speed: uplink.uplink_port_speed, @@ -995,7 +1187,87 @@ mod tests { }, }; - assert_eq!(expected, v1); + assert_eq!(expected, v2); + + logctx.cleanup_successful(); + } + + #[test] + fn serialized_early_network_config_v1_to_v2_conversion() { + let logctx = test_setup_log( + "serialized_early_network_config_v1_to_v2_conversion", + ); + + let v1 = EarlyNetworkConfigV1 { + generation: 1, + schema_version: 1, + body: EarlyNetworkConfigBodyV1 { + ntp_servers: Vec::new(), + rack_network_config: Some(RackNetworkConfigV1 { + rack_subnet: Ipv6Net::new(Ipv6Addr::UNSPECIFIED, 56) + .unwrap(), + infra_ip_first: Ipv4Addr::UNSPECIFIED, + infra_ip_last: Ipv4Addr::UNSPECIFIED, + ports: vec![PortConfigV1 { + routes: vec![RouteConfig { + destination: "0.0.0.0/0".parse().unwrap(), + nexthop: "192.168.0.2".parse().unwrap(), + vlan_id: None, + }], + addresses: vec!["192.168.0.1/16".parse().unwrap()], + switch: SwitchLocation::Switch0, + port: "Port0".to_string(), + uplink_port_speed: PortSpeed::Speed100G, + uplink_port_fec: PortFec::None, + bgp_peers: Vec::new(), + autoneg: false, + }], + bgp: Vec::new(), + bfd: Vec::new(), + }), + }, + }; + + let v1_serialized = serde_json::to_vec(&v1).unwrap(); + let bootstore_conf = + bootstore::NetworkConfig { generation: 1, blob: v1_serialized }; + + let v2 = EarlyNetworkConfig::deserialize_bootstore_config( + &logctx.log, + &bootstore_conf, + ) + .unwrap(); + let v1_rack_network_config = v1.body.rack_network_config.unwrap(); + let port = v1_rack_network_config.ports[0].clone(); + let expected = EarlyNetworkConfig { + generation: 1, + schema_version: 2, + body: EarlyNetworkConfigBody { + ntp_servers: v1.body.ntp_servers.clone(), + rack_network_config: Some(RackNetworkConfigV2 { + rack_subnet: v1_rack_network_config.rack_subnet, + infra_ip_first: v1_rack_network_config.infra_ip_first, + infra_ip_last: v1_rack_network_config.infra_ip_last, + ports: vec![PortConfigV2 { + routes: port.routes.clone(), + addresses: vec![UplinkAddressConfig { + address: port.addresses[0], + vlan_id: None, + }], + switch: port.switch, + port: port.port, + uplink_port_speed: port.uplink_port_speed, + uplink_port_fec: port.uplink_port_fec, + autoneg: false, + bgp_peers: vec![], + }], + bgp: vec![], + bfd: vec![], + }), + }, + }; + + assert_eq!(expected, v2); logctx.cleanup_successful(); } diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index 62af337c4c..b2c135fcf8 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -6,12 +6,14 @@ use crate::params::InstanceMigrationSourceParams; use chrono::{DateTime, Utc}; +use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::{ - InstanceRuntimeState, SledInstanceState, VmmRuntimeState, VmmState, + InstanceRuntimeState, MigrationRole, MigrationRuntimeState, MigrationState, + SledInstanceState, VmmRuntimeState, VmmState, }; use propolis_client::types::{ InstanceState as PropolisApiState, InstanceStateMonitorResponse, - MigrationState, + MigrationState as PropolisMigrationState, }; use uuid::Uuid; @@ -21,6 +23,7 @@ pub struct InstanceStates { instance: InstanceRuntimeState, vmm: VmmRuntimeState, propolis_id: Uuid, + migration: Option, } /// Newtype to allow conversion from Propolis API states (returned by the @@ -123,10 +126,10 @@ impl ObservedPropolisState { if this_id == propolis_migration.migration_id => { match propolis_migration.state { - MigrationState::Finish => { + PropolisMigrationState::Finish => { ObservedMigrationStatus::Succeeded } - MigrationState::Error => { + PropolisMigrationState::Error => { ObservedMigrationStatus::Failed } _ => ObservedMigrationStatus::InProgress, @@ -208,7 +211,22 @@ impl InstanceStates { vmm: VmmRuntimeState, propolis_id: Uuid, ) -> Self { - InstanceStates { instance, vmm, propolis_id } + let migration = instance.migration_id.map(|migration_id| { + let dst_propolis_id = instance.dst_propolis_id.expect("if an instance has a migration ID, it should also have a target VMM ID"); + let role = if dst_propolis_id == propolis_id { + MigrationRole::Target + } else { + MigrationRole::Source + }; + MigrationRuntimeState { + migration_id, + state: MigrationState::InProgress, + role, + gen: Generation::new(), + time_updated: Utc::now(), + } + }); + InstanceStates { instance, vmm, propolis_id, migration } } pub fn instance(&self) -> &InstanceRuntimeState { @@ -223,6 +241,10 @@ impl InstanceStates { self.propolis_id } + pub(crate) fn migration(&self) -> Option<&MigrationRuntimeState> { + self.migration.as_ref() + } + /// Creates a `SledInstanceState` structure containing the entirety of this /// structure's runtime state. This requires cloning; for simple read access /// use the `instance` or `vmm` accessors instead. @@ -231,6 +253,25 @@ impl InstanceStates { instance_state: self.instance.clone(), vmm_state: self.vmm.clone(), propolis_id: self.propolis_id, + migration_state: self.migration.clone(), + } + } + + fn transition_migration( + &mut self, + state: MigrationState, + time_updated: DateTime, + ) { + let migration = self.migration.as_mut().expect( + "an ObservedMigrationState should only be constructed when the \ + VMM has an active migration", + ); + // Don't generate spurious state updates if the migration is already in + // the state we're transitioning to. + if migration.state != state { + migration.state = state; + migration.time_updated = time_updated; + migration.gen = migration.gen.next(); } } @@ -256,58 +297,76 @@ impl InstanceStates { // Update the instance record to reflect the result of any completed // migration. match observed.migration_status { - ObservedMigrationStatus::Succeeded => match self.propolis_role() { - // This is a successful migration out. Point the instance to the - // target VMM, but don't clear migration IDs; let the target do - // that so that the instance will continue to appear to be - // migrating until it is safe to migrate again. - PropolisRole::Active => { - self.switch_propolis_id_to_target(observed.time); - - assert_eq!(self.propolis_role(), PropolisRole::Retired); - } + ObservedMigrationStatus::Succeeded => { + self.transition_migration( + MigrationState::Completed, + observed.time, + ); + match self.propolis_role() { + // This is a successful migration out. Point the instance to the + // target VMM, but don't clear migration IDs; let the target do + // that so that the instance will continue to appear to be + // migrating until it is safe to migrate again. + PropolisRole::Active => { + self.switch_propolis_id_to_target(observed.time); + + assert_eq!(self.propolis_role(), PropolisRole::Retired); + } - // This is a successful migration in. Point the instance to the - // target VMM and clear migration IDs so that another migration - // in can begin. Propolis will continue reporting that this - // migration was successful, but because its ID has been - // discarded the observed migration status will change from - // Succeeded to NoMigration. - // - // Note that these calls increment the instance's generation - // number twice. This is by design and allows the target's - // migration-ID-clearing update to overtake the source's update. - PropolisRole::MigrationTarget => { - self.switch_propolis_id_to_target(observed.time); - self.clear_migration_ids(observed.time); - - assert_eq!(self.propolis_role(), PropolisRole::Active); - } + // This is a successful migration in. Point the instance to the + // target VMM and clear migration IDs so that another migration + // in can begin. Propolis will continue reporting that this + // migration was successful, but because its ID has been + // discarded the observed migration status will change from + // Succeeded to NoMigration. + // + // Note that these calls increment the instance's generation + // number twice. This is by design and allows the target's + // migration-ID-clearing update to overtake the source's update. + PropolisRole::MigrationTarget => { + self.switch_propolis_id_to_target(observed.time); + self.clear_migration_ids(observed.time); + + assert_eq!(self.propolis_role(), PropolisRole::Active); + } - // This is a migration source that previously reported success - // and removed itself from the active Propolis position. Don't - // touch the instance. - PropolisRole::Retired => {} - }, - ObservedMigrationStatus::Failed => match self.propolis_role() { - // This is a failed migration out. CLear migration IDs so that - // Nexus can try again. - PropolisRole::Active => { - self.clear_migration_ids(observed.time); + // This is a migration source that previously reported success + // and removed itself from the active Propolis position. Don't + // touch the instance. + PropolisRole::Retired => {} } + } + ObservedMigrationStatus::Failed => { + self.transition_migration( + MigrationState::Failed, + observed.time, + ); - // This is a failed migration in. Leave the migration IDs alone - // so that the migration won't appear to have concluded until - // the source is ready to start a new one. - PropolisRole::MigrationTarget => {} + match self.propolis_role() { + // This is a failed migration out. CLear migration IDs so that + // Nexus can try again. + PropolisRole::Active => { + self.clear_migration_ids(observed.time); + } - // This VMM was part of a failed migration and was subsequently - // removed from the instance record entirely. There's nothing to - // update. - PropolisRole::Retired => {} - }, + // This is a failed migration in. Leave the migration IDs alone + // so that the migration won't appear to have concluded until + // the source is ready to start a new one. + PropolisRole::MigrationTarget => {} + + // This VMM was part of a failed migration and was subsequently + // removed from the instance record entirely. There's nothing to + // update. + PropolisRole::Retired => {} + } + } + ObservedMigrationStatus::InProgress => { + self.transition_migration( + MigrationState::InProgress, + observed.time, + ); + } ObservedMigrationStatus::NoMigration - | ObservedMigrationStatus::InProgress | ObservedMigrationStatus::Pending => {} } @@ -327,6 +386,16 @@ impl InstanceStates { self.clear_migration_ids(observed.time); self.retire_active_propolis(observed.time); } + // If there's an active migration and the VMM is suddenly gone, + // that should constitute a migration failure! + if let Some(MigrationState::Pending | MigrationState::InProgress) = + self.migration.as_ref().map(|m| m.state) + { + self.transition_migration( + MigrationState::Failed, + observed.time, + ); + } Some(Action::Destroy) } else { None @@ -431,12 +500,29 @@ impl InstanceStates { ids: &Option, now: DateTime, ) { - if let Some(ids) = ids { - self.instance.migration_id = Some(ids.migration_id); - self.instance.dst_propolis_id = Some(ids.dst_propolis_id); + if let Some(InstanceMigrationSourceParams { + migration_id, + dst_propolis_id, + }) = *ids + { + self.instance.migration_id = Some(migration_id); + self.instance.dst_propolis_id = Some(dst_propolis_id); + let role = if dst_propolis_id == self.propolis_id { + MigrationRole::Target + } else { + MigrationRole::Source + }; + self.migration = Some(MigrationRuntimeState { + migration_id, + state: MigrationState::Pending, + role, + gen: Generation::new(), + time_updated: now, + }) } else { self.instance.migration_id = None; self.instance.dst_propolis_id = None; + self.migration = None; } self.instance.gen = self.instance.gen.next(); @@ -538,17 +624,37 @@ mod test { fn make_migration_source_instance() -> InstanceStates { let mut state = make_instance(); state.vmm.state = VmmState::Migrating; - state.instance.migration_id = Some(Uuid::new_v4()); + let migration_id = Uuid::new_v4(); + state.instance.migration_id = Some(migration_id); state.instance.dst_propolis_id = Some(Uuid::new_v4()); + state.migration = Some(MigrationRuntimeState { + migration_id, + state: MigrationState::InProgress, + role: MigrationRole::Source, + // advance the generation once, since we are starting out in the + // `InProgress` state. + gen: Generation::new().next(), + time_updated: Utc::now(), + }); state } fn make_migration_target_instance() -> InstanceStates { let mut state = make_instance(); state.vmm.state = VmmState::Migrating; - state.instance.migration_id = Some(Uuid::new_v4()); + let migration_id = Uuid::new_v4(); + state.instance.migration_id = Some(migration_id); state.propolis_id = Uuid::new_v4(); state.instance.dst_propolis_id = Some(state.propolis_id); + state.migration = Some(MigrationRuntimeState { + migration_id, + state: MigrationState::InProgress, + role: MigrationRole::Target, + // advance the generation once, since we are starting out in the + // `InProgress` state. + gen: Generation::new().next(), + time_updated: Utc::now(), + }); state } @@ -623,6 +729,37 @@ mod test { } } + fn test_termination_fails_in_progress_migration( + mk_instance: impl Fn() -> InstanceStates, + ) { + for state in [Observed::Destroyed, Observed::Failed] { + let mut instance_state = mk_instance(); + let original_migration = instance_state.clone().migration.unwrap(); + let requested_action = instance_state + .apply_propolis_observation(&make_observed_state(state.into())); + + let migration = + instance_state.migration.expect("state must have a migration"); + assert_eq!(migration.state, MigrationState::Failed); + assert!(migration.gen > original_migration.gen); + assert!(matches!(requested_action, Some(Action::Destroy))); + } + } + + #[test] + fn source_termination_fails_in_progress_migration() { + test_termination_fails_in_progress_migration( + make_migration_source_instance, + ) + } + + #[test] + fn target_termination_fails_in_progress_migration() { + test_termination_fails_in_progress_migration( + make_migration_target_instance, + ) + } + #[test] fn destruction_after_migration_out_does_not_transition() { let mut state = make_migration_source_instance(); @@ -651,6 +788,17 @@ mod test { assert_eq!(state.instance.propolis_id, state.instance.dst_propolis_id); assert!(state.instance.migration_id.is_some()); + // The migration state should transition to "completed" + let migration = state + .migration + .clone() + .expect("instance must have a migration state"); + let prev_migration = + prev.migration.expect("previous state must have a migration"); + assert_eq!(migration.state, MigrationState::Completed); + assert!(migration.gen > prev_migration.gen); + let prev_migration = migration; + // Once a successful migration is observed, the VMM's state should // continue to update, but the instance's state shouldn't change // anymore. @@ -666,6 +814,15 @@ mod test { assert_eq!(state.vmm.state, VmmState::Stopping); assert!(state.vmm.gen > prev.vmm.gen); + // Now that the migration has completed, it should not transition again. + let migration = state + .migration + .clone() + .expect("instance must have a migration state"); + assert_eq!(migration.state, MigrationState::Completed); + assert_eq!(migration.gen, prev_migration.gen); + let prev_migration = migration; + let prev = state.clone(); observed.vmm_state = PropolisInstanceState(Observed::Destroyed); assert!(matches!( @@ -676,6 +833,13 @@ mod test { assert_eq!(state.instance.gen, prev.instance.gen); assert_eq!(state.vmm.state, VmmState::Destroyed); assert!(state.vmm.gen > prev.vmm.gen); + + let migration = state + .migration + .clone() + .expect("instance must have a migration state"); + assert_eq!(migration.state, MigrationState::Completed); + assert_eq!(migration.gen, prev_migration.gen); } #[test] @@ -699,6 +863,14 @@ mod test { assert_eq!(state.instance.gen, prev.instance.gen); assert_eq!(state.vmm.state, VmmState::Failed); assert!(state.vmm.gen > prev.vmm.gen); + + // The migration state should transition. + let migration = + state.migration.expect("instance must have a migration state"); + let prev_migration = + prev.migration.expect("previous state must have a migration"); + assert_eq!(migration.state, MigrationState::Failed); + assert!(migration.gen > prev_migration.gen); } // Verifies that the rude-termination state change doesn't update the @@ -717,6 +889,14 @@ mod test { assert_state_change_has_gen_change(&prev, &state); assert_eq!(state.instance.gen, prev.instance.gen); + + // The migration state should transition. + let migration = + state.migration.expect("instance must have a migration state"); + let prev_migration = + prev.migration.expect("previous state must have a migration"); + assert_eq!(migration.state, MigrationState::Failed); + assert!(migration.gen > prev_migration.gen); } #[test] @@ -739,11 +919,22 @@ mod test { assert_eq!(state.vmm.state, VmmState::Running); assert!(state.vmm.gen > prev.vmm.gen); + // The migration state should transition to completed. + let migration = state + .migration + .clone() + .expect("instance must have a migration state"); + let prev_migration = + prev.migration.expect("previous state must have a migration"); + assert_eq!(migration.state, MigrationState::Completed); + assert!(migration.gen > prev_migration.gen); + // Pretend Nexus set some new migration IDs. + let migration_id = Uuid::new_v4(); let prev = state.clone(); state.set_migration_ids( &Some(InstanceMigrationSourceParams { - migration_id: Uuid::new_v4(), + migration_id, dst_propolis_id: Uuid::new_v4(), }), Utc::now(), @@ -752,6 +943,15 @@ mod test { assert!(state.instance.gen > prev.instance.gen); assert_eq!(state.vmm.gen, prev.vmm.gen); + // There should be a new, pending migration state. + let migration = state + .migration + .clone() + .expect("instance must have a migration state"); + assert_eq!(migration.state, MigrationState::Pending); + assert_eq!(migration.migration_id, migration_id); + let prev_migration = migration; + // Mark that the new migration out is in progress. This doesn't change // anything in the instance runtime state, but does update the VMM state // generation. @@ -772,6 +972,15 @@ mod test { assert!(state.vmm.gen > prev.vmm.gen); assert_eq!(state.instance.gen, prev.instance.gen); + // The migration state should transition to in progress. + let migration = state + .migration + .clone() + .expect("instance must have a migration state"); + assert_eq!(migration.state, MigrationState::InProgress); + assert!(migration.gen > prev_migration.gen); + let prev_migration = migration; + // Propolis will publish that the migration succeeds before changing any // state. This should transfer control to the target but should not // touch the migration ID (that is the new target's job). @@ -790,6 +999,14 @@ mod test { assert_eq!(state.instance.propolis_id, state.instance.dst_propolis_id); assert!(state.instance.gen > prev.instance.gen); + // The migration state should transition to completed. + let migration = state + .migration + .clone() + .expect("instance must have a migration state"); + assert_eq!(migration.state, MigrationState::Completed); + assert!(migration.gen > prev_migration.gen); + // The rest of the destruction sequence is covered by other tests. } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index b48e4f18b8..8499a0000c 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -17,9 +17,9 @@ use internal_dns::config::{Host, Zone}; use internal_dns::ServiceName; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, - DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, - MGD_PORT, MGS_PORT, NEXUS_REDUNDANCY, NTP_PORT, NUM_SOURCE_NAT_PORTS, - RSS_RESERVED_ADDRESSES, SLED_PREFIX, + COCKROACHDB_REDUNDANCY, DENDRITE_PORT, DNS_HTTP_PORT, DNS_PORT, + DNS_REDUNDANCY, MAX_DNS_REDUNDANCY, MGD_PORT, MGS_PORT, NEXUS_REDUNDANCY, + NTP_PORT, NUM_SOURCE_NAT_PORTS, RSS_RESERVED_ADDRESSES, SLED_PREFIX, }; use omicron_common::api::external::{Generation, MacAddr, Vni}; use omicron_common::api::internal::shared::{ @@ -48,9 +48,6 @@ use uuid::Uuid; // The number of boundary NTP servers to create from RSS. const BOUNDARY_NTP_COUNT: usize = 2; -// The number of CRDB instances to create from RSS. -const CRDB_COUNT: usize = 5; - // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove // when Nexus provisions Oximeter. const OXIMETER_COUNT: usize = 1; @@ -426,7 +423,7 @@ impl Plan { } // Provision CockroachDB zones, continuing to stripe across Sleds. - for _ in 0..CRDB_COUNT { + for _ in 0..COCKROACHDB_REDUNDANCY { let sled = { let which_sled = sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1d8b3e7ad3..2d7a355440 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -734,16 +734,16 @@ impl ServiceInner { let rack_network_config = { let config = &config.rack_network_config; - NexusTypes::RackNetworkConfigV1 { + NexusTypes::RackNetworkConfigV2 { rack_subnet: config.rack_subnet, infra_ip_first: config.infra_ip_first, infra_ip_last: config.infra_ip_last, ports: config .ports .iter() - .map(|config| NexusTypes::PortConfigV1 { + .map(|config| NexusTypes::PortConfigV2 { port: config.port.clone(), - routes: config + routes: config .routes .iter() .map(|r| NexusTypes::RouteConfig { @@ -752,7 +752,14 @@ impl ServiceInner { vlan_id: r.vlan_id, }) .collect(), - addresses: config.addresses.iter().cloned().map(Into::into).collect(), + addresses: config + .addresses + .iter() + .map(|a| NexusTypes::UplinkAddressConfig { + address: a.address, + vlan_id: a.vlan_id + }) + .collect(), switch: config.switch.into(), uplink_port_speed: config.uplink_port_speed.into(), uplink_port_fec: config.uplink_port_fec.into(), @@ -1129,7 +1136,7 @@ impl ServiceInner { // from the bootstore". let early_network_config = EarlyNetworkConfig { generation: 1, - schema_version: 1, + schema_version: 2, body: EarlyNetworkConfigBody { ntp_servers: config.ntp_servers.clone(), rack_network_config: Some(config.rack_network_config.clone()), diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 7df9f06d53..70d68f6a8e 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1693,6 +1693,7 @@ impl ServiceManager { ZoneArgs::Omicron(OmicronZoneConfigLocal { zone: OmicronZoneConfig { + id: zone_id, zone_type: OmicronZoneType::CockroachDb { .. }, underlay_address, .. @@ -1734,6 +1735,7 @@ impl ServiceManager { // Configure the Omicron cockroach-admin service. let cockroach_admin_config = PropertyGroupBuilder::new("config") + .add_property("zone_id", "astring", zone_id.to_string()) .add_property( "cockroach_address", "astring", diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index f5be31bd37..12e17cc3de 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -451,6 +451,7 @@ mod test { instance_state: instance_vmm, vmm_state, propolis_id, + migration_state: None, }; SimObject::new_simulated_auto(&state, logctx.log.new(o!())) diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index ed88dbcc6f..2ac8618399 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -16,7 +16,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::external::ResourceType; use omicron_common::api::internal::nexus::{ - InstanceRuntimeState, SledInstanceState, VmmState, + InstanceRuntimeState, MigrationRole, SledInstanceState, VmmState, }; use propolis_client::types::{ InstanceMigrateStatusResponse as PropolisMigrateStatus, @@ -78,6 +78,46 @@ impl SimInstanceInner { self.queue.push_back(MonitorChange::MigrateStatus(migrate_status)) } + /// Queue a successful simulated migration. + /// + fn queue_successful_migration(&mut self, role: MigrationRole) { + // Propolis transitions to the Migrating state once before + // actually starting migration. + self.queue_propolis_state(PropolisInstanceState::Migrating); + let migration_id = + self.state.instance().migration_id.unwrap_or_else(|| { + panic!( + "should have migration ID set before getting request to + migrate in (current state: {:?})", + self + ) + }); + self.queue_migration_status(PropolisMigrateStatus { + migration_id, + state: propolis_client::types::MigrationState::Sync, + }); + self.queue_migration_status(PropolisMigrateStatus { + migration_id, + state: propolis_client::types::MigrationState::Finish, + }); + + // The state we transition to after the migration completes will depend + // on whether we are the source or destination. + match role { + MigrationRole::Target => { + self.queue_propolis_state(PropolisInstanceState::Running) + } + MigrationRole::Source => self.queue_graceful_stop(), + } + } + + fn queue_graceful_stop(&mut self) { + self.state.transition_vmm(PublishedVmmState::Stopping, Utc::now()); + self.queue_propolis_state(PropolisInstanceState::Stopping); + self.queue_propolis_state(PropolisInstanceState::Stopped); + self.queue_propolis_state(PropolisInstanceState::Destroyed); + } + /// Searches the queue for its last Propolis state change transition. If /// one exists, returns the associated Propolis state. fn last_queued_instance_state(&self) -> Option { @@ -118,26 +158,7 @@ impl SimInstanceInner { ))); } - // Propolis transitions to the Migrating state once before - // actually starting migration. - self.queue_propolis_state(PropolisInstanceState::Migrating); - let migration_id = - self.state.instance().migration_id.unwrap_or_else(|| { - panic!( - "should have migration ID set before getting request to - migrate in (current state: {:?})", - self - ) - }); - self.queue_migration_status(PropolisMigrateStatus { - migration_id, - state: propolis_client::types::MigrationState::Sync, - }); - self.queue_migration_status(PropolisMigrateStatus { - migration_id, - state: propolis_client::types::MigrationState::Finish, - }); - self.queue_propolis_state(PropolisInstanceState::Running); + self.queue_successful_migration(MigrationRole::Target) } InstanceStateRequested::Running => { match self.next_resting_state() { @@ -171,21 +192,7 @@ impl SimInstanceInner { VmmState::Starting => { self.state.terminate_rudely(); } - VmmState::Running => { - self.state.transition_vmm( - PublishedVmmState::Stopping, - Utc::now(), - ); - self.queue_propolis_state( - PropolisInstanceState::Stopping, - ); - self.queue_propolis_state( - PropolisInstanceState::Stopped, - ); - self.queue_propolis_state( - PropolisInstanceState::Destroyed, - ); - } + VmmState::Running => self.queue_graceful_stop(), // Idempotently allow requests to stop an instance that is // already stopping. VmmState::Stopping @@ -360,6 +367,24 @@ impl SimInstanceInner { } self.state.set_migration_ids(ids, Utc::now()); + + // If we set migration IDs and are the migration source, ensure that we + // will perform the correct state transitions to simulate a successful + // migration. + if ids.is_some() { + let role = self + .state + .migration() + .expect( + "we just got a `put_migration_ids` request with `Some` IDs, \ + so we should have a migration" + ) + .role; + if role == MigrationRole::Source { + self.queue_successful_migration(MigrationRole::Source) + } + } + Ok(self.state.sled_instance_state()) } } diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index ae7f40f5f3..7ce34473e7 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -527,7 +527,7 @@ pub async fn run_standalone_server( external_port_count: NexusTypes::ExternalPortDiscovery::Static( HashMap::new(), ), - rack_network_config: NexusTypes::RackNetworkConfigV1 { + rack_network_config: NexusTypes::RackNetworkConfigV2 { rack_subnet: Ipv6Net::host_net(Ipv6Addr::LOCALHOST), infra_ip_first: Ipv4Addr::LOCALHOST, infra_ip_last: Ipv4Addr::LOCALHOST, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index e2a52bf983..7f07dc199a 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -341,6 +341,7 @@ impl SledAgent { instance_state: instance_runtime, vmm_state: vmm_runtime, propolis_id, + migration_state: None, }, None, ) diff --git a/sled-agent/tests/data/early_network_blobs.txt b/sled-agent/tests/data/early_network_blobs.txt index c968d4010b..e9b9927e86 100644 --- a/sled-agent/tests/data/early_network_blobs.txt +++ b/sled-agent/tests/data/early_network_blobs.txt @@ -1,2 +1,2 @@ -2023-11-30 mupdate failing blob,{"generation":15,"schema_version":1,"body":{"ntp_servers":[],"rack_network_config":{"rack_subnet":"fd00:1122:3344:100::/56","infra_ip_first":"0.0.0.0","infra_ip_last":"0.0.0.0","ports":[{"routes":[],"addresses":[],"switch":"switch1","port":"qsfp0","uplink_port_speed":"speed100_g","uplink_port_fec":"none","bgp_peers":[]},{"routes":[],"addresses":["172.20.15.53/29"],"switch":"switch1","port":"qsfp18","uplink_port_speed":"speed100_g","uplink_port_fec":"rs","bgp_peers":[{"asn":65002,"port":"qsfp18","addr":"172.20.15.51","hold_time":6,"idle_hold_time":6,"delay_open":0,"connect_retry":3,"keepalive":2}]},{"routes":[],"addresses":["172.20.15.45/29"],"switch":"switch0","port":"qsfp18","uplink_port_speed":"speed100_g","uplink_port_fec":"rs","bgp_peers":[{"asn":65002,"port":"qsfp18","addr":"172.20.15.43","hold_time":6,"idle_hold_time":6,"delay_open":0,"connect_retry":3,"keepalive":2}]},{"routes":[],"addresses":[],"switch":"switch0","port":"qsfp0","uplink_port_speed":"speed100_g","uplink_port_fec":"none","bgp_peers":[]}],"bgp":[{"asn":65002,"originate":["172.20.26.0/24"]},{"asn":65002,"originate":["172.20.26.0/24"]}]}}} -2023-12-06 config,{"generation":20,"schema_version":1,"body":{"ntp_servers":["ntp.example.com"],"rack_network_config":{"rack_subnet":"ff01::/32","infra_ip_first":"127.0.0.1","infra_ip_last":"127.1.0.1","ports":[{"routes":[{"destination":"10.1.9.32/16","nexthop":"10.1.9.32"}],"addresses":["2001:db8::/96"],"switch":"switch0","port":"foo","uplink_port_speed":"speed200_g","uplink_port_fec":"firecode","bgp_peers":[{"asn":65000,"port":"bar","addr":"1.2.3.4","hold_time":20,"idle_hold_time":50,"delay_open":null,"connect_retry":30,"keepalive":10}],"autoneg":true}],"bgp":[{"asn":20000,"originate":["192.168.0.0/24"]}]}}} +2023-11-30 mupdate failing blob,{"generation":15,"schema_version":1,"body":{"ntp_servers":[],"rack_network_config":{"rack_subnet":"fd00:1122:3344:100::/56","infra_ip_first":"0.0.0.0","infra_ip_last":"0.0.0.0","ports":[{"routes":[],"addresses":[],"switch":"switch1","port":"qsfp0","uplink_port_speed":"speed100_g","uplink_port_fec":"none","bgp_peers":[]},{"routes":[],"addresses":[{"address":"172.20.15.53/29"}],"switch":"switch1","port":"qsfp18","uplink_port_speed":"speed100_g","uplink_port_fec":"rs","bgp_peers":[{"asn":65002,"port":"qsfp18","addr":"172.20.15.51","hold_time":6,"idle_hold_time":6,"delay_open":0,"connect_retry":3,"keepalive":2}]},{"routes":[],"addresses":[{"address":"172.20.15.45/29"}],"switch":"switch0","port":"qsfp18","uplink_port_speed":"speed100_g","uplink_port_fec":"rs","bgp_peers":[{"asn":65002,"port":"qsfp18","addr":"172.20.15.43","hold_time":6,"idle_hold_time":6,"delay_open":0,"connect_retry":3,"keepalive":2}]},{"routes":[],"addresses":[],"switch":"switch0","port":"qsfp0","uplink_port_speed":"speed100_g","uplink_port_fec":"none","bgp_peers":[]}],"bgp":[{"asn":65002,"originate":["172.20.26.0/24"]},{"asn":65002,"originate":["172.20.26.0/24"]}]}}} +2023-12-06 config,{"generation":20,"schema_version":1,"body":{"ntp_servers":["ntp.example.com"],"rack_network_config":{"rack_subnet":"ff01::/32","infra_ip_first":"127.0.0.1","infra_ip_last":"127.1.0.1","ports":[{"routes":[{"destination":"10.1.9.32/16","nexthop":"10.1.9.32"}],"addresses":[{"address":"2001:db8::/96"}],"switch":"switch0","port":"foo","uplink_port_speed":"speed200_g","uplink_port_fec":"firecode","bgp_peers":[{"asn":65000,"port":"bar","addr":"1.2.3.4","hold_time":20,"idle_hold_time":50,"delay_open":null,"connect_retry":30,"keepalive":10}],"autoneg":true}],"bgp":[{"asn":20000,"originate":["192.168.0.0/24"]}]}}} diff --git a/sled-agent/tests/integration_tests/early_network.rs b/sled-agent/tests/integration_tests/early_network.rs index 4421e76e8b..b7cab53a51 100644 --- a/sled-agent/tests/integration_tests/early_network.rs +++ b/sled-agent/tests/integration_tests/early_network.rs @@ -10,7 +10,7 @@ use bootstore::schemes::v0 as bootstore; use omicron_common::api::{ external::{ImportExportPolicy, SwitchLocation}, internal::shared::{ - BgpConfig, BgpPeerConfig, PortConfigV1, PortFec, PortSpeed, + BgpConfig, BgpPeerConfig, PortConfigV2, PortFec, PortSpeed, RackNetworkConfig, RouteConfig, }, }; @@ -120,7 +120,7 @@ fn current_config_example() -> (&'static str, EarlyNetworkConfig) { rack_subnet: "ff01::0/32".parse().unwrap(), infra_ip_first: Ipv4Addr::new(127, 0, 0, 1), infra_ip_last: Ipv4Addr::new(127, 1, 0, 1), - ports: vec![PortConfigV1 { + ports: vec![PortConfigV2 { routes: vec![RouteConfig { destination: "10.1.9.32/16".parse().unwrap(), nexthop: "10.1.9.32".parse().unwrap(), diff --git a/sled-agent/tests/old-rss-sled-plans/madrid-rss-sled-plan.json b/sled-agent/tests/old-rss-sled-plans/madrid-rss-sled-plan.json index 5512247ee8..683e8fb833 100644 --- a/sled-agent/tests/old-rss-sled-plans/madrid-rss-sled-plan.json +++ b/sled-agent/tests/old-rss-sled-plans/madrid-rss-sled-plan.json @@ -1 +1 @@ -{"rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","sleds":{"[fdb0:a840:2504:396::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"b3e78a88-0f2e-476e-a8a9-2d8c90a169d6","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:103::/64"}}},"[fdb0:a840:2504:157::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"168e1ad6-1e4b-4f7a-b894-157974bd8bb8","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:104::/64"}}},"[fdb0:a840:2504:355::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"b9877212-212b-4588-b818-9c7b53c5b143","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:102::/64"}}},"[fdb0:a840:2504:3d2::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"c3a0f8be-5b05-4ee8-8c4e-2514de6501b6","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:101::/64"}}}},"config":{"rack_subnet":"fd00:1122:3344:100::","trust_quorum_peers":[{"type":"gimlet","identifier":"BRM42220081","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM42220046","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM44220001","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM42220004","model":"913-0000019","revision":6}],"bootstrap_discovery":{"type":"only_these","addrs":["fdb0:a840:2504:3d2::1","fdb0:a840:2504:355::1","fdb0:a840:2504:396::1","fdb0:a840:2504:157::1"]},"ntp_servers":["ntp.eng.oxide.computer"],"dns_servers":["1.1.1.1","9.9.9.9"],"internal_services_ip_pool_ranges":[{"first":"172.20.28.1","last":"172.20.28.10"}],"external_dns_ips":["172.20.28.1"],"external_dns_zone_name":"madrid.eng.oxide.computer","external_certificates":[{"cert":"","key":""}],"recovery_silo":{"silo_name":"recovery","user_name":"recovery","user_password_hash":"$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY"},"rack_network_config":{"rack_subnet":"fd00:1122:3344:1::/56","infra_ip_first":"172.20.15.37","infra_ip_last":"172.20.15.38","ports":[{"routes":[{"destination":"0.0.0.0/0","nexthop":"172.20.15.33"}],"addresses":["172.20.15.38/29"],"switch":"switch0","port":"qsfp0","uplink_port_speed":"speed40_g","uplink_port_fec":"none","bgp_peers":[],"autoneg":false},{"routes":[{"destination":"0.0.0.0/0","nexthop":"172.20.15.33"}],"addresses":["172.20.15.37/29"],"switch":"switch1","port":"qsfp0","uplink_port_speed":"speed40_g","uplink_port_fec":"none","bgp_peers":[],"autoneg":false}],"bgp":[]}}} +{"rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","sleds":{"[fdb0:a840:2504:396::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"b3e78a88-0f2e-476e-a8a9-2d8c90a169d6","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:103::/64"}}},"[fdb0:a840:2504:157::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"168e1ad6-1e4b-4f7a-b894-157974bd8bb8","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:104::/64"}}},"[fdb0:a840:2504:355::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"b9877212-212b-4588-b818-9c7b53c5b143","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:102::/64"}}},"[fdb0:a840:2504:3d2::1]:12346":{"generation":0,"schema_version":1,"body":{"id":"c3a0f8be-5b05-4ee8-8c4e-2514de6501b6","rack_id":"ed6bcf59-9620-491d-8ebd-4a4eebf2e136","use_trust_quorum":true,"is_lrtq_learner":false,"subnet":{"net":"fd00:1122:3344:101::/64"}}}},"config":{"rack_subnet":"fd00:1122:3344:100::","trust_quorum_peers":[{"type":"gimlet","identifier":"BRM42220081","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM42220046","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM44220001","model":"913-0000019","revision":6},{"type":"gimlet","identifier":"BRM42220004","model":"913-0000019","revision":6}],"bootstrap_discovery":{"type":"only_these","addrs":["fdb0:a840:2504:3d2::1","fdb0:a840:2504:355::1","fdb0:a840:2504:396::1","fdb0:a840:2504:157::1"]},"ntp_servers":["ntp.eng.oxide.computer"],"dns_servers":["1.1.1.1","9.9.9.9"],"internal_services_ip_pool_ranges":[{"first":"172.20.28.1","last":"172.20.28.10"}],"external_dns_ips":["172.20.28.1"],"external_dns_zone_name":"madrid.eng.oxide.computer","external_certificates":[{"cert":"","key":""}],"recovery_silo":{"silo_name":"recovery","user_name":"recovery","user_password_hash":"$argon2id$v=19$m=98304,t=13,p=1$RUlWc0ZxaHo0WFdrN0N6ZQ$S8p52j85GPvMhR/ek3GL0el/oProgTwWpHJZ8lsQQoY"},"rack_network_config":{"rack_subnet":"fd00:1122:3344:1::/56","infra_ip_first":"172.20.15.37","infra_ip_last":"172.20.15.38","ports":[{"routes":[{"destination":"0.0.0.0/0","nexthop":"172.20.15.33"}],"addresses":[{"address":"172.20.15.38/29"}],"switch":"switch0","port":"qsfp0","uplink_port_speed":"speed40_g","uplink_port_fec":"none","bgp_peers":[],"autoneg":false},{"routes":[{"destination":"0.0.0.0/0","nexthop":"172.20.15.33"}],"addresses":[{"address":"172.20.15.37/29"}],"switch":"switch1","port":"qsfp0","uplink_port_speed":"speed40_g","uplink_port_fec":"none","bgp_peers":[],"autoneg":false}],"bgp":[]}}} diff --git a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json index 108914a26f..efd1a3c167 100644 --- a/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json +++ b/sled-agent/tests/output/new-rss-sled-plans/madrid-rss-sled-plan.json @@ -132,7 +132,10 @@ } ], "addresses": [ - "172.20.15.38/29" + { + "address": "172.20.15.38/29", + "vlan_id": null + } ], "switch": "switch0", "port": "qsfp0", @@ -150,7 +153,10 @@ } ], "addresses": [ - "172.20.15.37/29" + { + "address": "172.20.15.37/29", + "vlan_id": null + } ], "switch": "switch1", "port": "qsfp0", diff --git a/smf/cockroach-admin/manifest.xml b/smf/cockroach-admin/manifest.xml index 1d6f7c4861..3f95e1462a 100644 --- a/smf/cockroach-admin/manifest.xml +++ b/smf/cockroach-admin/manifest.xml @@ -22,6 +22,7 @@ + diff --git a/smf/cockroach-admin/method_script.sh b/smf/cockroach-admin/method_script.sh index c5f924223d..4f1e3035b0 100755 --- a/smf/cockroach-admin/method_script.sh +++ b/smf/cockroach-admin/method_script.sh @@ -6,6 +6,7 @@ set -o pipefail . /lib/svc/share/smf_include.sh +ZONE_ID="$(svcprop -c -p config/zone_id "${SMF_FMRI}")" COCKROACH_ADDR="$(svcprop -c -p config/cockroach_address "${SMF_FMRI}")" HTTP_ADDR="$(svcprop -c -p config/http_address "${SMF_FMRI}")" @@ -15,6 +16,7 @@ args=( '--path-to-cockroach-binary' "/opt/oxide/cockroachdb/bin/cockroach" '--cockroach-address' "$COCKROACH_ADDR" '--http-address' "$HTTP_ADDR" + '--zone-id' "$ZONE_ID" ) exec /opt/oxide/cockroach-admin/bin/cockroach-admin "${args[@]}" & diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 3827cbb38c..d4612ba15e 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -52,6 +52,7 @@ phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 +blueprints.period_secs_collect_crdb_node_ids = 180 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index ee04f88e59..3b158d0387 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -52,6 +52,7 @@ phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 +blueprints.period_secs_collect_crdb_node_ids = 180 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 diff --git a/smf/sled-agent/gimlet-standalone/config-rss.toml b/smf/sled-agent/gimlet-standalone/config-rss.toml index 616d8d496b..f1b5da6f24 100644 --- a/smf/sled-agent/gimlet-standalone/config-rss.toml +++ b/smf/sled-agent/gimlet-standalone/config-rss.toml @@ -102,7 +102,7 @@ bgp = [] # Routes associated with this port. routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] # Addresses associated with this port. -addresses = ["192.168.1.30/32"] +addresses = [{address = "192.168.1.30/32"}] # Name of the uplink port. This should always be "qsfp0" when using softnpu. port = "qsfp0" # The speed of this port. diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index d897f7ba4b..90f5339e84 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -102,7 +102,7 @@ bgp = [] # Routes associated with this port. routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] # Addresses associated with this port. -addresses = ["192.168.1.30/24"] +addresses = [{address = "192.168.1.30/24"}] # Name of the uplink port. This should always be "qsfp0" when using softnpu. port = "qsfp0" # The speed of this port. diff --git a/tools/console_version b/tools/console_version index 07cf4cc912..4c720590d5 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="a228b75ba35952b68c0b8b0892c452d4fc29467a" -SHA2="8d5b06680e5986b633b3f97e46d7823ea2dddf2b98930d8c6a4f7dc1eb382048" +COMMIT="a9b325e94a6bbb309d68cf586298b4f77aa452ab" +SHA2="d41f22b4d575fc622b2749ea9e81eec11d78a4aae46f61b0472a7958b78be7f7" diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 6d71042250..0d6d6f810e 100755 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="6334bf74fa21790c15f1c4e494ea2ec0edd1c83c" -SHA2="213031aa058f0aa355964e4a5ca350db30110454bad5c77cbc94ab77fdcbe013" +COMMIT="861c00bacbdf7a6e22471f0dabd8f926409b5292" +SHA2="12dc61e7c62b2e1ee1cf3c2bf7cdda6bee6ec96925d2fc1c021c6c1a8fdd56cd" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 3f4d9854c4..75c76f3585 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="5929f9abf0daf4bbf17d835e5d69fc842b9617b312fb5644fa99daf785203700" -CIDL_SHA256_LINUX_DPD="fa38138db9ce1c2cababd11dd9ef1289295e4a8185c78372f6ff1a090c75a05b" -CIDL_SHA256_LINUX_SWADM="ebda6c0a8e29f40c389337fe2e37c1191eeeb34d729de7724b6d707bb6c9a882" +CIDL_SHA256_ILLUMOS="1db849892c60b22f600fb081d4b0145d8ecd98acce9fad3094499a5d2159d001" +CIDL_SHA256_LINUX_DPD="4022e8c0de268c4bc38046b29a48d021b3204e6c2dc8371f2de67f42019720c0" +CIDL_SHA256_LINUX_SWADM="a1308303fd0d8f8ac272288e801beb913f695dcf820dd53f5c03871e6b8674f7" diff --git a/tools/permslip_staging b/tools/permslip_staging index edb9dda971..3764a4569e 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,4 +1,4 @@ -466a81d267c45355a3018dd57d61b5ae3ba281a54c804a1c0a6d6e4b4367ac3f manifest-gimlet-v1.0.20.toml +844c56d542700c4b613d9cd7aee5ab306c8d0b969e5dfe194b1b7468a6a9752b manifest-gimlet-v1.0.21.toml b973cc9feb20f7bba447e7f5291c4070387fa9992deab81301f67f0a3844cd0c manifest-oxide-rot-1-v1.0.11.toml -cc126931c09bb3c697b35255b438aac7dd93c98add8c7fcaffd1ee68e6317d24 manifest-psc-v1.0.19.toml -0a11a2e15f0b45b11ce8b0a539c1db848a5054a46d3efc6bd95178a896afabc4 manifest-sidecar-v1.0.20.toml +ca14a77639db3b71c60234e4edebd01ff31ba5a93a842100a991dbf3ad6e94fb manifest-psc-v1.0.20.toml +af0f6c7d0723db33a2972343cc42e4c2ee2ab8884c49808c9c3d8289c193f97b manifest-sidecar-v1.0.21.toml diff --git a/tufaceous-lib/src/assemble/manifest.rs b/tufaceous-lib/src/assemble/manifest.rs index 2236580b75..1c4a676f4c 100644 --- a/tufaceous-lib/src/assemble/manifest.rs +++ b/tufaceous-lib/src/assemble/manifest.rs @@ -294,14 +294,11 @@ impl<'a> FakeDataAttributes<'a> { KnownArtifactKind::SwitchRot => "fake-sidecar-rot", }; - // For our purposes sign = board represents what we want for the RoT - // and we don't care about the SP at this point let caboose = CabooseBuilder::default() .git_commit("this-is-fake-data") .board(board) .version(self.version.to_string()) .name(self.name) - .sign(board) .build(); let mut builder = HubrisArchiveBuilder::with_fake_image(); diff --git a/update-common/src/artifacts/update_plan.rs b/update-common/src/artifacts/update_plan.rs index 53286ee09a..c5b171d648 100644 --- a/update-common/src/artifacts/update_plan.rs +++ b/update-common/src/artifacts/update_plan.rs @@ -33,7 +33,6 @@ use std::collections::btree_map; use std::collections::BTreeMap; use std::collections::HashMap; use std::io; -use tokio::io::AsyncReadExt; use tufaceous_lib::HostPhaseImages; use tufaceous_lib::RotArchives; @@ -74,15 +73,6 @@ pub struct UpdatePlan { pub control_plane_hash: ArtifactHash, } -// Used to represent the information extracted from signed RoT images. This -// is used when going from `UpdatePlanBuilder` -> `UpdatePlan` to check -// the versions on the RoT images -#[derive(Debug, Eq, Hash, PartialEq)] -struct RotSignData { - kind: KnownArtifactKind, - sign: Vec, -} - /// `UpdatePlanBuilder` mirrors all the fields of `UpdatePlan`, but they're all /// optional: it can be filled in as we read a TUF repository. /// [`UpdatePlanBuilder::build()`] will (fallibly) convert from the builder to @@ -124,9 +114,6 @@ pub struct UpdatePlanBuilder<'a> { by_hash: HashMap, artifacts_meta: Vec, - // map for RoT signing information, used in `ArtifactsWithPlan` - rot_by_sign: HashMap>, - // extra fields we use to build the plan extracted_artifacts: ExtractedArtifacts, log: &'a Logger, @@ -157,7 +144,6 @@ impl<'a> UpdatePlanBuilder<'a> { by_id: BTreeMap::new(), by_hash: HashMap::new(), - rot_by_sign: HashMap::new(), artifacts_meta: Vec::new(), extracted_artifacts, @@ -331,56 +317,6 @@ impl<'a> UpdatePlanBuilder<'a> { }, )?; - // We need to get all the signing information now to properly check - // version at builder time (builder time is not async) - let image_a_stream = rot_a_data - .reader_stream() - .await - .map_err(RepositoryError::CreateReaderStream)?; - let mut image_a = Vec::with_capacity(rot_a_data.file_size()); - tokio_util::io::StreamReader::new(image_a_stream) - .read_to_end(&mut image_a) - .await - .map_err(|error| RepositoryError::ReadExtractedArchive { - artifact: ArtifactHashId { - kind: artifact_id.kind.clone(), - hash: rot_a_data.hash(), - }, - error, - })?; - - let (artifact_id, image_a_sign) = - read_hubris_sign_from_archive(artifact_id, image_a)?; - - self.rot_by_sign - .entry(RotSignData { kind: artifact_kind, sign: image_a_sign }) - .or_default() - .push(artifact_id.clone()); - - let image_b_stream = rot_b_data - .reader_stream() - .await - .map_err(RepositoryError::CreateReaderStream)?; - let mut image_b = Vec::with_capacity(rot_b_data.file_size()); - tokio_util::io::StreamReader::new(image_b_stream) - .read_to_end(&mut image_b) - .await - .map_err(|error| RepositoryError::ReadExtractedArchive { - artifact: ArtifactHashId { - kind: artifact_id.kind.clone(), - hash: rot_b_data.hash(), - }, - error, - })?; - - let (artifact_id, image_b_sign) = - read_hubris_sign_from_archive(artifact_id, image_b)?; - - self.rot_by_sign - .entry(RotSignData { kind: artifact_kind, sign: image_b_sign }) - .or_default() - .push(artifact_id.clone()); - // Technically we've done all we _need_ to do with the RoT images. We // send them directly to MGS ourself, so don't expect anyone to ask for // them via `by_id` or `by_hash`. However, it's more convenient to @@ -764,26 +700,38 @@ impl<'a> UpdatePlanBuilder<'a> { } } - // Ensure that all A/B RoT images for each board kind and same - // signing key have the same version. (i.e. allow gimlet_rot signed - // with a staging key to be a different version from gimlet_rot signed - // with a production key) - for (entry, versions) in self.rot_by_sign { - let kind = entry.kind; - // This unwrap is safe because we check above that each of the types - // has at least one entry - let version = &versions.first().unwrap().version; - match versions.iter().find(|x| x.version != *version) { - None => continue, - Some(v) => { + // Ensure that all A/B RoT images for each board kind have the same + // version number. + for (kind, mut single_board_rot_artifacts) in [ + ( + KnownArtifactKind::GimletRot, + self.gimlet_rot_a.iter().chain(&self.gimlet_rot_b), + ), + ( + KnownArtifactKind::PscRot, + self.psc_rot_a.iter().chain(&self.psc_rot_b), + ), + ( + KnownArtifactKind::SwitchRot, + self.sidecar_rot_a.iter().chain(&self.sidecar_rot_b), + ), + ] { + // We know each of these iterators has at least 2 elements (one from + // the A artifacts and one from the B artifacts, checked above) so + // we can safely unwrap the first. + let version = + &single_board_rot_artifacts.next().unwrap().id.version; + for artifact in single_board_rot_artifacts { + if artifact.id.version != *version { return Err(RepositoryError::MultipleVersionsPresent { kind, v1: version.clone(), - v2: v.version.clone(), - }) + v2: artifact.id.version.clone(), + }); } } } + // Repeat the same version check for all SP images. (This is a separate // loop because the types of the iterators don't match.) for (kind, mut single_board_sp_artifacts) in [ @@ -855,32 +803,6 @@ pub struct UpdatePlanBuildOutput { pub artifacts_meta: Vec, } -// We take id solely to be able to output error messages -fn read_hubris_sign_from_archive( - id: ArtifactId, - data: Vec, -) -> Result<(ArtifactId, Vec), RepositoryError> { - let archive = match RawHubrisArchive::from_vec(data).map_err(Box::new) { - Ok(archive) => archive, - Err(error) => { - return Err(RepositoryError::ParsingHubrisArchive { id, error }); - } - }; - let caboose = match archive.read_caboose().map_err(Box::new) { - Ok(caboose) => caboose, - Err(error) => { - return Err(RepositoryError::ReadHubrisCaboose { id, error }); - } - }; - let sign = match caboose.sign() { - Ok(sign) => sign, - Err(error) => { - return Err(RepositoryError::ReadHubrisCabooseBoard { id, error }); - } - }; - Ok((id, sign.to_vec())) -} - // This function takes and returns `id` to avoid an unnecessary clone; `id` will // be present in either the Ok tuple or the error. fn read_hubris_board_from_archive( @@ -973,11 +895,11 @@ mod tests { tarball: Bytes, } - fn make_random_rot_image(sign: &str, board: &str) -> RandomRotImage { + fn make_random_rot_image() -> RandomRotImage { use tufaceous_lib::CompositeRotArchiveBuilder; - let archive_a = make_fake_rot_image(sign, board); - let archive_b = make_fake_rot_image(sign, board); + let archive_a = make_random_bytes(); + let archive_b = make_random_bytes(); let mut builder = CompositeRotArchiveBuilder::new(Vec::new(), MtimeSource::Zero) @@ -1004,22 +926,6 @@ mod tests { } } - fn make_fake_rot_image(sign: &str, board: &str) -> Vec { - use hubtools::{CabooseBuilder, HubrisArchiveBuilder}; - - let caboose = CabooseBuilder::default() - .git_commit("this-is-fake-data") - .board(board) - .version("0.0.0") - .name("rot-bord") - .sign(sign) - .build(); - - let mut builder = HubrisArchiveBuilder::with_fake_image(); - builder.write_caboose(caboose.as_slice()).unwrap(); - builder.build_to_vec().unwrap() - } - fn make_fake_sp_image(board: &str) -> Vec { use hubtools::{CabooseBuilder, HubrisArchiveBuilder}; @@ -1035,288 +941,6 @@ mod tests { builder.build_to_vec().unwrap() } - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn test_bad_rot_versions() { - const VERSION_0: SemverVersion = SemverVersion::new(0, 0, 0); - const VERSION_1: SemverVersion = SemverVersion::new(0, 0, 1); - - let logctx = test_setup_log("test_multi_rot_version"); - - let mut plan_builder = - UpdatePlanBuilder::new(VERSION_0, &logctx.log).unwrap(); - - // The control plane artifact can be arbitrary bytes; just populate it - // with random data. - { - let kind = KnownArtifactKind::ControlPlane; - let data = make_random_bytes(); - let hash = ArtifactHash(Sha256::digest(&data).into()); - let id = ArtifactId { - name: format!("{kind:?}"), - version: VERSION_0, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(Bytes::from(data))]), - ) - .await - .unwrap(); - } - - // For each SP image, we'll insert two artifacts: these should end up in - // the update plan's SP image maps keyed by their "board". Normally the - // board is read from the archive itself via hubtools; we'll inject a - // test function that returns the artifact ID name as the board instead. - for (kind, boards) in [ - (KnownArtifactKind::GimletSp, ["test-gimlet-a", "test-gimlet-b"]), - (KnownArtifactKind::PscSp, ["test-psc-a", "test-psc-b"]), - (KnownArtifactKind::SwitchSp, ["test-switch-a", "test-switch-b"]), - ] { - for board in boards { - let data = make_fake_sp_image(board); - let hash = ArtifactHash(Sha256::digest(&data).into()); - let id = ArtifactId { - name: board.to_string(), - version: VERSION_0, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(Bytes::from(data))]), - ) - .await - .unwrap(); - } - } - - // The Host, Trampoline, and RoT artifacts must be structed the way we - // expect (i.e., .tar.gz's containing multiple inner artifacts). - let host = make_random_host_os_image(); - let trampoline = make_random_host_os_image(); - - for (kind, image) in [ - (KnownArtifactKind::Host, &host), - (KnownArtifactKind::Trampoline, &trampoline), - ] { - let data = &image.tarball; - let hash = ArtifactHash(Sha256::digest(data).into()); - let id = ArtifactId { - name: format!("{kind:?}"), - version: VERSION_0, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(data.clone())]), - ) - .await - .unwrap(); - } - - let gimlet_rot = make_random_rot_image("gimlet", "gimlet"); - let psc_rot = make_random_rot_image("psc", "psc"); - let sidecar_rot = make_random_rot_image("sidecar", "sidecar"); - - let gimlet_rot_2 = make_random_rot_image("gimlet", "gimlet-the second"); - - for (kind, artifact) in [ - (KnownArtifactKind::GimletRot, &gimlet_rot), - (KnownArtifactKind::PscRot, &psc_rot), - (KnownArtifactKind::SwitchRot, &sidecar_rot), - ] { - let data = &artifact.tarball; - let hash = ArtifactHash(Sha256::digest(data).into()); - let id = ArtifactId { - name: format!("{kind:?}"), - version: VERSION_0, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(data.clone())]), - ) - .await - .unwrap(); - } - - let bad_kind = KnownArtifactKind::GimletRot; - let data = &gimlet_rot_2.tarball; - let hash = ArtifactHash(Sha256::digest(data).into()); - let id = ArtifactId { - name: format!("{bad_kind:?}"), - version: VERSION_1, - kind: bad_kind.into(), - }; - plan_builder - .add_artifact(id, hash, futures::stream::iter([Ok(data.clone())])) - .await - .unwrap(); - - match plan_builder.build() { - Err(_) => (), - Ok(_) => panic!("Added two artifacts with the same version"), - } - logctx.cleanup_successful(); - } - - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn test_multi_rot_version() { - const VERSION_0: SemverVersion = SemverVersion::new(0, 0, 0); - const VERSION_1: SemverVersion = SemverVersion::new(0, 0, 1); - - let logctx = test_setup_log("test_multi_rot_version"); - - let mut plan_builder = - UpdatePlanBuilder::new("0.0.0".parse().unwrap(), &logctx.log) - .unwrap(); - - // The control plane artifact can be arbitrary bytes; just populate it - // with random data. - { - let kind = KnownArtifactKind::ControlPlane; - let data = make_random_bytes(); - let hash = ArtifactHash(Sha256::digest(&data).into()); - let id = ArtifactId { - name: format!("{kind:?}"), - version: VERSION_0, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(Bytes::from(data))]), - ) - .await - .unwrap(); - } - - // For each SP image, we'll insert two artifacts: these should end up in - // the update plan's SP image maps keyed by their "board". Normally the - // board is read from the archive itself via hubtools; we'll inject a - // test function that returns the artifact ID name as the board instead. - for (kind, boards) in [ - (KnownArtifactKind::GimletSp, ["test-gimlet-a", "test-gimlet-b"]), - (KnownArtifactKind::PscSp, ["test-psc-a", "test-psc-b"]), - (KnownArtifactKind::SwitchSp, ["test-switch-a", "test-switch-b"]), - ] { - for board in boards { - let data = make_fake_sp_image(board); - let hash = ArtifactHash(Sha256::digest(&data).into()); - let id = ArtifactId { - name: board.to_string(), - version: VERSION_0, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(Bytes::from(data))]), - ) - .await - .unwrap(); - } - } - - // The Host, Trampoline, and RoT artifacts must be structed the way we - // expect (i.e., .tar.gz's containing multiple inner artifacts). - let host = make_random_host_os_image(); - let trampoline = make_random_host_os_image(); - - for (kind, image) in [ - (KnownArtifactKind::Host, &host), - (KnownArtifactKind::Trampoline, &trampoline), - ] { - let data = &image.tarball; - let hash = ArtifactHash(Sha256::digest(data).into()); - let id = ArtifactId { - name: format!("{kind:?}"), - version: VERSION_0, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(data.clone())]), - ) - .await - .unwrap(); - } - - let gimlet_rot = make_random_rot_image("gimlet", "gimlet"); - let psc_rot = make_random_rot_image("psc", "psc"); - let sidecar_rot = make_random_rot_image("sidecar", "sidecar"); - - let gimlet_rot_2 = make_random_rot_image("gimlet2", "gimlet"); - let psc_rot_2 = make_random_rot_image("psc2", "psc"); - let sidecar_rot_2 = make_random_rot_image("sidecar2", "sidecar"); - - for (kind, artifact) in [ - (KnownArtifactKind::GimletRot, &gimlet_rot), - (KnownArtifactKind::PscRot, &psc_rot), - (KnownArtifactKind::SwitchRot, &sidecar_rot), - ] { - let data = &artifact.tarball; - let hash = ArtifactHash(Sha256::digest(data).into()); - let id = ArtifactId { - name: format!("{kind:?}"), - version: VERSION_0, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(data.clone())]), - ) - .await - .unwrap(); - } - - for (kind, artifact) in [ - (KnownArtifactKind::GimletRot, &gimlet_rot_2), - (KnownArtifactKind::PscRot, &psc_rot_2), - (KnownArtifactKind::SwitchRot, &sidecar_rot_2), - ] { - let data = &artifact.tarball; - let hash = ArtifactHash(Sha256::digest(data).into()); - let id = ArtifactId { - name: format!("{kind:?}"), - version: VERSION_1, - kind: kind.into(), - }; - plan_builder - .add_artifact( - id, - hash, - futures::stream::iter([Ok(data.clone())]), - ) - .await - .unwrap(); - } - - let UpdatePlanBuildOutput { plan, .. } = plan_builder.build().unwrap(); - - assert_eq!(plan.gimlet_rot_a.len(), 2); - assert_eq!(plan.gimlet_rot_b.len(), 2); - assert_eq!(plan.psc_rot_a.len(), 2); - assert_eq!(plan.psc_rot_b.len(), 2); - assert_eq!(plan.sidecar_rot_a.len(), 2); - assert_eq!(plan.sidecar_rot_b.len(), 2); - logctx.cleanup_successful(); - } - // See documentation for extract_nested_artifact_pair for why multi_thread // is required. #[tokio::test(flavor = "multi_thread", worker_threads = 2)] @@ -1427,9 +1051,9 @@ mod tests { .unwrap(); } - let gimlet_rot = make_random_rot_image("gimlet", "gimlet"); - let psc_rot = make_random_rot_image("psc", "psc"); - let sidecar_rot = make_random_rot_image("sidecar", "sidecar"); + let gimlet_rot = make_random_rot_image(); + let psc_rot = make_random_rot_image(); + let sidecar_rot = make_random_rot_image(); for (kind, artifact) in [ (KnownArtifactKind::GimletRot, &gimlet_rot), diff --git a/update-common/src/errors.rs b/update-common/src/errors.rs index 3d57fb6ab5..0d65312c56 100644 --- a/update-common/src/errors.rs +++ b/update-common/src/errors.rs @@ -140,14 +140,6 @@ pub enum RepositoryError { "duplicate hash entries found in artifacts.json for kind `{}`, hash `{}`", .0.kind, .0.hash )] DuplicateHashEntry(ArtifactHashId), - #[error("error creating reader stream")] - CreateReaderStream(#[source] anyhow::Error), - #[error("error reading extracted archive kind {}, hash {}", .artifact.kind, .artifact.hash)] - ReadExtractedArchive { - artifact: ArtifactHashId, - #[source] - error: std::io::Error, - }, } impl RepositoryError { @@ -161,9 +153,7 @@ impl RepositoryError { | RepositoryError::TempFileCreate(_) | RepositoryError::TempFileWrite(_) | RepositoryError::TempFileFlush(_) - | RepositoryError::NamedTempFileCreate { .. } - | RepositoryError::ReadExtractedArchive { .. } - | RepositoryError::CreateReaderStream { .. } => { + | RepositoryError::NamedTempFileCreate { .. } => { HttpError::for_unavail(None, message) } diff --git a/wicket-common/src/rack_setup.rs b/wicket-common/src/rack_setup.rs index ba88c258a5..33fbcb65b3 100644 --- a/wicket-common/src/rack_setup.rs +++ b/wicket-common/src/rack_setup.rs @@ -16,6 +16,7 @@ use omicron_common::api::internal::shared::BgpPeerConfig; use omicron_common::api::internal::shared::PortFec; use omicron_common::api::internal::shared::PortSpeed; use omicron_common::api::internal::shared::RouteConfig; +use omicron_common::api::internal::shared::UplinkAddressConfig; use omicron_common::update::ArtifactHash; use owo_colors::OwoColorize; use owo_colors::Style; @@ -170,18 +171,18 @@ impl UserSpecifiedRackNetworkConfig { } } -/// User-specified version of [`PortConfigV1`]. +/// User-specified version of [`PortConfigV2`]. /// -/// All of [`PortConfigV1`] is user-specified. But we expect the port name to -/// be a key, rather than a field as in [`PortConfigV1`]. So this has all of +/// All of [`PortConfigV2`] is user-specified. But we expect the port name to +/// be a key, rather than a field as in [`PortConfigV2`]. So this has all of /// the fields other than the port name. /// -/// [`PortConfigV1`]: omicron_common::api::internal::shared::PortConfigV1 +/// [`PortConfigV2`]: omicron_common::api::internal::shared::PortConfigV2 #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] #[serde(deny_unknown_fields)] pub struct UserSpecifiedPortConfig { pub routes: Vec, - pub addresses: Vec, + pub addresses: Vec, pub uplink_port_speed: PortSpeed, pub uplink_port_fec: PortFec, pub autoneg: bool, diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index 164ecf16b7..cef3746ff9 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -9,6 +9,7 @@ use omicron_common::address::IpRange; use omicron_common::api::external::AllowedSourceIps; use omicron_common::api::internal::shared::BgpConfig; use omicron_common::api::internal::shared::RouteConfig; +use omicron_common::api::internal::shared::UplinkAddressConfig; use serde::Serialize; use sled_hardware_types::Baseboard; use std::borrow::Cow; @@ -340,7 +341,13 @@ fn populate_uplink_table(cfg: &UserSpecifiedPortConfig) -> Table { // addresses = [] let mut addresses_out = Array::new(); for a in addresses { - addresses_out.push(string_value(a)); + let UplinkAddressConfig { address, vlan_id } = a; + let mut x = InlineTable::new(); + x.insert("address", string_value(address)); + if let Some(vlan_id) = vlan_id { + x.insert("vlan_id", i64_value(i64::from(*vlan_id))); + } + addresses_out.push(Value::InlineTable(x)); } uplink.insert("addresses", Item::Value(Value::Array(addresses_out))); diff --git a/wicket/src/state/rack.rs b/wicket/src/state/rack.rs index 67e7d9478c..3fbe4f762b 100644 --- a/wicket/src/state/rack.rs +++ b/wicket/src/state/rack.rs @@ -102,13 +102,8 @@ impl RackState { pub fn left_or_right(&mut self) { match self.selected { ComponentId::Sled(i) => { - if self.left_column { - self.left_column = false; - self.selected = ComponentId::Sled(i + 1); - } else { - self.left_column = true; - self.selected = ComponentId::Sled(i - 1); - } + self.selected = ComponentId::Sled(i ^ 1); + self.set_column(); } _ => (), } diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 941f5f7dc1..b4fa9de6f0 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -792,10 +792,19 @@ fn rss_config_text<'a>( }); let addresses = addresses.iter().map(|a| { - vec![ + let mut items = vec![ Span::styled(" • Address : ", label_style), - Span::styled(a.to_string(), ok_style), - ] + Span::styled(a.address.to_string(), ok_style), + ]; + if let Some(vlan_id) = a.vlan_id { + items.extend([ + Span::styled(" (vlan_id=", label_style), + Span::styled(vlan_id.to_string(), ok_style), + Span::styled(")", label_style), + ]); + } + + items }); let peers = bgp_peers.iter().flat_map(|p| { diff --git a/wicket/tests/output/example_non_empty.toml b/wicket/tests/output/example_non_empty.toml index 8785d158de..717e940ca5 100644 --- a/wicket/tests/output/example_non_empty.toml +++ b/wicket/tests/output/example_non_empty.toml @@ -74,7 +74,7 @@ infra_ip_last = "172.30.0.10" [rack_network_config.switch0.port0] routes = [{ nexthop = "172.30.0.10", destination = "0.0.0.0/0", vlan_id = 1 }] -addresses = ["172.30.0.1/24"] +addresses = [{ address = "172.30.0.1/24" }] uplink_port_speed = "speed400_g" uplink_port_fec = "firecode" autoneg = true @@ -113,7 +113,7 @@ enforce_first_as = true [rack_network_config.switch1.port0] routes = [{ nexthop = "172.33.0.10", destination = "0.0.0.0/0", vlan_id = 1 }] -addresses = ["172.32.0.1/24"] +addresses = [{ address = "172.32.0.1/24" }] uplink_port_speed = "speed400_g" uplink_port_fec = "firecode" autoneg = true diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 3a70823b5b..395fb8c795 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -301,7 +301,10 @@ fn add_steps_for_single_local_uplink_preflight_check<'a>( UplinkProperty(format!("uplinks/{}_0", port)); for addr in &uplink.addresses { - let uplink_cidr = addr.to_string(); + // This includes the CIDR only + let uplink_cidr = addr.address.to_string(); + // This includes the VLAN ID, if any + let uplink_cfg = addr.to_string(); if let Err(err) = execute_command(&[ SVCCFG, "-s", @@ -309,7 +312,7 @@ fn add_steps_for_single_local_uplink_preflight_check<'a>( "addpropvalue", &uplink_property.0, "astring:", - &uplink_cidr, + &uplink_cfg, ]) .await { diff --git a/wicketd/src/rss_config.rs b/wicketd/src/rss_config.rs index 77e107a129..dde6d35da5 100644 --- a/wicketd/src/rss_config.rs +++ b/wicketd/src/rss_config.rs @@ -17,7 +17,7 @@ use anyhow::Result; use bootstrap_agent_client::types::BootstrapAddressDiscovery; use bootstrap_agent_client::types::Certificate; use bootstrap_agent_client::types::Name; -use bootstrap_agent_client::types::PortConfigV1 as BaPortConfigV1; +use bootstrap_agent_client::types::PortConfigV2 as BaPortConfigV2; use bootstrap_agent_client::types::RackInitializeRequest; use bootstrap_agent_client::types::RecoverySiloConfig; use bootstrap_agent_client::types::UserId; @@ -609,7 +609,7 @@ pub(crate) enum BgpAuthKeyError { fn validate_rack_network_config( config: &UserSpecifiedRackNetworkConfig, bgp_auth_keys: &BTreeMap>, -) -> Result { +) -> Result { use bootstrap_agent_client::types::BgpConfig as BaBgpConfig; // Ensure that there is at least one uplink @@ -651,7 +651,7 @@ fn validate_rack_network_config( // TODO Add more client side checks on `rack_network_config` contents? - Ok(bootstrap_agent_client::types::RackNetworkConfigV1 { + Ok(bootstrap_agent_client::types::RackNetworkConfigV2 { rack_subnet: RACK_SUBNET.net(), infra_ip_first: config.infra_ip_first, infra_ip_last: config.infra_ip_last, @@ -676,7 +676,7 @@ fn validate_rack_network_config( }) } -/// Builds a `BaPortConfigV1` from a `UserSpecifiedPortConfig`. +/// Builds a `BaPortConfigV2` from a `UserSpecifiedPortConfig`. /// /// Assumes that all auth keys are present in `bgp_auth_keys`. fn build_port_config( @@ -684,16 +684,17 @@ fn build_port_config( port: &str, config: &UserSpecifiedPortConfig, bgp_auth_keys: &BTreeMap>, -) -> BaPortConfigV1 { +) -> BaPortConfigV2 { use bootstrap_agent_client::types::BgpPeerConfig as BaBgpPeerConfig; use bootstrap_agent_client::types::PortFec as BaPortFec; use bootstrap_agent_client::types::PortSpeed as BaPortSpeed; use bootstrap_agent_client::types::RouteConfig as BaRouteConfig; use bootstrap_agent_client::types::SwitchLocation as BaSwitchLocation; + use bootstrap_agent_client::types::UplinkAddressConfig as BaUplinkAddressConfig; use omicron_common::api::internal::shared::PortFec; use omicron_common::api::internal::shared::PortSpeed; - BaPortConfigV1 { + BaPortConfigV2 { port: port.to_owned(), routes: config .routes @@ -704,7 +705,14 @@ fn build_port_config( vlan_id: r.vlan_id, }) .collect(), - addresses: config.addresses.iter().cloned().map(Into::into).collect(), + addresses: config + .addresses + .iter() + .map(|a| BaUplinkAddressConfig { + address: a.address, + vlan_id: a.vlan_id, + }) + .collect(), bgp_peers: config .bgp_peers .iter()