diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 525ea490be..53a37faa50 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -67,6 +67,9 @@ progenitor::generate_api!( TypedUuidForUpstairsSessionKind = omicron_uuid_kinds::TypedUuid, TypedUuidForVolumeKind = omicron_uuid_kinds::TypedUuid, TypedUuidForZpoolKind = omicron_uuid_kinds::TypedUuid, + UpdateStatus = nexus_types::internal_api::views::UpdateStatus, + ZoneStatus = nexus_types::internal_api::views::ZoneStatus, + ZoneStatusVersion = nexus_types::internal_api::views::ZoneStatusVersion }, patch = { SledAgentInfo = { derives = [PartialEq, Eq] }, diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 50b42b6392..d7e9066748 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -134,6 +134,8 @@ enum NexusCommands { /// interact with support bundles #[command(visible_alias = "sb")] SupportBundles(SupportBundleArgs), + /// show running artifact versions + UpdateStatus, } #[derive(Debug, Args)] @@ -778,6 +780,9 @@ impl NexusArgs { NexusCommands::SupportBundles(SupportBundleArgs { command: SupportBundleCommands::Inspect(args), }) => cmd_nexus_support_bundles_inspect(&client, args).await, + NexusCommands::UpdateStatus => { + cmd_nexus_update_status(&client).await + } } } } @@ -4044,3 +4049,47 @@ async fn cmd_nexus_support_bundles_inspect( support_bundle_viewer::run_dashboard(accessor).await } + +/// Runs `omdb nexus upgrade-status` +async fn cmd_nexus_update_status( + client: &nexus_client::Client, +) -> Result<(), anyhow::Error> { + let status = client + .update_status() + .await + .context("retrieving update status")? + .into_inner(); + + #[derive(Tabled)] + #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] + struct ZoneRow { + sled_id: String, + zone_type: String, + zone_id: String, + version: String, + } + + let mut rows = Vec::new(); + for (sled_id, mut statuses) in status.zones.into_iter() { + statuses.sort_unstable_by_key(|s| { + (s.zone_type.kind(), s.zone_id, s.version.clone()) + }); + for status in statuses { + rows.push(ZoneRow { + sled_id: sled_id.to_string(), + zone_type: status.zone_type.kind().name_prefix().into(), + zone_id: status.zone_id.to_string(), + version: status.version.to_string(), + }); + } + } + + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("Running Zones"); + println!("{}", table); + Ok(()) +} diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 472e2b3cc9..70b7c683ab 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -709,6 +709,7 @@ Commands: sagas view sagas, create and complete demo sagas sleds interact with sleds support-bundles interact with support bundles [aliases: sb] + update-status show running artifact versions help Print this message or the help of the given subcommand(s) Options: diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index 70a87cba41..4dc265f180 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -147,6 +147,22 @@ impl ConfigReconcilerInventory { }) } + /// Iterate over all zones contained in the most-recently-reconciled sled + /// config and report their status as of that reconciliation. + pub fn reconciled_omicron_zones( + &self, + ) -> impl Iterator + { + // `self.zones` may contain zone IDs that aren't present in + // `last_reconciled_config` at all, if we failed to _shut down_ zones + // that are no longer present in the config. We use `filter_map` to + // strip those out, and only report on the configured zones. + self.zones.iter().filter_map(|(zone_id, result)| { + let config = self.last_reconciled_config.zones.get(zone_id)?; + Some((config, result)) + }) + } + /// Given a sled config, produce a reconciler result that sled-agent could /// have emitted if reconciliation succeeded. /// diff --git a/nexus/internal-api/src/lib.rs b/nexus/internal-api/src/lib.rs index a6e3fe49fd..0038d90e95 100644 --- a/nexus/internal-api/src/lib.rs +++ b/nexus/internal-api/src/lib.rs @@ -19,9 +19,7 @@ use nexus_types::{ headers::RangeRequest, params::{self, PhysicalDiskPath, SledSelector, UninitializedSledId}, shared::{self, ProbeInfo, UninitializedSled}, - views::Ping, - views::PingStatus, - views::SledPolicy, + views::{Ping, PingStatus, SledPolicy}, }, internal_api::{ params::{ @@ -30,7 +28,7 @@ use nexus_types::{ }, views::{ BackgroundTask, DemoSaga, Ipv4NatEntryView, MgsUpdateDriverStatus, - Saga, + Saga, UpdateStatus, }, }, }; @@ -487,6 +485,15 @@ pub trait NexusInternalApi { blueprint: TypedBody, ) -> Result; + /// Show deployed versions of artifacts + #[endpoint { + method = GET, + path = "/deployment/update-status" + }] + async fn update_status( + rqctx: RequestContext, + ) -> Result, HttpError>; + /// List uninitialized sleds #[endpoint { method = GET, diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 79e7a93e6d..f0a5b391f5 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -13,6 +13,7 @@ use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; use nexus_types::deployment::PlanningInput; +use nexus_types::internal_api::views::UpdateStatus; use nexus_types::inventory::Collection; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; @@ -200,4 +201,26 @@ impl super::Nexus { let _ = self.blueprint_add(&opctx, &blueprint).await?; Ok(()) } + + pub async fn update_status( + &self, + opctx: &OpContext, + ) -> Result { + let planning_context = self.blueprint_planning_context(opctx).await?; + let inventory = planning_context.inventory.ok_or_else(|| { + Error::internal_error("no recent inventory collection found") + })?; + let new = planning_context.planning_input.tuf_repo(); + let old = planning_context.planning_input.old_repo(); + let status = UpdateStatus::new( + old, + new, + inventory + .sled_agents + .iter() + .map(|(sled_id, agent)| (sled_id, &agent.last_reconciliation)), + ); + + Ok(status) + } } diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 7a6dd79673..b3e84519c1 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -47,6 +47,7 @@ use nexus_types::internal_api::views::DemoSaga; use nexus_types::internal_api::views::Ipv4NatEntryView; use nexus_types::internal_api::views::MgsUpdateDriverStatus; use nexus_types::internal_api::views::Saga; +use nexus_types::internal_api::views::UpdateStatus; use nexus_types::internal_api::views::to_list; use omicron_common::api::external::Instance; use omicron_common::api::external::http_pagination::PaginatedById; @@ -857,6 +858,23 @@ impl NexusInternalApi for NexusInternalApiImpl { .await } + async fn update_status( + rqctx: RequestContext, + ) -> Result, HttpError> { + let apictx = &rqctx.context().context; + let handler = async { + let opctx = + crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let result = nexus.update_status(&opctx).await?; + Ok(HttpResponseOk(result)) + }; + apictx + .internal_latencies + .instrument_dropshot_handler(&rqctx, handler) + .await + } + async fn sled_list_uninitialized( rqctx: RequestContext, ) -> Result>, HttpError> { diff --git a/nexus/types/src/internal_api/views.rs b/nexus/types/src/internal_api/views.rs index a08b136ed6..4780c8a272 100644 --- a/nexus/types/src/internal_api/views.rs +++ b/nexus/types/src/internal_api/views.rs @@ -9,13 +9,20 @@ use chrono::SecondsFormat; use chrono::Utc; use futures::future::ready; use futures::stream::StreamExt; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; +use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; +use nexus_sled_agent_shared::inventory::OmicronZoneType; use omicron_common::api::external::MacAddr; use omicron_common::api::external::ObjectStream; +use omicron_common::api::external::TufRepoDescription; use omicron_common::api::external::Vni; use omicron_common::snake_case_result; use omicron_common::snake_case_result::SnakeCaseResult; use omicron_uuid_kinds::DemoSagaUuid; +use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid}; use schemars::JsonSchema; +use semver::Version; use serde::Deserialize; use serde::Serialize; use std::collections::BTreeMap; @@ -469,3 +476,121 @@ pub struct WaitingStatus { pub next_attempt_time: DateTime, pub nattempts_done: u32, } + +#[derive( + Debug, + Clone, + PartialEq, + Eq, + PartialOrd, + Ord, + Serialize, + Deserialize, + JsonSchema, +)] +#[serde( + rename_all = "snake_case", + tag = "zone_status_version", + content = "details" +)] +pub enum ZoneStatusVersion { + Unknown, + InstallDataset, + Version(Version), + Error(String), +} + +impl Display for ZoneStatusVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ZoneStatusVersion::Unknown => write!(f, "unknown"), + ZoneStatusVersion::InstallDataset => write!(f, "install dataset"), + ZoneStatusVersion::Version(version) => { + write!(f, "{}", version) + } + ZoneStatusVersion::Error(s) => { + write!(f, "{}", s) + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +pub struct ZoneStatus { + pub zone_id: OmicronZoneUuid, + pub zone_type: OmicronZoneType, + pub version: ZoneStatusVersion, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct UpdateStatus { + pub zones: BTreeMap>, +} + +impl UpdateStatus { + pub fn new<'a>( + old: Option<&TufRepoDescription>, + new: Option<&TufRepoDescription>, + sleds: impl Iterator< + Item = (&'a SledUuid, &'a Option), + >, + ) -> UpdateStatus { + let zones = sleds + .map(|(sled_id, inv)| { + ( + *sled_id, + inv.as_ref().map_or(vec![], |inv| { + inv.reconciled_omicron_zones() + .map(|(conf, res)| ZoneStatus { + zone_id: conf.id, + zone_type: conf.zone_type.clone(), + version: Self::zone_image_source_to_version( + old, + new, + &conf.image_source, + res, + ), + }) + .collect() + }), + ) + }) + .collect(); + UpdateStatus { zones } + } + + pub fn zone_image_source_to_version( + old: Option<&TufRepoDescription>, + new: Option<&TufRepoDescription>, + source: &OmicronZoneImageSource, + res: &ConfigReconcilerInventoryResult, + ) -> ZoneStatusVersion { + if let ConfigReconcilerInventoryResult::Err { message } = res { + return ZoneStatusVersion::Error(message.clone()); + } + + let &OmicronZoneImageSource::Artifact { hash } = source else { + return ZoneStatusVersion::InstallDataset; + }; + + if let Some(old) = old { + if let Some(_) = old.artifacts.iter().find(|meta| meta.hash == hash) + { + return ZoneStatusVersion::Version( + old.repo.system_version.clone(), + ); + } + } + + if let Some(new) = new { + if let Some(_) = new.artifacts.iter().find(|meta| meta.hash == hash) + { + return ZoneStatusVersion::Version( + new.repo.system_version.clone(), + ); + } + } + + ZoneStatusVersion::Unknown + } +} diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 6e34d682b9..83fc60421a 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -677,6 +677,30 @@ } } }, + "/deployment/update-status": { + "get": { + "summary": "Show deployed versions of artifacts", + "operationId": "update_status", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/UpdateStatus" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/disk/{disk_id}/remove-read-only-parent": { "post": { "summary": "Request removal of a read_only_parent from a disk.", @@ -5336,6 +5360,357 @@ "snat_cfg" ] }, + "OmicronZoneType": { + "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", + "oneOf": [ + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "nullable": true, + "type": "string" + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "snat_cfg": { + "description": "The SNAT configuration for outbound connections.", + "allOf": [ + { + "$ref": "#/components/schemas/SourceNatConfig" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "boundary_ntp" + ] + } + }, + "required": [ + "address", + "dns_servers", + "nic", + "ntp_servers", + "snat_cfg", + "type" + ] + }, + { + "description": "Type of clickhouse zone used for a single node clickhouse deployment", + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "description": "A zone used to run a Clickhouse Keeper node\n\nKeepers are only used in replicated clickhouse setups", + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "description": "A zone used to run a Clickhouse Server in a replicated deployment", + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_server" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "cockroach_db" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + }, + "required": [ + "address", + "dataset", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "crucible_pantry" + ] + } + }, + "required": [ + "address", + "type" + ] + }, + { + "type": "object", + "properties": { + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "type": "string" + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "external_dns" + ] + } + }, + "required": [ + "dataset", + "dns_address", + "http_address", + "nic", + "type" + ] + }, + { + "type": "object", + "properties": { + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "http_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + }, + "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_ntp" + ] + } + }, + "required": [ + "address", + "type" + ] + }, + { + "type": "object", + "properties": { + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "external_ip": { + "description": "The address at which the external nexus server is reachable.", + "type": "string", + "format": "ip" + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + }, + "required": [ + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", + "type" + ] + }, + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + }, + "required": [ + "address", + "type" + ] + } + ] + }, "OximeterInfo": { "description": "Message used to notify Nexus that this oximeter instance is up and running.", "type": "object", @@ -7095,6 +7470,23 @@ "took_over_concurrent_update" ] }, + "UpdateStatus": { + "type": "object", + "properties": { + "zones": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ZoneStatus" + } + } + } + }, + "required": [ + "zones" + ] + }, "UplinkAddressConfig": { "type": "object", "properties": { @@ -7247,6 +7639,94 @@ "next_attempt_time" ] }, + "ZoneStatus": { + "type": "object", + "properties": { + "version": { + "$ref": "#/components/schemas/ZoneStatusVersion" + }, + "zone_id": { + "$ref": "#/components/schemas/TypedUuidForOmicronZoneKind" + }, + "zone_type": { + "$ref": "#/components/schemas/OmicronZoneType" + } + }, + "required": [ + "version", + "zone_id", + "zone_type" + ] + }, + "ZoneStatusVersion": { + "oneOf": [ + { + "type": "object", + "properties": { + "zone_status_version": { + "type": "string", + "enum": [ + "unknown" + ] + } + }, + "required": [ + "zone_status_version" + ] + }, + { + "type": "object", + "properties": { + "zone_status_version": { + "type": "string", + "enum": [ + "install_dataset" + ] + } + }, + "required": [ + "zone_status_version" + ] + }, + { + "type": "object", + "properties": { + "details": { + "type": "string", + "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" + }, + "zone_status_version": { + "type": "string", + "enum": [ + "version" + ] + } + }, + "required": [ + "details", + "zone_status_version" + ] + }, + { + "type": "object", + "properties": { + "details": { + "type": "string" + }, + "zone_status_version": { + "type": "string", + "enum": [ + "error" + ] + } + }, + "required": [ + "details", + "zone_status_version" + ] + } + ] + }, "ZpoolName": { "title": "The name of a Zpool", "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique",