diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 12eec3f783..e86f243b98 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -58,6 +58,7 @@ use nexus_db_model::HwBaseboardId; use nexus_db_model::Image; use nexus_db_model::Instance; use nexus_db_model::InvCollection; +use nexus_db_model::InvNvmeDiskFirmware; use nexus_db_model::InvPhysicalDisk; use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; @@ -4666,36 +4667,62 @@ async fn cmd_db_inventory_physical_disks( model: String, serial: String, variant: String, + firmware: String, + next_firmware: String, } - use db::schema::inv_physical_disk::dsl; - let mut query = dsl::inv_physical_disk.into_boxed(); + use db::schema::inv_nvme_disk_firmware::dsl as firmware_dsl; + use db::schema::inv_physical_disk::dsl as disk_dsl; + + let mut query = disk_dsl::inv_physical_disk.into_boxed(); query = query.limit(i64::from(u32::from(limit))); if let Some(collection_id) = args.collection_id { query = query.filter( - dsl::inv_collection_id.eq(collection_id.into_untyped_uuid()), + disk_dsl::inv_collection_id.eq(collection_id.into_untyped_uuid()), ); } if let Some(sled_id) = args.sled_id { - query = query.filter(dsl::sled_id.eq(sled_id.into_untyped_uuid())); + query = query.filter(disk_dsl::sled_id.eq(sled_id.into_untyped_uuid())); } let disks = query - .select(InvPhysicalDisk::as_select()) + .left_join( + firmware_dsl::inv_nvme_disk_firmware.on( + firmware_dsl::inv_collection_id + .eq(disk_dsl::inv_collection_id) + .and(firmware_dsl::sled_id.eq(disk_dsl::sled_id)) + .and(firmware_dsl::slot.eq(disk_dsl::slot)), + ), + ) + .select(( + InvPhysicalDisk::as_select(), + Option::::as_select(), + )) .load_async(&**conn) .await .context("loading physical disks")?; - let rows = disks.into_iter().map(|disk| DiskRow { - inv_collection_id: disk.inv_collection_id.into_untyped_uuid(), - sled_id: disk.sled_id.into_untyped_uuid(), - slot: disk.slot, - vendor: disk.vendor, - model: disk.model.clone(), - serial: disk.serial.clone(), - variant: format!("{:?}", disk.variant), + let rows = disks.into_iter().map(|(disk, firmware)| { + let (active_firmware, next_firmware) = + if let Some(firmware) = firmware.as_ref() { + (firmware.current_version(), firmware.next_version()) + } else { + (None, None) + }; + + DiskRow { + inv_collection_id: disk.inv_collection_id.into_untyped_uuid(), + sled_id: disk.sled_id.into_untyped_uuid(), + slot: disk.slot, + vendor: disk.vendor, + model: disk.model.clone(), + serial: disk.serial.clone(), + variant: format!("{:?}", disk.variant), + firmware: active_firmware.unwrap_or("UNKNOWN").to_string(), + next_firmware: next_firmware.unwrap_or("").to_string(), + } }); let table = tabled::Table::new(rows) @@ -5101,6 +5128,7 @@ fn inv_collection_print_sleds(collection: &Collection) { identity, variant, slot, + .. } = disk; println!(" {variant:?}: {identity:?} in {slot}"); } diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index eda237a741..9c645cf04d 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -30,6 +30,14 @@ pub struct InventoryDisk { pub identity: omicron_common::disk::DiskIdentity, pub variant: DiskVariant, pub slot: i64, + // Today we only have NVMe disks so we embedded the firmware metadata here. + // In the future we can track firmware metadata in a unique type if we + // support more than one disk format. + pub active_firmware_slot: u8, + pub next_active_firmware_slot: Option, + pub number_of_firmware_slots: u8, + pub slot1_is_read_only: bool, + pub slot_firmware_versions: Vec>, } /// Identifies information about zpools managed by the control plane diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index a57ca64b7c..4e22195039 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -7,10 +7,10 @@ use crate::omicron_zone_config::{self, OmicronZoneNic}; use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, - inv_dataset, inv_omicron_zone, inv_omicron_zone_nic, inv_physical_disk, - inv_root_of_trust, inv_root_of_trust_page, inv_service_processor, - inv_sled_agent, inv_sled_omicron_zones, inv_zpool, sw_caboose, - sw_root_of_trust_page, + inv_dataset, inv_nvme_disk_firmware, inv_omicron_zone, + inv_omicron_zone_nic, inv_physical_disk, inv_root_of_trust, + inv_root_of_trust_page, inv_service_processor, inv_sled_agent, + inv_sled_omicron_zones, inv_zpool, sw_caboose, sw_root_of_trust_page, }; use crate::typed_uuid::DbTypedUuid; use crate::PhysicalDiskKind; @@ -33,7 +33,8 @@ use nexus_sled_agent_shared::inventory::{ OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, }; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, + BaseboardId, Caboose, Collection, NvmeFirmware, PowerState, RotPage, + RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::zpool_name::ZpoolName; @@ -46,6 +47,7 @@ use omicron_uuid_kinds::ZpoolUuid; use omicron_uuid_kinds::{CollectionKind, OmicronZoneKind}; use omicron_uuid_kinds::{CollectionUuid, OmicronZoneUuid}; use std::net::{IpAddr, SocketAddrV6}; +use thiserror::Error; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -885,18 +887,205 @@ impl InvPhysicalDisk { } } -impl From for nexus_types::inventory::PhysicalDisk { - fn from(disk: InvPhysicalDisk) -> Self { - Self { - identity: omicron_common::disk::DiskIdentity { - vendor: disk.vendor, - serial: disk.serial, - model: disk.model, - }, - variant: disk.variant.into(), - slot: disk.slot, +#[derive(Clone, Debug, Error)] +pub enum InvNvmeDiskFirmwareError { + #[error("active slot must be between 1 and 7, found `{0}`")] + InvalidActiveSlot(u8), + #[error("next active slot must be between 1 and 7, found `{0}`")] + InvalidNextActiveSlot(u8), + #[error("number of slots must be between 1 and 7, found `{0}`")] + InvalidNumberOfSlots(u8), + #[error("`{slots}` slots should match `{len}` firmware version entries")] + SlotFirmwareVersionsLengthMismatch { slots: u8, len: usize }, + #[error("firmware version string `{0}` contains non ascii characters")] + FirmwareVersionNotAscii(String), + #[error("firmware version string `{0}` must be 8 bytes or less")] + FirmwareVersionTooLong(String), + #[error("active firmware at slot `{0}` maps to empty slot")] + InvalidActiveSlotFirmware(u8), + #[error("next active firmware at slot `{0}` maps to empty slot")] + InvalidNextActiveSlotFirmware(u8), +} + +/// See [`nexus_types::inventory::PhysicalDiskFirmware::Nvme`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_nvme_disk_firmware)] +pub struct InvNvmeDiskFirmware { + inv_collection_id: DbTypedUuid, + sled_id: DbTypedUuid, + slot: i64, + active_slot: SqlU8, + next_active_slot: Option, + number_of_slots: SqlU8, + slot1_is_read_only: bool, + slot_firmware_versions: Vec>, +} + +impl InvNvmeDiskFirmware { + pub fn new( + inv_collection_id: CollectionUuid, + sled_id: SledUuid, + sled_slot: i64, + firmware: &NvmeFirmware, + ) -> Result { + // NB: We first validate that the data given to us from an NVMe disk + // actually makes sense. For the purposes of testing we ensure that + // number of slots is validated first before any other field. + + // Valid NVMe slots are between 1 and 7. + let valid_slot = 1..=7; + if !valid_slot.contains(&firmware.number_of_slots) { + return Err(InvNvmeDiskFirmwareError::InvalidNumberOfSlots( + firmware.number_of_slots, + )); + } + if usize::from(firmware.number_of_slots) + != firmware.slot_firmware_versions.len() + { + return Err( + InvNvmeDiskFirmwareError::SlotFirmwareVersionsLengthMismatch { + slots: firmware.number_of_slots, + len: firmware.slot_firmware_versions.len(), + }, + ); + } + if !valid_slot.contains(&firmware.active_slot) { + return Err(InvNvmeDiskFirmwareError::InvalidActiveSlot( + firmware.active_slot, + )); + } + // active_slot maps to a populated version + let Some(Some(_fw_string)) = firmware + .slot_firmware_versions + .get(usize::from(firmware.active_slot) - 1) + else { + return Err(InvNvmeDiskFirmwareError::InvalidActiveSlotFirmware( + firmware.active_slot, + )); + }; + if let Some(next_active_slot) = firmware.next_active_slot { + if !valid_slot.contains(&next_active_slot) { + return Err(InvNvmeDiskFirmwareError::InvalidNextActiveSlot( + next_active_slot, + )); + } + + // next_active_slot maps to a populated version + let Some(Some(_fw_string)) = firmware + .slot_firmware_versions + .get(usize::from(next_active_slot) - 1) + else { + return Err( + InvNvmeDiskFirmwareError::InvalidNextActiveSlotFirmware( + next_active_slot, + ), + ); + }; + } + // slot fw strings must be a max of 8 bytes and must be ascii characters + for fw_string in firmware.slot_firmware_versions.iter().flatten() { + if !fw_string.is_ascii() { + return Err(InvNvmeDiskFirmwareError::FirmwareVersionNotAscii( + fw_string.clone(), + )); + } + if fw_string.len() > 8 { + return Err(InvNvmeDiskFirmwareError::FirmwareVersionTooLong( + fw_string.clone(), + )); + } + } + + Ok(Self { + inv_collection_id: inv_collection_id.into(), + sled_id: sled_id.into(), + slot: sled_slot, + active_slot: firmware.active_slot.into(), + next_active_slot: firmware.next_active_slot.map(|nas| nas.into()), + number_of_slots: firmware.number_of_slots.into(), + slot1_is_read_only: firmware.slot1_is_read_only, + slot_firmware_versions: firmware.slot_firmware_versions.clone(), + }) + } + + /// Attempt to read the current firmware version. + pub fn current_version(&self) -> Option<&str> { + match self.active_slot.0 { + // be paranoid that we have a value within the NVMe spec + slot @ 1..=7 => self + .slot_firmware_versions + .get(usize::from(slot) - 1) + .and_then(|v| v.as_deref()), + _ => None, + } + } + + /// Attempt to read the staged firmware version that will be active upon + /// next device reset. + pub fn next_version(&self) -> Option<&str> { + match self.next_active_slot { + // be paranoid that we have a value within the NVMe spec + Some(slot) if slot.0 <= 7 && slot.0 >= 1 => self + .slot_firmware_versions + .get(usize::from(slot.0) - 1) + .and_then(|v| v.as_deref()), + _ => None, } } + + pub fn inv_collection_id(&self) -> DbTypedUuid { + self.inv_collection_id + } + + pub fn sled_id(&self) -> DbTypedUuid { + self.sled_id + } + + pub fn slot(&self) -> i64 { + self.slot + } + + pub fn number_of_slots(&self) -> SqlU8 { + self.number_of_slots + } + + pub fn active_slot(&self) -> SqlU8 { + self.active_slot + } + + pub fn next_active_slot(&self) -> Option { + self.next_active_slot + } + + pub fn slot1_is_read_only(&self) -> bool { + self.slot1_is_read_only + } + + pub fn slot_firmware_versions(&self) -> &[Option] { + &self.slot_firmware_versions + } +} + +impl From + for nexus_types::inventory::PhysicalDiskFirmware +{ + fn from( + nvme_firmware: InvNvmeDiskFirmware, + ) -> nexus_types::inventory::PhysicalDiskFirmware { + use nexus_types::inventory as nexus_inventory; + + nexus_inventory::PhysicalDiskFirmware::Nvme( + nexus_inventory::NvmeFirmware { + active_slot: nvme_firmware.active_slot.0, + next_active_slot: nvme_firmware + .next_active_slot + .map(|nas| nas.0), + number_of_slots: nvme_firmware.number_of_slots.0, + slot1_is_read_only: nvme_firmware.slot1_is_read_only, + slot_firmware_versions: nvme_firmware.slot_firmware_versions, + }, + ) + } } /// See [`nexus_types::inventory::Zpool`]. @@ -1551,3 +1740,218 @@ impl InvOmicronZoneNic { zone_nic.into_network_interface_for_zone(zone_id) } } + +#[cfg(test)] +mod test { + use nexus_types::inventory::NvmeFirmware; + use omicron_uuid_kinds::{CollectionKind, SledUuid, TypedUuid}; + + use crate::{typed_uuid, InvNvmeDiskFirmware, InvNvmeDiskFirmwareError}; + + #[test] + fn test_inv_nvme_disk_firmware() { + let inv_collection_id: TypedUuid = + typed_uuid::DbTypedUuid(TypedUuid::new_v4()).into(); + let sled_id: SledUuid = TypedUuid::new_v4(); + let slot = 1; + + // NB: We are testing these error cases with only one value that + // is out of spec so that we don't have to worry about what order + // `InvNvmeDiskFirmware::new` is validating fields in. The only test + // dependent on position is the number of slots test, but that is always + // processed first in the implementation + + // Invalid active slot + for i in [0u8, 8] { + let firmware = &NvmeFirmware { + active_slot: i, + next_active_slot: None, + number_of_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("firmware".to_string())], + }; + let err = InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .unwrap_err(); + assert!(matches!( + err, + InvNvmeDiskFirmwareError::InvalidActiveSlot(_) + )); + } + + // Invalid next active slot + for i in [0u8, 8] { + let firmware = &NvmeFirmware { + active_slot: 1, + next_active_slot: Some(i), + number_of_slots: 2, + slot1_is_read_only: true, + slot_firmware_versions: vec![ + Some("firmware".to_string()), + Some("firmware".to_string()), + ], + }; + let err = InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .unwrap_err(); + assert!(matches!( + err, + InvNvmeDiskFirmwareError::InvalidNextActiveSlot(_) + )); + } + + // Invalid number of slots + for i in [0u8, 8] { + let firmware = &NvmeFirmware { + active_slot: i, + next_active_slot: None, + number_of_slots: i, + slot1_is_read_only: true, + slot_firmware_versions: vec![ + Some("firmware".to_string()); + i as usize + ], + }; + let err = InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .unwrap_err(); + assert!(matches!( + err, + InvNvmeDiskFirmwareError::InvalidNumberOfSlots(_) + )); + } + + // Mismatch between number of slots and firmware versions + let firmware = &NvmeFirmware { + active_slot: 1, + next_active_slot: None, + number_of_slots: 2, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("firmware".to_string())], + }; + let err = InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .unwrap_err(); + assert!(matches!( + err, + InvNvmeDiskFirmwareError::SlotFirmwareVersionsLengthMismatch { .. } + )); + + // Firmware version string contains non ascii characters + let firmware = &NvmeFirmware { + active_slot: 1, + next_active_slot: None, + number_of_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("💾".to_string())], + }; + let err = InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .unwrap_err(); + assert!(matches!( + err, + InvNvmeDiskFirmwareError::FirmwareVersionNotAscii(_) + )); + + // Firmware version string is too long + let firmware = &NvmeFirmware { + active_slot: 1, + next_active_slot: None, + number_of_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some( + "somereallylongstring".to_string(), + )], + }; + let err = InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .unwrap_err(); + assert!(matches!( + err, + InvNvmeDiskFirmwareError::FirmwareVersionTooLong(_) + )); + + // Active firmware version slot is in the vec but is empty + let firmware = &NvmeFirmware { + active_slot: 1, + next_active_slot: None, + number_of_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![None], + }; + let err = InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .unwrap_err(); + assert!(matches!( + err, + InvNvmeDiskFirmwareError::InvalidActiveSlotFirmware(_) + )); + + // Next active firmware version slot is in the vec but is empty + let firmware = &NvmeFirmware { + active_slot: 1, + next_active_slot: Some(2), + number_of_slots: 2, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("1234".to_string()), None], + }; + let err = InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .unwrap_err(); + assert!(matches!( + err, + InvNvmeDiskFirmwareError::InvalidNextActiveSlotFirmware(_) + )); + + // Actually construct a valid value + let firmware = &NvmeFirmware { + active_slot: 1, + next_active_slot: Some(2), + number_of_slots: 2, + slot1_is_read_only: true, + slot_firmware_versions: vec![ + Some("1234".to_string()), + Some("4567".to_string()), + ], + }; + assert!(InvNvmeDiskFirmware::new( + inv_collection_id, + sled_id, + slot, + firmware, + ) + .is_ok()) + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 695a3b21ba..9e0ec1055f 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1434,6 +1434,20 @@ table! { } } +table! { + inv_nvme_disk_firmware (inv_collection_id, sled_id, slot) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + slot -> Int8, + + number_of_slots -> Int2, + active_slot -> Int2, + next_active_slot -> Nullable, + slot1_is_read_only -> Bool, + slot_firmware_versions -> Array>, + } +} + table! { inv_zpool (inv_collection_id, sled_id, id) { inv_collection_id -> Uuid, @@ -1895,6 +1909,7 @@ allow_tables_to_appear_in_same_query!( network_interface, instance_network_interface, inv_physical_disk, + inv_nvme_disk_firmware, service_network_interface, oximeter, physical_disk, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 52b1b24554..018524a2e6 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(104, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(105, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(105, "inventory-nvme-firmware"), KnownVersion::new(104, "lookup-bgp-config-indexes"), KnownVersion::new(103, "lookup-instances-by-state-index"), KnownVersion::new(102, "add-instance-auto-restart-cooldown"), diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index c618661a6e..5b72068b76 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -39,6 +39,7 @@ use nexus_db_model::InvCaboose; use nexus_db_model::InvCollection; use nexus_db_model::InvCollectionError; use nexus_db_model::InvDataset; +use nexus_db_model::InvNvmeDiskFirmware; use nexus_db_model::InvOmicronZone; use nexus_db_model::InvOmicronZoneNic; use nexus_db_model::InvPhysicalDisk; @@ -61,6 +62,7 @@ use nexus_db_model::SwCaboose; use nexus_db_model::SwRotPage; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; +use nexus_types::inventory::PhysicalDiskFirmware; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; use omicron_common::api::external::LookupType; @@ -135,6 +137,29 @@ impl DataStore { )) }) .collect::, Error>>()?; + + // Pull disk firmware out of sled agents + let mut nvme_disk_firmware = Vec::new(); + for (sled_id, sled_agent) in &collection.sled_agents { + for disk in &sled_agent.disks { + match &disk.firmware { + PhysicalDiskFirmware::Unknown => (), + PhysicalDiskFirmware::Nvme(firmware) => nvme_disk_firmware + .push( + InvNvmeDiskFirmware::new( + collection_id, + *sled_id, + disk.slot, + firmware, + ) + .map_err(|e| { + Error::internal_error(&e.to_string()) + })?, + ), + } + } + } + // Pull disks out of all sled agents let disks: Vec<_> = collection .sled_agents @@ -738,6 +763,27 @@ impl DataStore { } } + // Insert rows for all the physical disk firmware we found. + { + use db::schema::inv_nvme_disk_firmware::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut nvme_disk_firmware = nvme_disk_firmware.into_iter(); + loop { + let some_disk_firmware = nvme_disk_firmware + .by_ref() + .take(batch_size) + .collect::>(); + if some_disk_firmware.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_nvme_disk_firmware) + .values(some_disk_firmware) + .execute_async(&conn) + .await?; + } + } + // Insert rows for all the zpools we found. { use db::schema::inv_zpool::dsl; @@ -1169,6 +1215,7 @@ impl DataStore { nsled_agents, ndatasets, nphysical_disks, + nnvme_disk_disk_firmware, nsled_agent_zones, nzones, nnics, @@ -1264,6 +1311,17 @@ impl DataStore { .await? }; + // Remove rows for NVMe physical disk firmware found. + let nnvme_disk_firwmare = + { + use db::schema::inv_nvme_disk_firmware::dsl; + diesel::delete(dsl::inv_nvme_disk_firmware.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + // Remove rows associated with Omicron zones let nsled_agent_zones = { @@ -1325,6 +1383,7 @@ impl DataStore { nsled_agents, ndatasets, nphysical_disks, + nnvme_disk_firwmare, nsled_agent_zones, nzones, nnics, @@ -1350,6 +1409,7 @@ impl DataStore { "nsled_agents" => nsled_agents, "ndatasets" => ndatasets, "nphysical_disks" => nphysical_disks, + "nnvme_disk_firmware" => nnvme_disk_disk_firmware, "nsled_agent_zones" => nsled_agent_zones, "nzones" => nzones, "nnics" => nnics, @@ -1578,15 +1638,57 @@ impl DataStore { rows }; + // Mapping of "Sled ID" -> "Mapping of physical slot -> disk firmware" + let disk_firmware: BTreeMap< + SledUuid, + BTreeMap, + > = { + use db::schema::inv_nvme_disk_firmware::dsl; + + let mut disk_firmware = BTreeMap::< + SledUuid, + BTreeMap, + >::new(); + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated_multicolumn( + dsl::inv_nvme_disk_firmware, + (dsl::sled_id, dsl::slot), + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvNvmeDiskFirmware::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = + p.found_batch(&batch, &|row| (row.sled_id(), row.slot())); + for firmware in batch { + disk_firmware + .entry(firmware.sled_id().into()) + .or_default() + .insert( + firmware.slot(), + nexus_types::inventory::PhysicalDiskFirmware::from( + firmware, + ), + ); + } + } + disk_firmware + }; + // Mapping of "Sled ID" -> "All disks reported by that sled" let physical_disks: BTreeMap< - Uuid, + SledUuid, Vec, > = { use db::schema::inv_physical_disk::dsl; let mut disks = BTreeMap::< - Uuid, + SledUuid, Vec, >::new(); let mut paginator = Paginator::new(batch_size); @@ -1606,10 +1708,24 @@ impl DataStore { paginator = p.found_batch(&batch, &|row| (row.sled_id, row.slot)); for disk in batch { - disks - .entry(disk.sled_id.into_untyped_uuid()) - .or_default() - .push(disk.into()); + let sled_id = disk.sled_id.into(); + let firmware = disk_firmware + .get(&sled_id) + .and_then(|lookup| lookup.get(&disk.slot)) + .unwrap_or(&nexus_types::inventory::PhysicalDiskFirmware::Unknown); + + disks.entry(sled_id).or_default().push( + nexus_types::inventory::PhysicalDisk { + identity: omicron_common::disk::DiskIdentity { + vendor: disk.vendor, + model: disk.model, + serial: disk.serial, + }, + variant: disk.variant.into(), + slot: disk.slot, + firmware: firmware.clone(), + }, + ); } } disks @@ -1778,7 +1894,7 @@ impl DataStore { usable_physical_ram: s.usable_physical_ram.into(), reservoir_size: s.reservoir_size.into(), disks: physical_disks - .get(sled_id.as_untyped_uuid()) + .get(&sled_id) .map(|disks| disks.to_vec()) .unwrap_or_default(), zpools: zpools diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 8162b11f2d..ae31ade02d 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -712,6 +712,41 @@ mod test { }, variant: DiskVariant::U2, slot, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("TEST1".to_string())], + } + } + + // helper function to eaisly convert from [`InventoryDisk`] to + // [`nexus_types::inventory::PhysicalDisk`] + fn inv_phys_disk_to_nexus_phys_disk( + inv_phys_disk: &InvPhysicalDisk, + ) -> nexus_types::inventory::PhysicalDisk { + use nexus_types::inventory::NvmeFirmware; + use nexus_types::inventory::PhysicalDiskFirmware; + + // We reuse `create_inv_disk` so that we get the same NVMe firmware + // values throughout the tests. + let inv_disk = create_inv_disk("unused".to_string(), 0); + + nexus_types::inventory::PhysicalDisk { + identity: DiskIdentity { + vendor: inv_phys_disk.vendor.clone(), + model: inv_phys_disk.model.clone(), + serial: inv_phys_disk.serial.clone(), + }, + variant: inv_phys_disk.variant.into(), + slot: inv_phys_disk.slot, + firmware: PhysicalDiskFirmware::Nvme(NvmeFirmware { + active_slot: inv_disk.active_firmware_slot, + next_active_slot: inv_disk.next_active_firmware_slot, + number_of_slots: inv_disk.number_of_firmware_slots, + slot1_is_read_only: inv_disk.slot1_is_read_only, + slot_firmware_versions: inv_disk.slot_firmware_versions, + }), } } @@ -829,7 +864,10 @@ mod test { // Normalize the data a bit -- convert to nexus types, and sort vecs for // stability in the comparison. let mut uninitialized_disks: Vec = - uninitialized_disks.into_iter().map(|d| d.into()).collect(); + uninitialized_disks + .into_iter() + .map(|d| inv_phys_disk_to_nexus_phys_disk(&d)) + .collect(); uninitialized_disks .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); let mut expected_disks: Vec = @@ -879,7 +917,10 @@ mod test { // uninitailized) and the last two disks of "disks_b" (of which both are // still uninitialized). let mut uninitialized_disks: Vec = - uninitialized_disks.into_iter().map(|d| d.into()).collect(); + uninitialized_disks + .into_iter() + .map(|d| inv_phys_disk_to_nexus_phys_disk(&d)) + .collect(); uninitialized_disks .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); let mut expected_disks: Vec = diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 9ec2978b39..149ddc859e 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -280,6 +280,7 @@ pub fn representative() -> Representative { // This first one will match "sled1_bb"'s baseboard information. let sled_agent_id_basic = "c5aec1df-b897-49e4-8085-ccd975f9b529".parse().unwrap(); + // Add some disks to this first sled. let disks = vec![ // Let's say we have one manufacturer for our M.2... @@ -291,6 +292,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::M2, slot: 0, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, // ... and a couple different vendors for our U.2s InventoryDisk { @@ -301,6 +307,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 1, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, InventoryDisk { identity: omicron_common::disk::DiskIdentity { @@ -310,6 +321,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 2, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, InventoryDisk { identity: omicron_common::disk::DiskIdentity { @@ -319,6 +335,11 @@ pub fn representative() -> Representative { }, variant: DiskVariant::U2, slot: 3, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("EXAMP1".to_string())], }, ]; let zpools = vec![InventoryZpool { diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 538c7bec1b..6b282499a9 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -562,6 +562,13 @@ impl Sled { identity: d.disk_identity.clone(), variant: DiskVariant::U2, slot: i64::try_from(i).unwrap(), + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some( + "SIMUL1".to_string(), + )], }) .collect(), // Zpools & Datasets won't necessarily show up until our first diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index f9ad9a5cf0..563fd8a4d3 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -377,6 +377,23 @@ impl IntoRotPage for gateway_client::types::RotCfpa { } } +/// Firmware reported for a physical NVMe disk. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub struct NvmeFirmware { + pub active_slot: u8, + pub next_active_slot: Option, + pub number_of_slots: u8, + pub slot1_is_read_only: bool, + pub slot_firmware_versions: Vec>, +} + +/// Firmware reported by sled agent for a particular disk format. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] +pub enum PhysicalDiskFirmware { + Unknown, + Nvme(NvmeFirmware), +} + /// A physical disk reported by a sled agent. /// /// This identifies that a physical disk appears in a Sled. @@ -391,6 +408,7 @@ pub struct PhysicalDisk { pub identity: omicron_common::disk::DiskIdentity, pub variant: PhysicalDiskKind, pub slot: i64, + pub firmware: PhysicalDiskFirmware, } impl From for PhysicalDisk { @@ -399,6 +417,13 @@ impl From for PhysicalDisk { identity: disk.identity, variant: disk.variant.into(), slot: disk.slot, + firmware: PhysicalDiskFirmware::Nvme(NvmeFirmware { + active_slot: disk.active_firmware_slot, + next_active_slot: disk.next_active_firmware_slot, + number_of_slots: disk.number_of_firmware_slots, + slot1_is_read_only: disk.slot1_is_read_only, + slot_firmware_versions: disk.slot_firmware_versions, + }), } } } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 415f6a5d23..f7fbcd8870 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3463,20 +3463,50 @@ "description": "Identifies information about disks which may be attached to Sleds.", "type": "object", "properties": { + "active_firmware_slot": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "identity": { "$ref": "#/components/schemas/DiskIdentity" }, + "next_active_firmware_slot": { + "nullable": true, + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "number_of_firmware_slots": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, "slot": { "type": "integer", "format": "int64" }, + "slot1_is_read_only": { + "type": "boolean" + }, + "slot_firmware_versions": { + "type": "array", + "items": { + "nullable": true, + "type": "string" + } + }, "variant": { "$ref": "#/components/schemas/DiskVariant" } }, "required": [ + "active_firmware_slot", "identity", + "number_of_firmware_slots", "slot", + "slot1_is_read_only", + "slot_firmware_versions", "variant" ] }, diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 450ea7b1bd..8afb828812 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3237,13 +3237,42 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_physical_disk ( variant omicron.public.physical_disk_kind NOT NULL, - -- FK consisting of: + -- PK consisting of: -- - Which collection this was -- - The sled reporting the disk -- - The slot in which this disk was found PRIMARY KEY (inv_collection_id, sled_id, slot) ); +CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + -- The slot where this disk was last observed + slot INT8 CHECK (slot >= 0) NOT NULL, + + -- total number of firmware slots the device has + number_of_slots INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + active_slot INT2 CHECK (active_slot BETWEEN 1 AND 7) NOT NULL, + -- staged firmware slot to be active on reset + next_active_slot INT2 CHECK (next_active_slot BETWEEN 1 AND 7), + -- slot1 is distinct in the NVMe spec in the sense that it can be read only + slot1_is_read_only BOOLEAN, + -- the firmware version string for each NVMe slot (0 indexed), a NULL means the + -- slot exists but is empty + slot_firmware_versions STRING(8)[] CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), + + -- PK consisting of: + -- - Which collection this was + -- - The sled reporting the disk + -- - The slot in which the disk was found + PRIMARY KEY (inv_collection_id, sled_id, slot) +); + CREATE TABLE IF NOT EXISTS omicron.public.inv_zpool ( -- where this observation came from -- (foreign key into `inv_collection` table) @@ -4382,7 +4411,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '104.0.0', NULL) + (TRUE, NOW(), NOW(), '105.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/inventory-nvme-firmware/up01.sql b/schema/crdb/inventory-nvme-firmware/up01.sql new file mode 100644 index 0000000000..0191fd31f1 --- /dev/null +++ b/schema/crdb/inventory-nvme-firmware/up01.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS omicron.public.inv_nvme_disk_firmware ( + inv_collection_id UUID NOT NULL, + + sled_id UUID NOT NULL, + slot INT8 CHECK (slot >= 0) NOT NULL, + + number_of_slots INT2 CHECK (number_of_slots BETWEEN 1 AND 7) NOT NULL, + active_slot INT2 CHECK (active_slot BETWEEN 1 AND 7) NOT NULL, + next_active_slot INT2 CHECK (next_active_slot BETWEEN 1 AND 7), + slot1_is_read_only BOOLEAN, + slot_firmware_versions STRING(8)[] CHECK (array_length(slot_firmware_versions, 1) BETWEEN 1 AND 7), + + PRIMARY KEY (inv_collection_id, sled_id, slot) +); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 1c1f3cb0b3..279253d323 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1626,6 +1626,11 @@ mod test { }, variant: DiskVariant::U2, slot: i.try_into().unwrap(), + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("TEST1".to_string())], }) .collect(), zpools: vec![], diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 3a1ece4439..957965c6a0 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -845,6 +845,11 @@ impl SledAgent { identity: info.identity.clone(), variant: info.variant, slot: info.slot, + active_firmware_slot: 1, + next_active_firmware_slot: None, + number_of_firmware_slots: 1, + slot1_is_read_only: true, + slot_firmware_versions: vec![Some("SIMUL1".to_string())], }) .collect(), zpools: storage diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 59c316634f..9af78521d1 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1228,11 +1228,16 @@ impl SledAgent { let mut zpools = vec![]; let mut datasets = vec![]; let all_disks = self.storage().get_latest_disks().await; - for (identity, variant, slot, _firmware) in all_disks.iter_all() { + for (identity, variant, slot, firmware) in all_disks.iter_all() { disks.push(InventoryDisk { identity: identity.clone(), variant, slot, + active_firmware_slot: firmware.active_slot(), + next_active_firmware_slot: firmware.next_active_slot(), + number_of_firmware_slots: firmware.number_of_slots(), + slot1_is_read_only: firmware.slot1_read_only(), + slot_firmware_versions: firmware.slots().to_vec(), }); } for zpool in all_disks.all_u2_zpools() { diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index f95efb985e..a07ddea8cf 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -138,6 +138,7 @@ pub struct DiskFirmware { active_slot: u8, next_active_slot: Option, slot1_read_only: bool, + number_of_slots: u8, // NB: This vec is 0 indexed while active_slot and next_active_slot are // referring to "slots" in terms of the NVMe spec which defines slots 1-7. // If the active_slot is 1, then it will be slot_firmware_versions[0] in the @@ -158,6 +159,10 @@ impl DiskFirmware { self.slot1_read_only } + pub fn number_of_slots(&self) -> u8 { + self.number_of_slots + } + pub fn slots(&self) -> &[Option] { self.slot_firmware_versions.as_slice() } @@ -168,12 +173,14 @@ impl DiskFirmware { active_slot: u8, next_active_slot: Option, slot1_read_only: bool, + number_of_slots: u8, slots: Vec>, ) -> Self { Self { active_slot, next_active_slot, slot1_read_only, + number_of_slots, slot_firmware_versions: slots, } } diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 83ef685b67..3664a7cf06 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -576,6 +576,7 @@ fn poll_blkdev_node( firmware_log_page.active_slot, firmware_log_page.next_active_slot, firmware_log_page.slot1_is_read_only, + firmware_log_page.number_of_slots, firmware_log_page.slot_iter().map(|s| s.map(str::to_string)).collect(), ); diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index 51980bea3a..ae0f5d25dc 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -44,8 +44,8 @@ const SYNTHETIC_SLOT_OFFSET: i64 = 1024; // A generic name for the firmware in slot1 of an NVMe device. // -// bhyve for example uses "1.0" and marks slot1 as read-only. -const SYNTHETIC_FIRMWARE_SLOT1: &str = "synthetic 1.0"; +// bhyve for example uses "1.0" and marks slot1 as read-only (must be within 8chars). +const SYNTHETIC_FIRMWARE_SLOT1: &str = "SYNTH1"; impl SyntheticDisk { // "Manages" a SyntheticDisk by ensuring that it has a Zpool and importing @@ -144,6 +144,7 @@ impl RawSyntheticDisk { 1, None, true, + 1, vec![Some(SYNTHETIC_FIRMWARE_SLOT1.to_string())], ); diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 21d54b4176..57ceb7d63d 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -1283,6 +1283,7 @@ mod tests { disk.firmware.active_slot(), disk.firmware.next_active_slot(), disk.firmware.slot1_read_only(), + disk.firmware.number_of_slots(), slots, ); }