From 38575abf11e463993bdb953fc3ad3653cbf8a3b6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 22 Jul 2024 16:33:30 -0700 Subject: [PATCH 1/3] [nexus] Decommission disks in reconfigurator, clean their DB state (#6059) This PR adds the following step to the reconfigurator's execution step: For all disks that are marked **expunged**, mark them **decommissioned**. This notably happens after the `deploy_disks` step of execution. This PR also adds a background task that looks for all disks that are **decommissioned**, but have **zpools**. It deletes these zpools (and their **datasets**) as long as no regions nor region snapshots are referencing the contained datasets. Fixes https://github.com/oxidecomputer/omicron/issues/6051 --- dev-tools/omdb/tests/env.out | 15 + dev-tools/omdb/tests/successes.out | 12 + nexus-config/src/nexus_config.rs | 23 + .../src/db/datastore/physical_disk.rs | 74 ++- nexus/db-queries/src/db/datastore/zpool.rs | 135 ++++++ nexus/examples/config-second.toml | 1 + nexus/examples/config.toml | 1 + .../execution/src/cockroachdb.rs | 5 +- .../reconfigurator/execution/src/datasets.rs | 5 +- nexus/reconfigurator/execution/src/dns.rs | 5 +- nexus/reconfigurator/execution/src/lib.rs | 27 +- .../execution/src/omicron_physical_disks.rs | 258 +++++++++- nexus/src/app/background/init.rs | 21 + .../tasks/decommissioned_disk_cleaner.rs | 458 ++++++++++++++++++ nexus/src/app/background/tasks/mod.rs | 1 + nexus/tests/config.test.toml | 3 + nexus/tests/integration_tests/sleds.rs | 9 +- nexus/types/src/deployment/planning_input.rs | 7 + smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + uuid-kinds/src/lib.rs | 2 + 21 files changed, 1013 insertions(+), 51 deletions(-) create mode 100644 nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 75acc5c584..a6bf4d4667 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -47,6 +47,11 @@ task: "crdb_node_id_collector" Collects node IDs of running CockroachDB zones +task: "decommissioned_disk_cleaner" + deletes DB records for decommissioned disks, after regions and region + snapshots have been replaced + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -187,6 +192,11 @@ task: "crdb_node_id_collector" Collects node IDs of running CockroachDB zones +task: "decommissioned_disk_cleaner" + deletes DB records for decommissioned disks, after regions and region + snapshots have been replaced + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -314,6 +324,11 @@ task: "crdb_node_id_collector" Collects node IDs of running CockroachDB zones +task: "decommissioned_disk_cleaner" + deletes DB records for decommissioned disks, after regions and region + snapshots have been replaced + + task: "dns_config_external" watches external DNS data stored in CockroachDB diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 982a9d8403..cec3fa3052 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -248,6 +248,11 @@ task: "crdb_node_id_collector" Collects node IDs of running CockroachDB zones +task: "decommissioned_disk_cleaner" + deletes DB records for decommissioned disks, after regions and region + snapshots have been replaced + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -453,6 +458,13 @@ task: "crdb_node_id_collector" started at (s ago) and ran for ms last completion reported error: no blueprint +task: "decommissioned_disk_cleaner" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "decommissioned_disk_cleaner" (don't know how to interpret details: Object {"deleted": Number(0), "error": Null, "error_count": Number(0), "found": Number(0), "not_ready_to_be_deleted": Number(0)}) + task: "external_endpoints" configured period: every 1m currently executing: no diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 3bc3a36126..6e9d6b0cf0 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -361,6 +361,8 @@ pub struct BackgroundTaskConfig { pub inventory: InventoryConfig, /// configuration for physical disk adoption tasks pub physical_disk_adoption: PhysicalDiskAdoptionConfig, + /// configuration for decommissioned disk cleaner task + pub decommissioned_disk_cleaner: DecommissionedDiskCleanerConfig, /// configuration for phantom disks task pub phantom_disks: PhantomDiskConfig, /// configuration for blueprint related tasks @@ -444,6 +446,20 @@ pub struct PhysicalDiskAdoptionConfig { pub disable: bool, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct DecommissionedDiskCleanerConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, + + /// A toggle to disable automated disk cleanup + /// + /// Default: Off + #[serde(default)] + pub disable: bool, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct NatCleanupConfig { @@ -822,6 +838,7 @@ mod test { inventory.nkeep = 11 inventory.disable = false physical_disk_adoption.period_secs = 30 + decommissioned_disk_cleaner.period_secs = 30 phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 @@ -947,6 +964,11 @@ mod test { period_secs: Duration::from_secs(30), disable: false, }, + decommissioned_disk_cleaner: + DecommissionedDiskCleanerConfig { + period_secs: Duration::from_secs(30), + disable: false, + }, phantom_disks: PhantomDiskConfig { period_secs: Duration::from_secs(30), }, @@ -1049,6 +1071,7 @@ mod test { inventory.nkeep = 3 inventory.disable = false physical_disk_adoption.period_secs = 30 + decommissioned_disk_cleaner.period_secs = 30 phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 11e056d19b..5e3b51f228 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -37,6 +37,7 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use uuid::Uuid; impl DataStore { @@ -278,23 +279,36 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Decommissions all expunged disks. + pub async fn physical_disk_decommission_all_expunged( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use db::schema::physical_disk::dsl; + + let conn = &*self.pool_connection_authorized(&opctx).await?; + diesel::update(dsl::physical_disk) + .filter(dsl::time_deleted.is_null()) + .physical_disk_filter(DiskFilter::ExpungedButActive) + .set(dsl::disk_state.eq(PhysicalDiskState::Decommissioned)) + .execute_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) + } + /// Deletes a disk from the database. pub async fn physical_disk_delete( &self, opctx: &OpContext, - vendor: String, - serial: String, - model: String, - sled_id: Uuid, + id: PhysicalDiskUuid, ) -> DeleteResult { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; let now = Utc::now(); use db::schema::physical_disk::dsl; diesel::update(dsl::physical_disk) - .filter(dsl::vendor.eq(vendor)) - .filter(dsl::serial.eq(serial)) - .filter(dsl::model.eq(model)) - .filter(dsl::sled_id.eq(sled_id)) + .filter(dsl::id.eq(id.into_untyped_uuid())) .filter(dsl::time_deleted.is_null()) .set(dsl::time_deleted.eq(now)) .execute_async(&*self.pool_connection_authorized(opctx).await?) @@ -451,8 +465,9 @@ mod test { let sled = create_test_sled(&datastore).await; // Insert a disk + let disk_id = PhysicalDiskUuid::new_v4(); let disk = PhysicalDisk::new( - Uuid::new_v4(), + disk_id.into_untyped_uuid(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -472,13 +487,7 @@ mod test { // Delete the inserted disk datastore - .physical_disk_delete( - &opctx, - disk.vendor.clone(), - disk.serial.clone(), - disk.model.clone(), - disk.sled_id, - ) + .physical_disk_delete(&opctx, disk_id) .await .expect("Failed to delete disk"); let disks = datastore @@ -489,13 +498,7 @@ mod test { // Deleting again should not throw an error datastore - .physical_disk_delete( - &opctx, - disk.vendor, - disk.serial, - disk.model, - disk.sled_id, - ) + .physical_disk_delete(&opctx, disk_id) .await .expect("Failed to delete disk"); @@ -520,8 +523,9 @@ mod test { let sled_b = create_test_sled(&datastore).await; // Insert a disk + let disk_id = PhysicalDiskUuid::new_v4(); let disk = PhysicalDisk::new( - Uuid::new_v4(), + disk_id.into_untyped_uuid(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -546,13 +550,7 @@ mod test { // Delete the inserted disk datastore - .physical_disk_delete( - &opctx, - disk.vendor, - disk.serial, - disk.model, - disk.sled_id, - ) + .physical_disk_delete(&opctx, disk_id) .await .expect("Failed to delete disk"); let disks = datastore @@ -567,8 +565,9 @@ mod test { assert!(disks.is_empty()); // Attach the disk to the second sled + let disk_id = PhysicalDiskUuid::new_v4(); let disk = PhysicalDisk::new( - Uuid::new_v4(), + disk_id.into_untyped_uuid(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -613,8 +612,9 @@ mod test { let sled_b = create_test_sled(&datastore).await; // Insert a disk + let disk_id = PhysicalDiskUuid::new_v4(); let disk = PhysicalDisk::new( - Uuid::new_v4(), + disk_id.into_untyped_uuid(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -639,13 +639,7 @@ mod test { // Remove the disk from the first sled datastore - .physical_disk_delete( - &opctx, - disk.vendor.clone(), - disk.serial.clone(), - disk.model.clone(), - disk.sled_id, - ) + .physical_disk_delete(&opctx, disk_id) .await .expect("Failed to delete disk"); diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index a771202387..a314b9d371 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -15,6 +15,7 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::identity::Asset; use crate::db::model::PhysicalDisk; +use crate::db::model::PhysicalDiskState; use crate::db::model::Sled; use crate::db::model::Zpool; use crate::db::pagination::paginated; @@ -27,10 +28,13 @@ use diesel::upsert::excluded; use nexus_db_model::PhysicalDiskKind; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::ZpoolUuid; use uuid::Uuid; impl DataStore { @@ -135,4 +139,135 @@ impl DataStore { Ok(zpools) } + + /// Returns all (non-deleted) zpools on decommissioned (or deleted) disks + pub async fn zpool_on_decommissioned_disk_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::physical_disk::dsl as physical_disk_dsl; + use db::schema::zpool::dsl as zpool_dsl; + + paginated(zpool_dsl::zpool, zpool_dsl::id, pagparams) + .filter(zpool_dsl::time_deleted.is_null()) + // Note the LEFT JOIN here -- we want to see zpools where the + // physical disk has been deleted too. + .left_join( + physical_disk_dsl::physical_disk + .on(physical_disk_dsl::id.eq(zpool_dsl::physical_disk_id)), + ) + .filter( + // The physical disk has been either explicitly decommissioned, + // or has been deleted altogether. + physical_disk_dsl::disk_state + .eq(PhysicalDiskState::Decommissioned) + .or(physical_disk_dsl::id.is_null()) + .or( + // NOTE: We should probably get rid of this altogether + // (it's kinda implied by "Decommissioned", being a terminal + // state) but this is an extra cautious statement. + physical_disk_dsl::time_deleted.is_not_null(), + ), + ) + .select(Zpool::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Soft-deletes the Zpools and cleans up all associated DB resources. + /// + /// This should only be called for zpools on physical disks which + /// have been decommissioned. + /// + /// In order: + /// - Finds all datasets within the zpool + /// - Ensures that no regions nor region snapshots are using these datasets + /// - Soft-deletes the datasets within the zpool + /// - Soft-deletes the zpool itself + pub async fn zpool_delete_self_and_all_datasets( + &self, + opctx: &OpContext, + zpool_id: ZpoolUuid, + ) -> DeleteResult { + let conn = &*self.pool_connection_authorized(&opctx).await?; + Self::zpool_delete_self_and_all_datasets_on_connection( + &conn, opctx, zpool_id, + ) + .await + } + + /// See: [Self::zpool_delete_self_and_all_datasets] + pub(crate) async fn zpool_delete_self_and_all_datasets_on_connection( + conn: &async_bb8_diesel::Connection, + opctx: &OpContext, + zpool_id: ZpoolUuid, + ) -> DeleteResult { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let now = Utc::now(); + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::zpool::dsl as zpool_dsl; + + let zpool_id = *zpool_id.as_untyped_uuid(); + + // Get the IDs of all datasets to-be-deleted + let dataset_ids: Vec = dataset_dsl::dataset + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::pool_id.eq(zpool_id)) + .select(dataset_dsl::id) + .load_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // Verify that there are no regions nor region snapshots using this dataset + use db::schema::region::dsl as region_dsl; + let region_count = region_dsl::region + .filter(region_dsl::dataset_id.eq_any(dataset_ids.clone())) + .count() + .first_async::(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + if region_count > 0 { + return Err(Error::unavail(&format!( + "Cannot delete this zpool; it has {region_count} regions" + ))); + } + + use db::schema::region_snapshot::dsl as region_snapshot_dsl; + let region_snapshot_count = region_snapshot_dsl::region_snapshot + .filter(region_snapshot_dsl::dataset_id.eq_any(dataset_ids)) + .count() + .first_async::(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + if region_snapshot_count > 0 { + return Err( + Error::unavail(&format!("Cannot delete this zpool; it has {region_snapshot_count} region snapshots")) + ); + } + + // Ensure the datasets are deleted + diesel::update(dataset_dsl::dataset) + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::pool_id.eq(zpool_id)) + .set(dataset_dsl::time_deleted.eq(now)) + .execute_async(conn) + .await + .map(|_rows_modified| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // Ensure the zpool is deleted + diesel::update(zpool_dsl::zpool) + .filter(zpool_dsl::id.eq(zpool_id)) + .filter(zpool_dsl::time_deleted.is_null()) + .set(zpool_dsl::time_deleted.eq(now)) + .execute_async(conn) + .await + .map(|_rows_modified| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } } diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml index ef67749a4b..40f5d95a5f 100644 --- a/nexus/examples/config-second.toml +++ b/nexus/examples/config-second.toml @@ -122,6 +122,7 @@ inventory.nkeep = 5 inventory.disable = false phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 +decommissioned_disk_cleaner.period_secs = 60 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 blueprints.period_secs_collect_crdb_node_ids = 180 diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 6ec80359ab..b194ecf1b6 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -108,6 +108,7 @@ inventory.nkeep = 5 inventory.disable = false phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 +decommissioned_disk_cleaner.period_secs = 60 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 blueprints.period_secs_collect_crdb_node_ids = 180 diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 5a8710a1c5..498944598d 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -90,7 +90,10 @@ mod test { ); // Record the zpools so we don't fail to ensure datasets (unrelated to // crdb settings) during blueprint execution. - crate::tests::insert_zpool_records(datastore, &opctx, &blueprint).await; + crate::tests::create_disks_for_zones_using_datasets( + datastore, &opctx, &blueprint, + ) + .await; // Execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); crate::realize_blueprint_with_overrides( diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 139c94c53f..6b7e30a738 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -153,7 +153,10 @@ mod tests { // Record the sleds and zpools. crate::tests::insert_sled_records(datastore, &blueprint).await; - crate::tests::insert_zpool_records(datastore, opctx, &blueprint).await; + crate::tests::create_disks_for_zones_using_datasets( + datastore, opctx, &blueprint, + ) + .await; // Prior to ensuring datasets exist, there should be none. assert_eq!( diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index f3210a12aa..bdcfc66bad 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -1182,7 +1182,10 @@ mod test { // Record the zpools so we don't fail to ensure datasets (unrelated to // DNS) during blueprint execution. - crate::tests::insert_zpool_records(datastore, &opctx, &blueprint).await; + crate::tests::create_disks_for_zones_using_datasets( + datastore, &opctx, &blueprint, + ) + .await; // Now, execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 8cdbd46265..e3d2019230 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -227,6 +227,12 @@ where ) .await?; + // This depends on the "deploy_disks" call earlier -- disk expungement is a + // statement of policy, but we need to be assured that the Sled Agent has + // stopped using that disk before we can mark its state as decommissioned. + omicron_physical_disks::decommission_expunged_disks(&opctx, datastore) + .await?; + // This is likely to error if any cluster upgrades are in progress (which // can take some time), so it should remain at the end so that other parts // of the blueprint can progress normally. @@ -241,6 +247,8 @@ where mod tests { use super::*; use nexus_db_model::Generation; + use nexus_db_model::PhysicalDisk; + use nexus_db_model::PhysicalDiskKind; use nexus_db_model::SledBaseboard; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; @@ -290,7 +298,7 @@ mod tests { // tests expect to be able to realize the the blueprint created from an // initial collection, and ensuring the zones' datasets exist requires first // inserting the sled and zpool records. - pub(crate) async fn insert_zpool_records( + pub(crate) async fn create_disks_for_zones_using_datasets( datastore: &DataStore, opctx: &OpContext, blueprint: &Blueprint, @@ -304,12 +312,27 @@ mod tests { continue; }; + let physical_disk_id = Uuid::new_v4(); let pool_id = dataset.dataset.pool_name.id(); + + let disk = PhysicalDisk::new( + physical_disk_id, + String::from("Oxide"), + format!("PhysDisk of {}", pool_id), + String::from("FakeDisk"), + PhysicalDiskKind::U2, + sled_id.into_untyped_uuid(), + ); + datastore + .physical_disk_insert(&opctx, disk.clone()) + .await + .expect("failed to upsert physical disk"); + if pool_inserted.insert(pool_id) { let zpool = Zpool::new( pool_id.into_untyped_uuid(), sled_id.into_untyped_uuid(), - Uuid::new_v4(), // physical_disk_id + physical_disk_id, ); datastore .zpool_insert(opctx, zpool) diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index d7d8604e7e..9ae72d2049 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -10,6 +10,7 @@ use anyhow::Context; use futures::stream; use futures::StreamExt; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; use nexus_types::deployment::BlueprintPhysicalDisksConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; @@ -97,26 +98,58 @@ pub(crate) async fn deploy_disks( } } +/// Decommissions all disks which are currently expunged +pub(crate) async fn decommission_expunged_disks( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), Vec> { + datastore + .physical_disk_decommission_all_expunged(&opctx) + .await + .map_err(|e| vec![anyhow!(e)])?; + Ok(()) +} + #[cfg(test)] mod test { use super::deploy_disks; + + use crate::DataStore; use crate::Sled; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::ExpressionMethods; + use diesel::QueryDsl; use httptest::matchers::{all_of, json_decoded, request}; use httptest::responders::json_encoded; use httptest::responders::status_code; use httptest::Expectation; + use nexus_db_model::Dataset; + use nexus_db_model::DatasetKind; + use nexus_db_model::PhysicalDisk; + use nexus_db_model::PhysicalDiskKind; + use nexus_db_model::PhysicalDiskPolicy; + use nexus_db_model::PhysicalDiskState; + use nexus_db_model::Region; + use nexus_db_model::Zpool; use nexus_db_queries::context::OpContext; + use nexus_db_queries::db; + use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{ Blueprint, BlueprintPhysicalDiskConfig, BlueprintPhysicalDisksConfig, - BlueprintTarget, CockroachDbPreserveDowngrade, + BlueprintTarget, CockroachDbPreserveDowngrade, DiskFilter, }; + use nexus_types::identity::Asset; + use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use std::collections::BTreeMap; use std::net::SocketAddr; + use std::str::FromStr; use uuid::Uuid; type ControlPlaneTestContext = @@ -350,4 +383,227 @@ mod test { s1.verify_and_clear(); s2.verify_and_clear(); } + + async fn make_disk_in_db( + datastore: &DataStore, + opctx: &OpContext, + i: usize, + sled_id: SledUuid, + ) -> PhysicalDiskUuid { + let id = PhysicalDiskUuid::from_untyped_uuid(Uuid::new_v4()); + let physical_disk = PhysicalDisk::new( + id.into_untyped_uuid(), + "v".into(), + format!("s-{i})"), + "m".into(), + PhysicalDiskKind::U2, + sled_id.into_untyped_uuid(), + ); + datastore + .physical_disk_insert(&opctx, physical_disk.clone()) + .await + .unwrap(); + id + } + + async fn add_zpool_dataset_and_region( + datastore: &DataStore, + opctx: &OpContext, + id: PhysicalDiskUuid, + sled_id: SledUuid, + ) { + let zpool = datastore + .zpool_insert( + opctx, + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id.into_untyped_uuid(), + ), + ) + .await + .unwrap(); + + let dataset = datastore + .dataset_upsert(Dataset::new( + Uuid::new_v4(), + zpool.id(), + Some(std::net::SocketAddrV6::new( + std::net::Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + )), + DatasetKind::Crucible, + )) + .await + .unwrap(); + + // There isn't a great API to insert regions (we normally allocate!) + // so insert the record manually here. + let region = { + let volume_id = Uuid::new_v4(); + Region::new( + dataset.id(), + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + 1, + ) + }; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_model::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(region) + .execute_async(&*conn) + .await + .unwrap(); + } + + async fn get_pools( + datastore: &DataStore, + id: PhysicalDiskUuid, + ) -> Vec { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use db::schema::zpool::dsl; + dsl::zpool + .filter(dsl::time_deleted.is_null()) + .filter(dsl::physical_disk_id.eq(id.into_untyped_uuid())) + .select(dsl::id) + .load_async::(&*conn) + .await + .map(|ids| { + ids.into_iter() + .map(|id| ZpoolUuid::from_untyped_uuid(id)) + .collect() + }) + .unwrap() + } + + async fn get_datasets(datastore: &DataStore, id: ZpoolUuid) -> Vec { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use db::schema::dataset::dsl; + dsl::dataset + .filter(dsl::time_deleted.is_null()) + .filter(dsl::pool_id.eq(id.into_untyped_uuid())) + .select(dsl::id) + .load_async(&*conn) + .await + .unwrap() + } + + async fn get_regions(datastore: &DataStore, id: Uuid) -> Vec { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use db::schema::region::dsl; + dsl::region + .filter(dsl::dataset_id.eq(id.into_untyped_uuid())) + .select(dsl::id) + .load_async(&*conn) + .await + .unwrap() + } + + #[nexus_test] + async fn test_decommission_expunged_disks( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let sled_id = SledUuid::from_untyped_uuid( + Uuid::from_str(&SLED_AGENT_UUID).unwrap(), + ); + + // Create a couple disks in the database + let disks = [ + make_disk_in_db(&datastore, &opctx, 0, sled_id).await, + make_disk_in_db(&datastore, &opctx, 1, sled_id).await, + ]; + + // Add a zpool, dataset, and region to each disk. + for disk_id in disks { + add_zpool_dataset_and_region(&datastore, &opctx, disk_id, sled_id) + .await; + } + + let disk_to_decommission = disks[0]; + let other_disk = disks[1]; + + // Expunge one of the disks + datastore + .physical_disk_update_policy( + &opctx, + disk_to_decommission.into_untyped_uuid(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + // Verify that the state of both disks is "active" + let all_disks = datastore + .physical_disk_list( + &opctx, + &DataPageParams::max_page(), + DiskFilter::All, + ) + .await + .unwrap() + .into_iter() + .map(|disk| (disk.id(), disk)) + .collect::>(); + let d = &all_disks[&disk_to_decommission.into_untyped_uuid()]; + assert_eq!(d.disk_state, PhysicalDiskState::Active); + assert_eq!(d.disk_policy, PhysicalDiskPolicy::Expunged); + let d = &all_disks[&other_disk.into_untyped_uuid()]; + assert_eq!(d.disk_state, PhysicalDiskState::Active); + assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService); + + super::decommission_expunged_disks(&opctx, &datastore).await.unwrap(); + + // After decommissioning, we see the expunged disk become + // decommissioned. The other disk remains in-service. + let all_disks = datastore + .physical_disk_list( + &opctx, + &DataPageParams::max_page(), + DiskFilter::All, + ) + .await + .unwrap() + .into_iter() + .map(|disk| (disk.id(), disk)) + .collect::>(); + let d = &all_disks[&disk_to_decommission.into_untyped_uuid()]; + assert_eq!(d.disk_state, PhysicalDiskState::Decommissioned); + assert_eq!(d.disk_policy, PhysicalDiskPolicy::Expunged); + let d = &all_disks[&other_disk.into_untyped_uuid()]; + assert_eq!(d.disk_state, PhysicalDiskState::Active); + assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService); + + // Even though we've decommissioned this disk, the pools, datasets, and + // regions should remain. Refer to the "decommissioned_disk_cleaner" + // for how these get eventually cleared up. + let pools = get_pools(&datastore, disk_to_decommission).await; + assert_eq!(pools.len(), 1); + let datasets = get_datasets(&datastore, pools[0]).await; + assert_eq!(datasets.len(), 1); + let regions = get_regions(&datastore, datasets[0]).await; + assert_eq!(regions.len(), 1); + + // Similarly, the "other disk" should still exist. + let pools = get_pools(&datastore, other_disk).await; + assert_eq!(pools.len(), 1); + let datasets = get_datasets(&datastore, pools[0]).await; + assert_eq!(datasets.len(), 1); + let regions = get_regions(&datastore, datasets[0]).await; + assert_eq!(regions.len(), 1); + } } diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 4a5d792c80..2f1c4cd738 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -93,6 +93,7 @@ use super::tasks::bfd; use super::tasks::blueprint_execution; use super::tasks::blueprint_load; use super::tasks::crdb_node_id_collector; +use super::tasks::decommissioned_disk_cleaner; use super::tasks::dns_config; use super::tasks::dns_propagation; use super::tasks::dns_servers; @@ -142,6 +143,7 @@ pub struct BackgroundTasks { pub task_bfd_manager: Activator, pub task_inventory_collection: Activator, pub task_physical_disk_adoption: Activator, + pub task_decommissioned_disk_cleaner: Activator, pub task_phantom_disks: Activator, pub task_blueprint_loader: Activator, pub task_blueprint_executor: Activator, @@ -221,6 +223,7 @@ impl BackgroundTasksInitializer { task_bfd_manager: Activator::new(), task_inventory_collection: Activator::new(), task_physical_disk_adoption: Activator::new(), + task_decommissioned_disk_cleaner: Activator::new(), task_phantom_disks: Activator::new(), task_blueprint_loader: Activator::new(), task_blueprint_executor: Activator::new(), @@ -280,6 +283,7 @@ impl BackgroundTasksInitializer { task_bfd_manager, task_inventory_collection, task_physical_disk_adoption, + task_decommissioned_disk_cleaner, task_phantom_disks, task_blueprint_loader, task_blueprint_executor, @@ -511,6 +515,23 @@ impl BackgroundTasksInitializer { activator: task_physical_disk_adoption, }); + driver.register(TaskDefinition { + name: "decommissioned_disk_cleaner", + description: + "deletes DB records for decommissioned disks, after regions \ + and region snapshots have been replaced", + period: config.decommissioned_disk_cleaner.period_secs, + task_impl: Box::new( + decommissioned_disk_cleaner::DecommissionedDiskCleaner::new( + datastore.clone(), + config.decommissioned_disk_cleaner.disable, + ), + ), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_decommissioned_disk_cleaner, + }); + driver.register(TaskDefinition { name: "service_zone_nat_tracker", description: diff --git a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs new file mode 100644 index 0000000000..cb3ef9a569 --- /dev/null +++ b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs @@ -0,0 +1,458 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Cleans up old database state from decommissioned disks. +//! +//! This cannot happen at decommissioning time, because it depends on region +//! (and snapshot) replacement, which happens in the background. +//! +//! Cleanup involves deleting database records for disks (datasets, zpools) +//! that are no longer viable after the physical disk has been decommissioned. + +use crate::app::background::BackgroundTask; +use anyhow::Context; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::Zpool; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::pagination::Paginator; +use nexus_db_queries::db::DataStore; +use nexus_types::identity::Asset; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::{GenericUuid, ZpoolUuid}; +use std::num::NonZeroU32; +use std::sync::Arc; + +/// Background task that cleans decommissioned disk DB records. +pub struct DecommissionedDiskCleaner { + datastore: Arc, + disable: bool, +} + +#[derive(Debug, Default)] +struct ActivationResults { + found: usize, + not_ready_to_be_deleted: usize, + deleted: usize, + error_count: usize, +} + +const MAX_BATCH: NonZeroU32 = unsafe { + // Safety: last time I checked, 100 was greater than zero. + NonZeroU32::new_unchecked(100) +}; + +impl DecommissionedDiskCleaner { + pub fn new(datastore: Arc, disable: bool) -> Self { + Self { datastore, disable } + } + + async fn clean_all( + &mut self, + results: &mut ActivationResults, + opctx: &OpContext, + ) -> Result<(), anyhow::Error> { + slog::info!(opctx.log, "Decommissioned disk cleaner running"); + + let mut paginator = Paginator::new(MAX_BATCH); + let mut last_err = Ok(()); + while let Some(p) = paginator.next() { + let zpools = self + .datastore + .zpool_on_decommissioned_disk_list( + opctx, + &p.current_pagparams(), + ) + .await + .context("failed to list zpools on decommissioned disks")?; + paginator = p.found_batch(&zpools, &|zpool| zpool.id()); + self.clean_batch(results, &mut last_err, opctx, &zpools).await; + } + + last_err + } + + async fn clean_batch( + &mut self, + results: &mut ActivationResults, + last_err: &mut Result<(), anyhow::Error>, + opctx: &OpContext, + zpools: &[Zpool], + ) { + results.found += zpools.len(); + slog::debug!(opctx.log, "Found zpools on decommissioned disks"; "count" => zpools.len()); + + for zpool in zpools { + let zpool_id = ZpoolUuid::from_untyped_uuid(zpool.id()); + slog::trace!(opctx.log, "Deleting Zpool"; "zpool" => %zpool_id); + + match self + .datastore + .zpool_delete_self_and_all_datasets(opctx, zpool_id) + .await + { + Ok(_) => { + slog::info!( + opctx.log, + "Deleted zpool and datasets within"; + "zpool" => %zpool_id, + ); + results.deleted += 1; + } + Err(Error::ServiceUnavailable { internal_message }) => { + slog::trace!( + opctx.log, + "Zpool on decommissioned disk not ready for deletion"; + "zpool" => %zpool_id, + "error" => internal_message, + ); + results.not_ready_to_be_deleted += 1; + } + Err(e) => { + slog::warn!( + opctx.log, + "Failed to zpool on decommissioned disk"; + "zpool" => %zpool_id, + "error" => %e, + ); + results.error_count += 1; + *last_err = Err(e).with_context(|| { + format!("failed to delete zpool record {zpool_id}") + }); + } + } + } + } +} + +impl BackgroundTask for DecommissionedDiskCleaner { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let mut results = ActivationResults::default(); + + let error = if !self.disable { + match self.clean_all(&mut results, opctx).await { + Ok(_) => { + slog::info!(opctx.log, "Cleaned decommissioned zpools"; + "found" => results.found, + "not_ready_to_be_deleted" => results.not_ready_to_be_deleted, + "deleted" => results.deleted, + "error_count" => results.error_count, + ); + None + } + Err(err) => { + slog::error!(opctx.log, "Failed to clean decommissioned zpools"; + "error" => %err, + "found" => results.found, + "not_ready_to_be_deleted" => results.not_ready_to_be_deleted, + "deleted" => results.deleted, + "error_count" => results.error_count, + ); + Some(err.to_string()) + } + } + } else { + slog::info!(opctx.log, "Decommissioned Disk Cleaner disabled"); + None + }; + serde_json::json!({ + "found": results.found, + "not_ready_to_be_deleted": results.not_ready_to_be_deleted, + "deleted": results.deleted, + "error_count": results.error_count, + "error": error, + }) + } + .boxed() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::ExpressionMethods; + use diesel::QueryDsl; + use nexus_db_model::Dataset; + use nexus_db_model::DatasetKind; + use nexus_db_model::PhysicalDisk; + use nexus_db_model::PhysicalDiskKind; + use nexus_db_model::PhysicalDiskPolicy; + use nexus_db_model::Region; + use nexus_test_utils::SLED_AGENT_UUID; + use nexus_test_utils_macros::nexus_test; + use omicron_uuid_kinds::{ + DatasetUuid, PhysicalDiskUuid, RegionUuid, SledUuid, + }; + use std::str::FromStr; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + async fn make_disk_in_db( + datastore: &DataStore, + opctx: &OpContext, + i: usize, + sled_id: SledUuid, + ) -> PhysicalDiskUuid { + let id = PhysicalDiskUuid::from_untyped_uuid(Uuid::new_v4()); + let physical_disk = PhysicalDisk::new( + id.into_untyped_uuid(), + "v".into(), + format!("s-{i})"), + "m".into(), + PhysicalDiskKind::U2, + sled_id.into_untyped_uuid(), + ); + datastore + .physical_disk_insert(&opctx, physical_disk.clone()) + .await + .unwrap(); + id + } + + async fn add_zpool_dataset_and_region( + datastore: &DataStore, + opctx: &OpContext, + id: PhysicalDiskUuid, + sled_id: SledUuid, + ) -> (ZpoolUuid, DatasetUuid, RegionUuid) { + let zpool = datastore + .zpool_insert( + opctx, + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id.into_untyped_uuid(), + ), + ) + .await + .unwrap(); + + let dataset = datastore + .dataset_upsert(Dataset::new( + Uuid::new_v4(), + zpool.id(), + Some(std::net::SocketAddrV6::new( + std::net::Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + )), + DatasetKind::Crucible, + )) + .await + .unwrap(); + + // There isn't a great API to insert regions (we normally allocate!) + // so insert the record manually here. + let region = { + let volume_id = Uuid::new_v4(); + Region::new( + dataset.id(), + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + 1, + ) + }; + let region_id = region.id(); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_model::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(region) + .execute_async(&*conn) + .await + .unwrap(); + + ( + ZpoolUuid::from_untyped_uuid(zpool.id()), + DatasetUuid::from_untyped_uuid(dataset.id()), + RegionUuid::from_untyped_uuid(region_id), + ) + } + + struct TestFixture { + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + region_id: RegionUuid, + } + + impl TestFixture { + async fn setup(datastore: &Arc, opctx: &OpContext) -> Self { + let sled_id = SledUuid::from_untyped_uuid( + Uuid::from_str(&SLED_AGENT_UUID).unwrap(), + ); + + let disk_id = make_disk_in_db(datastore, opctx, 0, sled_id).await; + let (zpool_id, dataset_id, region_id) = + add_zpool_dataset_and_region( + &datastore, &opctx, disk_id, sled_id, + ) + .await; + datastore + .physical_disk_update_policy( + &opctx, + disk_id.into_untyped_uuid(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + Self { zpool_id, dataset_id, region_id } + } + + async fn delete_region(&self, datastore: &DataStore) { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_model::schema::region::dsl; + + diesel::delete( + dsl::region + .filter(dsl::id.eq(self.region_id.into_untyped_uuid())), + ) + .execute_async(&*conn) + .await + .unwrap(); + } + + async fn has_been_cleaned(&self, datastore: &DataStore) -> bool { + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, + SelectableHelper, + }; + use nexus_db_queries::db::schema::zpool::dsl as zpool_dsl; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + let fetched_zpool = zpool_dsl::zpool + .filter(zpool_dsl::id.eq(self.zpool_id.into_untyped_uuid())) + .filter(zpool_dsl::time_deleted.is_null()) + .select(Zpool::as_select()) + .first_async(&*conn) + .await + .optional() + .expect("Zpool query should succeed"); + + use nexus_db_queries::db::schema::dataset::dsl as dataset_dsl; + let fetched_dataset = dataset_dsl::dataset + .filter(dataset_dsl::id.eq(self.dataset_id.into_untyped_uuid())) + .filter(dataset_dsl::time_deleted.is_null()) + .select(Dataset::as_select()) + .first_async(&*conn) + .await + .optional() + .expect("Dataset query should succeed"); + + match (fetched_zpool, fetched_dataset) { + (Some(_), Some(_)) => false, + (None, None) => true, + _ => panic!("If zpool and dataset were cleaned, they should be cleaned together"), + } + } + } + + #[nexus_test(server = crate::Server)] + async fn test_disk_cleanup_ignores_active_disks( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = TestFixture::setup(datastore, &opctx).await; + + let mut task = DecommissionedDiskCleaner::new(datastore.clone(), false); + + // Setup: Disk is expunged, not decommissioned. + // Expectation: We ignore it. + + let mut results = ActivationResults::default(); + dbg!(task.clean_all(&mut results, &opctx).await) + .expect("activation completes successfully"); + dbg!(&results); + assert_eq!(results.found, 0); + assert_eq!(results.not_ready_to_be_deleted, 0); + assert_eq!(results.deleted, 0); + assert_eq!(results.error_count, 0); + + assert!(!fixture.has_been_cleaned(&datastore).await); + } + + #[nexus_test(server = crate::Server)] + async fn test_disk_cleanup_does_not_clean_disks_with_regions( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = TestFixture::setup(datastore, &opctx).await; + + let mut task = DecommissionedDiskCleaner::new(datastore.clone(), false); + + datastore + .physical_disk_decommission_all_expunged(&opctx) + .await + .unwrap(); + + // Setup: Disk is decommissioned, but has a region. + // Expectation: We ignore it. + + let mut results = ActivationResults::default(); + dbg!(task.clean_all(&mut results, &opctx).await) + .expect("activation completes successfully"); + dbg!(&results); + assert_eq!(results.found, 1); + assert_eq!(results.not_ready_to_be_deleted, 1); + assert_eq!(results.deleted, 0); + assert_eq!(results.error_count, 0); + + assert!(!fixture.has_been_cleaned(&datastore).await); + } + + #[nexus_test(server = crate::Server)] + async fn test_disk_cleanup_cleans_disks_with_no_regions( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = TestFixture::setup(datastore, &opctx).await; + + let mut task = DecommissionedDiskCleaner::new(datastore.clone(), false); + + datastore + .physical_disk_decommission_all_expunged(&opctx) + .await + .unwrap(); + fixture.delete_region(&datastore).await; + + // Setup: Disk is decommissioned and has no regions. + // Expectation: We clean it. + + let mut results = ActivationResults::default(); + dbg!(task.clean_all(&mut results, &opctx).await) + .expect("activation completes successfully"); + dbg!(&results); + assert_eq!(results.found, 1); + assert_eq!(results.not_ready_to_be_deleted, 0); + assert_eq!(results.deleted, 1); + assert_eq!(results.error_count, 0); + + assert!(fixture.has_been_cleaned(&datastore).await); + } +} diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index a5204588d8..5062799bdb 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -9,6 +9,7 @@ pub mod bfd; pub mod blueprint_execution; pub mod blueprint_load; pub mod crdb_node_id_collector; +pub mod decommissioned_disk_cleaner; pub mod dns_config; pub mod dns_propagation; pub mod dns_servers; diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 415727693b..e231f665fa 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -104,6 +104,9 @@ phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 # Disable automatic disk adoption to avoid interfering with tests. physical_disk_adoption.disable = true +decommissioned_disk_cleaner.period_secs = 60 +# Disable disk decommissioning cleanup to avoid interfering with tests. +decommissioned_disk_cleaner.disable = true blueprints.period_secs_load = 100 blueprints.period_secs_execute = 600 blueprints.period_secs_collect_crdb_node_ids = 600 diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index bd8a8877fc..aa1ff3a667 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -20,6 +20,8 @@ use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views::SledInstance; use nexus_types::external_api::views::{PhysicalDisk, Sled}; use omicron_sled_agent::sim; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use std::str::FromStr; use uuid::Uuid; @@ -128,7 +130,7 @@ async fn test_physical_disk_create_list_delete( let disks = physical_disks_list(&external_client, &disks_url).await; assert_eq!(disks.len(), disks_initial.len() + 1); - let _new_disk = disks + let new_disk = disks .iter() .find(|found_disk| { found_disk.vendor == "v" @@ -141,10 +143,7 @@ async fn test_physical_disk_create_list_delete( datastore .physical_disk_delete( &opctx, - "v".into(), - "s".into(), - "m".into(), - sled_id, + PhysicalDiskUuid::from_untyped_uuid(new_disk.identity.id), ) .await .expect("Failed to upsert physical disk"); diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index a8f3989da4..a5feff067a 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -347,6 +347,9 @@ pub enum DiskFilter { /// All disks which are in-service. InService, + + /// All disks which are expunged but still active. + ExpungedButActive, } impl DiskFilter { @@ -366,10 +369,12 @@ impl PhysicalDiskPolicy { PhysicalDiskPolicy::InService => match filter { DiskFilter::All => true, DiskFilter::InService => true, + DiskFilter::ExpungedButActive => false, }, PhysicalDiskPolicy::Expunged => match filter { DiskFilter::All => true, DiskFilter::InService => false, + DiskFilter::ExpungedButActive => true, }, } } @@ -391,10 +396,12 @@ impl PhysicalDiskState { PhysicalDiskState::Active => match filter { DiskFilter::All => true, DiskFilter::InService => true, + DiskFilter::ExpungedButActive => true, }, PhysicalDiskState::Decommissioned => match filter { DiskFilter::All => true, DiskFilter::InService => false, + DiskFilter::ExpungedButActive => false, }, } } diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 50f9bf646e..396e3615b2 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -50,6 +50,7 @@ inventory.nkeep = 3 inventory.disable = false phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 +decommissioned_disk_cleaner.period_secs = 60 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 blueprints.period_secs_collect_crdb_node_ids = 180 diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 31db278616..df49476eed 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -50,6 +50,7 @@ inventory.nkeep = 3 inventory.disable = false phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 +decommissioned_disk_cleaner.period_secs = 60 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 blueprints.period_secs_collect_crdb_node_ids = 180 diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index fb8a6f6fa9..8f695d2399 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -50,6 +50,7 @@ macro_rules! impl_typed_uuid_kind { impl_typed_uuid_kind! { Collection => "collection", + Dataset => "dataset", Downstairs => "downstairs", DownstairsRegion => "downstairs_region", ExternalIp => "external_ip", @@ -60,6 +61,7 @@ impl_typed_uuid_kind! { Propolis => "propolis", RackInit => "rack_init", RackReset => "rack_reset", + Region => "region", Sled => "sled", TufRepo => "tuf_repo", Upstairs => "upstairs", From 12df35d64d0d2be87d258662f6e03c263b955d6b Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 24 Jul 2024 15:39:03 -0400 Subject: [PATCH 2/3] Correctly mark RoT Bootloader as skipped (#6117) When the RoT bootloader is skipped we need to update it similarly to the RoT and SP. --- wicket/src/state/update.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/wicket/src/state/update.rs b/wicket/src/state/update.rs index 3e0c89e83e..97c0b49d2d 100644 --- a/wicket/src/state/update.rs +++ b/wicket/src/state/update.rs @@ -258,12 +258,15 @@ impl UpdateItem { } | StepEventKind::StepCompleted { step, outcome, .. } => { if step.info.is_last_step_in_component() { - // The RoT and SP components each have two steps in - // them. If the second step ("Updating RoT/SP") is + // The RoT (and bootloader) and SP components each + // have two steps in them. If the second step + // ("Updating RoT Bootloader/RoT/SP") is // skipped, then treat the component as skipped. if matches!( step.info.component, - UpdateComponent::Sp | UpdateComponent::Rot + UpdateComponent::Sp + | UpdateComponent::Rot + | UpdateComponent::RotBootloader ) { assert_eq!( step.info.id, From 18f2520b3a70c51d28b4230a9c595960bd515095 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Wed, 24 Jul 2024 14:13:43 -0700 Subject: [PATCH 3/3] Define switch data link timeseries in TOML (#6120) Note that this renames and reorganizes the timeseries a good deal. We want to include more of the switch / sled identifiers, and make the name of the timeseries more consistent with the existing sled data link and planned instance data link timeseries. --- openapi/nexus.json | 23 +- oximeter/impl/src/schema/codegen.rs | 1 + oximeter/impl/src/schema/mod.rs | 2 + .../oximeter/schema/switch-data-link.toml | 213 ++++++++++++++++++ 4 files changed, 233 insertions(+), 6 deletions(-) create mode 100644 oximeter/oximeter/schema/switch-data-link.toml diff --git a/openapi/nexus.json b/openapi/nexus.json index 9e46a92039..a4baa8124f 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19797,12 +19797,23 @@ }, "Units": { "description": "Measurement units for timeseries samples.", - "type": "string", - "enum": [ - "count", - "bytes", - "seconds", - "nanoseconds" + "oneOf": [ + { + "type": "string", + "enum": [ + "count", + "bytes", + "seconds", + "nanoseconds" + ] + }, + { + "description": "No meaningful units, e.g. a dimensionless quanity.", + "type": "string", + "enum": [ + "none" + ] + } ] }, "User": { diff --git a/oximeter/impl/src/schema/codegen.rs b/oximeter/impl/src/schema/codegen.rs index cde67439de..ef686c3cdd 100644 --- a/oximeter/impl/src/schema/codegen.rs +++ b/oximeter/impl/src/schema/codegen.rs @@ -515,6 +515,7 @@ fn quote_creation_time(created: DateTime) -> TokenStream { impl quote::ToTokens for Units { fn to_tokens(&self, tokens: &mut TokenStream) { let toks = match self { + Units::None => quote! { ::oximeter::schema::Units::None }, Units::Count => quote! { ::oximeter::schema::Units::Count }, Units::Bytes => quote! { ::oximeter::schema::Units::Bytes }, Units::Seconds => quote! { ::oximeter::schema::Units::Seconds }, diff --git a/oximeter/impl/src/schema/mod.rs b/oximeter/impl/src/schema/mod.rs index 83a83e95b2..7743034e31 100644 --- a/oximeter/impl/src/schema/mod.rs +++ b/oximeter/impl/src/schema/mod.rs @@ -183,6 +183,8 @@ pub struct TimeseriesDescription { // TODO-completeness: Decide whether and how to handle dimensional analysis // during queries, if needed. pub enum Units { + /// No meaningful units, e.g. a dimensionless quanity. + None, Count, Bytes, Seconds, diff --git a/oximeter/oximeter/schema/switch-data-link.toml b/oximeter/oximeter/schema/switch-data-link.toml new file mode 100644 index 0000000000..fa10759ca9 --- /dev/null +++ b/oximeter/oximeter/schema/switch-data-link.toml @@ -0,0 +1,213 @@ +format_version = 1 + +[target] +name = "switch_data_link" +description = "A network data link on an Oxide switch" +authz_scope = "fleet" +versions = [ + { version = 1, fields = [ "rack_id", "sled_id", "sled_model", "sled_revision", "sled_serial", "switch_id", "switch_model", "switch_revision", "switch_serial" ] }, +] + +[[metrics]] +name = "bytes_sent" +description = "Total number of bytes sent on the data link" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "bytes_received" +description = "Total number of bytes received on the data link" +units = "bytes" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "errors_sent" +description = "Total number of errors when sending on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "errors_received" +description = "Total number of packets for the data link dropped due to any error" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "receive_crc_error_drops" +description = "Total number of packets for the data link dropped due to CRC errors" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "receive_buffer_full_drops" +description = "Total number of packets for the data link dropped due to ASIC buffer congestion" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "packets_sent" +description = "Total number of packets sent on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "packets_received" +description = "Total number of packets received on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "link_up" +description = "Reports whether the link is currently up" +units = "none" +datum_type = "bool" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "link_fsm" +description = """\ +Total entries into each state of the autonegotation / \ +link-training finite state machine\ +""" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id", "state" ] } +] + +[[metrics]] +name = "pcs_bad_sync_headers" +description = "Total number of bad PCS sync headers on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_errored_blocks" +description = "Total number of PCS error blocks on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_block_lock_loss" +description = "Total number of detected losses of block-lock on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_high_ber" +description = "Total number of high bit-error-rate events on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_valid_errors" +description = "Total number of valid error events on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_invalid_errors" +description = "Total number of invalid error events on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[[metrics]] +name = "pcs_unknown_errors" +description = "Total number of unknown error events on the data link" +units = "count" +datum_type = "cumulative_u64" +versions = [ + { added_in = 1, fields = [ "port_id", "link_id" ] } +] + +[fields.rack_id] +type = "uuid" +description = "ID of the rack the link's switch is in" + +[fields.sled_id] +type = "uuid" +description = "ID of the sled managing the link's switch" + +[fields.sled_model] +type = "string" +description = "Model number of the sled managing the link's switch" + +[fields.sled_revision] +type = "u32" +description = "Revision number of the sled managing the link's switch" + +[fields.sled_serial] +type = "string" +description = "Serial number of the sled managing the link's switch" + +[fields.switch_id] +type = "uuid" +description = "ID of the switch the link is on" + +[fields.switch_model] +type = "string" +description = "The model number switch the link is on" + +[fields.switch_revision] +type = "u32" +description = "Revision number of the switch the link is on" + +[fields.switch_serial] +type = "string" +description = "Serial number of the switch the link is on" + +[fields.port_id] +type = "string" +description = "Physical switch port the link is on" + +[fields.link_id] +type = "u8" +description = "ID of the link within its switch port" + +[fields.state] +type = "string" +description = "Name of the data link FSM state"