diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 0239837b0d..47912f89cc 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -212,10 +212,7 @@ impl SledUpdate { revision: self.revision, // By default, sleds start in-service. policy: DbSledPolicy::InService, - // By default, sleds start in the "active" state. - // - // XXX In the future there probably needs to be an "uninitialized" - // state here. + // Currently, new sleds start in the "active" state. state: SledState::Active, usable_hardware_threads: self.usable_hardware_threads, usable_physical_ram: self.usable_physical_ram, diff --git a/nexus/db-model/src/sled_policy.rs b/nexus/db-model/src/sled_policy.rs index 313c7d365e..9f84a65f23 100644 --- a/nexus/db-model/src/sled_policy.rs +++ b/nexus/db-model/src/sled_policy.rs @@ -2,6 +2,17 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Database representation of a sled's operator-defined policy. +//! +//! This is related to, but different from `SledState`: a sled's **policy** is +//! what the operator has specified, while its **state** refers to what's +//! currently on it, as determined by Nexus. +//! +//! For example, a sled might be in the `Active` state, but have a policy of +//! `Expunged` -- this would mean that Nexus knows about resources currently +//! provisioned on the sled, but the operator has said that it should be marked +//! as gone. + use super::impl_enum_type; use nexus_types::external_api::views::{SledPolicy, SledProvisionPolicy}; use serde::{Deserialize, Serialize}; diff --git a/nexus/db-model/src/sled_state.rs b/nexus/db-model/src/sled_state.rs index 8c0368fdcc..e090d6d41b 100644 --- a/nexus/db-model/src/sled_state.rs +++ b/nexus/db-model/src/sled_state.rs @@ -2,9 +2,21 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +//! Database representation of a sled's state as understood by Nexus. +//! +//! This is related to, but different from `SledPolicy`: a sled's **policy** is +//! what the operator has specified, while its **state** refers to what's +//! currently on it, as determined by Nexus. +//! +//! For example, a sled might be in the `Active` state, but have a policy of +//! `Expunged` -- this would mean that Nexus knows about resources currently +//! provisioned on the sled, but the operator has said that it should be marked +//! as gone. + use super::impl_enum_type; use nexus_types::external_api::views; use serde::{Deserialize, Serialize}; +use std::fmt; use strum::EnumIter; impl_enum_type!( @@ -21,6 +33,13 @@ impl_enum_type!( Decommissioned => b"decommissioned" ); +impl fmt::Display for SledState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Forward to the canonical implementation in nexus-types. + views::SledState::from(*self).fmt(f) + } +} + impl From for views::SledState { fn from(state: SledState) -> Self { match state { diff --git a/nexus/db-queries/src/authz/context.rs b/nexus/db-queries/src/authz/context.rs index 3510da2735..0d6f2a73ac 100644 --- a/nexus/db-queries/src/authz/context.rs +++ b/nexus/db-queries/src/authz/context.rs @@ -217,7 +217,8 @@ mod test { let logctx = dev::test_setup_log("test_unregistered_resource"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = - crate::db::datastore::datastore_test(&logctx, &db).await; + crate::db::datastore::test_utils::datastore_test(&logctx, &db) + .await; // Define a resource that we "forget" to register with Oso. use super::AuthorizedResource; diff --git a/nexus/db-queries/src/authz/policy_test/mod.rs b/nexus/db-queries/src/authz/policy_test/mod.rs index 00a9904499..b6961bcc30 100644 --- a/nexus/db-queries/src/authz/policy_test/mod.rs +++ b/nexus/db-queries/src/authz/policy_test/mod.rs @@ -62,7 +62,8 @@ use uuid::Uuid; async fn test_iam_roles_behavior() { let logctx = dev::test_setup_log("test_iam_roles"); let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = db::datastore::datastore_test(&logctx, &db).await; + let (opctx, datastore) = + db::datastore::test_utils::datastore_test(&logctx, &db).await; // Before we can create the resources, users, and role assignments that we // need, we must grant the "test-privileged" user privileges to fetch and @@ -328,7 +329,8 @@ async fn test_conferred_roles() { // To start, this test looks a lot like the test above. let logctx = dev::test_setup_log("test_conferred_roles"); let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = db::datastore::datastore_test(&logctx, &db).await; + let (opctx, datastore) = + db::datastore::test_utils::datastore_test(&logctx, &db).await; // Before we can create the resources, users, and role assignments that we // need, we must grant the "test-privileged" user privileges to fetch and diff --git a/nexus/db-queries/src/context.rs b/nexus/db-queries/src/context.rs index 72da01a5f1..d93b269255 100644 --- a/nexus/db-queries/src/context.rs +++ b/nexus/db-queries/src/context.rs @@ -350,7 +350,8 @@ mod test { let logctx = dev::test_setup_log("test_background_context"); let mut db = test_setup_database(&logctx.log).await; let (_, datastore) = - crate::db::datastore::datastore_test(&logctx, &db).await; + crate::db::datastore::test_utils::datastore_test(&logctx, &db) + .await; let opctx = OpContext::for_background( logctx.log.new(o!()), Arc::new(authz::Authz::new(&logctx.log)), @@ -382,7 +383,8 @@ mod test { let logctx = dev::test_setup_log("test_background_context"); let mut db = test_setup_database(&logctx.log).await; let (_, datastore) = - crate::db::datastore::datastore_test(&logctx, &db).await; + crate::db::datastore::test_utils::datastore_test(&logctx, &db) + .await; let opctx = OpContext::for_tests(logctx.log.new(o!()), datastore); // Like in test_background_context(), this is essentially a test of the @@ -403,7 +405,8 @@ mod test { let logctx = dev::test_setup_log("test_child_context"); let mut db = test_setup_database(&logctx.log).await; let (_, datastore) = - crate::db::datastore::datastore_test(&logctx, &db).await; + crate::db::datastore::test_utils::datastore_test(&logctx, &db) + .await; let opctx = OpContext::for_background( logctx.log.new(o!()), Arc::new(authz::Authz::new(&logctx.log)), diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 94d3e21d77..00a75a21da 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1054,7 +1054,7 @@ impl RunQueryDsl for InsertTargetQuery {} #[cfg(test)] mod tests { use super::*; - use crate::db::datastore::datastore_test; + use crate::db::datastore::test_utils::datastore_test; use nexus_deployment::blueprint_builder::BlueprintBuilder; use nexus_deployment::blueprint_builder::Ensure; use nexus_inventory::now_db_precision; diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 390376e627..2916573322 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -817,7 +817,7 @@ impl DataStore { mod tests { use super::*; - use crate::db::datastore::datastore_test; + use crate::db::datastore::test_utils::datastore_test; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params; use omicron_common::api::external; diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs index 180764d38c..b12df1875f 100644 --- a/nexus/db-queries/src/db/datastore/dns.rs +++ b/nexus/db-queries/src/db/datastore/dns.rs @@ -688,7 +688,7 @@ impl DnsVersionUpdateBuilder { #[cfg(test)] mod test { - use crate::db::datastore::datastore_test; + use crate::db::datastore::test_utils::datastore_test; use crate::db::datastore::DnsVersionUpdateBuilder; use crate::db::DataStore; use crate::db::TransactionError; diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 6b737f21ac..3f2f4bd127 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1914,8 +1914,8 @@ impl DataStoreInventoryTest for DataStore { #[cfg(test)] mod test { - use crate::db::datastore::datastore_test; use crate::db::datastore::inventory::DataStoreInventoryTest; + use crate::db::datastore::test_utils::datastore_test; use crate::db::datastore::DataStoreConnection; use crate::db::schema; use anyhow::Context; diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index 4634fda9ee..4a37efd612 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -816,7 +816,7 @@ mod test { use std::num::NonZeroU32; use crate::authz; - use crate::db::datastore::datastore_test; + use crate::db::datastore::test_utils::datastore_test; use crate::db::model::{IpPool, IpPoolResource, IpPoolResourceType}; use assert_matches::assert_matches; use nexus_test_utils::db::test_setup_database; diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs index 27a6bad32f..670ca08960 100644 --- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -379,7 +379,7 @@ fn ipv4_nat_next_version() -> diesel::expression::SqlLiteral { mod test { use std::{net::Ipv4Addr, str::FromStr}; - use crate::db::datastore::datastore_test; + use crate::db::datastore::test_utils::datastore_test; use chrono::Utc; use nexus_db_model::{Ipv4NatEntry, Ipv4NatValues, MacAddr, Vni}; use nexus_test_utils::db::test_setup_database; diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 71ebad407b..b5eff6cb85 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -88,6 +88,8 @@ mod ssh_key; mod switch; mod switch_interface; mod switch_port; +#[cfg(test)] +pub(crate) mod test_utils; mod update; mod utilization; mod virtual_provisioning_collection; @@ -353,52 +355,16 @@ pub enum UpdatePrecondition { Value(T), } -/// Constructs a DataStore for use in test suites that has preloaded the -/// built-in users, roles, and role assignments that are needed for basic -/// operation -#[cfg(test)] -pub async fn datastore_test( - logctx: &dropshot::test_util::LogContext, - db: &omicron_test_utils::dev::db::CockroachInstance, -) -> (OpContext, Arc) { - use crate::authn; - - let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); - let datastore = - Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); - - // Create an OpContext with the credentials of "db-init" just for the - // purpose of loading the built-in users, roles, and assignments. - let opctx = OpContext::for_background( - logctx.log.new(o!()), - Arc::new(authz::Authz::new(&logctx.log)), - authn::Context::internal_db_init(), - Arc::clone(&datastore), - ); - - // TODO: Can we just call "Populate" instead of doing this? - let rack_id = Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap(); - datastore.load_builtin_users(&opctx).await.unwrap(); - datastore.load_builtin_roles(&opctx).await.unwrap(); - datastore.load_builtin_role_asgns(&opctx).await.unwrap(); - datastore.load_builtin_silos(&opctx).await.unwrap(); - datastore.load_builtin_projects(&opctx).await.unwrap(); - datastore.load_builtin_vpcs(&opctx).await.unwrap(); - datastore.load_silo_users(&opctx).await.unwrap(); - datastore.load_silo_user_role_assignments(&opctx).await.unwrap(); - datastore - .load_builtin_fleet_virtual_provisioning_collection(&opctx) - .await - .unwrap(); - datastore.load_builtin_rack_data(&opctx, rack_id).await.unwrap(); - - // Create an OpContext with the credentials of "test-privileged" for general - // testing. - let opctx = - OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore)); - - (opctx, datastore) +/// Whether state transitions should be validated. "No" is only accessible in +/// test-only code. +/// +/// Intended only for testing around illegal states. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +#[must_use] +enum ValidateTransition { + Yes, + #[cfg(test)] + No, } #[cfg(test)] @@ -407,6 +373,10 @@ mod test { use crate::authn; use crate::authn::SiloAuthnPolicy; use crate::authz; + use crate::db::datastore::test_utils::datastore_test; + use crate::db::datastore::test_utils::{ + IneligibleSledKind, IneligibleSleds, + }; use crate::db::explain::ExplainableAsync; use crate::db::fixed_data::silo::DEFAULT_SILO; use crate::db::fixed_data::silo::SILO_ID; @@ -425,7 +395,6 @@ mod test { use nexus_db_model::IpAttachState; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params; - use nexus_types::external_api::views::SledProvisionPolicy; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::{ ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, @@ -437,6 +406,7 @@ mod test { use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; use std::num::NonZeroU32; use std::sync::Arc; + use strum::EnumCount; use uuid::Uuid; // Creates a "fake" Sled Baseboard. @@ -654,35 +624,6 @@ mod test { sled_id } - // Marks a sled as non-provisionable. - async fn mark_sled_non_provisionable( - datastore: &DataStore, - opctx: &OpContext, - sled_id: Uuid, - ) { - let (authz_sled, sled) = LookupPath::new(opctx, datastore) - .sled_id(sled_id) - .fetch_for(authz::Action::Modify) - .await - .unwrap(); - println!("sled: {:?}", sled); - let old_state = datastore - .sled_set_provision_policy( - &opctx, - &authz_sled, - SledProvisionPolicy::NonProvisionable, - ) - .await - .unwrap_or_else(|error| { - panic!( - "error marking sled {sled_id} as non-provisionable: {error}" - ) - }); - // The old state should always be provisionable since that's where we - // start. - assert_eq!(old_state, SledProvisionPolicy::Provisionable); - } - fn test_zpool_size() -> ByteCount { ByteCount::from_gibibytes_u32(100) } @@ -744,95 +685,184 @@ mod test { } } - struct TestDataset { - sled_id: Uuid, - dataset_id: Uuid, + #[derive(Debug)] + struct TestDatasets { + // eligible and ineligible aren't currently used, but are probably handy + // for the future. + #[allow(dead_code)] + eligible: SledToDatasetMap, + #[allow(dead_code)] + ineligible: SledToDatasetMap, + + // A map from eligible dataset IDs to their corresponding sled IDs. + eligible_dataset_ids: HashMap, + ineligible_dataset_ids: HashMap, } - async fn create_test_datasets_for_region_allocation( - opctx: &OpContext, - datastore: Arc, - number_of_sleds: usize, - ) -> Vec { - // Create sleds... - let sled_ids: Vec = stream::iter(0..number_of_sleds) - .then(|_| create_test_sled(&datastore)) - .collect() + // Map of sled IDs to dataset IDs. + type SledToDatasetMap = HashMap>; + + impl TestDatasets { + async fn create( + opctx: &OpContext, + datastore: Arc, + num_eligible_sleds: usize, + ) -> Self { + let eligible = + Self::create_impl(opctx, datastore.clone(), num_eligible_sleds) + .await; + + let eligible_dataset_ids = eligible + .iter() + .flat_map(|(sled_id, dataset_ids)| { + dataset_ids + .iter() + .map(move |dataset_id| (*dataset_id, *sled_id)) + }) + .collect(); + + let ineligible = Self::create_impl( + opctx, + datastore.clone(), + IneligibleSledKind::COUNT, + ) .await; - struct PhysicalDisk { - sled_id: Uuid, - disk_id: Uuid, - } + let mut ineligible_sled_ids = ineligible.keys(); - // create 9 disks on each sled - let physical_disks: Vec = stream::iter(sled_ids) - .map(|sled_id| { - let sled_id_iter: Vec = (0..9).map(|_| sled_id).collect(); - stream::iter(sled_id_iter).then(|sled_id| { - let disk_id_future = create_test_physical_disk( - &datastore, - opctx, - sled_id, - PhysicalDiskKind::U2, - ); - async move { - let disk_id = disk_id_future.await; - PhysicalDisk { sled_id, disk_id } - } - }) - }) - .flatten() - .collect() - .await; + // Set up the ineligible sleds. (We're guaranteed that + // IneligibleSledKind::COUNT is the same as the number of next() + // calls below.) + let ineligible_sleds = IneligibleSleds { + non_provisionable: *ineligible_sled_ids.next().unwrap(), + expunged: *ineligible_sled_ids.next().unwrap(), + decommissioned: *ineligible_sled_ids.next().unwrap(), + illegal_decommissioned: *ineligible_sled_ids.next().unwrap(), + }; - #[derive(Copy, Clone)] - struct Zpool { - sled_id: Uuid, - pool_id: Uuid, - } + eprintln!("Setting up ineligible sleds: {:?}", ineligible_sleds); - // 1 pool per disk - let zpools: Vec = stream::iter(physical_disks) - .then(|disk| { - let pool_id_future = - create_test_zpool(&datastore, disk.sled_id, disk.disk_id); - async move { - let pool_id = pool_id_future.await; - Zpool { sled_id: disk.sled_id, pool_id } + ineligible_sleds + .setup(opctx, &datastore) + .await + .expect("error setting up ineligible sleds"); + + // Build a map of dataset IDs to their ineligible kind. + let mut ineligible_dataset_ids = HashMap::new(); + for (kind, sled_id) in ineligible_sleds.iter() { + for dataset_id in ineligible.get(&sled_id).unwrap() { + ineligible_dataset_ids.insert(*dataset_id, kind); } - }) - .collect() - .await; + } - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + Self { + eligible, + eligible_dataset_ids, + ineligible, + ineligible_dataset_ids, + } + } - let datasets: Vec = stream::iter(zpools) - .map(|zpool| { - // 3 datasets per zpool, to test that pools are distinct - let zpool_iter: Vec = (0..3).map(|_| zpool).collect(); - stream::iter(zpool_iter).then(|zpool| { - let id = Uuid::new_v4(); - let dataset = Dataset::new( - id, - zpool.pool_id, - bogus_addr, - DatasetKind::Crucible, - ); + // Returns a map of sled ID to dataset IDs. + async fn create_impl( + opctx: &OpContext, + datastore: Arc, + number_of_sleds: usize, + ) -> SledToDatasetMap { + // Create sleds... + let sled_ids: Vec = stream::iter(0..number_of_sleds) + .then(|_| create_test_sled(&datastore)) + .collect() + .await; - let datastore = datastore.clone(); - async move { - datastore.dataset_upsert(dataset).await.unwrap(); + struct PhysicalDisk { + sled_id: Uuid, + disk_id: Uuid, + } + + // create 9 disks on each sled + let physical_disks: Vec = stream::iter(sled_ids) + .map(|sled_id| { + let sled_id_iter: Vec = + (0..9).map(|_| sled_id).collect(); + stream::iter(sled_id_iter).then(|sled_id| { + let disk_id_future = create_test_physical_disk( + &datastore, + opctx, + sled_id, + PhysicalDiskKind::U2, + ); + async move { + let disk_id = disk_id_future.await; + PhysicalDisk { sled_id, disk_id } + } + }) + }) + .flatten() + .collect() + .await; - TestDataset { sled_id: zpool.sled_id, dataset_id: id } + #[derive(Copy, Clone)] + struct Zpool { + sled_id: Uuid, + pool_id: Uuid, + } + + // 1 pool per disk + let zpools: Vec = stream::iter(physical_disks) + .then(|disk| { + let pool_id_future = create_test_zpool( + &datastore, + disk.sled_id, + disk.disk_id, + ); + async move { + let pool_id = pool_id_future.await; + Zpool { sled_id: disk.sled_id, pool_id } } }) - }) - .flatten() - .collect() - .await; + .collect() + .await; + + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + + let datasets = stream::iter(zpools) + .map(|zpool| { + // 3 datasets per zpool, to test that pools are distinct + let zpool_iter: Vec = + (0..3).map(|_| zpool).collect(); + stream::iter(zpool_iter).then(|zpool| { + let dataset_id = Uuid::new_v4(); + let dataset = Dataset::new( + dataset_id, + zpool.pool_id, + bogus_addr, + DatasetKind::Crucible, + ); + + let datastore = datastore.clone(); + async move { + datastore.dataset_upsert(dataset).await.unwrap(); + + (zpool.sled_id, dataset_id) + } + }) + }) + .flatten() + .fold( + SledToDatasetMap::new(), + |mut map, (sled_id, dataset_id)| { + // Build a map of sled ID to dataset IDs. + map.entry(sled_id) + .or_insert_with(Vec::new) + .push(dataset_id); + async move { map } + }, + ) + .await; - datasets + datasets + } } #[tokio::test] @@ -843,21 +873,12 @@ mod test { let logctx = dev::test_setup_log("test_region_allocation_strat_random"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - let test_datasets = create_test_datasets_for_region_allocation( + let test_datasets = TestDatasets::create( &opctx, datastore.clone(), - // Even though we're going to mark one sled as non-provisionable to - // test that logic, we aren't forcing the datasets to be on - // distinct sleds, so REGION_REDUNDANCY_THRESHOLD is enough. - REGION_REDUNDANCY_THRESHOLD, - ) - .await; - - let non_provisionable_dataset_id = test_datasets[0].dataset_id; - mark_sled_non_provisionable( - &datastore, - &opctx, - test_datasets[0].sled_id, + // We aren't forcing the datasets to be on distinct sleds, so we + // just need one eligible sled. + 1, ) .await; @@ -893,8 +914,16 @@ mod test { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); - // Dataset must not be non-provisionable. - assert_ne!(dataset.id(), non_provisionable_dataset_id); + // Dataset must not be eligible for provisioning. + if let Some(kind) = + test_datasets.ineligible_dataset_ids.get(&dataset.id()) + { + panic!( + "Dataset {} was ineligible for provisioning: {:?}", + dataset.id(), + kind + ); + } // Must be 3 unique zpools assert!(disk_zpools.insert(dataset.pool_id)); @@ -925,31 +954,16 @@ mod test { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create a rack with enough sleds for a successful allocation when we - // require 3 distinct provisionable sleds. - let test_datasets = create_test_datasets_for_region_allocation( + // require 3 distinct eligible sleds. + let test_datasets = TestDatasets::create( &opctx, datastore.clone(), - // We're going to mark one sled as non-provisionable to test that - // logic, and we *are* forcing the datasets to be on distinct - // sleds: hence threshold + 1. - REGION_REDUNDANCY_THRESHOLD + 1, - ) - .await; - - let non_provisionable_dataset_id = test_datasets[0].dataset_id; - mark_sled_non_provisionable( - &datastore, - &opctx, - test_datasets[0].sled_id, + // We're forcing the datasets to be on distinct sleds, hence the + // full REGION_REDUNDANCY_THRESHOLD. + REGION_REDUNDANCY_THRESHOLD, ) .await; - // We need to check that our datasets end up on 3 distinct sleds, but the query doesn't return the sled ID, so we need to reverse map from dataset ID to sled ID - let sled_id_map: HashMap = test_datasets - .into_iter() - .map(|test_dataset| (test_dataset.dataset_id, test_dataset.sled_id)) - .collect(); - // Allocate regions from the datasets for this disk. Do it a few times // for good measure. for alloc_seed in 0..10 { @@ -982,14 +996,25 @@ mod test { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); - // Dataset must not be non-provisionable. - assert_ne!(dataset.id(), non_provisionable_dataset_id); + // Dataset must not be eligible for provisioning. + if let Some(kind) = + test_datasets.ineligible_dataset_ids.get(&dataset.id()) + { + panic!( + "Dataset {} was ineligible for provisioning: {:?}", + dataset.id(), + kind + ); + } // Must be 3 unique zpools assert!(disk_zpools.insert(dataset.pool_id)); // Must be 3 unique sleds - let sled_id = sled_id_map.get(&dataset.id()).unwrap(); + let sled_id = test_datasets + .eligible_dataset_ids + .get(&dataset.id()) + .unwrap(); assert!(disk_sleds.insert(*sled_id)); assert_eq!(volume_id, region.volume_id()); @@ -1018,21 +1043,12 @@ mod test { // Create a rack without enough sleds for a successful allocation when // we require 3 distinct provisionable sleds. - let test_datasets = create_test_datasets_for_region_allocation( + TestDatasets::create( &opctx, datastore.clone(), - // Here, we need to have REGION_REDUNDANCY_THRESHOLD - 1 - // provisionable sleds to test this failure condition. We're going - // to mark one sled as non-provisionable to test that logic, so we - // need to add 1 to that number. - REGION_REDUNDANCY_THRESHOLD, - ) - .await; - - mark_sled_non_provisionable( - &datastore, - &opctx, - test_datasets[0].sled_id, + // Here, we need to have REGION_REDUNDANCY_THRESHOLD - 1 eligible + // sleds to test this failure condition. + REGION_REDUNDANCY_THRESHOLD - 1, ) .await; @@ -1077,7 +1093,7 @@ mod test { dev::test_setup_log("test_region_allocation_is_idempotent"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - create_test_datasets_for_region_allocation( + TestDatasets::create( &opctx, datastore.clone(), REGION_REDUNDANCY_THRESHOLD, @@ -1222,7 +1238,7 @@ mod test { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - create_test_datasets_for_region_allocation( + TestDatasets::create( &opctx, datastore.clone(), REGION_REDUNDANCY_THRESHOLD, diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 07d681f4ac..d4e94745aa 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -137,10 +137,10 @@ impl DataStore { #[cfg(test)] mod test { use super::*; - use crate::db::datastore::datastore_test; use crate::db::datastore::test::{ sled_baseboard_for_test, sled_system_hardware_for_test, }; + use crate::db::datastore::test_utils::datastore_test; use crate::db::model::{PhysicalDiskKind, Sled, SledUpdate}; use dropshot::PaginationOrder; use nexus_test_utils::db::test_setup_database; diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index bdcf93e124..46a3e0af2d 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -853,10 +853,10 @@ impl DataStore { #[cfg(test)] mod test { use super::*; - use crate::db::datastore::datastore_test; use crate::db::datastore::test::{ sled_baseboard_for_test, sled_system_hardware_for_test, }; + use crate::db::datastore::test_utils::datastore_test; use crate::db::datastore::Discoverability; use crate::db::lookup::LookupPath; use crate::db::model::ExternalIp; diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index f5b217a13a..dac9bf8a1c 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -9,6 +9,7 @@ use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; +use crate::db::datastore::ValidateTransition; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::to_db_sled_policy; @@ -32,6 +33,7 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::ResourceType; +use std::fmt; use strum::IntoEnumIterator; use thiserror::Error; use uuid::Uuid; @@ -39,7 +41,9 @@ use uuid::Uuid; impl DataStore { /// Stores a new sled in the database. /// - /// Produces `None` if the sled is decommissioned. + /// Produces `SledUpsertOutput::Decommissioned` if the sled is + /// decommissioned. This is not an error, because `sled_upsert`'s only + /// caller (sled-agent) is not expected to receive this error. pub async fn sled_upsert( &self, sled_update: SledUpdate, @@ -338,16 +342,8 @@ impl DataStore { { Ok(old_policy) => Ok(old_policy .provision_policy() - .expect("only valid policy was in-servie")), - Err(TransitionError::InvalidTransition(sled)) => { - Err(external::Error::conflict(format!( - "the sled has policy \"{}\" and state \"{:?}\", - and its provision policy cannot be changed", - sled.policy(), - sled.state(), - ))) - } - Err(TransitionError::External(e)) => Err(e), + .expect("only valid policy was in-service")), + Err(error) => Err(error.into_external_error()), } } @@ -372,20 +368,10 @@ impl DataStore { ValidateTransition::Yes, ) .await - .map_err(|error| match error { - TransitionError::InvalidTransition(sled) => { - external::Error::conflict(format!( - "the sled has policy \"{}\" and state \"{:?}\", - and it cannot be set to expunged", - sled.policy(), - sled.state(), - )) - } - TransitionError::External(e) => e, - }) + .map_err(|error| error.into_external_error()) } - async fn sled_set_policy_impl( + pub(super) async fn sled_set_policy_impl( &self, opctx: &OpContext, authz_sled: &authz::Sled, @@ -401,7 +387,7 @@ impl DataStore { .filter(dsl::time_deleted.is_null()) .filter(dsl::id.eq(sled_id)); - let t = Transition::Policy(new_policy); + let t = SledTransition::Policy(new_policy); let valid_old_policies = t.valid_old_policies(); let valid_old_states = t.valid_old_states(); @@ -447,7 +433,10 @@ impl DataStore { { Ok(result.found.policy()) } else { - Err(TransitionError::InvalidTransition(result.found)) + Err(TransitionError::InvalidTransition { + current: result.found, + transition: SledTransition::Policy(new_policy), + }) } } #[cfg(test)] @@ -479,20 +468,10 @@ impl DataStore { ValidateTransition::Yes, ) .await - .map_err(|error| match error { - TransitionError::InvalidTransition(sled) => { - external::Error::conflict(format!( - "the sled has policy \"{}\" and state \"{:?}\", - and it cannot be set to decommissioned", - sled.policy(), - sled.state(), - )) - } - TransitionError::External(e) => e, - }) + .map_err(|error| error.into_external_error()) } - async fn sled_set_state_impl( + pub(super) async fn sled_set_state_impl( &self, opctx: &OpContext, authz_sled: &authz::Sled, @@ -508,7 +487,7 @@ impl DataStore { .filter(dsl::time_deleted.is_null()) .filter(dsl::id.eq(sled_id)); - let t = Transition::State(new_state); + let t = SledTransition::State(new_state); let valid_old_policies = t.valid_old_policies(); let valid_old_states = t.valid_old_states(); @@ -551,7 +530,10 @@ impl DataStore { { Ok(result.found.state()) } else { - Err(TransitionError::InvalidTransition(result.found)) + Err(TransitionError::InvalidTransition { + current: result.found, + transition: SledTransition::State(new_state), + }) } } #[cfg(test)] @@ -582,32 +564,6 @@ impl SledUpsertOutput { } } -/// An error that occurred while setting a policy or state. -#[derive(Debug, Error)] -#[must_use] -enum TransitionError { - /// The state transition check failed. - /// - /// The sled is returned. - #[error("invalid transition: old sled: {0:?}")] - InvalidTransition(Sled), - - /// Some other kind of error occurred. - #[error("database error")] - External(#[from] external::Error), -} - -impl TransitionError { - #[cfg(test)] - fn ensure_invalid_transition(self) -> anyhow::Result<()> { - match self { - TransitionError::InvalidTransition(_) => Ok(()), - TransitionError::External(e) => Err(anyhow::anyhow!(e) - .context("expected invalid transition, got other error")), - } - } -} - // --- // State transition validators // --- @@ -616,12 +572,12 @@ impl TransitionError { // valid for a new policy or state, except idempotent transitions. #[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum Transition { +pub(super) enum SledTransition { Policy(SledPolicy), State(SledState), } -impl Transition { +impl SledTransition { /// Returns the list of valid old policies, other than the provided one /// (which is always considered valid). /// @@ -633,7 +589,7 @@ impl Transition { use SledState::*; match self { - Transition::Policy(new_policy) => match new_policy { + SledTransition::Policy(new_policy) => match new_policy { InService { provision_policy: Provisionable } => { vec![InService { provision_policy: NonProvisionable }] } @@ -644,7 +600,7 @@ impl Transition { .map(|provision_policy| InService { provision_policy }) .collect(), }, - Transition::State(state) => { + SledTransition::State(state) => { match state { Active => { // Any policy is valid for the active state. @@ -667,13 +623,13 @@ impl Transition { use SledState::*; match self { - Transition::Policy(_) => { + SledTransition::Policy(_) => { // Policies can only be transitioned in the active state. (In // the future, this will include other non-decommissioned // states.) vec![Active] } - Transition::State(state) => match state { + SledTransition::State(state) => match state { Active => vec![], Decommissioned => vec![Active], }, @@ -681,40 +637,90 @@ impl Transition { } } -impl IntoEnumIterator for Transition { +impl fmt::Display for SledTransition { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SledTransition::Policy(policy) => { + write!(f, "policy \"{}\"", policy) + } + SledTransition::State(state) => write!(f, "state \"{}\"", state), + } + } +} + +impl IntoEnumIterator for SledTransition { type Iterator = std::vec::IntoIter; fn iter() -> Self::Iterator { let v: Vec<_> = SledPolicy::iter() - .map(Transition::Policy) - .chain(SledState::iter().map(Transition::State)) + .map(SledTransition::Policy) + .chain(SledState::iter().map(SledTransition::State)) .collect(); v.into_iter() } } -/// A private enum to see whether state transitions should be checked while -/// setting a new policy and/or state. Intended only for testing around illegal -/// states. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] +/// An error that occurred while setting a policy or state. +#[derive(Debug, Error)] #[must_use] -enum ValidateTransition { - Yes, +pub(super) enum TransitionError { + /// The state transition check failed. + /// + /// The sled is returned. + #[error( + "sled id {} has current policy \"{}\" and state \"{}\" \ + and the transition to {} is not permitted", + .current.id(), + .current.policy(), + .current.state(), + .transition, + )] + InvalidTransition { + /// The current sled as fetched from the database. + current: Sled, + + /// The new policy or state that was attempted. + transition: SledTransition, + }, + + /// Some other kind of error occurred. + #[error("database error")] + External(#[from] external::Error), +} + +impl TransitionError { + fn into_external_error(self) -> external::Error { + match self { + TransitionError::InvalidTransition { .. } => { + external::Error::conflict(self.to_string()) + } + TransitionError::External(e) => e.clone(), + } + } + #[cfg(test)] - No, + pub(super) fn ensure_invalid_transition(self) -> anyhow::Result<()> { + match self { + TransitionError::InvalidTransition { .. } => Ok(()), + TransitionError::External(e) => Err(anyhow::anyhow!(e) + .context("expected invalid transition, got other error")), + } + } } #[cfg(test)] mod test { use super::*; - use crate::db::datastore::datastore_test; use crate::db::datastore::test::{ sled_baseboard_for_test, sled_system_hardware_for_test, }; - use crate::db::lookup::LookupPath; + use crate::db::datastore::test_utils::{ + datastore_test, sled_set_policy, sled_set_state, Expected, + IneligibleSleds, + }; use crate::db::model::ByteCount; use crate::db::model::SqlU32; - use anyhow::{bail, ensure, Context, Result}; + use anyhow::{Context, Result}; use itertools::Itertools; use nexus_test_utils::db::test_setup_database; use nexus_types::identity::Asset; @@ -805,7 +811,7 @@ mod test { // Set the sled to decommissioned (this is not a legal transition, but // we don't care about sled policy in sled_upsert, just the state.) - set_state( + sled_set_state( &opctx, &datastore, observed_sled.id(), @@ -881,82 +887,29 @@ mod test { .await .unwrap() .unwrap(); - set_policy( - &opctx, - &datastore, - non_provisionable_sled.id(), - SledPolicy::InService { - provision_policy: SledProvisionPolicy::NonProvisionable, - }, - ValidateTransition::Yes, - Expected::Ok(SledPolicy::provisionable()), - ) - .await - .unwrap(); - let expunged_sled = datastore .sled_upsert(test_new_sled_update()) .await .unwrap() .unwrap(); - set_policy( - &opctx, - &datastore, - expunged_sled.id(), - SledPolicy::Expunged, - ValidateTransition::Yes, - Expected::Ok(SledPolicy::provisionable()), - ) - .await - .unwrap(); - let decommissioned_sled = datastore .sled_upsert(test_new_sled_update()) .await .unwrap() .unwrap(); - // Legally, we must set the policy to expunged before setting the state - // to decommissioned. (In the future, we'll want to test graceful - // removal as well.) - set_policy( - &opctx, - &datastore, - decommissioned_sled.id(), - SledPolicy::Expunged, - ValidateTransition::Yes, - Expected::Ok(SledPolicy::provisionable()), - ) - .await - .unwrap(); - set_state( - &opctx, - &datastore, - decommissioned_sled.id(), - SledState::Decommissioned, - ValidateTransition::Yes, - Expected::Ok(SledState::Active), - ) - .await - .unwrap(); - - // This is _not_ a legal state, BUT we test it out to ensure that if - // the system somehow enters this state anyway, we don't try and - // provision resources on it. let illegal_decommissioned_sled = datastore .sled_upsert(test_new_sled_update()) .await .unwrap() .unwrap(); - set_state( - &opctx, - &datastore, - illegal_decommissioned_sled.id(), - SledState::Decommissioned, - ValidateTransition::No, - Expected::Ok(SledState::Active), - ) - .await - .unwrap(); + + let ineligible_sleds = IneligibleSleds { + non_provisionable: non_provisionable_sled.id(), + expunged: expunged_sled.id(), + decommissioned: decommissioned_sled.id(), + illegal_decommissioned: illegal_decommissioned_sled.id(), + }; + ineligible_sleds.setup(&opctx, &datastore).await.unwrap(); // This should be an error since there are no provisionable sleds. let resources = db::model::Resources::new( @@ -1035,7 +988,7 @@ mod test { predicate::in_iter(SledPolicy::all_in_service()), predicate::eq(SledState::Active), ), - Transition::Policy(SledPolicy::Expunged), + SledTransition::Policy(SledPolicy::Expunged), ), ( // The provision policy of in-service sleds can be changed, or @@ -1044,7 +997,7 @@ mod test { predicate::in_iter(SledPolicy::all_in_service()), predicate::eq(SledState::Active), ), - Transition::Policy(SledPolicy::InService { + SledTransition::Policy(SledPolicy::InService { provision_policy: SledProvisionPolicy::Provisionable, }), ), @@ -1054,7 +1007,7 @@ mod test { predicate::in_iter(SledPolicy::all_in_service()), predicate::eq(SledState::Active), ), - Transition::Policy(SledPolicy::InService { + SledTransition::Policy(SledPolicy::InService { provision_policy: SledProvisionPolicy::NonProvisionable, }), ), @@ -1065,7 +1018,7 @@ mod test { predicate::always(), predicate::eq(SledState::Active), ), - Transition::State(SledState::Active), + SledTransition::State(SledState::Active), ), ( // Expunged sleds can be marked as decommissioned. @@ -1073,7 +1026,7 @@ mod test { predicate::eq(SledPolicy::Expunged), predicate::eq(SledState::Active), ), - Transition::State(SledState::Decommissioned), + SledTransition::State(SledState::Decommissioned), ), ( // Expunged sleds can always be marked as expunged again, as @@ -1083,7 +1036,7 @@ mod test { predicate::eq(SledPolicy::Expunged), predicate::ne(SledState::Decommissioned), ), - Transition::Policy(SledPolicy::Expunged), + SledTransition::Policy(SledPolicy::Expunged), ), ( // Decommissioned sleds can always be marked as decommissioned @@ -1092,14 +1045,14 @@ mod test { predicate::in_iter(SledPolicy::all_decommissionable()), predicate::eq(SledState::Decommissioned), ), - Transition::State(SledState::Decommissioned), + SledTransition::State(SledState::Decommissioned), ), ]; // Generate all possible transitions. let all_transitions = SledPolicy::iter() .cartesian_product(SledState::iter()) - .cartesian_product(Transition::iter()) + .cartesian_product(SledTransition::iter()) .enumerate(); // Set up a sled to test against. @@ -1140,8 +1093,8 @@ mod test { sled_id: Uuid, before_policy: SledPolicy, before_state: SledState, - after: Transition, - valid_transitions: &[(Before, Transition)], + after: SledTransition, + valid_transitions: &[(Before, SledTransition)], ) -> Result<()> { // Is this a valid transition? let is_valid = valid_transitions.iter().any( @@ -1154,7 +1107,7 @@ mod test { // Set the sled to the initial policy and state, ignoring state // transition errors (this is just to set up the initial state). - set_policy( + sled_set_policy( opctx, datastore, sled_id, @@ -1164,7 +1117,7 @@ mod test { ) .await?; - set_state( + sled_set_state( opctx, datastore, sled_id, @@ -1176,14 +1129,14 @@ mod test { // Now perform the transition to the new policy or state. match after { - Transition::Policy(new_policy) => { + SledTransition::Policy(new_policy) => { let expected = if is_valid { Expected::Ok(before_policy) } else { Expected::Invalid }; - set_policy( + sled_set_policy( opctx, datastore, sled_id, @@ -1193,14 +1146,14 @@ mod test { ) .await?; } - Transition::State(new_state) => { + SledTransition::State(new_state) => { let expected = if is_valid { Expected::Ok(before_state) } else { Expected::Invalid }; - set_state( + sled_set_state( opctx, datastore, sled_id, @@ -1236,13 +1189,11 @@ mod test { struct Before(BoxPredicate, BoxPredicate); impl Before { - fn new< + fn new(policy: P, state: S) -> Self + where P: Predicate + Send + Sync + 'static, - Q: Predicate + Send + Sync + 'static, - >( - policy: P, - state: Q, - ) -> Self { + S: Predicate + Send + Sync + 'static, + { Before(policy.boxed(), state.boxed()) } } @@ -1296,114 +1247,4 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } - - async fn set_policy( - opctx: &OpContext, - datastore: &DataStore, - sled_id: Uuid, - new_policy: SledPolicy, - check: ValidateTransition, - expected_old_policy: Expected, - ) -> Result<()> { - let (authz_sled, _) = LookupPath::new(&opctx, &datastore) - .sled_id(sled_id) - .fetch_for(authz::Action::Modify) - .await - .unwrap(); - - let res = datastore - .sled_set_policy_impl(opctx, &authz_sled, new_policy, check) - .await; - match expected_old_policy { - Expected::Ok(expected) => { - let actual = res.context( - "failed transition that was expected to be successful", - )?; - ensure!( - actual == expected, - "actual old policy ({actual}) is not \ - the same as expected ({expected})" - ); - } - Expected::Invalid => match res { - Ok(old_policy) => { - bail!( - "expected an invalid state transition error, \ - but transition was accepted with old policy: \ - {old_policy}" - ) - } - Err(error) => { - error.ensure_invalid_transition()?; - } - }, - Expected::Ignore => { - // The return value is ignored. - } - } - - Ok(()) - } - - async fn set_state( - opctx: &OpContext, - datastore: &DataStore, - sled_id: Uuid, - new_state: SledState, - check: ValidateTransition, - expected_old_state: Expected, - ) -> Result<()> { - let (authz_sled, _) = LookupPath::new(&opctx, &datastore) - .sled_id(sled_id) - .fetch_for(authz::Action::Modify) - .await - .unwrap(); - - let res = datastore - .sled_set_state_impl(&opctx, &authz_sled, new_state, check) - .await; - match expected_old_state { - Expected::Ok(expected) => { - let actual = res.context( - "failed transition that was expected to be successful", - )?; - ensure!( - actual == expected, - "actual old state ({actual:?}) \ - is not the same as expected ({expected:?})" - ); - } - Expected::Invalid => match res { - Ok(old_state) => { - bail!( - "expected an invalid state transition error, \ - but transition was accepted with old state: \ - {old_state:?}" - ) - } - Err(error) => { - error.ensure_invalid_transition()?; - } - }, - Expected::Ignore => { - // The return value is ignored. - } - } - - Ok(()) - } - - /// For a policy/state transition, describes the expected value of the old - /// state. - enum Expected { - /// The transition is expected to successful, with the provided old - /// value. - Ok(T), - - /// The transition is expected to be invalid. - Invalid, - - /// The return value is ignored. - Ignore, - } } diff --git a/nexus/db-queries/src/db/datastore/switch_port.rs b/nexus/db-queries/src/db/datastore/switch_port.rs index 4771768e43..842cd4bf11 100644 --- a/nexus/db-queries/src/db/datastore/switch_port.rs +++ b/nexus/db-queries/src/db/datastore/switch_port.rs @@ -1192,7 +1192,8 @@ impl DataStore { #[cfg(test)] mod test { - use crate::db::datastore::{datastore_test, UpdatePrecondition}; + use crate::db::datastore::test_utils::datastore_test; + use crate::db::datastore::UpdatePrecondition; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params::{ BgpAnnounceSetCreate, BgpConfigCreate, BgpPeer, BgpPeerConfig, diff --git a/nexus/db-queries/src/db/datastore/test_utils.rs b/nexus/db-queries/src/db/datastore/test_utils.rs new file mode 100644 index 0000000000..6d26ad044b --- /dev/null +++ b/nexus/db-queries/src/db/datastore/test_utils.rs @@ -0,0 +1,343 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Shared test-only code for the `datastore` module. + +use crate::authz; +use crate::context::OpContext; +use crate::db; +use crate::db::datastore::ValidateTransition; +use crate::db::lookup::LookupPath; +use crate::db::DataStore; +use anyhow::bail; +use anyhow::ensure; +use anyhow::Context; +use anyhow::Result; +use dropshot::test_util::LogContext; +use nexus_db_model::SledState; +use nexus_types::external_api::views::SledPolicy; +use nexus_types::external_api::views::SledProvisionPolicy; +use omicron_test_utils::dev::db::CockroachInstance; +use std::sync::Arc; +use strum::EnumCount; +use uuid::Uuid; + +/// Constructs a DataStore for use in test suites that has preloaded the +/// built-in users, roles, and role assignments that are needed for basic +/// operation +#[cfg(test)] +pub async fn datastore_test( + logctx: &LogContext, + db: &CockroachInstance, +) -> (OpContext, Arc) { + use crate::authn; + + let cfg = db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let datastore = + Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); + + // Create an OpContext with the credentials of "db-init" just for the + // purpose of loading the built-in users, roles, and assignments. + let opctx = OpContext::for_background( + logctx.log.new(o!()), + Arc::new(authz::Authz::new(&logctx.log)), + authn::Context::internal_db_init(), + Arc::clone(&datastore), + ); + + // TODO: Can we just call "Populate" instead of doing this? + let rack_id = Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap(); + datastore.load_builtin_users(&opctx).await.unwrap(); + datastore.load_builtin_roles(&opctx).await.unwrap(); + datastore.load_builtin_role_asgns(&opctx).await.unwrap(); + datastore.load_builtin_silos(&opctx).await.unwrap(); + datastore.load_builtin_projects(&opctx).await.unwrap(); + datastore.load_builtin_vpcs(&opctx).await.unwrap(); + datastore.load_silo_users(&opctx).await.unwrap(); + datastore.load_silo_user_role_assignments(&opctx).await.unwrap(); + datastore + .load_builtin_fleet_virtual_provisioning_collection(&opctx) + .await + .unwrap(); + datastore.load_builtin_rack_data(&opctx, rack_id).await.unwrap(); + + // Create an OpContext with the credentials of "test-privileged" for general + // testing. + let opctx = + OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore)); + + (opctx, datastore) +} + +/// Denotes a specific way in which a sled is ineligible. +/// +/// This should match the list of sleds in `IneligibleSleds`. +#[derive( + Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, EnumCount, +)] +pub(super) enum IneligibleSledKind { + NonProvisionable, + Expunged, + Decommissioned, + IllegalDecommissioned, +} + +/// Specifies sleds to be marked as ineligible for provisioning. +/// +/// This is less error-prone than several places duplicating this logic. +#[derive(Debug)] +pub(super) struct IneligibleSleds { + pub(super) non_provisionable: Uuid, + pub(super) expunged: Uuid, + pub(super) decommissioned: Uuid, + pub(super) illegal_decommissioned: Uuid, +} + +impl IneligibleSleds { + pub(super) fn iter( + &self, + ) -> impl Iterator { + [ + (IneligibleSledKind::NonProvisionable, self.non_provisionable), + (IneligibleSledKind::Expunged, self.expunged), + (IneligibleSledKind::Decommissioned, self.decommissioned), + ( + IneligibleSledKind::IllegalDecommissioned, + self.illegal_decommissioned, + ), + ] + .into_iter() + } + + /// Marks the provided sleds as ineligible for provisioning. + /// + /// Assumes that: + /// + /// * the sleds have just been set up and are in the default state. + /// * the UUIDs are all distinct. + pub(super) async fn setup( + &self, + opctx: &OpContext, + datastore: &DataStore, + ) -> Result<()> { + let non_provisionable_fut = async { + sled_set_policy( + &opctx, + &datastore, + self.non_provisionable, + SledPolicy::InService { + provision_policy: SledProvisionPolicy::NonProvisionable, + }, + ValidateTransition::Yes, + Expected::Ok(SledPolicy::provisionable()), + ) + .await + .with_context(|| { + format!( + "failed to set non-provisionable policy for sled {}", + self.non_provisionable + ) + }) + }; + + let expunged_fut = async { + sled_set_policy( + &opctx, + &datastore, + self.expunged, + SledPolicy::Expunged, + ValidateTransition::Yes, + Expected::Ok(SledPolicy::provisionable()), + ) + .await + .with_context(|| { + format!( + "failed to set expunged policy for sled {}", + self.non_provisionable + ) + }) + }; + + // Legally, we must set the policy to expunged before setting the state + // to decommissioned. (In the future, we'll want to test graceful + // removal as well.) + let decommissioned_fut = async { + sled_set_policy( + &opctx, + &datastore, + self.decommissioned, + SledPolicy::Expunged, + ValidateTransition::Yes, + Expected::Ok(SledPolicy::provisionable()), + ) + .await + .with_context(|| { + format!( + "failed to set expunged policy for sled {}, \ + as prerequisite for decommissioning", + self.decommissioned + ) + })?; + + sled_set_state( + &opctx, + &datastore, + self.decommissioned, + SledState::Decommissioned, + ValidateTransition::Yes, + Expected::Ok(SledState::Active), + ) + .await + .with_context(|| { + format!( + "failed to set decommissioned state for sled {}", + self.decommissioned + ) + }) + }; + + // This is _not_ a legal state, BUT we test it out to ensure that if + // the system somehow enters this state anyway, we don't try and + // provision resources on it. + let illegal_decommissioned_fut = async { + sled_set_state( + &opctx, + &datastore, + self.illegal_decommissioned, + SledState::Decommissioned, + ValidateTransition::No, + Expected::Ok(SledState::Active), + ) + .await + .with_context(|| { + format!( + "failed to illegally set decommissioned state for sled {}", + self.illegal_decommissioned + ) + }) + }; + + // We're okay cancelling the rest of the futures if one of them fails, + // since the overall test is going to fail anyway. Hence try_join + // rather than join_then_try. + futures::try_join!( + non_provisionable_fut, + expunged_fut, + decommissioned_fut, + illegal_decommissioned_fut + )?; + + Ok(()) + } +} + +pub(super) async fn sled_set_policy( + opctx: &OpContext, + datastore: &DataStore, + sled_id: Uuid, + new_policy: SledPolicy, + check: ValidateTransition, + expected_old_policy: Expected, +) -> Result<()> { + let (authz_sled, _) = LookupPath::new(&opctx, &datastore) + .sled_id(sled_id) + .fetch_for(authz::Action::Modify) + .await + .unwrap(); + + let res = datastore + .sled_set_policy_impl(opctx, &authz_sled, new_policy, check) + .await; + match expected_old_policy { + Expected::Ok(expected) => { + let actual = res.context( + "failed transition that was expected to be successful", + )?; + ensure!( + actual == expected, + "actual old policy ({actual}) is not \ + the same as expected ({expected})" + ); + } + Expected::Invalid => match res { + Ok(old_policy) => { + bail!( + "expected an invalid state transition error, \ + but transition was accepted with old policy: \ + {old_policy}" + ) + } + Err(error) => { + error.ensure_invalid_transition()?; + } + }, + Expected::Ignore => { + // The return value is ignored. + } + } + + Ok(()) +} + +pub(super) async fn sled_set_state( + opctx: &OpContext, + datastore: &DataStore, + sled_id: Uuid, + new_state: SledState, + check: ValidateTransition, + expected_old_state: Expected, +) -> Result<()> { + let (authz_sled, _) = LookupPath::new(&opctx, &datastore) + .sled_id(sled_id) + .fetch_for(authz::Action::Modify) + .await + .unwrap(); + + let res = datastore + .sled_set_state_impl(&opctx, &authz_sled, new_state, check) + .await; + match expected_old_state { + Expected::Ok(expected) => { + let actual = res.context( + "failed transition that was expected to be successful", + )?; + ensure!( + actual == expected, + "actual old state ({actual:?}) \ + is not the same as expected ({expected:?})" + ); + } + Expected::Invalid => match res { + Ok(old_state) => { + bail!( + "expected an invalid state transition error, \ + but transition was accepted with old state: \ + {old_state:?}" + ) + } + Err(error) => { + error.ensure_invalid_transition()?; + } + }, + Expected::Ignore => { + // The return value is ignored. + } + } + + Ok(()) +} + +/// For a transition, describes the expected value of the old state. +pub(super) enum Expected { + /// The transition is expected to successful, with the provided old + /// value. + Ok(T), + + /// The transition is expected to be invalid. + Invalid, + + /// The return value is ignored. + Ignore, +} diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index d0b093ff45..374ef2cf73 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -1064,7 +1064,7 @@ pub fn read_only_resources_associated_with_volume( mod tests { use super::*; - use crate::db::datastore::datastore_test; + use crate::db::datastore::test_utils::datastore_test; use nexus_test_utils::db::test_setup_database; use omicron_test_utils::dev; diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 4f0245e283..4626301f76 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -1171,7 +1171,7 @@ impl DataStore { #[cfg(test)] mod tests { use super::*; - use crate::db::datastore::datastore_test; + use crate::db::datastore::test_utils::datastore_test; use crate::db::model::Project; use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; use nexus_test_utils::db::test_setup_database; diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index 18ea369685..050c1dcfe9 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -950,7 +950,8 @@ mod test { let logctx = dev::test_setup_log("test_lookup"); let mut db = test_setup_database(&logctx.log).await; let (_, datastore) = - crate::db::datastore::datastore_test(&logctx, &db).await; + crate::db::datastore::test_utils::datastore_test(&logctx, &db) + .await; let opctx = OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore)); let project_name: Name = Name("my-project".parse().unwrap()); diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 54595a8444..6676e5d531 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -906,7 +906,8 @@ mod tests { let logctx = dev::test_setup_log(test_name); let log = logctx.log.new(o!()); let db = test_setup_database(&log).await; - crate::db::datastore::datastore_test(&logctx, &db).await; + crate::db::datastore::test_utils::datastore_test(&logctx, &db) + .await; let cfg = crate::db::Config { url: db.pg_config().clone() }; let pool = Arc::new(crate::db::Pool::new(&logctx.log, &cfg)); let db_datastore = Arc::new( diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index a22c80b232..d96b26c9b3 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -1953,7 +1953,8 @@ mod tests { let log = logctx.log.new(o!()); let db = test_setup_database(&log).await; let (opctx, db_datastore) = - crate::db::datastore::datastore_test(&logctx, &db).await; + crate::db::datastore::test_utils::datastore_test(&logctx, &db) + .await; let authz_silo = opctx.authn.silo_required().unwrap(); diff --git a/nexus/db-queries/src/transaction_retry.rs b/nexus/db-queries/src/transaction_retry.rs index 6b5098158b..74a94a0b8f 100644 --- a/nexus/db-queries/src/transaction_retry.rs +++ b/nexus/db-queries/src/transaction_retry.rs @@ -270,7 +270,7 @@ impl OptionalError { mod test { use super::*; - use crate::db::datastore::datastore_test; + use crate::db::datastore::test_utils::datastore_test; use nexus_test_utils::db::test_setup_database; use omicron_test_utils::dev; use oximeter::types::FieldValue; diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 2b585bb4f1..bff0f5cfe7 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -619,6 +619,15 @@ impl SledState { } } +impl fmt::Display for SledState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SledState::Active => write!(f, "active"), + SledState::Decommissioned => write!(f, "decommissioned"), + } + } +} + /// An operator's view of an instance running on a given sled #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct SledInstance {