From e346fd10066c076eb75f5e11818dcb3e73fe1208 Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 22 Jul 2024 12:07:44 -0700 Subject: [PATCH 1/2] [oximeter] turn API into a trait (#6130) Use the OpenAPI manager to manage this trait as well. An interesting question is the standalone Nexus impl also defined by Oximeter -- should that switch to using the internal API crate? As things stand we'd have to write tons of methods that just error out, but I have some thoughts on how to make that easier on the Dropshot side. For now we've decided to leave it as-is, but it's a good data point to consider for future Dropshot improvements. --- Cargo.lock | 15 ++ Cargo.toml | 3 + dev-tools/openapi-manager/Cargo.toml | 1 + dev-tools/openapi-manager/src/spec.rs | 10 ++ openapi/oximeter.json | 4 + oximeter/api/Cargo.toml | 16 ++ oximeter/api/src/lib.rs | 79 +++++++++ oximeter/collector/Cargo.toml | 1 + oximeter/collector/src/bin/oximeter.rs | 17 -- oximeter/collector/src/http_entrypoints.rs | 163 +++++++----------- .../tests/output/cmd-oximeter-noargs-stderr | 1 - .../tests/output/cmd-oximeter-openapi-stderr | 0 oximeter/collector/tests/test_commands.rs | 45 +---- 13 files changed, 189 insertions(+), 166 deletions(-) create mode 100644 oximeter/api/Cargo.toml create mode 100644 oximeter/api/src/lib.rs delete mode 100644 oximeter/collector/tests/output/cmd-oximeter-openapi-stderr diff --git a/Cargo.lock b/Cargo.lock index c140347448..7831b5701c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6166,6 +6166,7 @@ dependencies = [ "openapi-lint", "openapiv3", "owo-colors", + "oximeter-api", "serde_json", "similar", "supports-color", @@ -6362,6 +6363,19 @@ dependencies = [ "uuid", ] +[[package]] +name = "oximeter-api" +version = "0.1.0" +dependencies = [ + "chrono", + "dropshot", + "omicron-common", + "omicron-workspace-hack", + "schemars", + "serde", + "uuid", +] + [[package]] name = "oximeter-client" version = "0.1.0" @@ -6398,6 +6412,7 @@ dependencies = [ "openapi-lint", "openapiv3", "oximeter", + "oximeter-api", "oximeter-client", "oximeter-db", "rand 0.8.5", diff --git a/Cargo.toml b/Cargo.toml index 1e9cc6ba0e..8cc8260f66 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,6 +63,7 @@ members = [ "nexus/test-utils-macros", "nexus/test-utils", "nexus/types", + "oximeter/api", "oximeter/collector", "oximeter/db", "oximeter/impl", @@ -164,6 +165,7 @@ default-members = [ "nexus/test-utils-macros", "nexus/test-utils", "nexus/types", + "oximeter/api", "oximeter/collector", "oximeter/db", "oximeter/impl", @@ -408,6 +410,7 @@ opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "3dc9a3dd8d3 oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } +oximeter-api = { path = "oximeter/api" } oximeter-client = { path = "clients/oximeter-client" } oximeter-db = { path = "oximeter/db/", default-features = false } oximeter-collector = { path = "oximeter/collector" } diff --git a/dev-tools/openapi-manager/Cargo.toml b/dev-tools/openapi-manager/Cargo.toml index dc07dae0e2..94be112895 100644 --- a/dev-tools/openapi-manager/Cargo.toml +++ b/dev-tools/openapi-manager/Cargo.toml @@ -24,6 +24,7 @@ omicron-workspace-hack.workspace = true openapiv3.workspace = true openapi-lint.workspace = true owo-colors.workspace = true +oximeter-api.workspace = true serde_json.workspace = true similar.workspace = true supports-color.workspace = true diff --git a/dev-tools/openapi-manager/src/spec.rs b/dev-tools/openapi-manager/src/spec.rs index e8ebc23550..cb3b0e3e5c 100644 --- a/dev-tools/openapi-manager/src/spec.rs +++ b/dev-tools/openapi-manager/src/spec.rs @@ -66,6 +66,16 @@ pub fn all_apis() -> Vec { filename: "nexus-internal.json", extra_validation: None, }, + ApiSpec { + title: "Oxide Oximeter API", + version: "0.0.1", + description: "API for interacting with oximeter", + boundary: ApiBoundary::Internal, + api_description: + oximeter_api::oximeter_api_mod::stub_api_description, + filename: "oximeter.json", + extra_validation: None, + }, ApiSpec { title: "Oxide Technician Port Control Service", version: "0.0.1", diff --git a/openapi/oximeter.json b/openapi/oximeter.json index d4a37957ab..f596ac6ee6 100644 --- a/openapi/oximeter.json +++ b/openapi/oximeter.json @@ -12,6 +12,7 @@ "paths": { "/info": { "get": { + "summary": "Return identifying information about this collector.", "operationId": "collector_info", "responses": { "200": { @@ -35,6 +36,7 @@ }, "/producers": { "get": { + "summary": "List all producers.", "operationId": "producers_list", "parameters": [ { @@ -81,6 +83,7 @@ } }, "post": { + "summary": "Handle a request from Nexus to register a new producer with this collector.", "operationId": "producers_post", "requestBody": { "content": { @@ -107,6 +110,7 @@ }, "/producers/{producer_id}": { "delete": { + "summary": "Delete a producer by ID.", "operationId": "producer_delete", "parameters": [ { diff --git a/oximeter/api/Cargo.toml b/oximeter/api/Cargo.toml new file mode 100644 index 0000000000..72189aa645 --- /dev/null +++ b/oximeter/api/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "oximeter-api" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +dropshot.workspace = true +omicron-common.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true +uuid.workspace = true diff --git a/oximeter/api/src/lib.rs b/oximeter/api/src/lib.rs new file mode 100644 index 0000000000..3cda501fa7 --- /dev/null +++ b/oximeter/api/src/lib.rs @@ -0,0 +1,79 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use chrono::{DateTime, Utc}; +use dropshot::{ + EmptyScanParams, HttpError, HttpResponseDeleted, HttpResponseOk, + HttpResponseUpdatedNoContent, PaginationParams, Query, RequestContext, + ResultsPage, TypedBody, +}; +use omicron_common::api::internal::nexus::ProducerEndpoint; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +#[dropshot::api_description { + module = "oximeter_api_mod", +}] +pub trait OximeterApi { + type Context; + + /// Handle a request from Nexus to register a new producer with this collector. + #[endpoint { + method = POST, + path = "/producers", + }] + async fn producers_post( + request_context: RequestContext, + body: TypedBody, + ) -> Result; + + /// List all producers. + #[endpoint { + method = GET, + path = "/producers", + }] + async fn producers_list( + request_context: RequestContext, + query: Query>, + ) -> Result>, HttpError>; + + /// Delete a producer by ID. + #[endpoint { + method = DELETE, + path = "/producers/{producer_id}", + }] + async fn producer_delete( + request_context: RequestContext, + path: dropshot::Path, + ) -> Result; + + /// Return identifying information about this collector. + #[endpoint { + method = GET, + path = "/info", + }] + async fn collector_info( + request_context: RequestContext, + ) -> Result, HttpError>; +} + +/// Parameters for paginating the list of producers. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct ProducerPage { + pub id: Uuid, +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct ProducerIdPathParams { + pub producer_id: Uuid, +} + +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct CollectorInfo { + /// The collector's UUID. + pub id: Uuid, + /// Last time we refreshed our producer list with Nexus. + pub last_refresh: Option>, +} diff --git a/oximeter/collector/Cargo.toml b/oximeter/collector/Cargo.toml index 01f484f5f4..cd1e012e5c 100644 --- a/oximeter/collector/Cargo.toml +++ b/oximeter/collector/Cargo.toml @@ -19,6 +19,7 @@ internal-dns.workspace = true nexus-types.workspace = true omicron-common.workspace = true oximeter.workspace = true +oximeter-api.workspace = true oximeter-client.workspace = true oximeter-db.workspace = true rand.workspace = true diff --git a/oximeter/collector/src/bin/oximeter.rs b/oximeter/collector/src/bin/oximeter.rs index d97ae5e72e..efbc53cace 100644 --- a/oximeter/collector/src/bin/oximeter.rs +++ b/oximeter/collector/src/bin/oximeter.rs @@ -10,7 +10,6 @@ use anyhow::{anyhow, Context}; use clap::Parser; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; -use oximeter_collector::oximeter_api; use oximeter_collector::standalone_nexus_api; use oximeter_collector::Config; use oximeter_collector::Oximeter; @@ -23,16 +22,6 @@ use std::net::SocketAddrV6; use std::path::PathBuf; use uuid::Uuid; -pub fn run_openapi() -> Result<(), String> { - oximeter_api() - .openapi("Oxide Oximeter API", "0.0.1") - .description("API for interacting with oximeter") - .contact_url("https://oxide.computer") - .contact_email("api@oxide.computer") - .write(&mut std::io::stdout()) - .map_err(|e| e.to_string()) -} - pub fn run_standalone_openapi() -> Result<(), String> { standalone_nexus_api() .openapi("Oxide Nexus API", "0.0.1") @@ -47,9 +36,6 @@ pub fn run_standalone_openapi() -> Result<(), String> { #[derive(Parser)] #[clap(name = "oximeter", about = "See README.adoc for more information")] enum Args { - /// Print the external OpenAPI Spec document and exit - Openapi, - /// Start an Oximeter server Run { /// Path to TOML file with configuration for the server @@ -133,9 +119,6 @@ async fn main() { async fn do_run() -> Result<(), CmdError> { let args = Args::parse(); match args { - Args::Openapi => { - run_openapi().map_err(|err| CmdError::Failure(anyhow!(err))) - } Args::Run { config_file, id, address } => { let config = Config::from_file(config_file).unwrap(); let args = OximeterArguments { id, address }; diff --git a/oximeter/collector/src/http_entrypoints.rs b/oximeter/collector/src/http_entrypoints.rs index e876ed047d..daf75bbbd1 100644 --- a/oximeter/collector/src/http_entrypoints.rs +++ b/oximeter/collector/src/http_entrypoints.rs @@ -7,9 +7,6 @@ // Copyright 2023 Oxide Computer Company use crate::OximeterAgent; -use chrono::DateTime; -use chrono::Utc; -use dropshot::endpoint; use dropshot::ApiDescription; use dropshot::EmptyScanParams; use dropshot::HttpError; @@ -23,117 +20,73 @@ use dropshot::ResultsPage; use dropshot::TypedBody; use dropshot::WhichPage; use omicron_common::api::internal::nexus::ProducerEndpoint; -use schemars::JsonSchema; -use serde::Deserialize; -use serde::Serialize; +use oximeter_api::*; use std::sync::Arc; -use uuid::Uuid; // Build the HTTP API internal to the control plane pub fn oximeter_api() -> ApiDescription> { - let mut api = ApiDescription::new(); - api.register(producers_post) - .expect("Could not register producers_post API handler"); - api.register(producers_list) - .expect("Could not register producers_list API handler"); - api.register(producer_delete) - .expect("Could not register producers_delete API handler"); - api.register(collector_info) - .expect("Could not register collector_info API handler"); - api + oximeter_api_mod::api_description::() + .expect("registered entrypoints") } -// Handle a request from Nexus to register a new producer with this collector. -#[endpoint { - method = POST, - path = "/producers", -}] -async fn producers_post( - request_context: RequestContext>, - body: TypedBody, -) -> Result { - let agent = request_context.context(); - let producer_info = body.into_inner(); - agent - .register_producer(producer_info) - .await - .map_err(HttpError::from) - .map(|_| HttpResponseUpdatedNoContent()) -} +enum OximeterApiImpl {} -// Parameters for paginating the list of producers. -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -struct ProducerPage { - id: Uuid, -} +impl OximeterApi for OximeterApiImpl { + type Context = Arc; -// List all producers -#[endpoint { - method = GET, - path = "/producers", -}] -async fn producers_list( - request_context: RequestContext>, - query: Query>, -) -> Result>, HttpError> { - let agent = request_context.context(); - let pagination = query.into_inner(); - let limit = request_context.page_limit(&pagination)?.get() as usize; - let start = match &pagination.page { - WhichPage::First(..) => None, - WhichPage::Next(ProducerPage { id }) => Some(*id), - }; - let producers = agent.list_producers(start, limit).await; - ResultsPage::new( - producers, - &EmptyScanParams {}, - |info: &ProducerEndpoint, _| ProducerPage { id: info.id }, - ) - .map(HttpResponseOk) -} + async fn producers_post( + request_context: RequestContext, + body: TypedBody, + ) -> Result { + let agent = request_context.context(); + let producer_info = body.into_inner(); + agent + .register_producer(producer_info) + .await + .map_err(HttpError::from) + .map(|_| HttpResponseUpdatedNoContent()) + } -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -struct ProducerIdPathParams { - producer_id: Uuid, -} + async fn producers_list( + request_context: RequestContext>, + query: Query>, + ) -> Result>, HttpError> { + let agent = request_context.context(); + let pagination = query.into_inner(); + let limit = request_context.page_limit(&pagination)?.get() as usize; + let start = match &pagination.page { + WhichPage::First(..) => None, + WhichPage::Next(ProducerPage { id }) => Some(*id), + }; + let producers = agent.list_producers(start, limit).await; + ResultsPage::new( + producers, + &EmptyScanParams {}, + |info: &ProducerEndpoint, _| ProducerPage { id: info.id }, + ) + .map(HttpResponseOk) + } -// Delete a producer by ID. -#[endpoint { - method = DELETE, - path = "/producers/{producer_id}", -}] -async fn producer_delete( - request_context: RequestContext>, - path: dropshot::Path, -) -> Result { - let agent = request_context.context(); - let producer_id = path.into_inner().producer_id; - agent - .delete_producer(producer_id) - .await - .map_err(HttpError::from) - .map(|_| HttpResponseDeleted()) -} - -#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] -pub struct CollectorInfo { - /// The collector's UUID. - pub id: Uuid, - /// Last time we refreshed our producer list with Nexus. - pub last_refresh: Option>, -} + async fn producer_delete( + request_context: RequestContext, + path: dropshot::Path, + ) -> Result { + let agent = request_context.context(); + let producer_id = path.into_inner().producer_id; + agent + .delete_producer(producer_id) + .await + .map_err(HttpError::from) + .map(|_| HttpResponseDeleted()) + } -// Return identifying information about this collector -#[endpoint { - method = GET, - path = "/info", -}] -async fn collector_info( - request_context: RequestContext>, -) -> Result, HttpError> { - let agent = request_context.context(); - let id = agent.id; - let last_refresh = *agent.last_refresh_time.lock().unwrap(); - let info = CollectorInfo { id, last_refresh }; - Ok(HttpResponseOk(info)) + async fn collector_info( + request_context: RequestContext, + ) -> Result, HttpError> { + let agent = request_context.context(); + let id = agent.id; + let last_refresh = *agent.last_refresh_time.lock().unwrap(); + let info = CollectorInfo { id, last_refresh }; + Ok(HttpResponseOk(info)) + } } diff --git a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr index 3f0fd4726d..e1330452b2 100644 --- a/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr +++ b/oximeter/collector/tests/output/cmd-oximeter-noargs-stderr @@ -3,7 +3,6 @@ See README.adoc for more information Usage: oximeter Commands: - openapi Print the external OpenAPI Spec document and exit run Start an Oximeter server standalone Run `oximeter` in standalone mode for development standalone-openapi Print the fake Nexus's standalone API diff --git a/oximeter/collector/tests/output/cmd-oximeter-openapi-stderr b/oximeter/collector/tests/output/cmd-oximeter-openapi-stderr deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/oximeter/collector/tests/test_commands.rs b/oximeter/collector/tests/test_commands.rs index d3d66be058..3a772d736c 100644 --- a/oximeter/collector/tests/test_commands.rs +++ b/oximeter/collector/tests/test_commands.rs @@ -4,14 +4,12 @@ // Copyright 2021 Oxide Computer Company -use std::{fs, path::PathBuf}; +use std::path::PathBuf; use expectorate::assert_contents; use omicron_test_utils::dev::test_cmds::{ - assert_exit_code, path_to_executable, run_command, temp_file_path, - EXIT_SUCCESS, EXIT_USAGE, + assert_exit_code, path_to_executable, run_command, EXIT_USAGE, }; -use openapiv3::OpenAPI; use subprocess::Exec; /// name of the "oximeter" executable @@ -21,15 +19,6 @@ fn path_to_oximeter() -> PathBuf { path_to_executable(CMD_OXIMETER) } -/// Write the requested string to a temporary file and return the path to that -/// file. -fn write_config(config: &str) -> PathBuf { - let file_path = temp_file_path("test_commands_config"); - eprintln!("writing temp config: {}", file_path.display()); - fs::write(&file_path, config).expect("failed to write config file"); - file_path -} - #[test] fn test_oximeter_no_args() { let exec = Exec::cmd(path_to_oximeter()); @@ -38,33 +27,3 @@ fn test_oximeter_no_args() { assert_contents("tests/output/cmd-oximeter-noargs-stdout", &stdout_text); assert_contents("tests/output/cmd-oximeter-noargs-stderr", &stderr_text); } - -#[test] -fn test_oximeter_openapi() { - // This is a little goofy: we need a config file for the program. - // (Arguably, --openapi shouldn't require a config file, but it's - // conceivable that the API metadata or the exposed endpoints would depend - // on the configuration.) We ship a config file in "examples", and we may - // as well use it here -- it would be a bug if that one didn't work for this - // purpose. However, it's not clear how to reliably locate it at runtime. - // But we do know where it is at compile time, so we load it then. - let config = include_str!("../../collector/config.toml"); - let config_path = write_config(config); - let exec = Exec::cmd(path_to_oximeter()).arg("openapi"); - let (exit_status, stdout_text, stderr_text) = run_command(exec); - fs::remove_file(&config_path).expect("failed to remove temporary file"); - assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - assert_contents("tests/output/cmd-oximeter-openapi-stderr", &stderr_text); - - let spec: OpenAPI = serde_json::from_str(&stdout_text) - .expect("stdout was not valid OpenAPI"); - - // Check for lint errors. - let errors = openapi_lint::validate(&spec); - assert!(errors.is_empty(), "{}", errors.join("\n\n")); - - // Confirm that the output hasn't changed. It's expected that we'll change - // this file as the API evolves, but pay attention to the diffs to ensure - // that the changes match your expectations. - assert_contents("../../openapi/oximeter.json", &stdout_text); -} From 38575abf11e463993bdb953fc3ad3653cbf8a3b6 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 22 Jul 2024 16:33:30 -0700 Subject: [PATCH 2/2] [nexus] Decommission disks in reconfigurator, clean their DB state (#6059) This PR adds the following step to the reconfigurator's execution step: For all disks that are marked **expunged**, mark them **decommissioned**. This notably happens after the `deploy_disks` step of execution. This PR also adds a background task that looks for all disks that are **decommissioned**, but have **zpools**. It deletes these zpools (and their **datasets**) as long as no regions nor region snapshots are referencing the contained datasets. Fixes https://github.com/oxidecomputer/omicron/issues/6051 --- dev-tools/omdb/tests/env.out | 15 + dev-tools/omdb/tests/successes.out | 12 + nexus-config/src/nexus_config.rs | 23 + .../src/db/datastore/physical_disk.rs | 74 ++- nexus/db-queries/src/db/datastore/zpool.rs | 135 ++++++ nexus/examples/config-second.toml | 1 + nexus/examples/config.toml | 1 + .../execution/src/cockroachdb.rs | 5 +- .../reconfigurator/execution/src/datasets.rs | 5 +- nexus/reconfigurator/execution/src/dns.rs | 5 +- nexus/reconfigurator/execution/src/lib.rs | 27 +- .../execution/src/omicron_physical_disks.rs | 258 +++++++++- nexus/src/app/background/init.rs | 21 + .../tasks/decommissioned_disk_cleaner.rs | 458 ++++++++++++++++++ nexus/src/app/background/tasks/mod.rs | 1 + nexus/tests/config.test.toml | 3 + nexus/tests/integration_tests/sleds.rs | 9 +- nexus/types/src/deployment/planning_input.rs | 7 + smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + uuid-kinds/src/lib.rs | 2 + 21 files changed, 1013 insertions(+), 51 deletions(-) create mode 100644 nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 75acc5c584..a6bf4d4667 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -47,6 +47,11 @@ task: "crdb_node_id_collector" Collects node IDs of running CockroachDB zones +task: "decommissioned_disk_cleaner" + deletes DB records for decommissioned disks, after regions and region + snapshots have been replaced + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -187,6 +192,11 @@ task: "crdb_node_id_collector" Collects node IDs of running CockroachDB zones +task: "decommissioned_disk_cleaner" + deletes DB records for decommissioned disks, after regions and region + snapshots have been replaced + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -314,6 +324,11 @@ task: "crdb_node_id_collector" Collects node IDs of running CockroachDB zones +task: "decommissioned_disk_cleaner" + deletes DB records for decommissioned disks, after regions and region + snapshots have been replaced + + task: "dns_config_external" watches external DNS data stored in CockroachDB diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 982a9d8403..cec3fa3052 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -248,6 +248,11 @@ task: "crdb_node_id_collector" Collects node IDs of running CockroachDB zones +task: "decommissioned_disk_cleaner" + deletes DB records for decommissioned disks, after regions and region + snapshots have been replaced + + task: "dns_config_external" watches external DNS data stored in CockroachDB @@ -453,6 +458,13 @@ task: "crdb_node_id_collector" started at (s ago) and ran for ms last completion reported error: no blueprint +task: "decommissioned_disk_cleaner" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms +warning: unknown background task: "decommissioned_disk_cleaner" (don't know how to interpret details: Object {"deleted": Number(0), "error": Null, "error_count": Number(0), "found": Number(0), "not_ready_to_be_deleted": Number(0)}) + task: "external_endpoints" configured period: every 1m currently executing: no diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 3bc3a36126..6e9d6b0cf0 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -361,6 +361,8 @@ pub struct BackgroundTaskConfig { pub inventory: InventoryConfig, /// configuration for physical disk adoption tasks pub physical_disk_adoption: PhysicalDiskAdoptionConfig, + /// configuration for decommissioned disk cleaner task + pub decommissioned_disk_cleaner: DecommissionedDiskCleanerConfig, /// configuration for phantom disks task pub phantom_disks: PhantomDiskConfig, /// configuration for blueprint related tasks @@ -444,6 +446,20 @@ pub struct PhysicalDiskAdoptionConfig { pub disable: bool, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct DecommissionedDiskCleanerConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, + + /// A toggle to disable automated disk cleanup + /// + /// Default: Off + #[serde(default)] + pub disable: bool, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct NatCleanupConfig { @@ -822,6 +838,7 @@ mod test { inventory.nkeep = 11 inventory.disable = false physical_disk_adoption.period_secs = 30 + decommissioned_disk_cleaner.period_secs = 30 phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 @@ -947,6 +964,11 @@ mod test { period_secs: Duration::from_secs(30), disable: false, }, + decommissioned_disk_cleaner: + DecommissionedDiskCleanerConfig { + period_secs: Duration::from_secs(30), + disable: false, + }, phantom_disks: PhantomDiskConfig { period_secs: Duration::from_secs(30), }, @@ -1049,6 +1071,7 @@ mod test { inventory.nkeep = 3 inventory.disable = false physical_disk_adoption.period_secs = 30 + decommissioned_disk_cleaner.period_secs = 30 phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 11e056d19b..5e3b51f228 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -37,6 +37,7 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use uuid::Uuid; impl DataStore { @@ -278,23 +279,36 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Decommissions all expunged disks. + pub async fn physical_disk_decommission_all_expunged( + &self, + opctx: &OpContext, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use db::schema::physical_disk::dsl; + + let conn = &*self.pool_connection_authorized(&opctx).await?; + diesel::update(dsl::physical_disk) + .filter(dsl::time_deleted.is_null()) + .physical_disk_filter(DiskFilter::ExpungedButActive) + .set(dsl::disk_state.eq(PhysicalDiskState::Decommissioned)) + .execute_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) + } + /// Deletes a disk from the database. pub async fn physical_disk_delete( &self, opctx: &OpContext, - vendor: String, - serial: String, - model: String, - sled_id: Uuid, + id: PhysicalDiskUuid, ) -> DeleteResult { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; let now = Utc::now(); use db::schema::physical_disk::dsl; diesel::update(dsl::physical_disk) - .filter(dsl::vendor.eq(vendor)) - .filter(dsl::serial.eq(serial)) - .filter(dsl::model.eq(model)) - .filter(dsl::sled_id.eq(sled_id)) + .filter(dsl::id.eq(id.into_untyped_uuid())) .filter(dsl::time_deleted.is_null()) .set(dsl::time_deleted.eq(now)) .execute_async(&*self.pool_connection_authorized(opctx).await?) @@ -451,8 +465,9 @@ mod test { let sled = create_test_sled(&datastore).await; // Insert a disk + let disk_id = PhysicalDiskUuid::new_v4(); let disk = PhysicalDisk::new( - Uuid::new_v4(), + disk_id.into_untyped_uuid(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -472,13 +487,7 @@ mod test { // Delete the inserted disk datastore - .physical_disk_delete( - &opctx, - disk.vendor.clone(), - disk.serial.clone(), - disk.model.clone(), - disk.sled_id, - ) + .physical_disk_delete(&opctx, disk_id) .await .expect("Failed to delete disk"); let disks = datastore @@ -489,13 +498,7 @@ mod test { // Deleting again should not throw an error datastore - .physical_disk_delete( - &opctx, - disk.vendor, - disk.serial, - disk.model, - disk.sled_id, - ) + .physical_disk_delete(&opctx, disk_id) .await .expect("Failed to delete disk"); @@ -520,8 +523,9 @@ mod test { let sled_b = create_test_sled(&datastore).await; // Insert a disk + let disk_id = PhysicalDiskUuid::new_v4(); let disk = PhysicalDisk::new( - Uuid::new_v4(), + disk_id.into_untyped_uuid(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -546,13 +550,7 @@ mod test { // Delete the inserted disk datastore - .physical_disk_delete( - &opctx, - disk.vendor, - disk.serial, - disk.model, - disk.sled_id, - ) + .physical_disk_delete(&opctx, disk_id) .await .expect("Failed to delete disk"); let disks = datastore @@ -567,8 +565,9 @@ mod test { assert!(disks.is_empty()); // Attach the disk to the second sled + let disk_id = PhysicalDiskUuid::new_v4(); let disk = PhysicalDisk::new( - Uuid::new_v4(), + disk_id.into_untyped_uuid(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -613,8 +612,9 @@ mod test { let sled_b = create_test_sled(&datastore).await; // Insert a disk + let disk_id = PhysicalDiskUuid::new_v4(); let disk = PhysicalDisk::new( - Uuid::new_v4(), + disk_id.into_untyped_uuid(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -639,13 +639,7 @@ mod test { // Remove the disk from the first sled datastore - .physical_disk_delete( - &opctx, - disk.vendor.clone(), - disk.serial.clone(), - disk.model.clone(), - disk.sled_id, - ) + .physical_disk_delete(&opctx, disk_id) .await .expect("Failed to delete disk"); diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index a771202387..a314b9d371 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -15,6 +15,7 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::identity::Asset; use crate::db::model::PhysicalDisk; +use crate::db::model::PhysicalDiskState; use crate::db::model::Sled; use crate::db::model::Zpool; use crate::db::pagination::paginated; @@ -27,10 +28,13 @@ use diesel::upsert::excluded; use nexus_db_model::PhysicalDiskKind; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::ZpoolUuid; use uuid::Uuid; impl DataStore { @@ -135,4 +139,135 @@ impl DataStore { Ok(zpools) } + + /// Returns all (non-deleted) zpools on decommissioned (or deleted) disks + pub async fn zpool_on_decommissioned_disk_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::physical_disk::dsl as physical_disk_dsl; + use db::schema::zpool::dsl as zpool_dsl; + + paginated(zpool_dsl::zpool, zpool_dsl::id, pagparams) + .filter(zpool_dsl::time_deleted.is_null()) + // Note the LEFT JOIN here -- we want to see zpools where the + // physical disk has been deleted too. + .left_join( + physical_disk_dsl::physical_disk + .on(physical_disk_dsl::id.eq(zpool_dsl::physical_disk_id)), + ) + .filter( + // The physical disk has been either explicitly decommissioned, + // or has been deleted altogether. + physical_disk_dsl::disk_state + .eq(PhysicalDiskState::Decommissioned) + .or(physical_disk_dsl::id.is_null()) + .or( + // NOTE: We should probably get rid of this altogether + // (it's kinda implied by "Decommissioned", being a terminal + // state) but this is an extra cautious statement. + physical_disk_dsl::time_deleted.is_not_null(), + ), + ) + .select(Zpool::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// Soft-deletes the Zpools and cleans up all associated DB resources. + /// + /// This should only be called for zpools on physical disks which + /// have been decommissioned. + /// + /// In order: + /// - Finds all datasets within the zpool + /// - Ensures that no regions nor region snapshots are using these datasets + /// - Soft-deletes the datasets within the zpool + /// - Soft-deletes the zpool itself + pub async fn zpool_delete_self_and_all_datasets( + &self, + opctx: &OpContext, + zpool_id: ZpoolUuid, + ) -> DeleteResult { + let conn = &*self.pool_connection_authorized(&opctx).await?; + Self::zpool_delete_self_and_all_datasets_on_connection( + &conn, opctx, zpool_id, + ) + .await + } + + /// See: [Self::zpool_delete_self_and_all_datasets] + pub(crate) async fn zpool_delete_self_and_all_datasets_on_connection( + conn: &async_bb8_diesel::Connection, + opctx: &OpContext, + zpool_id: ZpoolUuid, + ) -> DeleteResult { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let now = Utc::now(); + use db::schema::dataset::dsl as dataset_dsl; + use db::schema::zpool::dsl as zpool_dsl; + + let zpool_id = *zpool_id.as_untyped_uuid(); + + // Get the IDs of all datasets to-be-deleted + let dataset_ids: Vec = dataset_dsl::dataset + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::pool_id.eq(zpool_id)) + .select(dataset_dsl::id) + .load_async(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // Verify that there are no regions nor region snapshots using this dataset + use db::schema::region::dsl as region_dsl; + let region_count = region_dsl::region + .filter(region_dsl::dataset_id.eq_any(dataset_ids.clone())) + .count() + .first_async::(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + if region_count > 0 { + return Err(Error::unavail(&format!( + "Cannot delete this zpool; it has {region_count} regions" + ))); + } + + use db::schema::region_snapshot::dsl as region_snapshot_dsl; + let region_snapshot_count = region_snapshot_dsl::region_snapshot + .filter(region_snapshot_dsl::dataset_id.eq_any(dataset_ids)) + .count() + .first_async::(conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + if region_snapshot_count > 0 { + return Err( + Error::unavail(&format!("Cannot delete this zpool; it has {region_snapshot_count} region snapshots")) + ); + } + + // Ensure the datasets are deleted + diesel::update(dataset_dsl::dataset) + .filter(dataset_dsl::time_deleted.is_null()) + .filter(dataset_dsl::pool_id.eq(zpool_id)) + .set(dataset_dsl::time_deleted.eq(now)) + .execute_async(conn) + .await + .map(|_rows_modified| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // Ensure the zpool is deleted + diesel::update(zpool_dsl::zpool) + .filter(zpool_dsl::id.eq(zpool_id)) + .filter(zpool_dsl::time_deleted.is_null()) + .set(zpool_dsl::time_deleted.eq(now)) + .execute_async(conn) + .await + .map(|_rows_modified| ()) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } } diff --git a/nexus/examples/config-second.toml b/nexus/examples/config-second.toml index ef67749a4b..40f5d95a5f 100644 --- a/nexus/examples/config-second.toml +++ b/nexus/examples/config-second.toml @@ -122,6 +122,7 @@ inventory.nkeep = 5 inventory.disable = false phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 +decommissioned_disk_cleaner.period_secs = 60 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 blueprints.period_secs_collect_crdb_node_ids = 180 diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 6ec80359ab..b194ecf1b6 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -108,6 +108,7 @@ inventory.nkeep = 5 inventory.disable = false phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 +decommissioned_disk_cleaner.period_secs = 60 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 blueprints.period_secs_collect_crdb_node_ids = 180 diff --git a/nexus/reconfigurator/execution/src/cockroachdb.rs b/nexus/reconfigurator/execution/src/cockroachdb.rs index 5a8710a1c5..498944598d 100644 --- a/nexus/reconfigurator/execution/src/cockroachdb.rs +++ b/nexus/reconfigurator/execution/src/cockroachdb.rs @@ -90,7 +90,10 @@ mod test { ); // Record the zpools so we don't fail to ensure datasets (unrelated to // crdb settings) during blueprint execution. - crate::tests::insert_zpool_records(datastore, &opctx, &blueprint).await; + crate::tests::create_disks_for_zones_using_datasets( + datastore, &opctx, &blueprint, + ) + .await; // Execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); crate::realize_blueprint_with_overrides( diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 139c94c53f..6b7e30a738 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -153,7 +153,10 @@ mod tests { // Record the sleds and zpools. crate::tests::insert_sled_records(datastore, &blueprint).await; - crate::tests::insert_zpool_records(datastore, opctx, &blueprint).await; + crate::tests::create_disks_for_zones_using_datasets( + datastore, opctx, &blueprint, + ) + .await; // Prior to ensuring datasets exist, there should be none. assert_eq!( diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index f3210a12aa..bdcfc66bad 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -1182,7 +1182,10 @@ mod test { // Record the zpools so we don't fail to ensure datasets (unrelated to // DNS) during blueprint execution. - crate::tests::insert_zpool_records(datastore, &opctx, &blueprint).await; + crate::tests::create_disks_for_zones_using_datasets( + datastore, &opctx, &blueprint, + ) + .await; // Now, execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 8cdbd46265..e3d2019230 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -227,6 +227,12 @@ where ) .await?; + // This depends on the "deploy_disks" call earlier -- disk expungement is a + // statement of policy, but we need to be assured that the Sled Agent has + // stopped using that disk before we can mark its state as decommissioned. + omicron_physical_disks::decommission_expunged_disks(&opctx, datastore) + .await?; + // This is likely to error if any cluster upgrades are in progress (which // can take some time), so it should remain at the end so that other parts // of the blueprint can progress normally. @@ -241,6 +247,8 @@ where mod tests { use super::*; use nexus_db_model::Generation; + use nexus_db_model::PhysicalDisk; + use nexus_db_model::PhysicalDiskKind; use nexus_db_model::SledBaseboard; use nexus_db_model::SledSystemHardware; use nexus_db_model::SledUpdate; @@ -290,7 +298,7 @@ mod tests { // tests expect to be able to realize the the blueprint created from an // initial collection, and ensuring the zones' datasets exist requires first // inserting the sled and zpool records. - pub(crate) async fn insert_zpool_records( + pub(crate) async fn create_disks_for_zones_using_datasets( datastore: &DataStore, opctx: &OpContext, blueprint: &Blueprint, @@ -304,12 +312,27 @@ mod tests { continue; }; + let physical_disk_id = Uuid::new_v4(); let pool_id = dataset.dataset.pool_name.id(); + + let disk = PhysicalDisk::new( + physical_disk_id, + String::from("Oxide"), + format!("PhysDisk of {}", pool_id), + String::from("FakeDisk"), + PhysicalDiskKind::U2, + sled_id.into_untyped_uuid(), + ); + datastore + .physical_disk_insert(&opctx, disk.clone()) + .await + .expect("failed to upsert physical disk"); + if pool_inserted.insert(pool_id) { let zpool = Zpool::new( pool_id.into_untyped_uuid(), sled_id.into_untyped_uuid(), - Uuid::new_v4(), // physical_disk_id + physical_disk_id, ); datastore .zpool_insert(opctx, zpool) diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index d7d8604e7e..9ae72d2049 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -10,6 +10,7 @@ use anyhow::Context; use futures::stream; use futures::StreamExt; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; use nexus_types::deployment::BlueprintPhysicalDisksConfig; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; @@ -97,26 +98,58 @@ pub(crate) async fn deploy_disks( } } +/// Decommissions all disks which are currently expunged +pub(crate) async fn decommission_expunged_disks( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), Vec> { + datastore + .physical_disk_decommission_all_expunged(&opctx) + .await + .map_err(|e| vec![anyhow!(e)])?; + Ok(()) +} + #[cfg(test)] mod test { use super::deploy_disks; + + use crate::DataStore; use crate::Sled; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::ExpressionMethods; + use diesel::QueryDsl; use httptest::matchers::{all_of, json_decoded, request}; use httptest::responders::json_encoded; use httptest::responders::status_code; use httptest::Expectation; + use nexus_db_model::Dataset; + use nexus_db_model::DatasetKind; + use nexus_db_model::PhysicalDisk; + use nexus_db_model::PhysicalDiskKind; + use nexus_db_model::PhysicalDiskPolicy; + use nexus_db_model::PhysicalDiskState; + use nexus_db_model::Region; + use nexus_db_model::Zpool; use nexus_db_queries::context::OpContext; + use nexus_db_queries::db; + use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{ Blueprint, BlueprintPhysicalDiskConfig, BlueprintPhysicalDisksConfig, - BlueprintTarget, CockroachDbPreserveDowngrade, + BlueprintTarget, CockroachDbPreserveDowngrade, DiskFilter, }; + use nexus_types::identity::Asset; + use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use std::collections::BTreeMap; use std::net::SocketAddr; + use std::str::FromStr; use uuid::Uuid; type ControlPlaneTestContext = @@ -350,4 +383,227 @@ mod test { s1.verify_and_clear(); s2.verify_and_clear(); } + + async fn make_disk_in_db( + datastore: &DataStore, + opctx: &OpContext, + i: usize, + sled_id: SledUuid, + ) -> PhysicalDiskUuid { + let id = PhysicalDiskUuid::from_untyped_uuid(Uuid::new_v4()); + let physical_disk = PhysicalDisk::new( + id.into_untyped_uuid(), + "v".into(), + format!("s-{i})"), + "m".into(), + PhysicalDiskKind::U2, + sled_id.into_untyped_uuid(), + ); + datastore + .physical_disk_insert(&opctx, physical_disk.clone()) + .await + .unwrap(); + id + } + + async fn add_zpool_dataset_and_region( + datastore: &DataStore, + opctx: &OpContext, + id: PhysicalDiskUuid, + sled_id: SledUuid, + ) { + let zpool = datastore + .zpool_insert( + opctx, + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id.into_untyped_uuid(), + ), + ) + .await + .unwrap(); + + let dataset = datastore + .dataset_upsert(Dataset::new( + Uuid::new_v4(), + zpool.id(), + Some(std::net::SocketAddrV6::new( + std::net::Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + )), + DatasetKind::Crucible, + )) + .await + .unwrap(); + + // There isn't a great API to insert regions (we normally allocate!) + // so insert the record manually here. + let region = { + let volume_id = Uuid::new_v4(); + Region::new( + dataset.id(), + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + 1, + ) + }; + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_model::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(region) + .execute_async(&*conn) + .await + .unwrap(); + } + + async fn get_pools( + datastore: &DataStore, + id: PhysicalDiskUuid, + ) -> Vec { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use db::schema::zpool::dsl; + dsl::zpool + .filter(dsl::time_deleted.is_null()) + .filter(dsl::physical_disk_id.eq(id.into_untyped_uuid())) + .select(dsl::id) + .load_async::(&*conn) + .await + .map(|ids| { + ids.into_iter() + .map(|id| ZpoolUuid::from_untyped_uuid(id)) + .collect() + }) + .unwrap() + } + + async fn get_datasets(datastore: &DataStore, id: ZpoolUuid) -> Vec { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use db::schema::dataset::dsl; + dsl::dataset + .filter(dsl::time_deleted.is_null()) + .filter(dsl::pool_id.eq(id.into_untyped_uuid())) + .select(dsl::id) + .load_async(&*conn) + .await + .unwrap() + } + + async fn get_regions(datastore: &DataStore, id: Uuid) -> Vec { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use db::schema::region::dsl; + dsl::region + .filter(dsl::dataset_id.eq(id.into_untyped_uuid())) + .select(dsl::id) + .load_async(&*conn) + .await + .unwrap() + } + + #[nexus_test] + async fn test_decommission_expunged_disks( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let sled_id = SledUuid::from_untyped_uuid( + Uuid::from_str(&SLED_AGENT_UUID).unwrap(), + ); + + // Create a couple disks in the database + let disks = [ + make_disk_in_db(&datastore, &opctx, 0, sled_id).await, + make_disk_in_db(&datastore, &opctx, 1, sled_id).await, + ]; + + // Add a zpool, dataset, and region to each disk. + for disk_id in disks { + add_zpool_dataset_and_region(&datastore, &opctx, disk_id, sled_id) + .await; + } + + let disk_to_decommission = disks[0]; + let other_disk = disks[1]; + + // Expunge one of the disks + datastore + .physical_disk_update_policy( + &opctx, + disk_to_decommission.into_untyped_uuid(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + // Verify that the state of both disks is "active" + let all_disks = datastore + .physical_disk_list( + &opctx, + &DataPageParams::max_page(), + DiskFilter::All, + ) + .await + .unwrap() + .into_iter() + .map(|disk| (disk.id(), disk)) + .collect::>(); + let d = &all_disks[&disk_to_decommission.into_untyped_uuid()]; + assert_eq!(d.disk_state, PhysicalDiskState::Active); + assert_eq!(d.disk_policy, PhysicalDiskPolicy::Expunged); + let d = &all_disks[&other_disk.into_untyped_uuid()]; + assert_eq!(d.disk_state, PhysicalDiskState::Active); + assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService); + + super::decommission_expunged_disks(&opctx, &datastore).await.unwrap(); + + // After decommissioning, we see the expunged disk become + // decommissioned. The other disk remains in-service. + let all_disks = datastore + .physical_disk_list( + &opctx, + &DataPageParams::max_page(), + DiskFilter::All, + ) + .await + .unwrap() + .into_iter() + .map(|disk| (disk.id(), disk)) + .collect::>(); + let d = &all_disks[&disk_to_decommission.into_untyped_uuid()]; + assert_eq!(d.disk_state, PhysicalDiskState::Decommissioned); + assert_eq!(d.disk_policy, PhysicalDiskPolicy::Expunged); + let d = &all_disks[&other_disk.into_untyped_uuid()]; + assert_eq!(d.disk_state, PhysicalDiskState::Active); + assert_eq!(d.disk_policy, PhysicalDiskPolicy::InService); + + // Even though we've decommissioned this disk, the pools, datasets, and + // regions should remain. Refer to the "decommissioned_disk_cleaner" + // for how these get eventually cleared up. + let pools = get_pools(&datastore, disk_to_decommission).await; + assert_eq!(pools.len(), 1); + let datasets = get_datasets(&datastore, pools[0]).await; + assert_eq!(datasets.len(), 1); + let regions = get_regions(&datastore, datasets[0]).await; + assert_eq!(regions.len(), 1); + + // Similarly, the "other disk" should still exist. + let pools = get_pools(&datastore, other_disk).await; + assert_eq!(pools.len(), 1); + let datasets = get_datasets(&datastore, pools[0]).await; + assert_eq!(datasets.len(), 1); + let regions = get_regions(&datastore, datasets[0]).await; + assert_eq!(regions.len(), 1); + } } diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 4a5d792c80..2f1c4cd738 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -93,6 +93,7 @@ use super::tasks::bfd; use super::tasks::blueprint_execution; use super::tasks::blueprint_load; use super::tasks::crdb_node_id_collector; +use super::tasks::decommissioned_disk_cleaner; use super::tasks::dns_config; use super::tasks::dns_propagation; use super::tasks::dns_servers; @@ -142,6 +143,7 @@ pub struct BackgroundTasks { pub task_bfd_manager: Activator, pub task_inventory_collection: Activator, pub task_physical_disk_adoption: Activator, + pub task_decommissioned_disk_cleaner: Activator, pub task_phantom_disks: Activator, pub task_blueprint_loader: Activator, pub task_blueprint_executor: Activator, @@ -221,6 +223,7 @@ impl BackgroundTasksInitializer { task_bfd_manager: Activator::new(), task_inventory_collection: Activator::new(), task_physical_disk_adoption: Activator::new(), + task_decommissioned_disk_cleaner: Activator::new(), task_phantom_disks: Activator::new(), task_blueprint_loader: Activator::new(), task_blueprint_executor: Activator::new(), @@ -280,6 +283,7 @@ impl BackgroundTasksInitializer { task_bfd_manager, task_inventory_collection, task_physical_disk_adoption, + task_decommissioned_disk_cleaner, task_phantom_disks, task_blueprint_loader, task_blueprint_executor, @@ -511,6 +515,23 @@ impl BackgroundTasksInitializer { activator: task_physical_disk_adoption, }); + driver.register(TaskDefinition { + name: "decommissioned_disk_cleaner", + description: + "deletes DB records for decommissioned disks, after regions \ + and region snapshots have been replaced", + period: config.decommissioned_disk_cleaner.period_secs, + task_impl: Box::new( + decommissioned_disk_cleaner::DecommissionedDiskCleaner::new( + datastore.clone(), + config.decommissioned_disk_cleaner.disable, + ), + ), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_decommissioned_disk_cleaner, + }); + driver.register(TaskDefinition { name: "service_zone_nat_tracker", description: diff --git a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs new file mode 100644 index 0000000000..cb3ef9a569 --- /dev/null +++ b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs @@ -0,0 +1,458 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Cleans up old database state from decommissioned disks. +//! +//! This cannot happen at decommissioning time, because it depends on region +//! (and snapshot) replacement, which happens in the background. +//! +//! Cleanup involves deleting database records for disks (datasets, zpools) +//! that are no longer viable after the physical disk has been decommissioned. + +use crate::app::background::BackgroundTask; +use anyhow::Context; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::Zpool; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::pagination::Paginator; +use nexus_db_queries::db::DataStore; +use nexus_types::identity::Asset; +use omicron_common::api::external::Error; +use omicron_uuid_kinds::{GenericUuid, ZpoolUuid}; +use std::num::NonZeroU32; +use std::sync::Arc; + +/// Background task that cleans decommissioned disk DB records. +pub struct DecommissionedDiskCleaner { + datastore: Arc, + disable: bool, +} + +#[derive(Debug, Default)] +struct ActivationResults { + found: usize, + not_ready_to_be_deleted: usize, + deleted: usize, + error_count: usize, +} + +const MAX_BATCH: NonZeroU32 = unsafe { + // Safety: last time I checked, 100 was greater than zero. + NonZeroU32::new_unchecked(100) +}; + +impl DecommissionedDiskCleaner { + pub fn new(datastore: Arc, disable: bool) -> Self { + Self { datastore, disable } + } + + async fn clean_all( + &mut self, + results: &mut ActivationResults, + opctx: &OpContext, + ) -> Result<(), anyhow::Error> { + slog::info!(opctx.log, "Decommissioned disk cleaner running"); + + let mut paginator = Paginator::new(MAX_BATCH); + let mut last_err = Ok(()); + while let Some(p) = paginator.next() { + let zpools = self + .datastore + .zpool_on_decommissioned_disk_list( + opctx, + &p.current_pagparams(), + ) + .await + .context("failed to list zpools on decommissioned disks")?; + paginator = p.found_batch(&zpools, &|zpool| zpool.id()); + self.clean_batch(results, &mut last_err, opctx, &zpools).await; + } + + last_err + } + + async fn clean_batch( + &mut self, + results: &mut ActivationResults, + last_err: &mut Result<(), anyhow::Error>, + opctx: &OpContext, + zpools: &[Zpool], + ) { + results.found += zpools.len(); + slog::debug!(opctx.log, "Found zpools on decommissioned disks"; "count" => zpools.len()); + + for zpool in zpools { + let zpool_id = ZpoolUuid::from_untyped_uuid(zpool.id()); + slog::trace!(opctx.log, "Deleting Zpool"; "zpool" => %zpool_id); + + match self + .datastore + .zpool_delete_self_and_all_datasets(opctx, zpool_id) + .await + { + Ok(_) => { + slog::info!( + opctx.log, + "Deleted zpool and datasets within"; + "zpool" => %zpool_id, + ); + results.deleted += 1; + } + Err(Error::ServiceUnavailable { internal_message }) => { + slog::trace!( + opctx.log, + "Zpool on decommissioned disk not ready for deletion"; + "zpool" => %zpool_id, + "error" => internal_message, + ); + results.not_ready_to_be_deleted += 1; + } + Err(e) => { + slog::warn!( + opctx.log, + "Failed to zpool on decommissioned disk"; + "zpool" => %zpool_id, + "error" => %e, + ); + results.error_count += 1; + *last_err = Err(e).with_context(|| { + format!("failed to delete zpool record {zpool_id}") + }); + } + } + } + } +} + +impl BackgroundTask for DecommissionedDiskCleaner { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let mut results = ActivationResults::default(); + + let error = if !self.disable { + match self.clean_all(&mut results, opctx).await { + Ok(_) => { + slog::info!(opctx.log, "Cleaned decommissioned zpools"; + "found" => results.found, + "not_ready_to_be_deleted" => results.not_ready_to_be_deleted, + "deleted" => results.deleted, + "error_count" => results.error_count, + ); + None + } + Err(err) => { + slog::error!(opctx.log, "Failed to clean decommissioned zpools"; + "error" => %err, + "found" => results.found, + "not_ready_to_be_deleted" => results.not_ready_to_be_deleted, + "deleted" => results.deleted, + "error_count" => results.error_count, + ); + Some(err.to_string()) + } + } + } else { + slog::info!(opctx.log, "Decommissioned Disk Cleaner disabled"); + None + }; + serde_json::json!({ + "found": results.found, + "not_ready_to_be_deleted": results.not_ready_to_be_deleted, + "deleted": results.deleted, + "error_count": results.error_count, + "error": error, + }) + } + .boxed() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::ExpressionMethods; + use diesel::QueryDsl; + use nexus_db_model::Dataset; + use nexus_db_model::DatasetKind; + use nexus_db_model::PhysicalDisk; + use nexus_db_model::PhysicalDiskKind; + use nexus_db_model::PhysicalDiskPolicy; + use nexus_db_model::Region; + use nexus_test_utils::SLED_AGENT_UUID; + use nexus_test_utils_macros::nexus_test; + use omicron_uuid_kinds::{ + DatasetUuid, PhysicalDiskUuid, RegionUuid, SledUuid, + }; + use std::str::FromStr; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + async fn make_disk_in_db( + datastore: &DataStore, + opctx: &OpContext, + i: usize, + sled_id: SledUuid, + ) -> PhysicalDiskUuid { + let id = PhysicalDiskUuid::from_untyped_uuid(Uuid::new_v4()); + let physical_disk = PhysicalDisk::new( + id.into_untyped_uuid(), + "v".into(), + format!("s-{i})"), + "m".into(), + PhysicalDiskKind::U2, + sled_id.into_untyped_uuid(), + ); + datastore + .physical_disk_insert(&opctx, physical_disk.clone()) + .await + .unwrap(); + id + } + + async fn add_zpool_dataset_and_region( + datastore: &DataStore, + opctx: &OpContext, + id: PhysicalDiskUuid, + sled_id: SledUuid, + ) -> (ZpoolUuid, DatasetUuid, RegionUuid) { + let zpool = datastore + .zpool_insert( + opctx, + Zpool::new( + Uuid::new_v4(), + sled_id.into_untyped_uuid(), + id.into_untyped_uuid(), + ), + ) + .await + .unwrap(); + + let dataset = datastore + .dataset_upsert(Dataset::new( + Uuid::new_v4(), + zpool.id(), + Some(std::net::SocketAddrV6::new( + std::net::Ipv6Addr::LOCALHOST, + 0, + 0, + 0, + )), + DatasetKind::Crucible, + )) + .await + .unwrap(); + + // There isn't a great API to insert regions (we normally allocate!) + // so insert the record manually here. + let region = { + let volume_id = Uuid::new_v4(); + Region::new( + dataset.id(), + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + 1, + ) + }; + let region_id = region.id(); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_model::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(region) + .execute_async(&*conn) + .await + .unwrap(); + + ( + ZpoolUuid::from_untyped_uuid(zpool.id()), + DatasetUuid::from_untyped_uuid(dataset.id()), + RegionUuid::from_untyped_uuid(region_id), + ) + } + + struct TestFixture { + zpool_id: ZpoolUuid, + dataset_id: DatasetUuid, + region_id: RegionUuid, + } + + impl TestFixture { + async fn setup(datastore: &Arc, opctx: &OpContext) -> Self { + let sled_id = SledUuid::from_untyped_uuid( + Uuid::from_str(&SLED_AGENT_UUID).unwrap(), + ); + + let disk_id = make_disk_in_db(datastore, opctx, 0, sled_id).await; + let (zpool_id, dataset_id, region_id) = + add_zpool_dataset_and_region( + &datastore, &opctx, disk_id, sled_id, + ) + .await; + datastore + .physical_disk_update_policy( + &opctx, + disk_id.into_untyped_uuid(), + PhysicalDiskPolicy::Expunged, + ) + .await + .unwrap(); + + Self { zpool_id, dataset_id, region_id } + } + + async fn delete_region(&self, datastore: &DataStore) { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + use nexus_db_model::schema::region::dsl; + + diesel::delete( + dsl::region + .filter(dsl::id.eq(self.region_id.into_untyped_uuid())), + ) + .execute_async(&*conn) + .await + .unwrap(); + } + + async fn has_been_cleaned(&self, datastore: &DataStore) -> bool { + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, + SelectableHelper, + }; + use nexus_db_queries::db::schema::zpool::dsl as zpool_dsl; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + let fetched_zpool = zpool_dsl::zpool + .filter(zpool_dsl::id.eq(self.zpool_id.into_untyped_uuid())) + .filter(zpool_dsl::time_deleted.is_null()) + .select(Zpool::as_select()) + .first_async(&*conn) + .await + .optional() + .expect("Zpool query should succeed"); + + use nexus_db_queries::db::schema::dataset::dsl as dataset_dsl; + let fetched_dataset = dataset_dsl::dataset + .filter(dataset_dsl::id.eq(self.dataset_id.into_untyped_uuid())) + .filter(dataset_dsl::time_deleted.is_null()) + .select(Dataset::as_select()) + .first_async(&*conn) + .await + .optional() + .expect("Dataset query should succeed"); + + match (fetched_zpool, fetched_dataset) { + (Some(_), Some(_)) => false, + (None, None) => true, + _ => panic!("If zpool and dataset were cleaned, they should be cleaned together"), + } + } + } + + #[nexus_test(server = crate::Server)] + async fn test_disk_cleanup_ignores_active_disks( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = TestFixture::setup(datastore, &opctx).await; + + let mut task = DecommissionedDiskCleaner::new(datastore.clone(), false); + + // Setup: Disk is expunged, not decommissioned. + // Expectation: We ignore it. + + let mut results = ActivationResults::default(); + dbg!(task.clean_all(&mut results, &opctx).await) + .expect("activation completes successfully"); + dbg!(&results); + assert_eq!(results.found, 0); + assert_eq!(results.not_ready_to_be_deleted, 0); + assert_eq!(results.deleted, 0); + assert_eq!(results.error_count, 0); + + assert!(!fixture.has_been_cleaned(&datastore).await); + } + + #[nexus_test(server = crate::Server)] + async fn test_disk_cleanup_does_not_clean_disks_with_regions( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = TestFixture::setup(datastore, &opctx).await; + + let mut task = DecommissionedDiskCleaner::new(datastore.clone(), false); + + datastore + .physical_disk_decommission_all_expunged(&opctx) + .await + .unwrap(); + + // Setup: Disk is decommissioned, but has a region. + // Expectation: We ignore it. + + let mut results = ActivationResults::default(); + dbg!(task.clean_all(&mut results, &opctx).await) + .expect("activation completes successfully"); + dbg!(&results); + assert_eq!(results.found, 1); + assert_eq!(results.not_ready_to_be_deleted, 1); + assert_eq!(results.deleted, 0); + assert_eq!(results.error_count, 0); + + assert!(!fixture.has_been_cleaned(&datastore).await); + } + + #[nexus_test(server = crate::Server)] + async fn test_disk_cleanup_cleans_disks_with_no_regions( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = TestFixture::setup(datastore, &opctx).await; + + let mut task = DecommissionedDiskCleaner::new(datastore.clone(), false); + + datastore + .physical_disk_decommission_all_expunged(&opctx) + .await + .unwrap(); + fixture.delete_region(&datastore).await; + + // Setup: Disk is decommissioned and has no regions. + // Expectation: We clean it. + + let mut results = ActivationResults::default(); + dbg!(task.clean_all(&mut results, &opctx).await) + .expect("activation completes successfully"); + dbg!(&results); + assert_eq!(results.found, 1); + assert_eq!(results.not_ready_to_be_deleted, 0); + assert_eq!(results.deleted, 1); + assert_eq!(results.error_count, 0); + + assert!(fixture.has_been_cleaned(&datastore).await); + } +} diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index a5204588d8..5062799bdb 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -9,6 +9,7 @@ pub mod bfd; pub mod blueprint_execution; pub mod blueprint_load; pub mod crdb_node_id_collector; +pub mod decommissioned_disk_cleaner; pub mod dns_config; pub mod dns_propagation; pub mod dns_servers; diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 415727693b..e231f665fa 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -104,6 +104,9 @@ phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 # Disable automatic disk adoption to avoid interfering with tests. physical_disk_adoption.disable = true +decommissioned_disk_cleaner.period_secs = 60 +# Disable disk decommissioning cleanup to avoid interfering with tests. +decommissioned_disk_cleaner.disable = true blueprints.period_secs_load = 100 blueprints.period_secs_execute = 600 blueprints.period_secs_collect_crdb_node_ids = 600 diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index bd8a8877fc..aa1ff3a667 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -20,6 +20,8 @@ use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views::SledInstance; use nexus_types::external_api::views::{PhysicalDisk, Sled}; use omicron_sled_agent::sim; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use std::str::FromStr; use uuid::Uuid; @@ -128,7 +130,7 @@ async fn test_physical_disk_create_list_delete( let disks = physical_disks_list(&external_client, &disks_url).await; assert_eq!(disks.len(), disks_initial.len() + 1); - let _new_disk = disks + let new_disk = disks .iter() .find(|found_disk| { found_disk.vendor == "v" @@ -141,10 +143,7 @@ async fn test_physical_disk_create_list_delete( datastore .physical_disk_delete( &opctx, - "v".into(), - "s".into(), - "m".into(), - sled_id, + PhysicalDiskUuid::from_untyped_uuid(new_disk.identity.id), ) .await .expect("Failed to upsert physical disk"); diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index a8f3989da4..a5feff067a 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -347,6 +347,9 @@ pub enum DiskFilter { /// All disks which are in-service. InService, + + /// All disks which are expunged but still active. + ExpungedButActive, } impl DiskFilter { @@ -366,10 +369,12 @@ impl PhysicalDiskPolicy { PhysicalDiskPolicy::InService => match filter { DiskFilter::All => true, DiskFilter::InService => true, + DiskFilter::ExpungedButActive => false, }, PhysicalDiskPolicy::Expunged => match filter { DiskFilter::All => true, DiskFilter::InService => false, + DiskFilter::ExpungedButActive => true, }, } } @@ -391,10 +396,12 @@ impl PhysicalDiskState { PhysicalDiskState::Active => match filter { DiskFilter::All => true, DiskFilter::InService => true, + DiskFilter::ExpungedButActive => true, }, PhysicalDiskState::Decommissioned => match filter { DiskFilter::All => true, DiskFilter::InService => false, + DiskFilter::ExpungedButActive => false, }, } } diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 50f9bf646e..396e3615b2 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -50,6 +50,7 @@ inventory.nkeep = 3 inventory.disable = false phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 +decommissioned_disk_cleaner.period_secs = 60 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 blueprints.period_secs_collect_crdb_node_ids = 180 diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 31db278616..df49476eed 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -50,6 +50,7 @@ inventory.nkeep = 3 inventory.disable = false phantom_disks.period_secs = 30 physical_disk_adoption.period_secs = 30 +decommissioned_disk_cleaner.period_secs = 60 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 blueprints.period_secs_collect_crdb_node_ids = 180 diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index fb8a6f6fa9..8f695d2399 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -50,6 +50,7 @@ macro_rules! impl_typed_uuid_kind { impl_typed_uuid_kind! { Collection => "collection", + Dataset => "dataset", Downstairs => "downstairs", DownstairsRegion => "downstairs_region", ExternalIp => "external_ip", @@ -60,6 +61,7 @@ impl_typed_uuid_kind! { Propolis => "propolis", RackInit => "rack_init", RackReset => "rack_reset", + Region => "region", Sled => "sled", TufRepo => "tuf_repo", Upstairs => "upstairs",