diff --git a/common/src/lib.rs b/common/src/lib.rs index dd4b633de9..de9eef9fd2 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -61,44 +61,6 @@ impl slog::KV for FileKv { pub const OMICRON_DPD_TAG: &str = "omicron"; -use crate::api::external::Error; -use crate::progenitor_operation_retry::ProgenitorOperationRetry; -use crate::progenitor_operation_retry::ProgenitorOperationRetryError; -use std::future::Future; - -/// Retry a progenitor client operation until a known result is returned. -/// -/// See [`ProgenitorOperationRetry`] for more information. -// TODO mark this deprecated, `never_bail` is a bad idea -pub async fn retry_until_known_result( - log: &slog::Logger, - f: F, -) -> Result> -where - F: FnMut() -> Fut, - Fut: Future>>, - E: std::fmt::Debug, -{ - match ProgenitorOperationRetry::new(f, never_bail).run(log).await { - Ok(v) => Ok(v), - - Err(e) => match e { - ProgenitorOperationRetryError::ProgenitorError(e) => Err(e), - - ProgenitorOperationRetryError::Gone - | ProgenitorOperationRetryError::GoneCheckError(_) => { - // ProgenitorOperationRetry::new called with `never_bail` as the - // bail check should never return these variants! - unreachable!(); - } - }, - } -} - -async fn never_bail() -> Result { - Ok(false) -} - /// A wrapper struct that does nothing other than elide the inner value from /// [`std::fmt::Debug`] output. /// diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 9e9d8d1828..3afff7543e 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -99,6 +99,18 @@ impl DataStore { Ok((sled, was_modified)) } + /// Confirms that a sled exists and is in-service. + pub async fn check_sled_in_service( + &self, + opctx: &OpContext, + sled_id: Uuid, + ) -> Result<(), Error> { + let conn = &*self.pool_connection_authorized(&opctx).await?; + Self::check_sled_in_service_on_connection(conn, sled_id) + .await + .map_err(From::from) + } + /// Confirms that a sled exists and is in-service. /// /// This function may be called from a transaction context. diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index dd10fa267b..8381b0f148 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -104,7 +104,6 @@ use nexus_db_model::Generation; use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::Error; -use omicron_common::retry_until_known_result; use omicron_common::{ api::external, progenitor_operation_retry::ProgenitorOperationRetry, }; @@ -116,6 +115,7 @@ use sled_agent_client::types::CrucibleOpts; use sled_agent_client::types::VmmIssueDiskSnapshotRequestBody; use sled_agent_client::types::VolumeConstructionRequest; use slog::info; +use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::VecDeque; use std::net::SocketAddrV6; @@ -860,7 +860,7 @@ async fn ssc_send_snapshot_request_to_sled_agent( .await .map_err(ActionError::action_failed)?; - retry_until_known_result(log, || async { + let snapshot_operation = || async { sled_agent_client .vmm_issue_disk_snapshot_request( &PropolisUuid::from_untyped_uuid(propolis_id), @@ -868,10 +868,23 @@ async fn ssc_send_snapshot_request_to_sled_agent( &VmmIssueDiskSnapshotRequestBody { snapshot_id }, ) .await - }) - .await - .map_err(|e| e.to_string()) - .map_err(ActionError::action_failed)?; + }; + let gone_check = || async { + osagactx.datastore().check_sled_in_service(&opctx, sled_id).await?; + // `check_sled_in_service` returns an error if the sled is no longer in + // service; if it succeeds, the sled is not gone. + Ok(false) + }; + + ProgenitorOperationRetry::new(snapshot_operation, gone_check) + .run(log) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to issue VMM disk snapshot request: {}", + InlineErrorChain::new(&e) + )) + })?; Ok(()) }