Skip to content

Commit

Permalink
make instance-migrate sagas just set migration IDs
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkw committed Jun 20, 2024
1 parent aba9f19 commit 9ea089b
Show file tree
Hide file tree
Showing 5 changed files with 232 additions and 119 deletions.
158 changes: 158 additions & 0 deletions nexus/db-queries/src/db/datastore/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,164 @@ impl DataStore {
Ok(updated)
}

/// Updates an instance record by setting the instance's migration ID.
pub async fn instance_set_migration_ids(
&self,
opctx: &OpContext,
instance_id: InstanceUuid,
src_propolis_id: PropolisUuid,
migration_id: Uuid,
target_propolis_id: PropolisUuid,
) -> Result<bool, Error> {
use db::schema::instance::dsl;

let instance_id = instance_id.into_untyped_uuid();
let target_propolis_id = target_propolis_id.into_untyped_uuid();
let src_propolis_id = src_propolis_id.into_untyped_uuid();
let updated = diesel::update(dsl::instance)
.filter(dsl::time_deleted.is_null())
.filter(dsl::id.eq(instance_id))
.filter(dsl::migration_id.is_null())
.filter(dsl::target_propolis_id.is_null())
.filter(dsl::active_propolis_id.eq(src_propolis_id))
.set((
dsl::migration_id.eq(Some(migration_id)),
dsl::target_propolis_id.eq(Some(target_propolis_id)),
// advance the generation
dsl::state_generation.eq(dsl::state_generation + 1),
dsl::time_state_updated.eq(Utc::now()),
))
.check_if_exists::<Instance>(instance_id.into_untyped_uuid())
.execute_and_check(&*self.pool_connection_authorized(&opctx).await?)
.await
.map_err(|e| {
public_error_from_diesel(
e,
ErrorHandler::NotFoundByLookup(
ResourceType::Instance,
LookupType::ById(instance_id),
),
)
})?;

match updated {
// If we updated the instance, that's great! Good job team!
UpdateAndQueryResult { status: UpdateStatus::Updated, .. } => {
Ok(true)
}
// No update was performed because the migration ID has already been
// set to the ID we were trying to set it to. That's fine, count it
// as a success.
UpdateAndQueryResult {
found: Instance { runtime_state, .. },
..
} if runtime_state.migration_id == Some(migration_id) => {
debug_assert_eq!(
runtime_state.dst_propolis_id,
Some(target_propolis_id)
);
debug_assert_eq!(
runtime_state.propolis_id,
Some(src_propolis_id)
);
Ok(false)
}

// On the other hand, if there was already a different migration ID,
// that means another migrate saga has already started a migration.
// Guess I'll die!
UpdateAndQueryResult {
found:
Instance {
runtime_state:
InstanceRuntimeState {
migration_id: Some(actual_migration_id),
..
},
..
},
..
} => {
slog::info!(
opctx.log,
"failed to set instance migration IDs: a different migration ID was already set";
"instance_id" => %instance_id,
"desired_migration_id" => %migration_id,
"actual_migration_id" => %actual_migration_id,
);
Err(Error::conflict("instance is already migrating"))
}
// If one of the other filters didn't match, our understanding of
// the instance's state is clearly pretty wromg.
UpdateAndQueryResult {
found: Instance { runtime_state, .. },
..
} => {
slog::warn!(
opctx.log,
"failed to set instance migration IDs: one of its Propolis IDs was what way we anticipated!";
"instance_id" => %instance_id,
"desired_migration_id" => %migration_id,
"desired_active_propolis_id" => %src_propolis_id,
"desired_target_propolis_id" => %target_propolis_id,
"actual_migration_id" => ?runtime_state.migration_id,
"actual_active_propolis_id" => ?runtime_state.propolis_id,
"actual_target_propolis_id" => ?runtime_state.dst_propolis_id,
);
Err(Error::conflict(
"instance snapshot didn't match actual state",
))
}
}
}

/// Unsets the migration IDs set by
/// [`DataStore::instance_set_migration_ids`].
///
/// This method will only unset the instance's migration IDs if they match
/// the provided ones.
pub async fn instance_unset_migration_ids(
&self,
opctx: &OpContext,
instance_id: InstanceUuid,
migration_id: Uuid,
target_propolis_id: PropolisUuid,
) -> Result<bool, Error> {
use db::schema::instance::dsl;

let instance_id = instance_id.into_untyped_uuid();
let target_propolis_id = target_propolis_id.into_untyped_uuid();
let updated = diesel::update(dsl::instance)
.filter(dsl::time_deleted.is_null())
.filter(dsl::id.eq(instance_id))
.filter(dsl::migration_id.eq(migration_id))
.filter(dsl::target_propolis_id.eq(target_propolis_id))
.set((
dsl::migration_id.eq(None::<Uuid>),
dsl::target_propolis_id.eq(None::<Uuid>),
// advance the generation
dsl::state_generation.eq(dsl::state_generation + 1),
dsl::time_state_updated.eq(Utc::now()),
))
.check_if_exists::<Instance>(instance_id.into_untyped_uuid())
.execute_and_check(&*self.pool_connection_authorized(&opctx).await?)
.await
.map(|r| match r.status {
UpdateStatus::Updated => true,
UpdateStatus::NotUpdatedButExists => false,
})
.map_err(|e| {
public_error_from_diesel(
e,
ErrorHandler::NotFoundByLookup(
ResourceType::Instance,
LookupType::ById(instance_id),
),
)
})?;
Ok(updated)
}

/// Updates an instance record and a VMM record with a single database
/// command.
///
Expand Down
22 changes: 11 additions & 11 deletions nexus/db-queries/src/db/datastore/migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,24 +38,24 @@ impl DataStore {
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Marks a migration record as deleted if and only if both sides of the
/// migration are in a terminal state.
pub async fn migration_terminate(
/// Marks a migration record as failed.
pub async fn migration_mark_failed(
&self,
opctx: &OpContext,
migration_id: Uuid,
) -> UpdateResult<bool> {
const TERMINAL_STATES: &[MigrationState] = &[
MigrationState(nexus::MigrationState::Completed),
MigrationState(nexus::MigrationState::Failed),
];

let failed = MigrationState(nexus::MigrationState::Failed);
diesel::update(dsl::migration)
.filter(dsl::id.eq(migration_id))
.filter(dsl::time_deleted.is_null())
.filter(dsl::source_state.eq_any(TERMINAL_STATES))
.filter(dsl::target_state.eq_any(TERMINAL_STATES))
.set(dsl::time_deleted.eq(Utc::now()))
.set((
dsl::source_state.eq(failed),
dsl::source_gen.eq(dsl::source_gen + 1),
dsl::time_source_updated.eq(Utc::now()),
dsl::target_state.eq(failed),
dsl::target_gen.eq(dsl::target_gen + 1),
dsl::time_target_updated.eq(Utc::now()),
))
.check_if_exists::<Migration>(migration_id)
.execute_and_check(&*self.pool_connection_authorized(opctx).await?)
.await
Expand Down
73 changes: 0 additions & 73 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,8 @@ use propolis_client::support::InstanceSerialConsoleHelper;
use propolis_client::support::WSClientOffset;
use propolis_client::support::WebSocketStream;
use sagas::instance_common::ExternalIpAttach;
use sled_agent_client::types::InstanceMigrationSourceParams;
use sled_agent_client::types::InstanceMigrationTargetParams;
use sled_agent_client::types::InstanceProperties;
use sled_agent_client::types::InstancePutMigrationIdsBody;
use sled_agent_client::types::InstancePutStateBody;
use std::matches;
use std::net::SocketAddr;
Expand Down Expand Up @@ -519,77 +517,6 @@ impl super::Nexus {
self.db_datastore.instance_fetch_with_vmm(opctx, &authz_instance).await
}

/// Attempts to set the migration IDs for the supplied instance via the
/// sled specified in `db_instance`.
///
/// The caller is assumed to have fetched the current instance record from
/// the DB and verified that the record has no migration IDs.
///
/// Returns `Ok` and the updated instance record if this call successfully
/// updated the instance with the sled agent and that update was
/// successfully reflected into CRDB. Returns `Err` with an appropriate
/// error otherwise.
///
/// # Panics
///
/// Asserts that `db_instance` has no migration ID or destination Propolis
/// ID set.
pub(crate) async fn instance_set_migration_ids(
&self,
opctx: &OpContext,
instance_id: InstanceUuid,
src_propolis_id: PropolisUuid,
prev_instance_runtime: &db::model::InstanceRuntimeState,
migration_params: InstanceMigrationSourceParams,
) -> UpdateResult<db::model::Instance> {
assert!(prev_instance_runtime.migration_id.is_none());
assert!(prev_instance_runtime.dst_propolis_id.is_none());

let (.., authz_instance) = LookupPath::new(opctx, &self.db_datastore)
.instance_id(instance_id.into_untyped_uuid())
.lookup_for(authz::Action::Modify)
.await?;

let instance_updated = todo!("eliza: do this transition purely in nexus rather than in sled-agent...");

if instance_updated {
Ok(self
.db_datastore
.instance_refetch(opctx, &authz_instance)
.await?)
} else {
Err(Error::conflict(
"instance is already migrating, or underwent an operation that \
prevented this migration from proceeding"
))
}
}

/// Attempts to clear the migration IDs for the supplied instance via the
/// sled specified in `db_instance`.
///
/// The supplied instance record must contain valid migration IDs.
///
/// Returns `Ok` if sled agent accepted the request to clear migration IDs
/// and the resulting attempt to write instance runtime state back to CRDB
/// succeeded. This routine returns `Ok` even if the update was not actually
/// applied (due to a separate generation number change).
///
/// # Panics
///
/// Asserts that `db_instance` has a migration ID and destination Propolis
/// ID set.
pub(crate) async fn instance_clear_migration_ids(
&self,
instance_id: InstanceUuid,
prev_instance_runtime: &db::model::InstanceRuntimeState,
) -> Result<(), Error> {
assert!(prev_instance_runtime.migration_id.is_some());
assert!(prev_instance_runtime.dst_propolis_id.is_some());

todo!("eliza: do this transition in the DB rather than in sled-agent")
}

/// Reboot the specified instance.
pub(crate) async fn instance_reboot(
&self,
Expand Down
Loading

0 comments on commit 9ea089b

Please sign in to comment.