Skip to content

Commit

Permalink
Add instance ID column to migration table (#5909)
Browse files Browse the repository at this point in the history
Currently, the `migration` table does not contain a way to directly
associate migrations with instances. An `instance` row's `migration_id`
is a foreign key into the `migration` table pointing at the currently
active migration, if one exists, but past migrations cannot be easily
associated with migrations. This is unfortunate, as it seems useful to
be able to easily list completed/failed migrations for an instance. This
is particularly important as we would like to change the behavior around
marking migration records as deleted, so that deleting an instance also
deletes any corresponding migration records. It may also be useful for
debugging tools and UIs that expose a migration's history.

This branch adds an instance ID column to the migration table. To avoid
having `NULL`s in this column, I've added a migration[^1] that deletes
the old `migration` table and creates a new one, rather than backfilling
migration IDs by trying to figure them out using the `vmm` records for
the migration records. This seemed fine, because (1) the `migration`
table hasn't been released to customers yet, and (2), we don't actually
use the data in that table for anything yet, so nuking it should be
okay. I've also added a query for listing migration records by instance
ID.

Now, we no longer mark migrations as deleted when they complete or fail,
and instead mark all migrations for an instance as deleted when the
instance itself is deleted.

Migration records are still marked as deleted when an `instance-migrate`
saga unwinds. I'm on the fence as to whether this is the Right Thing or
not, but I think it's fine, as those migration states won't be needed by
the `instance-update` saga (AFAIK so far). If that doesn't end up being
the case, we might consider adding a `SagaUnwound` state, similar to the
one for VMMs, so that we can distinguish between migrations that failed
because a VMM failed and migrations whose sagas unwound...

[^1]: A database migration, not a live migration migration. :)
  • Loading branch information
hawkw authored Jun 28, 2024
1 parent a14b41a commit 659130e
Show file tree
Hide file tree
Showing 12 changed files with 328 additions and 44 deletions.
6 changes: 6 additions & 0 deletions nexus/db-model/src/migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::MigrationState;
use chrono::DateTime;
use chrono::Utc;
use omicron_common::api::internal::nexus;
use omicron_uuid_kinds::{GenericUuid, InstanceUuid};
use serde::Deserialize;
use serde::Serialize;
use uuid::Uuid;
Expand All @@ -32,6 +33,9 @@ pub struct Migration {
/// `instance` table's `migration_id` field.
pub id: Uuid,

/// The instance that was migrated.
pub instance_id: Uuid,

/// The time at which this migration record was created.
pub time_created: DateTime<Utc>,

Expand Down Expand Up @@ -66,11 +70,13 @@ pub struct Migration {
impl Migration {
pub fn new(
migration_id: Uuid,
instance_id: InstanceUuid,
source_propolis_id: Uuid,
target_propolis_id: Uuid,
) -> Self {
Self {
id: migration_id,
instance_id: instance_id.into_untyped_uuid(),
time_created: Utc::now(),
time_deleted: None,
source_state: nexus::MigrationState::Pending.into(),
Expand Down
1 change: 1 addition & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1762,6 +1762,7 @@ table! {
table! {
migration (id) {
id -> Uuid,
instance_id -> Uuid,
time_created -> Timestamptz,
time_deleted -> Nullable<Timestamptz>,
source_state -> crate::MigrationStateEnum,
Expand Down
4 changes: 2 additions & 2 deletions nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(79, 0, 0);

pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(80, 0, 0);
/// List of all past database schema versions, in *reverse* order
///
/// If you want to change the Omicron database schema, you must update this.
Expand All @@ -29,6 +28,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(80, "add-instance-id-to-migrations"),
KnownVersion::new(79, "nic-spoof-allow"),
KnownVersion::new(78, "vpc-subnet-routing"),
KnownVersion::new(77, "remove-view-for-v2p-mappings"),
Expand Down
15 changes: 9 additions & 6 deletions nexus/db-queries/src/db/datastore/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -698,11 +698,9 @@ impl DataStore {
}
})?;

self.instance_ssh_keys_delete(
opctx,
InstanceUuid::from_untyped_uuid(authz_instance.id()),
)
.await?;
let instance_id = InstanceUuid::from_untyped_uuid(authz_instance.id());
self.instance_ssh_keys_delete(opctx, instance_id).await?;
self.instance_mark_migrations_deleted(opctx, instance_id).await?;

Ok(())
}
Expand Down Expand Up @@ -1343,7 +1341,12 @@ mod tests {
let migration = datastore
.migration_insert(
&opctx,
Migration::new(Uuid::new_v4(), active_vmm.id, target_vmm.id),
Migration::new(
Uuid::new_v4(),
instance_id,
active_vmm.id,
target_vmm.id,
),
)
.await
.expect("migration should be inserted successfully!");
Expand Down
237 changes: 237 additions & 0 deletions nexus/db-queries/src/db/datastore/migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ use crate::context::OpContext;
use crate::db::error::public_error_from_diesel;
use crate::db::error::ErrorHandler;
use crate::db::model::{Migration, MigrationState};
use crate::db::pagination::paginated;
use crate::db::schema::migration::dsl;
use crate::db::update_and_check::UpdateAndCheck;
use crate::db::update_and_check::UpdateStatus;
use async_bb8_diesel::AsyncRunQueryDsl;
use chrono::Utc;
use diesel::prelude::*;
use omicron_common::api::external::CreateResult;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::ListResultVec;
use omicron_common::api::external::UpdateResult;
use omicron_common::api::internal::nexus;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::InstanceUuid;
use uuid::Uuid;

impl DataStore {
Expand All @@ -38,6 +43,39 @@ impl DataStore {
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// List all migrations associated with the provided instance ID.
pub async fn instance_list_migrations(
&self,
opctx: &OpContext,
instance_id: InstanceUuid,
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<Migration> {
paginated(dsl::migration, dsl::id, pagparams)
.filter(dsl::instance_id.eq(instance_id.into_untyped_uuid()))
.filter(dsl::time_deleted.is_null())
.select(Migration::as_select())
.load_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Mark *all* migrations for the provided instance as deleted.
///
/// This should be called when deleting an instance.
pub(crate) async fn instance_mark_migrations_deleted(
&self,
opctx: &OpContext,
instance_id: InstanceUuid,
) -> UpdateResult<usize> {
diesel::update(dsl::migration)
.filter(dsl::instance_id.eq(instance_id.into_untyped_uuid()))
.filter(dsl::time_deleted.is_null())
.set(dsl::time_deleted.eq(Utc::now()))
.execute_async(&*self.pool_connection_authorized(opctx).await?)
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}

/// Marks a migration record as deleted if and only if both sides of the
/// migration are in a terminal state.
pub async fn migration_terminate(
Expand Down Expand Up @@ -90,3 +128,202 @@ impl DataStore {
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::authz;
use crate::db::datastore::test_utils::datastore_test;
use crate::db::lookup::LookupPath;
use crate::db::model::Instance;
use nexus_db_model::Project;
use nexus_test_utils::db::test_setup_database;
use nexus_types::external_api::params;
use omicron_common::api::external::ByteCount;
use omicron_common::api::external::IdentityMetadataCreateParams;
use omicron_test_utils::dev;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::InstanceUuid;

async fn create_test_instance(
datastore: &DataStore,
opctx: &OpContext,
) -> authz::Instance {
let silo_id = *nexus_db_fixed_data::silo::DEFAULT_SILO_ID;
let project_id = Uuid::new_v4();
let instance_id = InstanceUuid::new_v4();

let (authz_project, _project) = datastore
.project_create(
&opctx,
Project::new_with_id(
project_id,
silo_id,
params::ProjectCreate {
identity: IdentityMetadataCreateParams {
name: "stuff".parse().unwrap(),
description: "Where I keep my stuff".into(),
},
},
),
)
.await
.expect("project must be created successfully");
let _ = datastore
.project_create_instance(
&opctx,
&authz_project,
Instance::new(
instance_id,
project_id,
&params::InstanceCreate {
identity: IdentityMetadataCreateParams {
name: "myinstance".parse().unwrap(),
description: "It's an instance".into(),
},
ncpus: 2i64.try_into().unwrap(),
memory: ByteCount::from_gibibytes_u32(16),
hostname: "myhostname".try_into().unwrap(),
user_data: Vec::new(),
network_interfaces:
params::InstanceNetworkInterfaceAttachment::None,
external_ips: Vec::new(),
disks: Vec::new(),
ssh_public_keys: None,
start: false,
},
),
)
.await
.expect("instance must be created successfully");

let (.., authz_instance) = LookupPath::new(&opctx, &datastore)
.instance_id(instance_id.into_untyped_uuid())
.lookup_for(authz::Action::Modify)
.await
.expect("instance must exist");
authz_instance
}

async fn insert_migration(
datastore: &DataStore,
opctx: &OpContext,
instance_id: InstanceUuid,
) -> Migration {
let migration = Migration::new(
Uuid::new_v4(),
instance_id,
Uuid::new_v4(),
Uuid::new_v4(),
);

datastore
.migration_insert(&opctx, migration.clone())
.await
.expect("must insert migration successfully");

migration
}

#[tokio::test]
async fn test_migration_query_by_instance() {
// Setup
let logctx = dev::test_setup_log("test_migration_query_by_instance");
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = datastore_test(&logctx, &db).await;
let authz_instance = create_test_instance(&datastore, &opctx).await;
let instance_id = InstanceUuid::from_untyped_uuid(authz_instance.id());

let migration1 =
insert_migration(&datastore, &opctx, instance_id).await;
let migration2 =
insert_migration(&datastore, &opctx, instance_id).await;

let list = datastore
.instance_list_migrations(
&opctx,
instance_id,
&DataPageParams::max_page(),
)
.await
.expect("must list migrations");
assert_all_migrations_found(&[&migration1, &migration2], &list[..]);

let migration3 =
insert_migration(&datastore, &opctx, instance_id).await;

let list = datastore
.instance_list_migrations(
&opctx,
instance_id,
&DataPageParams::max_page(),
)
.await
.expect("must list migrations");
assert_all_migrations_found(
&[&migration1, &migration2, &migration3],
&list[..],
);

datastore
.migration_mark_deleted(&opctx, migration3.id)
.await
.expect("must delete migration");
let list = datastore
.instance_list_migrations(
&opctx,
instance_id,
&DataPageParams::max_page(),
)
.await
.expect("must list migrations");
assert_all_migrations_found(&[&migration1, &migration2], &list[..]);

let deleted = datastore
.instance_mark_migrations_deleted(&opctx, instance_id)
.await
.expect("must delete remaining migrations");
assert_eq!(
deleted, 2,
"should not delete migration that was already marked as deleted"
);

let list = datastore
.instance_list_migrations(
&opctx,
instance_id,
&DataPageParams::max_page(),
)
.await
.expect("list must succeed");
assert!(list.is_empty(), "all migrations must be deleted");

let deleted = datastore
.instance_mark_migrations_deleted(&opctx, instance_id)
.await
.expect("must delete remaining migrations");
assert_eq!(
deleted, 0,
"should not delete migration that was already marked as deleted"
);

// Clean up.
db.cleanup().await.unwrap();
logctx.cleanup_successful();
}

#[track_caller]
fn assert_all_migrations_found(
expected: &[&Migration],
actual: &[Migration],
) {
assert_eq!(expected.len(), actual.len());
for migration in expected {
assert!(
actual.iter().any(|m| m.id == migration.id),
"couldn't find migration {:?} in {actual:#?}",
migration.id,
);
}
}
}
34 changes: 0 additions & 34 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2111,40 +2111,6 @@ pub(crate) async fn notify_instance_updated(
)
.await;

// Has a migration terminated? If so,mark the migration record as deleted if
// and only if both sides of the migration are in a terminal state.
if let Some(nexus::MigrationRuntimeState {
migration_id,
state,
role,
..
}) = new_runtime_state.migration_state
{
if state.is_terminal() {
info!(
log,
"migration has terminated, trying to delete it...";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"migration_id" => %propolis_id,
"migration_state" => %state,
"migration_role" => %role,
);
if !datastore.migration_terminate(opctx, migration_id).await? {
info!(
log,
"did not mark migration record as deleted (the other half \
may not yet have reported termination)";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"migration_id" => %propolis_id,
"migration_state" => %state,
"migration_role" => %role,
);
}
}
}

// If the VMM is now in a terminal state, make sure its resources get
// cleaned up.
//
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/sagas/instance_migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@ async fn sim_create_migration_record(
&opctx,
db::model::Migration::new(
migration_id,
InstanceUuid::from_untyped_uuid(params.instance.id()),
source_propolis_id,
target_propolis_id,
),
Expand Down
Loading

0 comments on commit 659130e

Please sign in to comment.