diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index eac1cb2118a..f1f1cb63bbf 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -1258,11 +1258,12 @@ fn print_task_blueprint_planner(details: &serde_json::Value) { but could not make it the target: {error}" ); } - BlueprintPlannerStatus::Targeted { blueprint_id, .. } => { + BlueprintPlannerStatus::Targeted { blueprint_id, report, .. } => { println!( " planned new blueprint {blueprint_id}, \ and made it the current target" ); + println!("{report}"); } } } diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 96b3d22bfb9..49a4163d9e0 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -1615,6 +1615,9 @@ parent: PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint .............. + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -1735,6 +1738,9 @@ parent: PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint .............. + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/dev-tools/reconfigurator-cli/src/lib.rs b/dev-tools/reconfigurator-cli/src/lib.rs index d33d00d5661..5ac893cff40 100644 --- a/dev-tools/reconfigurator-cli/src/lib.rs +++ b/dev-tools/reconfigurator-cli/src/lib.rs @@ -1754,14 +1754,14 @@ fn cmd_blueprint_plan( &planning_input, creator, collection, + rng, ) - .context("creating planner")? - .with_rng(rng); + .context("creating planner")?; let blueprint = planner.plan().context("generating blueprint")?; let rv = format!( - "generated blueprint {} based on parent blueprint {}", - blueprint.id, parent_blueprint.id, + "generated blueprint {} based on parent blueprint {}\n{}", + blueprint.id, parent_blueprint.id, blueprint.report, ); system.add_blueprint(blueprint)?; @@ -1799,9 +1799,9 @@ fn cmd_blueprint_edit( &planning_input, &latest_collection, creator, + rng, ) .context("creating blueprint builder")?; - builder.set_rng(rng); if let Some(comment) = args.comment { builder.comment(comment); diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-add-sled-no-disks-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-add-sled-no-disks-stdout index 4feccb11e4c..adb9680a62c 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-add-sled-no-disks-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-add-sled-no-disks-stdout @@ -37,21 +37,15 @@ generated inventory collection eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 from configu > # we added has no disks. > blueprint-plan dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 INFO skipping noop image source check for all sleds, reason: no target release is currently set -INFO skipping sled (no zpools in service), sled_id: 00320471-945d-413c-85e7-03e091a70b3c -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 WARN cannot issue more SP updates (no current artifacts) -INFO all zones up-to-date -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 based on parent blueprint dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 +Planning report for blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1: +Chicken switches: + add zones with mupdate override: false + +* No zpools in service for NTP zones on sleds: 00320471-945d-413c-85e7-03e091a70b3c +* Discretionary zone placement waiting for NTP zones on sleds: 00320471-945d-413c-85e7-03e091a70b3c + > blueprint-show 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 @@ -283,4 +277,12 @@ parent: dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 PENDING MGS-MANAGED UPDATES: 0 +Planning report for blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1: +Chicken switches: + add zones with mupdate override: false + +* No zpools in service for NTP zones on sleds: 00320471-945d-413c-85e7-03e091a70b3c +* Discretionary zone placement waiting for NTP zones on sleds: 00320471-945d-413c-85e7-03e091a70b3c + + diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout index e6f97ae6104..d6aa9f36e59 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout @@ -396,6 +396,9 @@ parent: 02697f74-b14a-4418-90f0-c28b2a3a6aa9 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a. + + > inventory-generate @@ -506,6 +509,9 @@ parent: 02697f74-b14a-4418-90f0-c28b2a3a6aa9 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a. + + > # Exercise `blueprint-diff` arguments. @@ -532,21 +538,20 @@ T ENA ID PARENT > blueprint-plan ade5749d-bdf3-4fab-a8ae-00bea01b3a5a INFO skipping noop image source check for all sleds, reason: no target release is currently set -INFO found sled missing NTP zone (will add one), sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -WARN failed to place all new desired Clickhouse zones, placed: 0, wanted_to_place: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -WARN failed to place all new desired CruciblePantry zones, placed: 0, wanted_to_place: 3 -WARN failed to place all new desired InternalDns zones, placed: 0, wanted_to_place: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 0, current_count: 0 -WARN failed to place all new desired Nexus zones, placed: 0, wanted_to_place: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 WARN cannot issue more SP updates (no current artifacts) INFO some zones not yet up-to-date, sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b, zones_currently_updating: [ZoneCurrentlyUpdating { zone_id: b3c9c041-d2f0-4767-bdaf-0e52e9d7a013 (service), zone_kind: InternalNtp, reason: MissingInInventory { bp_image_source: InstallDataset } }] -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 86db3308-f817-4626-8838-4085949a6a41 based on parent blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a +Planning report for blueprint 86db3308-f817-4626-8838-4085949a6a41: +Chicken switches: + add zones with mupdate override: false + +* Discretionary zone placement waiting for NTP zones on sleds: 89d02b1b-478c-401a-8e28-7a26f74fa41b +* Missing NTP zone on sled 89d02b1b-478c-401a-8e28-7a26f74fa41b +* Only placed 0/1 desired clickhouse zones +* Only placed 0/3 desired crucible_pantry zones +* Only placed 0/3 desired internal_dns zones +* Only placed 0/3 desired nexus zones + > blueprint-list T ENA ID PARENT TIME_CREATED @@ -1018,6 +1023,9 @@ parent: 02697f74-b14a-4418-90f0-c28b2a3a6aa9 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a. + + > inventory-show latest all collection: 9e187896-7809-46d0-9210-d75be1b3c4d4 @@ -1531,20 +1539,10 @@ COCKROACH STATUS > # sled to be expunged. > blueprint-plan latest INFO skipping noop image source check for all sleds, reason: no target release is currently set -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 WARN cannot issue more SP updates (no current artifacts) -INFO all zones up-to-date -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 86db3308-f817-4626-8838-4085949a6a41 based on parent blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a +Nothing to report on planning for blueprint 86db3308-f817-4626-8838-4085949a6a41. + > blueprint-diff ade5749d-bdf3-4fab-a8ae-00bea01b3a5a latest from: blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout index a1876121bed..c38933dede6 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout @@ -334,6 +334,9 @@ parent: 06c88262-f435-410e-ba98-101bed41ec27 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint 3f00b694-1b16-4aaa-8f78-e6b3a527b434. + + > blueprint-edit 3f00b694-1b16-4aaa-8f78-e6b3a527b434 expunge-zone 8429c772-07e8-40a6-acde-2ed47d16cf84 blueprint 366b0b68-d80e-4bc1-abd3-dc69837847e0 created from blueprint 3f00b694-1b16-4aaa-8f78-e6b3a527b434: expunged zone 8429c772-07e8-40a6-acde-2ed47d16cf84 from sled 711ac7f8-d19e-4572-bdb9-e9b50f6e362a @@ -1022,24 +1025,23 @@ parent: 3f00b694-1b16-4aaa-8f78-e6b3a527b434 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint 366b0b68-d80e-4bc1-abd3-dc69837847e0. + + > # blueprint-plan will place a new external DNS zone, diff DNS to see the new zone has `ns` and NS records. > blueprint-plan 366b0b68-d80e-4bc1-abd3-dc69837847e0 INFO skipping noop image source check for all sleds, reason: no target release is currently set -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO added zone to sled, sled_id: 711ac7f8-d19e-4572-bdb9-e9b50f6e362a, kind: ExternalDns -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 WARN cannot issue more SP updates (no current artifacts) -INFO some zones not yet up-to-date, sled_id: 711ac7f8-d19e-4572-bdb9-e9b50f6e362a, zones_currently_updating: [ZoneCurrentlyUpdating { zone_id: fe2d5287-24e3-4071-b214-2640b097a759 (service), zone_kind: ExternalDns, reason: MissingInInventory { bp_image_source: InstallDataset } }] -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 9c998c1d-1a7b-440a-ae0c-40f781dea6e2 based on parent blueprint 366b0b68-d80e-4bc1-abd3-dc69837847e0 +Planning report for blueprint 9c998c1d-1a7b-440a-ae0c-40f781dea6e2: +Chicken switches: + add zones with mupdate override: false + +* Discretionary zones placed: + * 1 zone on sled 711ac7f8-d19e-4572-bdb9-e9b50f6e362a: external_dns +* Zone updates waiting on discretionary zones + > blueprint-diff 366b0b68-d80e-4bc1-abd3-dc69837847e0 9c998c1d-1a7b-440a-ae0c-40f781dea6e2 from: blueprint 366b0b68-d80e-4bc1-abd3-dc69837847e0 @@ -1726,6 +1728,15 @@ parent: 366b0b68-d80e-4bc1-abd3-dc69837847e0 PENDING MGS-MANAGED UPDATES: 0 +Planning report for blueprint 9c998c1d-1a7b-440a-ae0c-40f781dea6e2: +Chicken switches: + add zones with mupdate override: false + +* Discretionary zones placed: + * 1 zone on sled 711ac7f8-d19e-4572-bdb9-e9b50f6e362a: external_dns +* Zone updates waiting on discretionary zones + + > # expunging the new zone should work, then diff again to see the new zone also have its DNS records removed. > blueprint-edit 9c998c1d-1a7b-440a-ae0c-40f781dea6e2 expunge-zone 8c0a1969-15b6-4165-ba6d-a27c24151037 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout index ade6db21966..9f7081101e6 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout @@ -332,6 +332,9 @@ parent: 184f10b3-61cb-41ef-9b93-3489b2bac559 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21. + + > # Expunge an internal DNS zone > blueprint-edit dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 expunge-zone 99e2f30b-3174-40bf-a78a-90da8abba8ca @@ -1043,20 +1046,16 @@ external DNS: > # Planning a new blueprint will now replace the expunged zone, with new records for its replacement. > blueprint-plan 58d5e830-0884-47d8-a7cd-b2b3751adeb4 INFO skipping noop image source check for all sleds, reason: no target release is currently set -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 3, current_count: 3 -INFO added zone to sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, kind: InternalDns -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 WARN cannot issue more SP updates (no current artifacts) -INFO some zones not yet up-to-date, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, zones_currently_updating: [ZoneCurrentlyUpdating { zone_id: e375dd21-320b-43b7-bc92-a2c3dac9d9e1 (service), zone_kind: InternalDns, reason: MissingInInventory { bp_image_source: InstallDataset } }] -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 +Planning report for blueprint af934083-59b5-4bf6-8966-6fb5292c29e1: +Chicken switches: + add zones with mupdate override: false + +* Discretionary zones placed: + * 1 zone on sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c: internal_dns +* Zone updates waiting on discretionary zones + > blueprint-diff 58d5e830-0884-47d8-a7cd-b2b3751adeb4 af934083-59b5-4bf6-8966-6fb5292c29e1 from: blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-host-phase-2-source-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-host-phase-2-source-stdout index 2490e7f32d7..e5215838f7d 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-host-phase-2-source-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-host-phase-2-source-stdout @@ -712,6 +712,9 @@ parent: 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4. + + > # Restore A to "current contents" @@ -1419,4 +1422,7 @@ parent: af934083-59b5-4bf6-8966-6fb5292c29e1 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint df06bb57-ad42-4431-9206-abff322896c7. + + diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout index bd344107868..59dca2b7702 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-noop-image-source-stdout @@ -165,23 +165,20 @@ INFO install dataset artifact hash not found in TUF repo, ignoring for noop chec INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: ecbe0b3d-1acc-44b2-b6d4-f4d2770516e4, kind: crucible, file_name: crucible.tar.gz, expected_hash: 866f6a7c2e51c056fb722b5113e80181cc9cd8b712a0d3dbf1edc4ce29e5229e INFO skipped noop image source check on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, reason: remove_mupdate_override is set in the blueprint (ffffffff-ffff-ffff-ffff-ffffffffffff) INFO skipped noop image source check on sled, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, reason: sled not found in inventory -INFO noop converting 6/6 install-dataset zones to artifact store, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 6, num_already_artifact: 0 -INFO noop converting 5/6 install-dataset zones to artifact store, sled_id: aff6c093-197d-42c5-ad80-9f10ba051a34, num_total: 6, num_already_artifact: 0 -INFO parent blueprint contains NTP zone, but it's not in inventory yet, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 INFO reached maximum number of pending SP updates, max: 1 -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 based on parent blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 +Planning report for blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4: +Chicken switches: + add zones with mupdate override: false + +* Noop converting 6/6 install-dataset zones to artifact store on sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6 +* Noop converting 5/6 install-dataset zones to artifact store on sled aff6c093-197d-42c5-ad80-9f10ba051a34 +* 1 pending MGS update: + * model0:serial0: Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } +* Waiting for NTP zones to appear in inventory on sleds: e96e226f-4ed9-4c01-91b9-69a9cd076c9e +* Zone updates waiting on pending MGS updates (RoT / SP / Host OS / etc.) + > # This diff should show expected changes to the blueprint. @@ -537,21 +534,19 @@ INFO install dataset artifact hash not found in TUF repo, ignoring for noop chec INFO install dataset artifact hash not found in TUF repo, ignoring for noop checks, sled_id: b82ede02-399c-48c6-a1de-411df4fa49a7, zone_id: ecbe0b3d-1acc-44b2-b6d4-f4d2770516e4, kind: crucible, file_name: crucible.tar.gz, expected_hash: 866f6a7c2e51c056fb722b5113e80181cc9cd8b712a0d3dbf1edc4ce29e5229e INFO skipped noop image source check on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, reason: remove_mupdate_override is set in the blueprint (ffffffff-ffff-ffff-ffff-ffffffffffff) INFO performed noop image source checks on sled, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, num_total: 2, num_already_artifact: 0, num_eligible: 2, num_ineligible: 0 -INFO noop converting 2/2 install-dataset zones to artifact store, sled_id: e96e226f-4ed9-4c01-91b9-69a9cd076c9e, num_total: 2, num_already_artifact: 0 -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 INFO SP update not yet completed (will keep it), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 INFO reached maximum number of pending SP updates, max: 1 -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 +Planning report for blueprint af934083-59b5-4bf6-8966-6fb5292c29e1: +Chicken switches: + add zones with mupdate override: false + +* Skipping noop image source check on sled 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6: all 6 zones are already from artifacts +* Noop converting 2/2 install-dataset zones to artifact store on sled e96e226f-4ed9-4c01-91b9-69a9cd076c9e +* 1 pending MGS update: + * model0:serial0: Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } +* Zone updates waiting on pending MGS updates (RoT / SP / Host OS / etc.) + > # This diff should show changes to the sled that's back in inventory. diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-set-mgs-updates-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-set-mgs-updates-stdout index c524f6e980f..70ed671898f 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-set-mgs-updates-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-set-mgs-updates-stdout @@ -208,6 +208,9 @@ parent: 6ccc786b-17f1-4562-958f-5a7d9a5a15fd PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint ad97e762-7bf1-45a6-a98f-60afb7e491c0. + + > # Configure an MGS-managed update to one of the SPs. @@ -422,6 +425,9 @@ parent: ad97e762-7bf1-45a6-a98f-60afb7e491c0 sled 2 model2 serial2 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 1.1.0 Sp { expected_active_version: ArtifactVersion("1.0.0"), expected_inactive_version: Version(ArtifactVersion("1.0.1")) } +Nothing to report on planning for blueprint cca24b71-09b5-4042-9185-b33e9f2ebba0. + + > blueprint-diff ad97e762-7bf1-45a6-a98f-60afb7e491c0 cca24b71-09b5-4042-9185-b33e9f2ebba0 from: blueprint ad97e762-7bf1-45a6-a98f-60afb7e491c0 @@ -966,6 +972,9 @@ parent: cca24b71-09b5-4042-9185-b33e9f2ebba0 sled 2 model2 serial2 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 newest Sp { expected_active_version: ArtifactVersion("newer"), expected_inactive_version: Version(ArtifactVersion("older")) } +Nothing to report on planning for blueprint 5bf974f3-81f9-455b-b24e-3099f765664c. + + > blueprint-diff cca24b71-09b5-4042-9185-b33e9f2ebba0 5bf974f3-81f9-455b-b24e-3099f765664c from: blueprint cca24b71-09b5-4042-9185-b33e9f2ebba0 @@ -1514,6 +1523,9 @@ parent: 5bf974f3-81f9-455b-b24e-3099f765664c sled 2 model2 serial2 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 newest Sp { expected_active_version: ArtifactVersion("newer"), expected_inactive_version: Version(ArtifactVersion("older")) } +Nothing to report on planning for blueprint 1b837a27-3be1-4fcb-8499-a921c839e1d0. + + > blueprint-diff 5bf974f3-81f9-455b-b24e-3099f765664c 1b837a27-3be1-4fcb-8499-a921c839e1d0 from: blueprint 5bf974f3-81f9-455b-b24e-3099f765664c @@ -1892,6 +1904,9 @@ parent: 1b837a27-3be1-4fcb-8499-a921c839e1d0 sled 0 model0 serial0 e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 three Sp { expected_active_version: ArtifactVersion("two"), expected_inactive_version: NoValidVersion } +Nothing to report on planning for blueprint 3682a71b-c6ca-4b7e-8f84-16df80c85960. + + > blueprint-diff 1b837a27-3be1-4fcb-8499-a921c839e1d0 3682a71b-c6ca-4b7e-8f84-16df80c85960 from: blueprint 1b837a27-3be1-4fcb-8499-a921c839e1d0 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-set-remove-mupdate-override-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-set-remove-mupdate-override-stdout index 149410242ab..7f49b91c6e1 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-set-remove-mupdate-override-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-set-remove-mupdate-override-stdout @@ -277,6 +277,9 @@ parent: df06bb57-ad42-4431-9206-abff322896c7 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba. + + > # Now make another blueprint, starting by adding a new sled and removing sled 6. @@ -665,6 +668,9 @@ parent: afb09faf-a586-4483-9289-04d4f1d8ba23 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint ce365dff-2cdb-4f35-a186-b15e20e1e700. + + > blueprint-diff afb09faf-a586-4483-9289-04d4f1d8ba23 latest from: blueprint afb09faf-a586-4483-9289-04d4f1d8ba23 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-set-zone-images-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-set-zone-images-stdout index 0054dcc9852..bc716c9f26b 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-set-zone-images-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-set-zone-images-stdout @@ -110,6 +110,9 @@ parent: 1b013011-2062-4b48-b544-a32b23bce83a PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint 971eeb12-1830-4fa0-a699-98ea0164505c. + + > # Set a couple zones' image sources to specific artifacts. @@ -225,6 +228,9 @@ parent: 9766ca20-38d4-4380-b005-e7c43c797e7c PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint f714e6ea-e85a-4d7d-93c2-a018744fe176. + + > blueprint-diff 971eeb12-1830-4fa0-a699-98ea0164505c f714e6ea-e85a-4d7d-93c2-a018744fe176 from: blueprint 971eeb12-1830-4fa0-a699-98ea0164505c @@ -544,6 +550,9 @@ parent: bb128f06-a2e1-44c1-8874-4f789d0ff896 PENDING MGS-MANAGED UPDATES: 0 +Nothing to report on planning for blueprint d9c572a1-a68c-4945-b1ec-5389bd588fe9. + + > blueprint-diff f714e6ea-e85a-4d7d-93c2-a018744fe176 d9c572a1-a68c-4945-b1ec-5389bd588fe9 from: blueprint f714e6ea-e85a-4d7d-93c2-a018744fe176 diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-target-release-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-target-release-stdout index 51bc5998419..11bf0485b3a 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmds-target-release-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmds-target-release-stdout @@ -203,20 +203,17 @@ f45ba181-4b56-42cc-a762-874d90184a43 0 INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 INFO reached maximum number of pending SP updates, max: 1 -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 based on parent blueprint dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 +Planning report for blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1: +Chicken switches: + add zones with mupdate override: false + +* 1 pending MGS update: + * model0:serial0: Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } +* Zone updates waiting on pending MGS updates (RoT / SP / Host OS / etc.) + > blueprint-diff dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 from: blueprint dbcbd3d6-41ff-48ae-ac0b-1becc9b2fd21 @@ -388,20 +385,17 @@ external DNS: INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 INFO SP update not yet completed (will keep it), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 INFO reached maximum number of pending SP updates, max: 1 -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 based on parent blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 +Planning report for blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4: +Chicken switches: + add zones with mupdate override: false + +* 1 pending MGS update: + * model0:serial0: Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } +* Zone updates waiting on pending MGS updates (RoT / SP / Host OS / etc.) + > blueprint-diff 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 58d5e830-0884-47d8-a7cd-b2b3751adeb4 from: blueprint 8da82a8e-bf97-4fbd-8ddd-9f6462732cf1 @@ -574,22 +568,19 @@ generated inventory collection eb0796d5-ab8a-4f7b-a884-b4aeacb8ab51 from configu INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 0, sp_type: Sled, serial_number: serial0, part_number: model0 INFO skipping board for SP update, serial_number: serial0, part_number: model0 INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 INFO reached maximum number of pending SP updates, max: 1 -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 +Planning report for blueprint af934083-59b5-4bf6-8966-6fb5292c29e1: +Chicken switches: + add zones with mupdate override: false + +* 1 pending MGS update: + * model1:serial1: Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } +* Zone updates waiting on pending MGS updates (RoT / SP / Host OS / etc.) + > blueprint-diff 58d5e830-0884-47d8-a7cd-b2b3751adeb4 af934083-59b5-4bf6-8966-6fb5292c29e1 from: blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4 @@ -770,21 +761,18 @@ generated inventory collection 61f451b3-2121-4ed6-91c7-a550054f6c21 from configu INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 INFO SP update impossible (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: Version(ArtifactVersion("0.5.0")), expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 INFO reached maximum number of pending SP updates, max: 1 -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint df06bb57-ad42-4431-9206-abff322896c7 based on parent blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 +Planning report for blueprint df06bb57-ad42-4431-9206-abff322896c7: +Chicken switches: + add zones with mupdate override: false + +* 1 pending MGS update: + * model1:serial1: Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: Version(ArtifactVersion("0.5.0")) } +* Zone updates waiting on pending MGS updates (RoT / SP / Host OS / etc.) + > blueprint-diff af934083-59b5-4bf6-8966-6fb5292c29e1 df06bb57-ad42-4431-9206-abff322896c7 from: blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 @@ -964,23 +952,20 @@ generated inventory collection b1bda47d-2c19-4fba-96e3-d9df28db7436 from configu INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: Version(ArtifactVersion("0.5.0")), expected_active_version: 0.0.1, component: sp, sp_slot: 1, sp_type: Sled, serial_number: serial1, part_number: model1 INFO skipping board for SP update, serial_number: serial1, part_number: model1 INFO skipping board for SP update, serial_number: serial0, part_number: model0 INFO configuring SP update, artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2 INFO ran out of boards for SP update -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba based on parent blueprint df06bb57-ad42-4431-9206-abff322896c7 +Planning report for blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba: +Chicken switches: + add zones with mupdate override: false + +* 1 pending MGS update: + * model2:serial2: Sp { expected_active_version: ArtifactVersion("0.0.1"), expected_inactive_version: NoValidVersion } +* Zone updates waiting on pending MGS updates (RoT / SP / Host OS / etc.) + > blueprint-diff df06bb57-ad42-4431-9206-abff322896c7 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba from: blueprint df06bb57-ad42-4431-9206-abff322896c7 @@ -1160,24 +1145,20 @@ generated inventory collection a71f7a73-35a6-45e8-acbe-f1c5925eed69 from configu INFO performed noop image source checks on sled, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, num_total: 9, num_already_artifact: 0, num_eligible: 0, num_ineligible: 9 INFO performed noop image source checks on sled, sled_id: 98e6b7c2-2efa-41ca-b20a-0a4d61102fe6, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 INFO performed noop image source checks on sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, num_total: 8, num_already_artifact: 0, num_eligible: 0, num_ineligible: 8 -INFO sufficient BoundaryNtp zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient Clickhouse zones exist in plan, desired_count: 1, current_count: 1 -INFO sufficient ClickhouseKeeper zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient ClickhouseServer zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CockroachDb zones exist in plan, desired_count: 0, current_count: 0 -INFO sufficient CruciblePantry zones exist in plan, desired_count: 0, current_count: 3 -INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3 -INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0 INFO SP update completed (will remove it and re-evaluate board), artifact_version: 1.0.0, artifact_hash: 7e6667e646ad001b54c8365a3d309c03f89c59102723d38d01697ee8079fe670, expected_inactive_version: NoValidVersion, expected_active_version: 0.0.1, component: sp, sp_slot: 2, sp_type: Sled, serial_number: serial2, part_number: model2 INFO skipping board for SP update, serial_number: serial2, part_number: model2 INFO skipping board for SP update, serial_number: serial0, part_number: model0 INFO skipping board for SP update, serial_number: serial1, part_number: model1 INFO ran out of boards for SP update -INFO updating zone image source in-place, sled_id: 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, zone_id: 353b3b65-20f7-48c3-88f7-495bd5d31545, kind: Clickhouse, image_source: artifact: version 1.0.0 -INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify generated blueprint 9034c710-3e57-45f3-99e5-4316145e87ac based on parent blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba +Planning report for blueprint 9034c710-3e57-45f3-99e5-4316145e87ac: +Chicken switches: + add zones with mupdate override: false + +* 1 out-of-date zone updated in-place: + * sled 2b8f0cb3-0295-4b3c-bc58-4fe88b57112c, zone 353b3b65-20f7-48c3-88f7-495bd5d31545 (clickhouse) +* 25 remaining out-of-date zones + > blueprint-diff 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba 9034c710-3e57-45f3-99e5-4316145e87ac from: blueprint 7f976e0d-d2a5-4eeb-9e82-c82bc2824aba diff --git a/live-tests/tests/common/reconfigurator.rs b/live-tests/tests/common/reconfigurator.rs index 040bbec81f8..c34ebf6124f 100644 --- a/live-tests/tests/common/reconfigurator.rs +++ b/live-tests/tests/common/reconfigurator.rs @@ -7,6 +7,7 @@ use anyhow::{Context, ensure}; use nexus_client::types::BlueprintTargetSet; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_types::deployment::{Blueprint, PlanningInput}; use nexus_types::inventory::Collection; use omicron_uuid_kinds::GenericUuid; @@ -73,6 +74,7 @@ pub async fn blueprint_edit_current_target( &planning_input, &collection, "test-suite", + PlannerRng::from_entropy(), ) .context("creating BlueprintBuilder")?; diff --git a/live-tests/tests/test_nexus_add_remove.rs b/live-tests/tests/test_nexus_add_remove.rs index ff8a1f75e5d..045472b5ff0 100644 --- a/live-tests/tests/test_nexus_add_remove.rs +++ b/live-tests/tests/test_nexus_add_remove.rs @@ -16,6 +16,7 @@ use nexus_client::types::SagaState; use nexus_inventory::CollectionBuilder; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::planner::Planner; +use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::BlueprintZoneDisposition; @@ -267,6 +268,7 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) { &planning_input, "live test suite", &latest_collection, + PlannerRng::from_entropy(), ) .expect("constructing planner"); let new_blueprint = planner.plan().expect("creating blueprint"); diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index f6ba7e911d2..a0e1f93252f 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -80,6 +80,7 @@ use nexus_types::deployment::ExpectedVersion; use nexus_types::deployment::OximeterReadMode; use nexus_types::deployment::PendingMgsUpdateDetails; use nexus_types::deployment::PendingMgsUpdates; +use nexus_types::deployment::PlanningReport; use nexus_types::inventory::BaseboardId; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; @@ -1683,6 +1684,9 @@ impl DataStore { )?; } + // FIXME: Once reports are stored in the database, read them out here. + let report = PlanningReport::new(blueprint_id); + Ok(Blueprint { id: blueprint_id, pending_mgs_updates, @@ -1699,6 +1703,7 @@ impl DataStore { time_created, creator, comment, + report, }) } @@ -2758,6 +2763,7 @@ mod tests { use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; use nexus_reconfigurator_planning::example::ExampleSystemBuilder; use nexus_reconfigurator_planning::example::example; + use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_types::deployment::BlueprintArtifactVersion; use nexus_types::deployment::BlueprintHostPhase2DesiredContents; use nexus_types::deployment::BlueprintHostPhase2DesiredSlots; @@ -3149,6 +3155,7 @@ mod tests { &planning_input, &collection, "test", + PlannerRng::from_entropy(), ) .expect("failed to create builder"); @@ -3490,6 +3497,7 @@ mod tests { &planning_input, &collection, "dummy", + PlannerRng::from_entropy(), ) .expect("failed to create builder"); @@ -3539,6 +3547,7 @@ mod tests { &planning_input, &collection, "dummy", + PlannerRng::from_entropy(), ) .expect("failed to create builder"); @@ -3631,6 +3640,7 @@ mod tests { &EMPTY_PLANNING_INPUT, &collection, "test2", + PlannerRng::from_entropy(), ) .expect("failed to create builder") .build(); @@ -3640,6 +3650,7 @@ mod tests { &EMPTY_PLANNING_INPUT, &collection, "test3", + PlannerRng::from_entropy(), ) .expect("failed to create builder") .build(); @@ -3740,6 +3751,7 @@ mod tests { &EMPTY_PLANNING_INPUT, &collection, "test3", + PlannerRng::from_entropy(), ) .expect("failed to create builder") .build(); @@ -3785,6 +3797,7 @@ mod tests { &EMPTY_PLANNING_INPUT, &collection, "test2", + PlannerRng::from_entropy(), ) .expect("failed to create builder") .build(); @@ -4017,6 +4030,7 @@ mod tests { &example_system.input, &example_system.collection, &format!("{test_name}-2"), + PlannerRng::from_entropy(), ) .expect("failed to create builder") .build(); diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 816b132aedd..ce0e4f72244 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -1020,7 +1020,7 @@ mod test { }; use nexus_types::deployment::{ BlueprintZoneDisposition, BlueprintZoneImageSource, - OmicronZoneExternalSnatIp, OximeterReadMode, + OmicronZoneExternalSnatIp, OximeterReadMode, PlanningReport, }; use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::external_api::views::SledState; @@ -1050,11 +1050,12 @@ mod test { // easily specify just the parts that they want. impl Default for RackInit { fn default() -> Self { + let blueprint_id = BlueprintUuid::new_v4(); RackInit { rack_id: Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap(), rack_subnet: nexus_test_utils::RACK_SUBNET.parse().unwrap(), blueprint: Blueprint { - id: BlueprintUuid::new_v4(), + id: blueprint_id, sleds: BTreeMap::new(), pending_mgs_updates: PendingMgsUpdates::new(), cockroachdb_setting_preserve_downgrade: @@ -1070,6 +1071,7 @@ mod test { time_created: Utc::now(), creator: "test suite".to_string(), comment: "test suite".to_string(), + report: PlanningReport::new(blueprint_id), }, physical_disks: vec![], zpools: vec![], @@ -1545,8 +1547,9 @@ mod test { .into_iter() .collect(), ); + let blueprint_id = BlueprintUuid::new_v4(); let blueprint = Blueprint { - id: BlueprintUuid::new_v4(), + id: blueprint_id, sleds: make_sled_config_only_zones(blueprint_zones), pending_mgs_updates: PendingMgsUpdates::new(), cockroachdb_setting_preserve_downgrade: @@ -1562,6 +1565,7 @@ mod test { time_created: now_db_precision(), creator: "test suite".to_string(), comment: "test blueprint".to_string(), + report: PlanningReport::new(blueprint_id), }; let rack = datastore @@ -1806,8 +1810,9 @@ mod test { HashMap::from([("api.sys".to_string(), external_records.clone())]), ); + let blueprint_id = BlueprintUuid::new_v4(); let blueprint = Blueprint { - id: BlueprintUuid::new_v4(), + id: blueprint_id, sleds: make_sled_config_only_zones(blueprint_zones), pending_mgs_updates: PendingMgsUpdates::new(), cockroachdb_setting_preserve_downgrade: @@ -1823,6 +1828,7 @@ mod test { time_created: now_db_precision(), creator: "test suite".to_string(), comment: "test blueprint".to_string(), + report: PlanningReport::new(blueprint_id), }; let rack = datastore @@ -2016,8 +2022,9 @@ mod test { .into_iter() .collect::>(), ); + let blueprint_id = BlueprintUuid::new_v4(); let blueprint = Blueprint { - id: BlueprintUuid::new_v4(), + id: blueprint_id, sleds: make_sled_config_only_zones(blueprint_zones), pending_mgs_updates: PendingMgsUpdates::new(), cockroachdb_setting_preserve_downgrade: @@ -2033,6 +2040,7 @@ mod test { time_created: now_db_precision(), creator: "test suite".to_string(), comment: "test blueprint".to_string(), + report: PlanningReport::new(blueprint_id), }; let result = datastore @@ -2156,8 +2164,9 @@ mod test { .collect::>(), ); + let blueprint_id = BlueprintUuid::new_v4(); let blueprint = Blueprint { - id: BlueprintUuid::new_v4(), + id: blueprint_id, sleds: make_sled_config_only_zones(blueprint_zones), pending_mgs_updates: PendingMgsUpdates::new(), cockroachdb_setting_preserve_downgrade: @@ -2173,6 +2182,7 @@ mod test { time_created: now_db_precision(), creator: "test suite".to_string(), comment: "test blueprint".to_string(), + report: PlanningReport::new(blueprint_id), }; let result = datastore diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 6e3a1bef151..44f24fa942f 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -2967,6 +2967,7 @@ mod tests { use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use nexus_db_model::IncompleteNetworkInterface; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; + use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_reconfigurator_planning::system::SledBuilder; use nexus_reconfigurator_planning::system::SystemDescription; use nexus_types::deployment::Blueprint; @@ -3315,6 +3316,7 @@ mod tests { &planning_input, &collection, "test", + PlannerRng::from_entropy(), ) .expect("created blueprint builder"); for &sled_id in &sled_ids { @@ -3399,6 +3401,7 @@ mod tests { &planning_input, &collection, "test", + PlannerRng::from_entropy(), ) .expect("created blueprint builder"); for &sled_id in &sled_ids { diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 04176bcf8ec..85bc01a30ac 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -323,6 +323,7 @@ mod test { use nexus_inventory::now_db_precision; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::example::ExampleSystemBuilder; + use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; @@ -350,6 +351,7 @@ mod test { use nexus_types::deployment::OximeterReadPolicy; use nexus_types::deployment::PendingMgsUpdates; use nexus_types::deployment::PlannerChickenSwitches; + use nexus_types::deployment::PlanningReport; use nexus_types::deployment::SledFilter; use nexus_types::deployment::TufRepoPolicy; use nexus_types::deployment::blueprint_zone_type; @@ -707,8 +709,9 @@ mod test { let dns_empty = dns_config_empty(); let initial_dns_generation = dns_empty.generation; + let blueprint_id = BlueprintUuid::new_v4(); let mut blueprint = Blueprint { - id: BlueprintUuid::new_v4(), + id: blueprint_id, sleds: blueprint_sleds, pending_mgs_updates: PendingMgsUpdates::new(), cockroachdb_setting_preserve_downgrade: @@ -724,6 +727,7 @@ mod test { time_created: now_db_precision(), creator: "test-suite".to_string(), comment: "test blueprint".to_string(), + report: PlanningReport::new(blueprint_id), }; // To make things slightly more interesting, let's add a zone that's @@ -1524,6 +1528,7 @@ mod test { &planning_input, &collection, "test suite", + PlannerRng::from_entropy(), ) .unwrap(); let sled_id = diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 3ec4aef2ef7..9b30cee3539 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -45,6 +45,7 @@ use nexus_types::deployment::OmicronZoneExternalSnatIp; use nexus_types::deployment::OximeterReadMode; use nexus_types::deployment::PendingMgsUpdates; use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::PlanningReport; use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; use nexus_types::deployment::TufRepoContentsError; @@ -64,6 +65,7 @@ use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_common::disk::M2Slot; use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; +use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::MupdateOverrideUuid; use omicron_uuid_kinds::OmicronZoneUuid; @@ -411,6 +413,9 @@ pub struct BlueprintBuilder<'a> { /// The latest inventory collection collection: &'a Collection, + /// The ID that the completed blueprint will have + new_blueprint_id: BlueprintUuid, + // These fields are used to allocate resources for sleds. input: &'a PlanningInput, @@ -430,13 +435,14 @@ pub struct BlueprintBuilder<'a> { sled_editors: BTreeMap, cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade, target_release_minimum_generation: Generation, + report: Option, creator: String, operations: Vec, comments: Vec, pending_mgs_updates: PendingMgsUpdates, - // Random number generator for new UUIDs + /// Random number generator for new UUIDs rng: PlannerRng, } @@ -486,8 +492,10 @@ impl<'a> BlueprintBuilder<'a> { .collect::>(); let num_sleds = sleds.len(); + let id = rng.next_blueprint(); + let report = PlanningReport::new(id); Blueprint { - id: rng.next_blueprint(), + id, sleds, pending_mgs_updates: PendingMgsUpdates::new(), parent_blueprint_id: None, @@ -503,6 +511,7 @@ impl<'a> BlueprintBuilder<'a> { time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("starting blueprint with {num_sleds} empty sleds"), + report, } } @@ -514,6 +523,7 @@ impl<'a> BlueprintBuilder<'a> { input: &'a PlanningInput, inventory: &'a Collection, creator: &str, + mut rng: PlannerRng, ) -> anyhow::Result> { let log = log.new(o!( "component" => "BlueprintBuilder", @@ -564,6 +574,7 @@ impl<'a> BlueprintBuilder<'a> { log, parent_blueprint, collection: inventory, + new_blueprint_id: rng.next_blueprint(), input, resource_allocator: OnceCell::new(), sled_editors, @@ -572,10 +583,11 @@ impl<'a> BlueprintBuilder<'a> { pending_mgs_updates: parent_blueprint.pending_mgs_updates.clone(), target_release_minimum_generation: parent_blueprint .target_release_minimum_generation, + report: None, creator: creator.to_owned(), operations: Vec::new(), comments: Vec::new(), - rng: PlannerRng::from_entropy(), + rng, }) } @@ -583,6 +595,10 @@ impl<'a> BlueprintBuilder<'a> { &self.parent_blueprint } + pub fn new_blueprint_id(&self) -> BlueprintUuid { + self.new_blueprint_id + } + fn resource_allocator( &mut self, ) -> Result<&mut BlueprintResourceAllocator, Error> { @@ -646,7 +662,7 @@ impl<'a> BlueprintBuilder<'a> { /// Assemble a final [`Blueprint`] based on the contents of the builder pub fn build(mut self) -> Blueprint { - let blueprint_id = self.rng.next_blueprint(); + let blueprint_id = self.new_blueprint_id(); // Collect the Omicron zones config for all sleds, including sleds that // are no longer in service and need expungement work. @@ -761,6 +777,9 @@ impl<'a> BlueprintBuilder<'a> { .chain(self.operations.iter().map(|op| op.to_string())) .collect::>() .join(", "), + report: self + .report + .unwrap_or_else(|| PlanningReport::new(blueprint_id)), } } @@ -791,12 +810,9 @@ impl<'a> BlueprintBuilder<'a> { .map_err(|err| Error::SledEditError { sled_id, err }) } - /// Within tests, set an RNG for deterministic results. - /// - /// This will ensure that tests that use this builder will produce the same - /// results each time they are run. - pub fn set_rng(&mut self, rng: PlannerRng) -> &mut Self { - self.rng = rng; + /// Set the planning report for this blueprint. + pub fn set_report(&mut self, report: PlanningReport) -> &mut Self { + self.report = Some(report); self } @@ -2283,7 +2299,6 @@ pub mod test { fn test_basic() { static TEST_NAME: &str = "blueprint_builder_test_basic"; let logctx = test_setup_log(TEST_NAME); - let mut rng = SimRngState::from_seed(TEST_NAME); let (mut example, blueprint1) = ExampleSystemBuilder::new_with_rng( &logctx.log, @@ -2298,6 +2313,7 @@ pub mod test { &example.input, &example.collection, "test_basic", + rng.next_planner_rng(), ) .expect("failed to create builder"); @@ -2349,6 +2365,7 @@ pub mod test { &input, &example.collection, "test_basic", + rng.next_planner_rng(), ) .expect("failed to create builder"); let new_sled_resources = &input @@ -2466,6 +2483,7 @@ pub mod test { fn test_decommissioned_sleds() { static TEST_NAME: &str = "blueprint_builder_test_decommissioned_sleds"; let logctx = test_setup_log(TEST_NAME); + let mut rng = SimRngState::from_seed(TEST_NAME); let (collection, input, mut blueprint1) = example(&logctx.log, TEST_NAME); verify_blueprint(&blueprint1); @@ -2516,6 +2534,7 @@ pub mod test { &input, &collection, "test_decommissioned_sleds", + rng.next_planner_rng(), ) .expect("created builder") .build(); @@ -2553,6 +2572,7 @@ pub mod test { &input, &collection, "test_decommissioned_sleds", + rng.next_planner_rng(), ) .expect("created builder") .build(); @@ -2569,6 +2589,7 @@ pub mod test { fn test_add_physical_disks() { static TEST_NAME: &str = "blueprint_builder_test_add_physical_disks"; let logctx = test_setup_log(TEST_NAME); + let mut rng = SimRngState::from_seed(TEST_NAME); // Start with an empty system (sleds with no zones). However, we leave // the disks around so that `sled_add_disks` can add them. @@ -2588,6 +2609,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("failed to create builder"); @@ -2673,6 +2695,7 @@ pub mod test { fn test_datasets_for_zpools_and_zones() { static TEST_NAME: &str = "test_datasets_for_zpools_and_zones"; let logctx = test_setup_log(TEST_NAME); + let mut rng = SimRngState::from_seed(TEST_NAME); let (collection, input, blueprint) = example(&logctx.log, TEST_NAME); // Creating the "example" blueprint should already invoke @@ -2687,6 +2710,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("failed to create builder"); @@ -2741,6 +2765,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("failed to create builder"); @@ -2780,6 +2805,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("failed to create builder"); @@ -2802,6 +2828,7 @@ pub mod test { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_with_no_existing_nexus_zones"; let logctx = test_setup_log(TEST_NAME); + let mut rng = SimRngState::from_seed(TEST_NAME); // Start with an empty system (sleds with no zones). let (example, parent) = @@ -2820,6 +2847,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("failed to create builder"); @@ -2847,6 +2875,7 @@ pub mod test { fn test_add_nexus_error_cases() { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_error_cases"; let logctx = test_setup_log(TEST_NAME); + let mut rng = SimRngState::from_seed(TEST_NAME); let (mut collection, mut input, mut parent) = example(&logctx.log, TEST_NAME); @@ -2926,6 +2955,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("failed to create builder"); builder @@ -2946,6 +2976,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("failed to create builder"); for _ in 0..3 { @@ -2985,6 +3016,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("failed to create builder"); let err = builder @@ -3020,6 +3052,7 @@ pub mod test { fn test_ensure_cockroachdb() { static TEST_NAME: &str = "blueprint_builder_test_ensure_cockroachdb"; let logctx = test_setup_log(TEST_NAME); + let mut rng = SimRngState::from_seed(TEST_NAME); // Start with an example system (no CRDB zones). let (example, parent) = @@ -3060,6 +3093,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("constructed builder"); for _ in 0..num_sled_zpools { @@ -3102,6 +3136,7 @@ pub mod test { &input, &collection, "test", + rng.next_planner_rng(), ) .expect("constructed builder"); for _ in 0..num_sled_zpools { @@ -3135,6 +3170,7 @@ pub mod test { static TEST_NAME: &str = "builder_zone_image_source_change_diff"; let logctx = test_setup_log(TEST_NAME); let log = logctx.log.clone(); + let mut rng = SimRngState::from_seed(TEST_NAME); // Use our example system. let (system, blueprint1) = @@ -3147,9 +3183,9 @@ pub mod test { &system.input, &system.collection, TEST_NAME, + rng.next_planner_rng(), ) .expect("built blueprint builder"); - blueprint_builder.set_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))); let sled_id = system .input diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 237702f47ba..ce793980b2c 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -433,9 +433,9 @@ impl ExampleSystemBuilder { &base_input, &collection, "test suite", + rng.blueprint2_rng, ) .unwrap(); - builder.set_rng(rng.blueprint2_rng); // Add as many external IPs as is necessary for external DNS zones. We // pick addresses in the TEST-NET-2 (RFC 5737) range. diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 3975c944536..d3e9f1f1d06 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -37,6 +37,13 @@ use nexus_types::deployment::SledDetails; use nexus_types::deployment::SledFilter; use nexus_types::deployment::TufRepoContentsError; use nexus_types::deployment::ZpoolFilter; +use nexus_types::deployment::{ + CockroachdbUnsafeToShutdown, PlanningAddStepReport, + PlanningCockroachdbSettingsStepReport, PlanningDecommissionStepReport, + PlanningExpungeStepReport, PlanningMgsUpdatesStepReport, + PlanningNoopImageSourceStepReport, PlanningReport, + PlanningZoneUpdatesStepReport, ZoneUnsafeToShutdown, ZoneUpdatesWaitingOn, +}; use nexus_types::external_api::views::PhysicalDiskPolicy; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; @@ -47,8 +54,6 @@ use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; -use slog::debug; -use slog::error; use slog::{Logger, info, warn}; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; @@ -91,10 +96,8 @@ pub(crate) mod rng; /// services, etc.). const NUM_CONCURRENT_MGS_UPDATES: usize = 1; -enum UpdateStepResult { - ContinueToNextStep, - Waiting, -} +/// A receipt that `check_input_validity` has been run prior to planning. +struct InputChecked; pub struct Planner<'a> { log: Logger, @@ -121,6 +124,7 @@ impl<'a> Planner<'a> { // NOTE: Right now, we just assume that this is the latest inventory // collection. See the comment on the corresponding field in `Planner`. inventory: &'a Collection, + rng: PlannerRng, ) -> anyhow::Result> { let blueprint = BlueprintBuilder::new_based_on( &log, @@ -128,59 +132,71 @@ impl<'a> Planner<'a> { input, inventory, creator, + rng, )?; Ok(Planner { log, input, blueprint, inventory }) } - /// Within tests, set a seeded RNG for deterministic results. - /// - /// This will ensure that tests that use this builder will produce the same - /// results each time they are run. - pub fn with_rng(mut self, rng: PlannerRng) -> Self { - // This is an owned builder (self rather than &mut self) because it is - // almost never going to be conditional. - self.blueprint.set_rng(rng); - self - } - pub fn plan(mut self) -> Result { - debug!( - self.log, - "running planner with chicken switches"; - self.input.chicken_switches(), - ); - self.check_input_validity()?; - self.do_plan()?; + let checked = self.check_input_validity()?; + let report = self.do_plan(checked)?; + self.blueprint.set_report(report); Ok(self.blueprint.build()) } - fn check_input_validity(&self) -> Result<(), Error> { + fn check_input_validity(&self) -> Result { if self.input.target_internal_dns_zone_count() > INTERNAL_DNS_REDUNDANCY { return Err(Error::PolicySpecifiesTooManyInternalDnsServers); } - Ok(()) + Ok(InputChecked) } - fn do_plan(&mut self) -> Result<(), Error> { - self.do_plan_expunge()?; - self.do_plan_decommission()?; - - let noop_info = - NoopConvertInfo::new(self.input, self.inventory, &self.blueprint)?; - noop_info.log_to(&self.log); - - self.do_plan_noop_image_source(noop_info)?; - self.do_plan_add()?; - if let UpdateStepResult::ContinueToNextStep = self.do_plan_mgs_updates() - { - self.do_plan_zone_updates()?; - } - self.do_plan_cockroachdb_settings(); - Ok(()) + fn do_plan( + &mut self, + _checked: InputChecked, + ) -> Result { + // Run the planning steps, recording their step reports as we go. + let expunge = self.do_plan_expunge()?; + let decommission = self.do_plan_decommission()?; + let noop_image_source = self.do_plan_noop_image_source()?; + let mgs_updates = self.do_plan_mgs_updates(); + let add = self.do_plan_add(&mgs_updates)?; + let zone_updates = if add.any_discretionary_zones_placed() { + // Do not update any zones if we've added any discretionary zones + // (e.g., in response to policy changes) ... + PlanningZoneUpdatesStepReport::waiting_on( + ZoneUpdatesWaitingOn::DiscretionaryZones, + ) + } else if !mgs_updates.is_empty() { + // ... or if there are still pending updates for the RoT / SP / + // Host OS / etc. + // TODO This is not quite right. See oxidecomputer/omicron#8285. + PlanningZoneUpdatesStepReport::waiting_on( + ZoneUpdatesWaitingOn::PendingMgsUpdates, + ) + } else { + self.do_plan_zone_updates(&mgs_updates)? + }; + let cockroachdb_settings = self.do_plan_cockroachdb_settings(); + Ok(PlanningReport { + blueprint_id: self.blueprint.new_blueprint_id(), + chicken_switches: *self.input.chicken_switches(), + expunge, + decommission, + noop_image_source, + add, + mgs_updates, + zone_updates, + cockroachdb_settings, + }) } - fn do_plan_decommission(&mut self) -> Result<(), Error> { + fn do_plan_decommission( + &mut self, + ) -> Result { + let mut report = PlanningDecommissionStepReport::new(); + // Check for any sleds that are currently commissioned but can be // decommissioned. Our gates for decommissioning are: // @@ -215,15 +231,10 @@ impl<'a> Planner<'a> { continue; } // If the sled is already decommissioned it... why is it showing - // up when we ask for commissioned sleds? Warn, but don't try to + // up when we ask for commissioned sleds? Report, but don't try to // decommission it again. (SledPolicy::Expunged, SledState::Decommissioned) => { - error!( - self.log, - "decommissioned sled returned by \ - SledFilter::Commissioned"; - "sled_id" => %sled_id, - ); + report.zombie_sleds.push(sled_id); continue; } // The sled is expunged but not yet decommissioned; fall through @@ -263,7 +274,7 @@ impl<'a> Planner<'a> { } } - Ok(()) + Ok(report) } fn do_plan_decommission_expunged_disks_for_in_service_sled( @@ -313,17 +324,22 @@ impl<'a> Planner<'a> { self.blueprint.sled_decommission_disks(sled_id, disks_to_decommission) } - fn do_plan_expunge(&mut self) -> Result<(), Error> { - let mut commissioned_sled_ids = BTreeSet::new(); + fn do_plan_expunge(&mut self) -> Result { + let mut report = PlanningExpungeStepReport::new(); // Remove services from sleds marked expunged. We use // `SledFilter::Commissioned` and have a custom `needs_zone_expungement` // function that allows us to produce better errors. + let mut commissioned_sled_ids = BTreeSet::new(); for (sled_id, sled_details) in self.input.all_sleds(SledFilter::Commissioned) { commissioned_sled_ids.insert(sled_id); - self.do_plan_expunge_for_commissioned_sled(sled_id, sled_details)?; + self.do_plan_expunge_for_commissioned_sled( + sled_id, + sled_details, + &mut report, + )?; } // Check for any decommissioned sleds (i.e., sleds for which our @@ -354,13 +370,14 @@ impl<'a> Planner<'a> { } } - Ok(()) + Ok(report) } fn do_plan_expunge_for_commissioned_sled( &mut self, sled_id: SledUuid, sled_details: &SledDetails, + report: &mut PlanningExpungeStepReport, ) -> Result<(), Error> { match sled_details.policy { SledPolicy::InService { .. } => { @@ -397,14 +414,8 @@ impl<'a> Planner<'a> { // isn't in the blueprint at all (e.g., a disk could // have been added and then expunged since our // parent blueprint was created). We don't want to - // fail in this case, but will issue a warning. - warn!( - self.log, - "planning input contained expunged disk not \ - present in parent blueprint"; - "sled_id" => %sled_id, - "disk" => ?disk, - ); + // fail in this case, but will report it. + report.orphan_disks.insert(sled_id, disk.disk_id); } Err(err) => return Err(err), } @@ -519,11 +530,17 @@ impl<'a> Planner<'a> { fn do_plan_noop_image_source( &mut self, - noop_info: NoopConvertInfo, - ) -> Result<(), Error> { + ) -> Result { + use nexus_types::deployment::PlanningNoopImageSourceSkipSledReason as SkipSledReason; + let mut report = PlanningNoopImageSourceStepReport::new(); + + let noop_info = + NoopConvertInfo::new(self.input, self.inventory, &self.blueprint)?; + noop_info.log_to(&self.log); + let sleds = match noop_info { NoopConvertInfo::GlobalEligible { sleds } => sleds, - NoopConvertInfo::GlobalIneligible { .. } => return Ok(()), + NoopConvertInfo::GlobalIneligible { .. } => return Ok(report), }; for sled in sleds { let eligible = match &sled.status { @@ -533,23 +550,19 @@ impl<'a> Planner<'a> { let zone_counts = eligible.zone_counts(); if zone_counts.num_install_dataset() == 0 { - debug!( - self.log, - "all zones are already Artifact, so \ - no noop image source action required"; - "num_total" => zone_counts.num_total, + report.skip_sled( + sled.sled_id, + SkipSledReason::AllZonesAlreadyArtifact { + num_total: zone_counts.num_total, + }, ); continue; } if zone_counts.num_eligible > 0 { - info!( - self.log, - "noop converting {}/{} install-dataset zones to artifact store", + report.converted_zones( + sled.sled_id, zone_counts.num_eligible, - zone_counts.num_install_dataset(); - "sled_id" => %sled.sled_id, - "num_total" => zone_counts.num_total, - "num_already_artifact" => zone_counts.num_already_artifact, + zone_counts.num_install_dataset(), ); } @@ -577,10 +590,15 @@ impl<'a> Planner<'a> { } } - Ok(()) + Ok(report) } - fn do_plan_add(&mut self) -> Result<(), Error> { + fn do_plan_add( + &mut self, + mgs_updates: &PlanningMgsUpdatesStepReport, + ) -> Result { + let mut report = PlanningAddStepReport::new(); + // Internal DNS is a prerequisite for bringing up all other zones. At // this point, we assume that internal DNS (as a service) is already // functioning. @@ -594,8 +612,6 @@ impl<'a> Planner<'a> { // We will not mark sleds getting Crucible zones as ineligible; other // control plane service zones starting concurrently with Crucible zones // is fine. - let mut sleds_waiting_for_ntp_zone = BTreeSet::new(); - for (sled_id, sled_resources) in self.input.all_sled_resources(SledFilter::InService) { @@ -642,12 +658,8 @@ impl<'a> Planner<'a> { .next() .is_none() { - info!( - self.log, - "skipping sled (no zpools in service)"; - "sled_id" => %sled_id, - ); - sleds_waiting_for_ntp_zone.insert(sled_id); + report.sleds_without_zpools_for_ntp_zones.insert(sled_id); + report.sleds_waiting_for_ntp_zone.insert(sled_id); continue; } @@ -657,14 +669,13 @@ impl<'a> Planner<'a> { // provision anything else. if self.blueprint.sled_ensure_zone_ntp( sled_id, - self.image_source_for_new_zone(ZoneKind::InternalNtp)?, + self.image_source_for_new_zone( + ZoneKind::InternalNtp, + mgs_updates, + )?, )? == Ensure::Added { - info!( - &self.log, - "found sled missing NTP zone (will add one)"; - "sled_id" => %sled_id - ); + report.sleds_missing_ntp_zone.insert(sled_id); self.blueprint.record_operation(Operation::AddZone { sled_id, kind: ZoneKind::InternalNtp, @@ -692,14 +703,11 @@ impl<'a> Planner<'a> { .requires_timesync() }) { - info!( - &self.log, - "sled getting NTP zone has other services already; \ - considering it eligible for discretionary zones"; - "sled_id" => %sled_id, - ); + report + .sleds_getting_ntp_and_discretionary_zones + .insert(sled_id); } else { - sleds_waiting_for_ntp_zone.insert(sled_id); + report.sleds_waiting_for_ntp_zone.insert(sled_id); continue; } } @@ -744,12 +752,7 @@ impl<'a> Planner<'a> { }) .unwrap_or(false); if !has_ntp_inventory { - info!( - &self.log, - "parent blueprint contains NTP zone, but it's not in \ - inventory yet"; - "sled_id" => %sled_id, - ); + report.sleds_without_ntp_zones_in_inventory.insert(sled_id); continue; } @@ -760,15 +763,13 @@ impl<'a> Planner<'a> { if self.blueprint.sled_ensure_zone_crucible( sled_id, *zpool_id, - self.image_source_for_new_zone(ZoneKind::Crucible)?, + self.image_source_for_new_zone( + ZoneKind::Crucible, + mgs_updates, + )?, )? == Ensure::Added { - info!( - &self.log, - "found sled zpool missing Crucible zone (will add one)"; - "sled_id" => ?sled_id, - "zpool_id" => ?zpool_id, - ); + report.missing_crucible_zone(sled_id, *zpool_id); ncrucibles_added += 1; } } @@ -788,16 +789,19 @@ impl<'a> Planner<'a> { } } - self.do_plan_add_discretionary_zones(&sleds_waiting_for_ntp_zone)?; + self.do_plan_add_discretionary_zones(mgs_updates, &mut report)?; // Now that we've added all the disks and zones we plan on adding, // ensure that all sleds have the datasets they need to have. - self.do_plan_datasets()?; + self.do_plan_datasets(&mut report)?; - Ok(()) + Ok(report) } - fn do_plan_datasets(&mut self) -> Result<(), Error> { + fn do_plan_datasets( + &mut self, + _report: &mut PlanningAddStepReport, + ) -> Result<(), Error> { for sled_id in self.input.all_sled_ids(SledFilter::InService) { if let EnsureMultiple::Changed { added, @@ -829,7 +833,8 @@ impl<'a> Planner<'a> { fn do_plan_add_discretionary_zones( &mut self, - sleds_waiting_for_ntp_zone: &BTreeSet, + mgs_updates: &PlanningMgsUpdatesStepReport, + report: &mut PlanningAddStepReport, ) -> Result<(), Error> { // We usually don't need to construct an `OmicronZonePlacement` to add // discretionary zones, so defer its creation until it's needed. @@ -847,7 +852,8 @@ impl<'a> Planner<'a> { DiscretionaryOmicronZone::Nexus, DiscretionaryOmicronZone::Oximeter, ] { - let num_zones_to_add = self.num_additional_zones_needed(zone_kind); + let num_zones_to_add = + self.num_additional_zones_needed(zone_kind, report); if num_zones_to_add == 0 { continue; } @@ -864,7 +870,7 @@ impl<'a> Planner<'a> { .input .all_sled_resources(SledFilter::Discretionary) .filter(|(sled_id, _)| { - !sleds_waiting_for_ntp_zone.contains(&sled_id) + !report.sleds_waiting_for_ntp_zone.contains(&sled_id) }) .map(|(sled_id, sled_resources)| { OmicronZonePlacementSledState { @@ -892,17 +898,20 @@ impl<'a> Planner<'a> { zone_placement, zone_kind, num_zones_to_add, + mgs_updates, + report, )?; } Ok(()) } - // Given the current blueprint state and policy, returns the number of - // additional zones needed of the given `zone_kind` to satisfy the policy. + /// Given the current blueprint state and policy, returns the number of + /// additional zones needed of the given `zone_kind` to satisfy the policy. fn num_additional_zones_needed( &mut self, zone_kind: DiscretionaryOmicronZone, + report: &mut PlanningAddStepReport, ) -> usize { // Count the number of `kind` zones on all in-service sleds. This // will include sleds that are in service but not eligible for new @@ -965,30 +974,32 @@ impl<'a> Planner<'a> { }; // TODO-correctness What should we do if we have _too many_ - // `zone_kind` zones? For now, just log it the number of zones any - // time we have at least the minimum number. + // `zone_kind` zones? For now, just report the number of zones + // any time we have at least the minimum number. let num_zones_to_add = target_count.saturating_sub(num_existing_kind_zones); if num_zones_to_add == 0 { - info!( - self.log, "sufficient {zone_kind:?} zones exist in plan"; - "desired_count" => target_count, - "current_count" => num_existing_kind_zones, + report.sufficient_zones_exist( + ZoneKind::from(zone_kind).report_str(), + target_count, + num_existing_kind_zones, ); } num_zones_to_add } - // Attempts to place `num_zones_to_add` new zones of `kind`. - // - // It is not an error if there are too few eligible sleds to start a - // sufficient number of zones; instead, we'll log a warning and start as - // many as we can (up to `num_zones_to_add`). + /// Attempts to place `num_zones_to_add` new zones of `kind`. + /// + /// It is not an error if there are too few eligible sleds to start a + /// sufficient number of zones; instead, we'll report it and start as + /// many as we can (up to `num_zones_to_add`). fn add_discretionary_zones( &mut self, zone_placement: &mut OmicronZonePlacement, kind: DiscretionaryOmicronZone, num_zones_to_add: usize, + mgs_updates: &PlanningMgsUpdatesStepReport, + report: &mut PlanningAddStepReport, ) -> Result<(), Error> { for i in 0..num_zones_to_add { let sled_id = match zone_placement.place_zone(kind) { @@ -998,18 +1009,17 @@ impl<'a> Planner<'a> { // (albeit unlikely?) we're in a weird state where we need // more sleds or disks to come online, and we may need to be // able to produce blueprints to achieve that status. - warn!( - self.log, - "failed to place all new desired {kind:?} zones"; - "placed" => i, - "wanted_to_place" => num_zones_to_add, + report.out_of_eligible_sleds( + ZoneKind::from(kind).report_str(), + i, + num_zones_to_add, ); - break; } }; - let image_source = self.image_source_for_new_zone(kind.into())?; + let image_source = + self.image_source_for_new_zone(kind.into(), mgs_updates)?; match kind { DiscretionaryOmicronZone::BoundaryNtp => { self.blueprint.sled_promote_internal_ntp_to_boundary_ntp( @@ -1045,10 +1055,9 @@ impl<'a> Planner<'a> { .blueprint .sled_add_zone_oximeter(sled_id, image_source)?, }; - info!( - self.log, "added zone to sled"; - "sled_id" => %sled_id, - "kind" => ?kind, + report.discretionary_zone_placed( + sled_id, + ZoneKind::from(kind).report_str(), ); } @@ -1057,7 +1066,7 @@ impl<'a> Planner<'a> { /// Update at most one MGS-managed device (SP, RoT, etc.), if any are out of /// date. - fn do_plan_mgs_updates(&mut self) -> UpdateStepResult { + fn do_plan_mgs_updates(&mut self) -> PlanningMgsUpdatesStepReport { // Determine which baseboards we will consider updating. // // Sleds may be present but not adopted as part of the control plane. @@ -1101,19 +1110,18 @@ impl<'a> Planner<'a> { current_artifacts, NUM_CONCURRENT_MGS_UPDATES, ); + self.blueprint.pending_mgs_updates_replace_all(next.clone()); - // TODO This is not quite right. See oxidecomputer/omicron#8285. - let rv = if next.is_empty() { - UpdateStepResult::ContinueToNextStep - } else { - UpdateStepResult::Waiting - }; - self.blueprint.pending_mgs_updates_replace_all(next); - rv + PlanningMgsUpdatesStepReport::new(next) } /// Update at most one existing zone to use a new image source. - fn do_plan_zone_updates(&mut self) -> Result<(), Error> { + fn do_plan_zone_updates( + &mut self, + mgs_updates: &PlanningMgsUpdatesStepReport, + ) -> Result { + let mut report = PlanningZoneUpdatesStepReport::new(); + // We are only interested in non-decommissioned sleds. let sleds = self .input @@ -1229,14 +1237,14 @@ impl<'a> Planner<'a> { "sled_id" => %sled_id, "zones_currently_updating" => ?zones_currently_updating, ); - return Ok(()); + return Ok(report); } } // Find out of date zones, as defined by zones whose image source does // not match what it should be based on our current target release. let target_release = self.input.tuf_repo().description(); - let mut out_of_date_zones = sleds + let out_of_date_zones = sleds .into_iter() .flat_map(|sled_id| { let log = &self.log; @@ -1264,28 +1272,30 @@ impl<'a> Planner<'a> { } }; if zone.image_source != desired_image_source { - Some((sled_id, zone, desired_image_source)) + Some((sled_id, zone.clone(), desired_image_source)) } else { None } }) }) - .peekable(); + .collect::>(); - // Before we filter out zones that can't be updated, do we have any out - // of date zones at all? We need this to explain why we didn't update - // any zones below, if we don't. - let have_out_of_date_zones = out_of_date_zones.peek().is_some(); + for (sled_id, zone, desired_image) in out_of_date_zones.iter() { + report.out_of_date_zone(*sled_id, zone, desired_image.clone()); + } // Of the out-of-date zones, filter out zones that can't be updated yet, // either because they're not ready or because it wouldn't be safe to // bounce them. - let mut updateable_zones = - out_of_date_zones.filter(|(_sled_id, zone, _new_image_source)| { - if !self.can_zone_be_shut_down_safely(zone) { + let mut updateable_zones = out_of_date_zones.iter().filter( + |(_sled_id, zone, _new_image_source)| { + if !self.can_zone_be_shut_down_safely(zone, &mut report) { return false; } - match self.is_zone_ready_for_update(zone.zone_type.kind()) { + match self.is_zone_ready_for_update( + zone.zone_type.kind(), + mgs_updates, + ) { Ok(true) => true, Ok(false) => false, Err(err) => { @@ -1300,35 +1310,22 @@ impl<'a> Planner<'a> { false } } - }); + }, + ); - // Update the first out-of-date zone. if let Some((sled_id, zone, new_image_source)) = updateable_zones.next() { - // Borrow check workaround: `self.update_or_expunge_zone` needs - // `&mut self`, but `self` is borrowed in the `updateable_zones` - // iterator. Clone the one zone we want to update, then drop the - // iterator; now we can call `&mut self` methods. - let zone = zone.clone(); - std::mem::drop(updateable_zones); - - return self.update_or_expunge_zone( - sled_id, - &zone, - new_image_source, - ); - } - - if have_out_of_date_zones { - info!( - self.log, - "not all zones up-to-date, but no zones can be updated now" - ); + // Update the first out-of-date zone. + self.update_or_expunge_zone( + *sled_id, + zone, + new_image_source.clone(), + report, + ) } else { - info!(self.log, "all zones up-to-date"); + // No zones to update. + Ok(report) } - - Ok(()) } /// Update a zone to use a new image source, either in-place or by @@ -1338,7 +1335,8 @@ impl<'a> Planner<'a> { sled_id: SledUuid, zone: &BlueprintZoneConfig, new_image_source: BlueprintZoneImageSource, - ) -> Result<(), Error> { + mut report: PlanningZoneUpdatesStepReport, + ) -> Result { let zone_kind = zone.zone_type.kind(); // We're called by `do_plan_zone_updates()`, which guarantees the @@ -1351,18 +1349,12 @@ impl<'a> Planner<'a> { | ZoneKind::ClickhouseKeeper | ZoneKind::ClickhouseServer | ZoneKind::CockroachDb => { - info!( - self.log, "updating zone image source in-place"; - "sled_id" => %sled_id, - "zone_id" => %zone.id, - "kind" => ?zone.zone_type.kind(), - "image_source" => %new_image_source, - ); self.blueprint.comment(format!( "updating {:?} zone {} in-place", zone.zone_type.kind(), zone.id )); + report.updated_zone(sled_id, &zone); self.blueprint.sled_set_zone_source( sled_id, zone.id, @@ -1376,25 +1368,24 @@ impl<'a> Planner<'a> { | ZoneKind::InternalNtp | ZoneKind::Nexus | ZoneKind::Oximeter => { - info!( - self.log, "expunging out-of-date zone"; - "sled_id" => %sled_id, - "zone_id" => %zone.id, - "kind" => ?zone.zone_type.kind(), - ); self.blueprint.comment(format!( "expunge {:?} zone {} for update", zone.zone_type.kind(), zone.id )); + report.expunged_zone(sled_id, zone); self.blueprint.sled_expunge_zone(sled_id, zone.id)?; } } - Ok(()) + Ok(report) } - fn do_plan_cockroachdb_settings(&mut self) { + fn do_plan_cockroachdb_settings( + &mut self, + ) -> PlanningCockroachdbSettingsStepReport { + let mut report = PlanningCockroachdbSettingsStepReport::new(); + // Figure out what we should set the CockroachDB "preserve downgrade // option" setting to based on the planning input. // @@ -1472,12 +1463,8 @@ impl<'a> Planner<'a> { Err(_) => CockroachDbPreserveDowngrade::DoNotModify, }; self.blueprint.cockroachdb_preserve_downgrade(value); - info!( - &self.log, - "will ensure cockroachdb setting"; - "setting" => "cluster.preserve_downgrade_option", - "value" => ?value, - ); + report.preserve_downgrade = value; + report // Hey! Listen! // @@ -1492,12 +1479,14 @@ impl<'a> Planner<'a> { fn image_source_for_new_zone( &self, zone_kind: ZoneKind, + mgs_updates: &PlanningMgsUpdatesStepReport, ) -> Result { - let source_repo = if self.is_zone_ready_for_update(zone_kind)? { - self.input.tuf_repo().description() - } else { - self.input.old_repo().description() - }; + let source_repo = + if self.is_zone_ready_for_update(zone_kind, mgs_updates)? { + self.input.tuf_repo().description() + } else { + self.input.old_repo().description() + }; source_repo.zone_image_source(zone_kind) } @@ -1506,10 +1495,14 @@ impl<'a> Planner<'a> { fn is_zone_ready_for_update( &self, zone_kind: ZoneKind, + mgs_updates: &PlanningMgsUpdatesStepReport, ) -> Result { - // TODO-correctness: We should return false regardless of `zone_kind` if - // there are still pending updates for components earlier in the update - // ordering than zones: RoT bootloader / RoT / SP / Host OS. + // We return false regardless of `zone_kind` if there are still + // pending updates for components earlier in the update ordering + // than zones: RoT bootloader / RoT / SP / Host OS. + if !mgs_updates.is_empty() { + return Ok(false); + } match zone_kind { ZoneKind::Nexus => { @@ -1558,55 +1551,69 @@ impl<'a> Planner<'a> { /// because the underlying disk / sled has been expunged" case. In this /// case, we have no choice but to reconcile with the fact that the zone is /// now gone. - fn can_zone_be_shut_down_safely(&self, zone: &BlueprintZoneConfig) -> bool { + fn can_zone_be_shut_down_safely( + &self, + zone: &BlueprintZoneConfig, + report: &mut PlanningZoneUpdatesStepReport, + ) -> bool { + use ZoneUnsafeToShutdown::*; match zone.zone_type.kind() { ZoneKind::CockroachDb => { - debug!(self.log, "Checking if Cockroach node can shut down"); + use CockroachdbUnsafeToShutdown::*; + // We must hear from all nodes let all_statuses = &self.inventory.cockroach_status; if all_statuses.len() < COCKROACHDB_REDUNDANCY { - warn!(self.log, "Not enough nodes"); + report.unsafe_zone( + zone, + Cockroachdb { reason: NotEnoughNodes }, + ); return false; } // All nodes must report: "We have the necessary redundancy, and // have observed no underreplicated ranges". - for (node_id, status) in all_statuses { - let log = self.log.new(slog::o!( - "operation" => "Checking Cockroach node status for shutdown safety", - "node_id" => node_id.to_string() - )); + for (_node_id, status) in all_statuses { let Some(ranges_underreplicated) = status.ranges_underreplicated else { - warn!(log, "Missing underreplicated stat"); + report.unsafe_zone( + zone, + Cockroachdb { reason: MissingUnderreplicatedStat }, + ); return false; }; if ranges_underreplicated != 0 { - warn!(log, "Underreplicated ranges != 0"; "ranges_underreplicated" => ranges_underreplicated); + report.unsafe_zone( + zone, + Cockroachdb { + reason: UnderreplicatedRanges { + n: ranges_underreplicated, + }, + }, + ); return false; } let Some(live_nodes) = status.liveness_live_nodes else { - warn!(log, "Missing live_nodes"); + report.unsafe_zone( + zone, + Cockroachdb { reason: MissingLiveNodesStat }, + ); return false; }; if live_nodes < COCKROACHDB_REDUNDANCY as u64 { - warn!(log, "Live nodes < COCKROACHDB_REDUNDANCY"; "live_nodes" => live_nodes); + report.unsafe_zone( + zone, + Cockroachdb { + reason: NotEnoughLiveNodes { live_nodes }, + }, + ); return false; } - info!( - log, - "CockroachDB Node status looks ready for shutdown" - ); } true } ZoneKind::BoundaryNtp => { - debug!( - self.log, - "Checking if boundary NTP zone can be shut down" - ); - // Find all boundary NTP zones expected to be in-service by our // blueprint. let mut boundary_ntp_zones = std::collections::HashSet::new(); @@ -1636,16 +1643,18 @@ impl<'a> Planner<'a> { } } - let can_shutdown = - synchronized_boundary_ntp_count >= BOUNDARY_NTP_REDUNDANCY; - info!( - self.log, - "Boundary NTP zone shutdown check"; - "total_boundary_ntp_zones" => boundary_ntp_zones.len(), - "synchronized_count" => synchronized_boundary_ntp_count, - "can_shutdown" => can_shutdown - ); - can_shutdown + if synchronized_boundary_ntp_count < BOUNDARY_NTP_REDUNDANCY { + report.unsafe_zone( + zone, + BoundaryNtp { + total_boundary_ntp_zones: boundary_ntp_zones.len(), + synchronized_count: synchronized_boundary_ntp_count, + }, + ); + false + } else { + true + } } _ => true, // other zone kinds have no special safety checks } @@ -1750,6 +1759,7 @@ pub(crate) mod test { &input, test_name, &collection, + PlannerRng::from_entropy(), ) .expect("created planner"); let child_blueprint = planner.plan().expect("planning succeeded"); @@ -1790,9 +1800,9 @@ pub(crate) mod test { &example.input, "no-op?", &example.collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -1827,9 +1837,9 @@ pub(crate) mod test { &input, "test: add NTP?", &example.collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("failed to plan"); @@ -1871,9 +1881,9 @@ pub(crate) mod test { &input, "test: add nothing more", &example.collection, + PlannerRng::from_seed((TEST_NAME, "bp4")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp4"))) .plan() .expect("failed to plan"); let summary = blueprint4.diff_since_blueprint(&blueprint3); @@ -1909,9 +1919,9 @@ pub(crate) mod test { &input, "test: add Crucible zones?", &collection, + PlannerRng::from_seed((TEST_NAME, "bp5")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp5"))) .plan() .expect("failed to plan"); @@ -2012,9 +2022,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -2093,9 +2103,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -2181,6 +2191,7 @@ pub(crate) mod test { &builder.build(), "test_blueprint2", &collection, + PlannerRng::from_entropy(), ) .expect("created planner") .plan() @@ -2221,9 +2232,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -2300,9 +2311,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -2341,9 +2352,9 @@ pub(crate) mod test { &input, "test_blueprint3", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("failed to plan"); @@ -2401,6 +2412,7 @@ pub(crate) mod test { &input, &collection, TEST_NAME, + PlannerRng::from_entropy(), ) .expect("failed to build blueprint builder"); let sled_id = builder.sled_ids_with_zones().next().expect("no sleds"); @@ -2419,6 +2431,7 @@ pub(crate) mod test { &input, &collection, TEST_NAME, + PlannerRng::from_entropy(), ) .expect("failed to build blueprint builder"); @@ -2481,9 +2494,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -2512,9 +2525,9 @@ pub(crate) mod test { &input, "test_blueprint3", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("failed to re-plan"); @@ -2630,9 +2643,9 @@ pub(crate) mod test { &input, "test: some new disks", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -2716,9 +2729,9 @@ pub(crate) mod test { &input, "test: fix a dataset", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -2797,9 +2810,9 @@ pub(crate) mod test { &input, "test: expunge a disk", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -2857,9 +2870,9 @@ pub(crate) mod test { &input, "test: decommission a disk", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("failed to plan"); @@ -2913,9 +2926,9 @@ pub(crate) mod test { &input, "test: expunge and decommission all disks", &collection, + PlannerRng::from_seed((TEST_NAME, "bp4")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp4"))) .plan() .expect("failed to plan"); @@ -3003,9 +3016,9 @@ pub(crate) mod test { &input, "test: expunge a disk", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -3173,9 +3186,9 @@ pub(crate) mod test { &input, "test: expunge a disk with a zone on top", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -3350,9 +3363,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -3597,9 +3610,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); @@ -3641,9 +3654,9 @@ pub(crate) mod test { &input, "test_blueprint3", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("succeeded in planner"); @@ -3692,9 +3705,9 @@ pub(crate) mod test { &input, "test_blueprint4", &collection, + PlannerRng::from_seed((TEST_NAME, "bp4")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp4"))) .plan() .expect("succeeded in planner"); @@ -3752,9 +3765,9 @@ pub(crate) mod test { &builder.clone().build(), "initial settings", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to plan"); assert_eq!(bp2.cockroachdb_fingerprint, "bp2"); @@ -3779,9 +3792,9 @@ pub(crate) mod test { &builder.clone().build(), "initial settings", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("failed to plan"); assert_eq!(bp3.cockroachdb_fingerprint, "bp3"); @@ -3804,9 +3817,9 @@ pub(crate) mod test { &builder.clone().build(), "after ensure", &collection, + PlannerRng::from_seed((TEST_NAME, "bp4")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp4"))) .plan() .expect("failed to plan"); assert_eq!(bp4.cockroachdb_fingerprint, "bp4"); @@ -3833,12 +3846,12 @@ pub(crate) mod test { &builder.clone().build(), "unknown version", &collection, + PlannerRng::from_seed(( + TEST_NAME, + format!("bp5-{}", preserve_downgrade), + )), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed(( - TEST_NAME, - format!("bp5-{}", preserve_downgrade), - ))) .plan() .expect("failed to plan"); assert_eq!(bp5.cockroachdb_fingerprint, "bp5"); @@ -3892,9 +3905,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to re-plan"); @@ -3962,9 +3975,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("failed to create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("failed to re-plan"); @@ -4026,9 +4039,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("plan"); @@ -4100,9 +4113,9 @@ pub(crate) mod test { &input, "test_blueprint3", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("plan"); @@ -4141,9 +4154,9 @@ pub(crate) mod test { &input, "test_blueprint4", &collection, + PlannerRng::from_seed((TEST_NAME, "bp4")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp4"))) .plan() .expect("plan"); @@ -4189,9 +4202,9 @@ pub(crate) mod test { &input, "test_blueprint5", &collection, + PlannerRng::from_seed((TEST_NAME, "bp5")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp5"))) .plan() .expect("plan"); @@ -4236,9 +4249,9 @@ pub(crate) mod test { &input, "test_blueprint6", &collection, + PlannerRng::from_seed((TEST_NAME, "bp6")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp6"))) .plan() .expect("plan"); @@ -4273,9 +4286,9 @@ pub(crate) mod test { &input, "test_blueprint7", &collection, + PlannerRng::from_seed((TEST_NAME, "bp7")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp7"))) .plan() .expect("plan"); @@ -4316,9 +4329,9 @@ pub(crate) mod test { &input, "test_blueprint8", &collection, + PlannerRng::from_seed((TEST_NAME, "bp8")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp8"))) .plan() .expect("plan"); @@ -4369,9 +4382,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("plan"); @@ -4427,9 +4440,9 @@ pub(crate) mod test { &input, "test_blueprint3", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("plan"); @@ -4464,9 +4477,9 @@ pub(crate) mod test { &input, "test_blueprint4", &collection, + PlannerRng::from_seed((TEST_NAME, "bp4")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp4"))) .plan() .expect("plan"); @@ -4495,9 +4508,9 @@ pub(crate) mod test { &input, "test_blueprint5", &collection, + PlannerRng::from_seed((TEST_NAME, "bp5")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp5"))) .plan() .expect("plan"); @@ -4528,9 +4541,9 @@ pub(crate) mod test { &input, "test_blueprint6", &collection, + PlannerRng::from_seed((TEST_NAME, "bp6")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp6"))) .plan() .expect("plan"); @@ -4588,9 +4601,9 @@ pub(crate) mod test { &input, "test_blueprint2", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("plan"); @@ -4635,9 +4648,9 @@ pub(crate) mod test { &input, "test_blueprint3", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("plan"); @@ -4663,9 +4676,9 @@ pub(crate) mod test { &input, "test_blueprint4", &collection, + PlannerRng::from_seed((TEST_NAME, "bp4")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp4"))) .plan() .expect("plan"); @@ -4764,9 +4777,9 @@ pub(crate) mod test { &input, "expunge disk", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("planned"); @@ -4911,9 +4924,9 @@ pub(crate) mod test { &input, "removed Nexus zone from inventory", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("planned"); @@ -4991,9 +5004,9 @@ pub(crate) mod test { &input, "expunge disk", &collection, + PlannerRng::from_seed((TEST_NAME, "bp2")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp2"))) .plan() .expect("planned"); @@ -5064,9 +5077,9 @@ pub(crate) mod test { &input, "removed Nexus zone from inventory", &collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("created planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("planned"); @@ -5292,9 +5305,9 @@ pub(crate) mod test { &input, "test_blueprint3", &example.collection, + PlannerRng::from_seed((TEST_NAME, "bp3")), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp3"))) .plan() .expect("can't re-plan for new Nexus zone"); { @@ -5326,9 +5339,9 @@ pub(crate) mod test { &input, &blueprint_name, &example.collection, + PlannerRng::from_seed((TEST_NAME, &blueprint_name)), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, &blueprint_name))) .plan() .unwrap_or_else(|_| panic!("can't re-plan after {i} iterations")); @@ -5408,9 +5421,9 @@ pub(crate) mod test { &input, &blueprint_name, &example.collection, + PlannerRng::from_seed((TEST_NAME, &blueprint_name)), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, &blueprint_name))) .plan() .unwrap_or_else(|_| panic!("can't re-plan after {i} iterations")); @@ -5489,9 +5502,9 @@ pub(crate) mod test { &example.input, &blueprint_name, &example.collection, + PlannerRng::from_seed((TEST_NAME, &blueprint_name)), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, &blueprint_name))) .plan() .unwrap_or_else(|_| panic!("can't plan to include Cockroach nodes")); @@ -5683,9 +5696,9 @@ pub(crate) mod test { &example.input, &format!("test_blueprint_cockroach_{i}"), &example.collection, + PlannerRng::from_seed((TEST_NAME, "bp_crdb")), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp_crdb"))) .plan() .expect("plan for trivial TUF repo"); @@ -5868,9 +5881,9 @@ pub(crate) mod test { &example.input, &blueprint_name, &example.collection, + rng.next_planner_rng(), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, &blueprint_name))) .plan() .unwrap_or_else(|err| { panic!("can't plan to include boundary NTP: {err}") @@ -5908,6 +5921,7 @@ pub(crate) mod test { &example.input, TEST_NAME, &example.collection, + rng.next_planner_rng(), ) .expect("can't create planner"); let new_blueprint = planner.plan().expect("planning succeeded"); @@ -6131,9 +6145,9 @@ pub(crate) mod test { &example.input, "test_blueprint_expunge_old_boundary_ntp", &example.collection, + rng.next_planner_rng(), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp_ntp"))) .plan() .expect("plan for trivial TUF repo"); { @@ -6142,6 +6156,7 @@ pub(crate) mod test { "diff between blueprints (should be expunging boundary NTP using install dataset):\n{}", summary.display() ); + eprintln!("{}", new_blueprint.report); assert_eq!(summary.total_zones_added(), 0); assert_eq!(summary.total_zones_removed(), 0); @@ -6179,9 +6194,9 @@ pub(crate) mod test { &example.input, "test_blueprint_boundary_ntp_add_internal_and_promote_one", &example.collection, + rng.next_planner_rng(), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp_ntp"))) .plan() .expect("plan for trivial TUF repo"); { @@ -6190,6 +6205,7 @@ pub(crate) mod test { "diff between blueprints (should be adding one internal NTP and promoting another to boundary):\n{}", summary.display() ); + eprintln!("{}", new_blueprint.report); assert_eq!(summary.total_zones_added(), 2); assert_eq!(summary.total_zones_removed(), 0); @@ -6218,9 +6234,9 @@ pub(crate) mod test { &example.input, "test_blueprint_boundary_ntp_expunge_the_other_one", &example.collection, + rng.next_planner_rng(), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp_ntp"))) .plan() .expect("plan for trivial TUF repo"); { @@ -6229,6 +6245,7 @@ pub(crate) mod test { "diff between blueprints (should be expunging another boundary NTP):\n{}", summary.display() ); + eprintln!("{}", new_blueprint.report); assert_eq!(summary.total_zones_added(), 0); assert_eq!(summary.total_zones_removed(), 0); @@ -6258,9 +6275,9 @@ pub(crate) mod test { &example.input, "test_blueprint_boundary_ntp_promotion", &example.collection, + rng.next_planner_rng(), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp_ntp"))) .plan() .expect("plan for trivial TUF repo"); { @@ -6269,6 +6286,7 @@ pub(crate) mod test { "diff between blueprints (should be adding promoting internal -> boundary NTP):\n{}", summary.display() ); + eprintln!("{}", new_blueprint.report); assert_eq!(summary.total_zones_added(), 2); assert_eq!(summary.total_zones_removed(), 0); @@ -6294,9 +6312,9 @@ pub(crate) mod test { &example.input, "test_blueprint_boundary_ntp_finish_expunging", &example.collection, + rng.next_planner_rng(), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, "bp_ntp"))) .plan() .expect("plan for trivial TUF repo"); { @@ -6305,6 +6323,7 @@ pub(crate) mod test { "diff between blueprints (should be adding wrapping up internal NTP expungement):\n{}", summary.display() ); + eprintln!("{}", new_blueprint.report); assert_eq!(summary.total_zones_added(), 0); assert_eq!(summary.total_zones_removed(), 0); @@ -6435,12 +6454,16 @@ pub(crate) mod test { &input, &blueprint_name, &example.collection, + PlannerRng::from_seed((TEST_NAME, &blueprint_name)), ) .expect("can't create planner") - .with_rng(PlannerRng::from_seed((TEST_NAME, &blueprint_name))) .plan() .unwrap_or_else(|_| panic!("can't re-plan after {i} iterations")); + assert_eq!(blueprint.report.blueprint_id, blueprint.id); + eprintln!("{}\n", blueprint.report); + // TODO: more report testing + { let summary = blueprint.diff_since_blueprint(&parent); if summary.total_zones_added() == 0 diff --git a/nexus/reconfigurator/planning/tests/output/example_builder_zone_counts_blueprint.txt b/nexus/reconfigurator/planning/tests/output/example_builder_zone_counts_blueprint.txt index 262bdeabd5d..9123be5312c 100644 --- a/nexus/reconfigurator/planning/tests/output/example_builder_zone_counts_blueprint.txt +++ b/nexus/reconfigurator/planning/tests/output/example_builder_zone_counts_blueprint.txt @@ -533,3 +533,6 @@ parent: e35b2fdd-354d-48d9-acb5-703b2c269a54 target release min gen: 1 PENDING MGS-MANAGED UPDATES: 0 + +Nothing to report on planning for blueprint 4a0b8410-b14f-41e7-85e7-3c0fe7050ccc. + diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt index aaefd04e153..2fd2c97679d 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt @@ -324,3 +324,13 @@ parent: 516e80a3-b362-4fac-bd3c-4559717120dd target release min gen: 1 PENDING MGS-MANAGED UPDATES: 0 + +Planning report for blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e: +Chicken switches: + add zones with mupdate override: false + +* Discretionary zones placed: + * 2 zones on sled d67ce8f0-a691-4010-b414-420d82e80527: crucible_pantry, nexus + * 2 zones on sled fefcf4cf-f7e7-46b3-b629-058526ce440e: clickhouse, internal_dns +* Zone updates waiting on discretionary zones + diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt index 1295f3ff2ea..a82490611cb 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -512,3 +512,13 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b target release min gen: 1 PENDING MGS-MANAGED UPDATES: 0 + +Planning report for blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6: +Chicken switches: + add zones with mupdate override: false + +* Discretionary zones placed: + * 3 zones on sled 75bc286f-2b4b-482c-9431-59272af529da: nexus, nexus, nexus + * 3 zones on sled affab35f-600a-4109-8ea0-34a067a4e0bc: nexus, nexus, nexus +* Zone updates waiting on discretionary zones + diff --git a/nexus/reconfigurator/planning/tests/output/zone_image_source_change_1.txt b/nexus/reconfigurator/planning/tests/output/zone_image_source_change_1.txt index 57cf92543a6..440e7e28e51 100644 --- a/nexus/reconfigurator/planning/tests/output/zone_image_source_change_1.txt +++ b/nexus/reconfigurator/planning/tests/output/zone_image_source_change_1.txt @@ -1,5 +1,5 @@ from: blueprint 11d7ef8b-adcd-4a37-9b4e-69faa3c242b1 -to: blueprint 665dc34a-dbf2-4d13-9ceb-9542d434ab0e +to: blueprint 1481141d-a5cf-4103-8344-738967e0f110 MODIFIED SLEDS: diff --git a/nexus/src/app/background/tasks/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs index 88fea70e7a1..c8c16456a96 100644 --- a/nexus/src/app/background/tasks/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -196,7 +196,7 @@ mod test { BlueprintTarget, BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZoneImageSource, BlueprintZoneType, CockroachDbPreserveDowngrade, OximeterReadMode, PendingMgsUpdates, - blueprint_zone_type, + PlanningReport, blueprint_zone_type, }; use nexus_types::external_api::views::SledState; use omicron_common::api::external; @@ -276,6 +276,7 @@ mod test { time_created: chrono::Utc::now(), creator: "test".to_string(), comment: "test blueprint".to_string(), + report: PlanningReport::new(id), }; datastore diff --git a/nexus/src/app/background/tasks/blueprint_load.rs b/nexus/src/app/background/tasks/blueprint_load.rs index 7f6f0aa5ba4..d2d9c7c380e 100644 --- a/nexus/src/app/background/tasks/blueprint_load.rs +++ b/nexus/src/app/background/tasks/blueprint_load.rs @@ -195,7 +195,7 @@ mod test { use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{ Blueprint, BlueprintTarget, CockroachDbPreserveDowngrade, - OximeterReadMode, PendingMgsUpdates, + OximeterReadMode, PendingMgsUpdates, PlanningReport, }; use omicron_common::api::external::Generation; use omicron_uuid_kinds::BlueprintUuid; @@ -232,6 +232,7 @@ mod test { time_created: now_db_precision(), creator: "test".to_string(), comment: "test blueprint".to_string(), + report: PlanningReport::new(id), }, ) } diff --git a/nexus/src/app/background/tasks/blueprint_planner.rs b/nexus/src/app/background/tasks/blueprint_planner.rs index f94a72e3597..e01f9cdf346 100644 --- a/nexus/src/app/background/tasks/blueprint_planner.rs +++ b/nexus/src/app/background/tasks/blueprint_planner.rs @@ -11,6 +11,7 @@ use nexus_auth::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_reconfigurator_planning::planner::Planner; +use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_types::deployment::ReconfiguratorChickenSwitchesView; use nexus_types::deployment::{Blueprint, BlueprintTarget}; @@ -140,6 +141,7 @@ impl BlueprintPlanner { &input, "blueprint_planner", &collection, + PlannerRng::from_entropy(), ) { Ok(planner) => planner, Err(error) => { @@ -249,8 +251,13 @@ impl BlueprintPlanner { } // We have a new target! + let report = blueprint.report.clone(); self.tx_blueprint.send_replace(Some(Arc::new((target, blueprint)))); - BlueprintPlannerStatus::Targeted { parent_blueprint_id, blueprint_id } + BlueprintPlannerStatus::Targeted { + parent_blueprint_id, + blueprint_id, + report, + } } } @@ -347,8 +354,10 @@ mod test { BlueprintPlannerStatus::Targeted { parent_blueprint_id, blueprint_id, + report, } if parent_blueprint_id == initial_blueprint.id - && blueprint_id != initial_blueprint.id => + && blueprint_id != initial_blueprint.id + && blueprint_id == report.blueprint_id => { blueprint_id } diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 890974d125e..43f6f558a4b 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -7,6 +7,7 @@ use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_reconfigurator_planning::planner::Planner; +use nexus_reconfigurator_planning::planner::PlannerRng; use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; @@ -188,6 +189,7 @@ impl super::Nexus { &planning_context.planning_input, &planning_context.creator, &inventory, + PlannerRng::from_entropy(), ) .map_err(|error| { Error::internal_error(&format!( diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 9a76249fb12..c6c666432d1 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -56,6 +56,7 @@ use nexus_types::deployment::OmicronZoneExternalFloatingAddr; use nexus_types::deployment::OmicronZoneExternalFloatingIp; use nexus_types::deployment::OmicronZoneExternalSnatIp; use nexus_types::deployment::OximeterReadMode; +use nexus_types::deployment::PlanningReport; use nexus_types::deployment::blueprint_zone_type; use nexus_types::external_api::views::SledState; use nexus_types::internal_api::params::DnsConfigParams; @@ -956,8 +957,9 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .blueprint_sleds .take() .expect("should have already made blueprint sled configs"); + let id = BlueprintUuid::new_v4(); let blueprint = Blueprint { - id: BlueprintUuid::new_v4(), + id, sleds, pending_mgs_updates: PendingMgsUpdates::new(), parent_blueprint_id: None, @@ -975,6 +977,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { time_created: Utc::now(), creator: "nexus-test-utils".to_string(), comment: "initial test blueprint".to_string(), + report: PlanningReport::new(id), }; self.initial_blueprint_id = Some(blueprint.id); diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 739100f3dcd..e6ec79c937f 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -70,6 +70,7 @@ mod clickhouse; pub mod execution; mod network_resources; mod planning_input; +mod planning_report; mod zone_type; use crate::inventory::BaseboardId; @@ -120,6 +121,19 @@ pub use planning_input::TargetReleaseDescription; pub use planning_input::TufRepoContentsError; pub use planning_input::TufRepoPolicy; pub use planning_input::ZpoolFilter; +pub use planning_report::CockroachdbUnsafeToShutdown; +pub use planning_report::PlanningAddStepReport; +pub use planning_report::PlanningCockroachdbSettingsStepReport; +pub use planning_report::PlanningDecommissionStepReport; +pub use planning_report::PlanningExpungeStepReport; +pub use planning_report::PlanningMgsUpdatesStepReport; +pub use planning_report::PlanningNoopImageSourceSkipSledReason; +pub use planning_report::PlanningNoopImageSourceSkipZoneReason; +pub use planning_report::PlanningNoopImageSourceStepReport; +pub use planning_report::PlanningReport; +pub use planning_report::PlanningZoneUpdatesStepReport; +pub use planning_report::ZoneUnsafeToShutdown; +pub use planning_report::ZoneUpdatesWaitingOn; pub use zone_type::BlueprintZoneType; pub use zone_type::DurableDataset; pub use zone_type::blueprint_zone_type; @@ -233,12 +247,17 @@ pub struct Blueprint { /// when this blueprint was generated (for debugging) #[daft(ignore)] pub time_created: chrono::DateTime, + /// identity of the component that generated the blueprint (for debugging) /// This would generally be the Uuid of a Nexus instance. pub creator: String, + /// human-readable string describing why this blueprint was created /// (for debugging) pub comment: String, + + /// Report on the planning session that resulted in this blueprint + pub report: PlanningReport, } impl Blueprint { @@ -632,6 +651,7 @@ impl fmt::Display for BlueprintDisplay<'_> { time_created: _, creator: _, comment: _, + report, } = self.blueprint; writeln!(f, "blueprint {}", id)?; @@ -744,6 +764,8 @@ impl fmt::Display for BlueprintDisplay<'_> { )?; } + writeln!(f, "\n{report}")?; + Ok(()) } } diff --git a/nexus/types/src/deployment/blueprint_diff.rs b/nexus/types/src/deployment/blueprint_diff.rs index 79aed0ed89b..a29cb57317f 100644 --- a/nexus/types/src/deployment/blueprint_diff.rs +++ b/nexus/types/src/deployment/blueprint_diff.rs @@ -76,6 +76,7 @@ impl<'a> BlueprintDiffSummary<'a> { oximeter_read_mode, creator: _, comment: _, + report: _, } = &self.diff; // Did we modify, add, or remove any sleds? diff --git a/nexus/types/src/deployment/planning_report.rs b/nexus/types/src/deployment/planning_report.rs new file mode 100644 index 00000000000..48bcadd2951 --- /dev/null +++ b/nexus/types/src/deployment/planning_report.rs @@ -0,0 +1,947 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types representing a report on a planning run that produced a blueprint. + +use super::ArtifactHash; +use super::BlueprintZoneConfig; +use super::BlueprintZoneImageSource; +use super::CockroachDbPreserveDowngrade; +use super::PendingMgsUpdates; +use super::PlannerChickenSwitches; + +use daft::Diffable; +use omicron_common::policy::COCKROACHDB_REDUNDANCY; +use omicron_uuid_kinds::BlueprintUuid; +use omicron_uuid_kinds::MupdateOverrideUuid; +use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; +use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; + +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::fmt; + +/// A full blueprint planning report. Other than the blueprint ID, each +/// field corresponds to a step in the update planner, i.e., a subroutine +/// of `omicron_nexus::reconfigurator::planning::Planner::do_plan`. +/// +/// The intent of a planning report is to capture information useful to an +/// operator or developer about the planning process itself, especially if +/// it has become "stuck" (unable to proceed with an update). It is *not* a +/// summary of the plan (blueprint), but rather a description of non-fatal +/// conditions the planner is waiting on, unexpected or invalid +/// configurations encountered during planning, etc. The planner may make +/// internal decisions based on the step reports; the intent is that an +/// operator may make administrative decisions based on the full report. +/// +/// Only successful planning runs are currently covered by this report. +/// Failures to plan (i.e., to generate a valid blueprint) are represented +/// by `nexus-reconfigurator-planning::blueprint_builder::Error`. +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +#[must_use = "an unread report is not actionable"] +pub struct PlanningReport { + /// The blueprint produced by the planning run this report describes. + pub blueprint_id: BlueprintUuid, + + /// The set of "chicken switches" in effect for this planning run. + pub chicken_switches: PlannerChickenSwitches, + + // Step reports. + pub expunge: PlanningExpungeStepReport, + pub decommission: PlanningDecommissionStepReport, + pub noop_image_source: PlanningNoopImageSourceStepReport, + pub mgs_updates: PlanningMgsUpdatesStepReport, + pub add: PlanningAddStepReport, + pub zone_updates: PlanningZoneUpdatesStepReport, + pub cockroachdb_settings: PlanningCockroachdbSettingsStepReport, +} + +impl PlanningReport { + pub fn new(blueprint_id: BlueprintUuid) -> Self { + Self { + blueprint_id, + chicken_switches: PlannerChickenSwitches::default(), + expunge: PlanningExpungeStepReport::new(), + decommission: PlanningDecommissionStepReport::new(), + noop_image_source: PlanningNoopImageSourceStepReport::new(), + mgs_updates: PlanningMgsUpdatesStepReport::new( + PendingMgsUpdates::new(), + ), + add: PlanningAddStepReport::new(), + zone_updates: PlanningZoneUpdatesStepReport::new(), + cockroachdb_settings: PlanningCockroachdbSettingsStepReport::new(), + } + } + + pub fn is_empty(&self) -> bool { + self.expunge.is_empty() + && self.decommission.is_empty() + && self.noop_image_source.is_empty() + && self.mgs_updates.is_empty() + && self.add.is_empty() + && self.zone_updates.is_empty() + && self.cockroachdb_settings.is_empty() + } +} + +impl fmt::Display for PlanningReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_empty() { + writeln!( + f, + "Nothing to report on planning for blueprint {}.", + self.blueprint_id, + )?; + } else { + let Self { + blueprint_id, + chicken_switches, + expunge, + decommission, + noop_image_source, + mgs_updates, + add, + zone_updates, + cockroachdb_settings, + } = self; + writeln!(f, "Planning report for blueprint {blueprint_id}:")?; + if *chicken_switches != PlannerChickenSwitches::default() { + writeln!( + f, + "Chicken switches:\n{}", + chicken_switches.display() + )?; + } + expunge.fmt(f)?; + decommission.fmt(f)?; + noop_image_source.fmt(f)?; + mgs_updates.fmt(f)?; + add.fmt(f)?; + zone_updates.fmt(f)?; + cockroachdb_settings.fmt(f)?; + } + Ok(()) + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningExpungeStepReport { + /// Expunged disks not present in the parent blueprint. + pub orphan_disks: BTreeMap, +} + +impl PlanningExpungeStepReport { + pub fn new() -> Self { + Self { orphan_disks: BTreeMap::new() } + } + + pub fn is_empty(&self) -> bool { + self.orphan_disks.is_empty() + } +} + +impl fmt::Display for PlanningExpungeStepReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Self { orphan_disks } = self; + if !orphan_disks.is_empty() { + writeln!( + f, + "* planning input contained expunged disks \ + not present in parent blueprint:", + )?; + for (sled, disk) in orphan_disks.iter() { + writeln!(f, " * sled {sled}, disk {disk}",)?; + } + } + Ok(()) + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningDecommissionStepReport { + /// Decommissioned sleds that unexpectedly appeared as commissioned. + pub zombie_sleds: Vec, +} + +impl PlanningDecommissionStepReport { + pub fn new() -> Self { + Self { zombie_sleds: Vec::new() } + } + + pub fn is_empty(&self) -> bool { + self.zombie_sleds.is_empty() + } +} + +impl fmt::Display for PlanningDecommissionStepReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Self { zombie_sleds } = self; + if !zombie_sleds.is_empty() { + let (n, s) = plural_vec(zombie_sleds); + writeln!( + f, + "* {n} decommissioned sled{s} returned by `SledFilter::Commissioned`: {}", + zombie_sleds + .iter() + .map(|sled_id| format!("{sled_id}")) + .collect::>() + .join(", ") + )?; + } + Ok(()) + } +} + +/// How many of the total install-dataset zones were noop-converted to use +/// the artifact store on a particular sled. +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningNoopImageSourceConvertedZones { + pub num_eligible: usize, + pub num_dataset: usize, +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningNoopImageSourceStepReport { + pub no_target_release: bool, + pub skipped_sleds: + BTreeMap, + pub skipped_zones: + BTreeMap, + pub converted_zones: + BTreeMap, +} + +impl PlanningNoopImageSourceStepReport { + pub fn new() -> Self { + Self { + no_target_release: false, + skipped_sleds: BTreeMap::new(), + skipped_zones: BTreeMap::new(), + converted_zones: BTreeMap::new(), + } + } + + pub fn is_empty(&self) -> bool { + !self.no_target_release + && self.skipped_sleds.is_empty() + && self.skipped_zones.is_empty() + && self.converted_zones.is_empty() + } + + pub fn skip_sled( + &mut self, + sled_id: SledUuid, + reason: PlanningNoopImageSourceSkipSledReason, + ) { + self.skipped_sleds.insert(sled_id, reason); + } + + pub fn skip_zone( + &mut self, + zone_id: OmicronZoneUuid, + reason: PlanningNoopImageSourceSkipZoneReason, + ) { + self.skipped_zones.insert(zone_id, reason); + } + + pub fn converted_zones( + &mut self, + sled_id: SledUuid, + num_eligible: usize, + num_dataset: usize, + ) { + self.converted_zones.insert( + sled_id, + PlanningNoopImageSourceConvertedZones { num_eligible, num_dataset }, + ); + } +} + +impl fmt::Display for PlanningNoopImageSourceStepReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Self { + no_target_release, + skipped_sleds, + skipped_zones: _, + converted_zones, + } = self; + + if *no_target_release { + return writeln!( + f, + "* Skipping noop image source check for all sleds (no current TUF repo)", + ); + } + + for (sled_id, reason) in skipped_sleds.iter() { + writeln!( + f, + "* Skipping noop image source check on sled {sled_id}: {reason}" + )?; + } + + for ( + sled_id, + PlanningNoopImageSourceConvertedZones { num_eligible, num_dataset }, + ) in converted_zones.iter() + { + if *num_eligible > 0 && *num_dataset > 0 { + writeln!( + f, + "* Noop converting {num_eligible}/{num_dataset} install-dataset zones \ + to artifact store on sled {sled_id}", + )?; + } + } + + Ok(()) + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum PlanningNoopImageSourceSkipSledReason { + AllZonesAlreadyArtifact { num_total: usize }, + SledNotInInventory, + ErrorRetrievingZoneManifest { error: String }, + RemoveMupdateOverride { id: MupdateOverrideUuid }, +} + +impl fmt::Display for PlanningNoopImageSourceSkipSledReason { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::AllZonesAlreadyArtifact { num_total } => { + write!(f, "all {num_total} zones are already from artifacts") + } + Self::SledNotInInventory => { + write!(f, "sled not present in latest inventory collection") + } + Self::ErrorRetrievingZoneManifest { error } => { + write!( + f, + "sled-agent encountered error retrieving zone manifest \ + (this is abnormal): {error}" + ) + } + Self::RemoveMupdateOverride { id } => { + write!( + f, + "blueprint has get_remove_mupdate_override set for sled: {id}", + ) + } + } + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum PlanningNoopImageSourceSkipZoneReason { + ZoneNotInManifest { + zone_kind: String, + file_name: String, + }, + InvalidArtifact { + zone_kind: String, + file_name: String, + error: String, + }, + ArtifactNotInRepo { + artifact_hash: ArtifactHash, + zone_kind: String, + file_name: String, + }, +} + +impl fmt::Display for PlanningNoopImageSourceSkipZoneReason { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::ZoneNotInManifest { file_name, .. } => { + write!(f, "artifact not found in zone manifest: {file_name}") + } + Self::InvalidArtifact { error, .. } => { + write!( + f, + "zone manifest inventory indicated install dataset artifact \ + is invalid, not using artifact (this is abnormal): {error}" + ) + } + Self::ArtifactNotInRepo { .. } => { + write!(f, "install dataset artifact hash not found in TUF repo") + } + } + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningMgsUpdatesStepReport { + pub pending_mgs_updates: PendingMgsUpdates, +} + +impl PlanningMgsUpdatesStepReport { + pub fn new(pending_mgs_updates: PendingMgsUpdates) -> Self { + Self { pending_mgs_updates } + } + + pub fn is_empty(&self) -> bool { + self.pending_mgs_updates.is_empty() + } +} + +impl fmt::Display for PlanningMgsUpdatesStepReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Self { pending_mgs_updates } = self; + if !pending_mgs_updates.is_empty() { + let n = pending_mgs_updates.len(); + let s = plural(n); + writeln!(f, "* {n} pending MGS update{s}:")?; + for update in pending_mgs_updates.iter() { + writeln!( + f, + " * {}: {:?}", + update.baseboard_id, update.details + )?; + } + } + Ok(()) + } +} + +/// How many discretionary zones we actually placed out of how many we +/// wanted to place. +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningAddOutOfEligibleSleds { + pub placed: usize, + pub wanted_to_place: usize, +} + +/// We have at least the minimum required number of zones of a given kind. +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningAddSufficientZonesExist { + pub target_count: usize, + pub num_existing: usize, +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningAddStepReport { + pub sleds_without_ntp_zones_in_inventory: BTreeSet, + pub sleds_without_zpools_for_ntp_zones: BTreeSet, + pub sleds_waiting_for_ntp_zone: BTreeSet, + pub sleds_getting_ntp_and_discretionary_zones: BTreeSet, + pub sleds_missing_ntp_zone: BTreeSet, + pub sleds_missing_crucible_zone: BTreeMap>, + + /// Discretionary zone kind → (placed, wanted to place) + pub out_of_eligible_sleds: BTreeMap, + + /// Discretionary zone kind → (wanted to place, num existing) + pub sufficient_zones_exist: + BTreeMap, + + /// Sled ID → kinds of discretionary zones placed there + // TODO: make `sled_add_zone_*` methods return the added zone config + // so that we can report it here. + pub discretionary_zones_placed: BTreeMap>, +} + +impl PlanningAddStepReport { + pub fn new() -> Self { + Self { + sleds_without_ntp_zones_in_inventory: BTreeSet::new(), + sleds_without_zpools_for_ntp_zones: BTreeSet::new(), + sleds_waiting_for_ntp_zone: BTreeSet::new(), + sleds_getting_ntp_and_discretionary_zones: BTreeSet::new(), + sleds_missing_ntp_zone: BTreeSet::new(), + sleds_missing_crucible_zone: BTreeMap::new(), + out_of_eligible_sleds: BTreeMap::new(), + sufficient_zones_exist: BTreeMap::new(), + discretionary_zones_placed: BTreeMap::new(), + } + } + + pub fn is_empty(&self) -> bool { + self.sleds_without_ntp_zones_in_inventory.is_empty() + && self.sleds_without_zpools_for_ntp_zones.is_empty() + && self.sleds_waiting_for_ntp_zone.is_empty() + && self.sleds_getting_ntp_and_discretionary_zones.is_empty() + && self.sleds_missing_ntp_zone.is_empty() + && self.sleds_missing_crucible_zone.is_empty() + && self.out_of_eligible_sleds.is_empty() + && self.discretionary_zones_placed.is_empty() + } + + pub fn any_discretionary_zones_placed(&self) -> bool { + !self.discretionary_zones_placed.is_empty() + } + + pub fn missing_crucible_zone( + &mut self, + sled_id: SledUuid, + zpool_id: ZpoolUuid, + ) { + self.sleds_missing_crucible_zone + .entry(sled_id) + .and_modify(|pools| pools.push(zpool_id)) + .or_insert_with(|| vec![zpool_id]); + } + + pub fn out_of_eligible_sleds( + &mut self, + zone_kind: &str, + placed: usize, + wanted_to_place: usize, + ) { + self.out_of_eligible_sleds.insert( + zone_kind.to_owned(), + PlanningAddOutOfEligibleSleds { placed, wanted_to_place }, + ); + } + + pub fn sufficient_zones_exist( + &mut self, + zone_kind: &str, + target_count: usize, + num_existing: usize, + ) { + self.sufficient_zones_exist.insert( + zone_kind.to_owned(), + PlanningAddSufficientZonesExist { target_count, num_existing }, + ); + } + + pub fn discretionary_zone_placed( + &mut self, + sled_id: SledUuid, + zone_kind: &str, + ) { + self.discretionary_zones_placed + .entry(sled_id) + .and_modify(|kinds| kinds.push(zone_kind.to_owned())) + .or_insert_with(|| vec![zone_kind.to_owned()]); + } +} + +impl fmt::Display for PlanningAddStepReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Self { + sleds_without_ntp_zones_in_inventory, + sleds_without_zpools_for_ntp_zones, + sleds_waiting_for_ntp_zone, + sleds_getting_ntp_and_discretionary_zones, + sleds_missing_ntp_zone, + sleds_missing_crucible_zone, + out_of_eligible_sleds, + sufficient_zones_exist: _, + discretionary_zones_placed, + } = self; + + if !sleds_without_ntp_zones_in_inventory.is_empty() { + writeln!( + f, + "* Waiting for NTP zones to appear in inventory on sleds: {}", + sleds_without_ntp_zones_in_inventory + .iter() + .map(|sled_id| format!("{sled_id}")) + .collect::>() + .join(", ") + )?; + } + + if !sleds_without_zpools_for_ntp_zones.is_empty() { + writeln!( + f, + "* No zpools in service for NTP zones on sleds: {}", + sleds_without_zpools_for_ntp_zones + .iter() + .map(|sled_id| format!("{sled_id}")) + .collect::>() + .join(", ") + )?; + } + + if !sleds_waiting_for_ntp_zone.is_empty() { + writeln!( + f, + "* Discretionary zone placement waiting for NTP zones on sleds: {}", + sleds_waiting_for_ntp_zone + .iter() + .map(|sled_id| format!("{sled_id}")) + .collect::>() + .join(", ") + )?; + } + + if !sleds_getting_ntp_and_discretionary_zones.is_empty() { + writeln!( + f, + "* Sleds getting NTP zones and which have other services already, \ + making them eligible for discretionary zones: {}", + sleds_getting_ntp_and_discretionary_zones + .iter() + .map(|sled_id| format!("{sled_id}")) + .collect::>() + .join(", ") + )?; + } + + for sled_id in sleds_missing_ntp_zone { + writeln!(f, "* Missing NTP zone on sled {sled_id}",)?; + } + + for (sled_id, zpools) in sleds_missing_crucible_zone { + for zpool_id in zpools { + writeln!( + f, + "* Missing Crucible zone for sled {sled_id}, zpool {zpool_id}", + )?; + } + } + + for (kind, PlanningAddOutOfEligibleSleds { placed, wanted_to_place }) in + out_of_eligible_sleds.iter() + { + writeln!( + f, + "* Only placed {placed}/{wanted_to_place} desired {kind} zones" + )?; + } + + if !discretionary_zones_placed.is_empty() { + writeln!(f, "* Discretionary zones placed:")?; + for (sled_id, kinds) in discretionary_zones_placed.iter() { + let (n, s) = plural_vec(kinds); + writeln!( + f, + " * {n} zone{s} on sled {sled_id}: {}", + kinds.join(", ") + )?; + } + } + + Ok(()) + } +} + +/// We have at least the minimum required number of zones of a given kind. +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningOutOfDateZone { + pub zone_config: BlueprintZoneConfig, + pub desired_image_source: BlueprintZoneImageSource, +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningZoneUpdatesStepReport { + /// What are we waiting on to start zone updates? + pub waiting_on: Option, + + pub out_of_date_zones: BTreeMap>, + pub expunged_zones: BTreeMap>, + pub updated_zones: BTreeMap>, + pub unsafe_zones: BTreeMap, +} + +impl PlanningZoneUpdatesStepReport { + pub fn new() -> Self { + Self { + waiting_on: None, + out_of_date_zones: BTreeMap::new(), + expunged_zones: BTreeMap::new(), + updated_zones: BTreeMap::new(), + unsafe_zones: BTreeMap::new(), + } + } + + pub fn waiting_on(waiting_on: ZoneUpdatesWaitingOn) -> Self { + let mut new = Self::new(); + new.waiting_on = Some(waiting_on); + new + } + + pub fn is_empty(&self) -> bool { + self.waiting_on.is_none() + && self.out_of_date_zones.is_empty() + && self.expunged_zones.is_empty() + && self.updated_zones.is_empty() + && self.unsafe_zones.is_empty() + } + + pub fn out_of_date_zone( + &mut self, + sled_id: SledUuid, + zone_config: &BlueprintZoneConfig, + desired_image_source: BlueprintZoneImageSource, + ) { + let out_of_date = PlanningOutOfDateZone { + zone_config: zone_config.to_owned(), + desired_image_source, + }; + self.out_of_date_zones + .entry(sled_id) + .and_modify(|zones| zones.push(out_of_date.clone())) + .or_insert_with(|| vec![out_of_date]); + } + + pub fn expunged_zone( + &mut self, + sled_id: SledUuid, + zone_config: &BlueprintZoneConfig, + ) { + self.expunged_zones + .entry(sled_id) + .and_modify(|zones| zones.push(zone_config.to_owned())) + .or_insert_with(|| vec![zone_config.to_owned()]); + } + + pub fn updated_zone( + &mut self, + sled_id: SledUuid, + zone_config: &BlueprintZoneConfig, + ) { + self.updated_zones + .entry(sled_id) + .and_modify(|zones| zones.push(zone_config.to_owned())) + .or_insert_with(|| vec![zone_config.to_owned()]); + } + + pub fn unsafe_zone( + &mut self, + zone: &BlueprintZoneConfig, + reason: ZoneUnsafeToShutdown, + ) { + self.unsafe_zones.insert(zone.clone(), reason); + } +} + +impl fmt::Display for PlanningZoneUpdatesStepReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let Self { + waiting_on, + out_of_date_zones, + expunged_zones, + updated_zones, + unsafe_zones, + } = self; + + if let Some(waiting_on) = waiting_on { + writeln!(f, "* Zone updates waiting on {}", waiting_on.as_str())?; + } + + if !expunged_zones.is_empty() { + let (n, s) = plural_map_of_vec(expunged_zones); + writeln!(f, "* {n} out-of-date zone{s} expunged:")?; + for (sled_id, zones) in expunged_zones.iter() { + for zone in zones { + writeln!( + f, + " * sled {}, zone {} ({})", + sled_id, + zone.id, + zone.zone_type.kind().report_str(), + )?; + } + } + } + + if !updated_zones.is_empty() { + let (n, s) = plural_map_of_vec(updated_zones); + writeln!(f, "* {n} out-of-date zone{s} updated in-place:")?; + for (sled_id, zones) in updated_zones.iter() { + for zone in zones { + writeln!( + f, + " * sled {}, zone {} ({})", + sled_id, + zone.id, + zone.zone_type.kind().report_str(), + )?; + } + } + } + + if !out_of_date_zones.is_empty() { + let (n, s) = plural_map_of_vec(out_of_date_zones); + writeln!(f, "* {n} remaining out-of-date zone{s}")?; + } + + if !unsafe_zones.is_empty() { + let (n, s) = plural_map(unsafe_zones); + writeln!(f, "* {n} zone{s} not ready to shut down safely:")?; + for (zone, reason) in unsafe_zones.iter() { + writeln!( + f, + " * zone {} ({}): {}", + zone.id, + zone.zone_type.kind().report_str(), + reason, + )?; + } + } + + Ok(()) + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum ZoneUpdatesWaitingOn { + /// Waiting on discretionary zone placement. + DiscretionaryZones, + + /// Waiting on updates to RoT / SP / Host OS / etc. + PendingMgsUpdates, +} + +impl ZoneUpdatesWaitingOn { + pub fn as_str(&self) -> &'static str { + match self { + Self::DiscretionaryZones => "discretionary zones", + Self::PendingMgsUpdates => { + "pending MGS updates (RoT / SP / Host OS / etc.)" + } + } + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum ZoneUnsafeToShutdown { + Cockroachdb { reason: CockroachdbUnsafeToShutdown }, + BoundaryNtp { total_boundary_ntp_zones: usize, synchronized_count: usize }, +} + +impl fmt::Display for ZoneUnsafeToShutdown { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Cockroachdb { reason } => write!(f, "{reason}"), + Self::BoundaryNtp { + total_boundary_ntp_zones: t, + synchronized_count: s, + } => write!(f, "only {s}/{t} boundary NTP zones are synchronized"), + } + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +#[serde(rename_all = "snake_case", tag = "type")] +pub enum CockroachdbUnsafeToShutdown { + MissingLiveNodesStat, + MissingUnderreplicatedStat, + NotEnoughLiveNodes { live_nodes: u64 }, + NotEnoughNodes, + UnderreplicatedRanges { n: u64 }, +} + +impl fmt::Display for CockroachdbUnsafeToShutdown { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::MissingLiveNodesStat => write!(f, "missing live_nodes stat"), + Self::MissingUnderreplicatedStat => { + write!(f, "missing ranges_underreplicated stat") + } + Self::NotEnoughLiveNodes { live_nodes } => { + write!( + f, + "not enough live nodes: {live_nodes} < {COCKROACHDB_REDUNDANCY}" + ) + } + Self::NotEnoughNodes => write!(f, "not enough nodes"), + Self::UnderreplicatedRanges { n } => { + if *n > 0 { + write!(f, "{n} > 0 underreplicated ranges") + } else { + write!( + f, + "no underreplicated ranges (this shouldn't happen)" + ) + } + } + } + } +} + +#[derive( + Clone, Debug, Deserialize, Serialize, PartialEq, Eq, Diffable, JsonSchema, +)] +pub struct PlanningCockroachdbSettingsStepReport { + pub preserve_downgrade: CockroachDbPreserveDowngrade, +} + +impl PlanningCockroachdbSettingsStepReport { + pub fn new() -> Self { + Self { preserve_downgrade: CockroachDbPreserveDowngrade::DoNotModify } + } + + pub fn is_empty(&self) -> bool { + self.preserve_downgrade == CockroachDbPreserveDowngrade::DoNotModify + } +} + +impl fmt::Display for PlanningCockroachdbSettingsStepReport { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if !self.is_empty() { + let PlanningCockroachdbSettingsStepReport { preserve_downgrade } = + self; + writeln!( + f, + "* Will ensure cockroachdb setting: {preserve_downgrade}" + )?; + } + Ok(()) + } +} + +fn plural(n: usize) -> &'static str { + if n == 1 { "" } else { "s" } +} + +fn plural_vec(vec: &Vec) -> (usize, &'static str) { + let n = vec.len(); + (n, plural(n)) +} + +fn plural_map(map: &BTreeMap) -> (usize, &'static str) { + let n = map.len(); + (n, plural(n)) +} + +fn plural_map_of_vec(map: &BTreeMap>) -> (usize, &'static str) { + let n = map.values().map(|v| v.len()).sum(); + (n, plural(n)) +} diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index 1395d9c7131..e2ca1c4ee99 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::deployment::PlanningReport; use crate::external_api::views; use chrono::DateTime; use chrono::Utc; @@ -460,6 +461,7 @@ impl slog::KV for DebugDatasetsRendezvousStats { } /// The status of a `blueprint_planner` background task activation. +#[allow(clippy::large_enum_variant)] #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] pub enum BlueprintPlannerStatus { /// Automatic blueprint planning has been explicitly disabled @@ -479,7 +481,11 @@ pub enum BlueprintPlannerStatus { /// Planing succeeded, and we saved and made the new blueprint the /// current target. - Targeted { parent_blueprint_id: BlueprintUuid, blueprint_id: BlueprintUuid }, + Targeted { + parent_blueprint_id: BlueprintUuid, + blueprint_id: BlueprintUuid, + report: PlanningReport, + }, } /// The status of a `alert_dispatcher` background task activation. diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 7f927ff8e88..4e4dcd2fbc3 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2561,6 +2561,14 @@ } ] }, + "report": { + "description": "Report on the planning session that resulted in this blueprint", + "allOf": [ + { + "$ref": "#/components/schemas/PlanningReport" + } + ] + }, "sleds": { "description": "A map of sled id -> desired configuration of the sled.", "type": "object", @@ -2593,6 +2601,7 @@ "oximeter_read_mode", "oximeter_read_version", "pending_mgs_updates", + "report", "sleds", "target_release_minimum_generation", "time_created" @@ -3771,6 +3780,92 @@ } ] }, + "CockroachdbUnsafeToShutdown": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "missing_live_nodes_stat" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "missing_underreplicated_stat" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "live_nodes": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "not_enough_live_nodes" + ] + } + }, + "required": [ + "live_nodes", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "not_enough_nodes" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "n": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "underreplicated_ranges" + ] + } + }, + "required": [ + "n", + "type" + ] + } + ] + }, "CompletedAttempt": { "description": "externally-exposed status for a completed attempt", "type": "object", @@ -6294,6 +6389,510 @@ "add_zones_with_mupdate_override" ] }, + "PlanningAddOutOfEligibleSleds": { + "description": "How many discretionary zones we actually placed out of how many we wanted to place.", + "type": "object", + "properties": { + "placed": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "wanted_to_place": { + "type": "integer", + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "placed", + "wanted_to_place" + ] + }, + "PlanningAddStepReport": { + "type": "object", + "properties": { + "discretionary_zones_placed": { + "description": "Sled ID → kinds of discretionary zones placed there", + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "out_of_eligible_sleds": { + "description": "Discretionary zone kind → (placed, wanted to place)", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/PlanningAddOutOfEligibleSleds" + } + }, + "sleds_getting_ntp_and_discretionary_zones": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TypedUuidForSledKind" + }, + "uniqueItems": true + }, + "sleds_missing_crucible_zone": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" + } + } + }, + "sleds_missing_ntp_zone": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TypedUuidForSledKind" + }, + "uniqueItems": true + }, + "sleds_waiting_for_ntp_zone": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TypedUuidForSledKind" + }, + "uniqueItems": true + }, + "sleds_without_ntp_zones_in_inventory": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TypedUuidForSledKind" + }, + "uniqueItems": true + }, + "sleds_without_zpools_for_ntp_zones": { + "type": "array", + "items": { + "$ref": "#/components/schemas/TypedUuidForSledKind" + }, + "uniqueItems": true + }, + "sufficient_zones_exist": { + "description": "Discretionary zone kind → (wanted to place, num existing)", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/PlanningAddSufficientZonesExist" + } + } + }, + "required": [ + "discretionary_zones_placed", + "out_of_eligible_sleds", + "sleds_getting_ntp_and_discretionary_zones", + "sleds_missing_crucible_zone", + "sleds_missing_ntp_zone", + "sleds_waiting_for_ntp_zone", + "sleds_without_ntp_zones_in_inventory", + "sleds_without_zpools_for_ntp_zones", + "sufficient_zones_exist" + ] + }, + "PlanningAddSufficientZonesExist": { + "description": "We have at least the minimum required number of zones of a given kind.", + "type": "object", + "properties": { + "num_existing": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "target_count": { + "type": "integer", + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "num_existing", + "target_count" + ] + }, + "PlanningCockroachdbSettingsStepReport": { + "type": "object", + "properties": { + "preserve_downgrade": { + "$ref": "#/components/schemas/CockroachDbPreserveDowngrade" + } + }, + "required": [ + "preserve_downgrade" + ] + }, + "PlanningDecommissionStepReport": { + "type": "object", + "properties": { + "zombie_sleds": { + "description": "Decommissioned sleds that unexpectedly appeared as commissioned.", + "type": "array", + "items": { + "$ref": "#/components/schemas/TypedUuidForSledKind" + } + } + }, + "required": [ + "zombie_sleds" + ] + }, + "PlanningExpungeStepReport": { + "type": "object", + "properties": { + "orphan_disks": { + "description": "Expunged disks not present in the parent blueprint.", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/TypedUuidForPhysicalDiskKind" + } + } + }, + "required": [ + "orphan_disks" + ] + }, + "PlanningMgsUpdatesStepReport": { + "type": "object", + "properties": { + "pending_mgs_updates": { + "$ref": "#/components/schemas/PendingMgsUpdates" + } + }, + "required": [ + "pending_mgs_updates" + ] + }, + "PlanningNoopImageSourceConvertedZones": { + "description": "How many of the total install-dataset zones were noop-converted to use the artifact store on a particular sled.", + "type": "object", + "properties": { + "num_dataset": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "num_eligible": { + "type": "integer", + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "num_dataset", + "num_eligible" + ] + }, + "PlanningNoopImageSourceSkipSledReason": { + "oneOf": [ + { + "type": "object", + "properties": { + "num_total": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "all_zones_already_artifact" + ] + } + }, + "required": [ + "num_total", + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "sled_not_in_inventory" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "error": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "error_retrieving_zone_manifest" + ] + } + }, + "required": [ + "error", + "type" + ] + }, + { + "type": "object", + "properties": { + "id": { + "$ref": "#/components/schemas/TypedUuidForMupdateOverrideKind" + }, + "type": { + "type": "string", + "enum": [ + "remove_mupdate_override" + ] + } + }, + "required": [ + "id", + "type" + ] + } + ] + }, + "PlanningNoopImageSourceSkipZoneReason": { + "oneOf": [ + { + "type": "object", + "properties": { + "file_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "zone_not_in_manifest" + ] + }, + "zone_kind": { + "type": "string" + } + }, + "required": [ + "file_name", + "type", + "zone_kind" + ] + }, + { + "type": "object", + "properties": { + "error": { + "type": "string" + }, + "file_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "invalid_artifact" + ] + }, + "zone_kind": { + "type": "string" + } + }, + "required": [ + "error", + "file_name", + "type", + "zone_kind" + ] + }, + { + "type": "object", + "properties": { + "artifact_hash": { + "type": "string", + "format": "hex string (32 bytes)" + }, + "file_name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "artifact_not_in_repo" + ] + }, + "zone_kind": { + "type": "string" + } + }, + "required": [ + "artifact_hash", + "file_name", + "type", + "zone_kind" + ] + } + ] + }, + "PlanningNoopImageSourceStepReport": { + "type": "object", + "properties": { + "converted_zones": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/PlanningNoopImageSourceConvertedZones" + } + }, + "no_target_release": { + "type": "boolean" + }, + "skipped_sleds": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/PlanningNoopImageSourceSkipSledReason" + } + }, + "skipped_zones": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/PlanningNoopImageSourceSkipZoneReason" + } + } + }, + "required": [ + "converted_zones", + "no_target_release", + "skipped_sleds", + "skipped_zones" + ] + }, + "PlanningOutOfDateZone": { + "description": "We have at least the minimum required number of zones of a given kind.", + "type": "object", + "properties": { + "desired_image_source": { + "$ref": "#/components/schemas/BlueprintZoneImageSource" + }, + "zone_config": { + "$ref": "#/components/schemas/BlueprintZoneConfig" + } + }, + "required": [ + "desired_image_source", + "zone_config" + ] + }, + "PlanningReport": { + "description": "A full blueprint planning report. Other than the blueprint ID, each field corresponds to a step in the update planner, i.e., a subroutine of `omicron_nexus::reconfigurator::planning::Planner::do_plan`.\n\nThe intent of a planning report is to capture information useful to an operator or developer about the planning process itself, especially if it has become \"stuck\" (unable to proceed with an update). It is *not* a summary of the plan (blueprint), but rather a description of non-fatal conditions the planner is waiting on, unexpected or invalid configurations encountered during planning, etc. The planner may make internal decisions based on the step reports; the intent is that an operator may make administrative decisions based on the full report.\n\nOnly successful planning runs are currently covered by this report. Failures to plan (i.e., to generate a valid blueprint) are represented by `nexus-reconfigurator-planning::blueprint_builder::Error`.", + "type": "object", + "properties": { + "add": { + "$ref": "#/components/schemas/PlanningAddStepReport" + }, + "blueprint_id": { + "description": "The blueprint produced by the planning run this report describes.", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForBlueprintKind" + } + ] + }, + "chicken_switches": { + "description": "The set of \"chicken switches\" in effect for this planning run.", + "allOf": [ + { + "$ref": "#/components/schemas/PlannerChickenSwitches" + } + ] + }, + "cockroachdb_settings": { + "$ref": "#/components/schemas/PlanningCockroachdbSettingsStepReport" + }, + "decommission": { + "$ref": "#/components/schemas/PlanningDecommissionStepReport" + }, + "expunge": { + "$ref": "#/components/schemas/PlanningExpungeStepReport" + }, + "mgs_updates": { + "$ref": "#/components/schemas/PlanningMgsUpdatesStepReport" + }, + "noop_image_source": { + "$ref": "#/components/schemas/PlanningNoopImageSourceStepReport" + }, + "zone_updates": { + "$ref": "#/components/schemas/PlanningZoneUpdatesStepReport" + } + }, + "required": [ + "add", + "blueprint_id", + "chicken_switches", + "cockroachdb_settings", + "decommission", + "expunge", + "mgs_updates", + "noop_image_source", + "zone_updates" + ] + }, + "PlanningZoneUpdatesStepReport": { + "type": "object", + "properties": { + "expunged_zones": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlueprintZoneConfig" + } + } + }, + "out_of_date_zones": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/PlanningOutOfDateZone" + } + } + }, + "unsafe_zones": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/ZoneUnsafeToShutdown" + } + }, + "updated_zones": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BlueprintZoneConfig" + } + } + }, + "waiting_on": { + "nullable": true, + "description": "What are we waiting on to start zone updates?", + "allOf": [ + { + "$ref": "#/components/schemas/ZoneUpdatesWaitingOn" + } + ] + } + }, + "required": [ + "expunged_zones", + "out_of_date_zones", + "unsafe_zones", + "updated_zones" + ] + }, "PortConfigV2": { "type": "object", "properties": { @@ -8091,6 +8690,88 @@ "zone_type" ] }, + "ZoneUnsafeToShutdown": { + "oneOf": [ + { + "type": "object", + "properties": { + "reason": { + "$ref": "#/components/schemas/CockroachdbUnsafeToShutdown" + }, + "type": { + "type": "string", + "enum": [ + "cockroachdb" + ] + } + }, + "required": [ + "reason", + "type" + ] + }, + { + "type": "object", + "properties": { + "synchronized_count": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "total_boundary_ntp_zones": { + "type": "integer", + "format": "uint", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "boundary_ntp" + ] + } + }, + "required": [ + "synchronized_count", + "total_boundary_ntp_zones", + "type" + ] + } + ] + }, + "ZoneUpdatesWaitingOn": { + "oneOf": [ + { + "description": "Waiting on discretionary zone placement.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "discretionary_zones" + ] + } + }, + "required": [ + "type" + ] + }, + { + "description": "Waiting on updates to RoT / SP / Host OS / etc.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "pending_mgs_updates" + ] + } + }, + "required": [ + "type" + ] + } + ] + }, "ZpoolName": { "title": "The name of a Zpool", "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 508733d4f2e..c6c357088fe 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -95,11 +95,9 @@ use nexus_sled_agent_shared::inventory::{ }; use nexus_types::deployment::{ Blueprint, BlueprintDatasetConfig, BlueprintDatasetDisposition, - BlueprintHostPhase2DesiredSlots, BlueprintZoneType, - CockroachDbPreserveDowngrade, blueprint_zone_type, -}; -use nexus_types::deployment::{ - BlueprintSledConfig, OximeterReadMode, PendingMgsUpdates, + BlueprintHostPhase2DesiredSlots, BlueprintSledConfig, BlueprintZoneType, + CockroachDbPreserveDowngrade, OximeterReadMode, PendingMgsUpdates, + PlanningReport, blueprint_zone_type, }; use nexus_types::external_api::views::SledState; use ntp_admin_client::{ @@ -1621,8 +1619,9 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( ); } + let id = BlueprintUuid::new_v4(); Ok(Blueprint { - id: BlueprintUuid::new_v4(), + id, sleds: blueprint_sleds, pending_mgs_updates: PendingMgsUpdates::new(), parent_blueprint_id: None, @@ -1646,6 +1645,7 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( time_created: Utc::now(), creator: "RSS".to_string(), comment: "initial blueprint from rack setup".to_string(), + report: PlanningReport::new(id), }) }