From c9fc67441f776815959f048e869ffc020ce79295 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 11 Oct 2024 18:47:56 -0700 Subject: [PATCH] [7/n] [reconfigurator] combine inventory and omicron zones (#6739) Currently, when the collection process makes two separate queries to sled-agent: one to obtain basic inventory, and the other to obtain Omicron zones. With this approach, it is possible for one of the two queries to fail -- in that case, the `sled_agents` and `omicron_zones` maps will not have the same set of keys. In general, consumers of collections must be able to handle these kinds of failing cases, because collections contain information from a variety of sources, not all of which are guaranteed to work all the time. But this specific case (sled-agent inventory vs Omicron zones) is not intrinsic to the system -- it is an artifact of our APIs. Change the API so that the `/inventory` endpoint also returns Omicron zones. The format remains the same, so that `PUT /omicron-zones` mirrors `GET /inventory | jq .omicron_zones`. I had to make a few changes to our tests, since we now have a unified notion of Omicron zones and sled agents. But this exposed what feels to me like a modeling deficiency in the way our tests work. I hope to fix that up at some point soon. There is also one behavior change to the reconfigurator CLI -- now, `inventory-generate` preserves zone information passed into it from the system. This matches the behavior of inventory generation within tests themselves. --- dev-tools/omdb/src/bin/omdb/db.rs | 38 ++-- dev-tools/reconfigurator-cli/src/main.rs | 21 +- .../tests/output/cmd-example-stdout | 88 ++++---- nexus-sled-agent-shared/src/inventory.rs | 1 + nexus/db-model/src/inventory.rs | 34 +--- .../db-queries/src/db/datastore/deployment.rs | 6 +- .../db-queries/src/db/datastore/inventory.rs | 192 +++++++++++------- .../src/db/datastore/physical_disk.rs | 6 +- nexus/inventory/src/builder.rs | 36 +--- nexus/inventory/src/collector.rs | 44 ++-- nexus/inventory/src/examples.rs | 60 +++--- .../reconfigurator/execution/src/datasets.rs | 2 +- nexus/reconfigurator/execution/src/dns.rs | 7 +- .../planning/src/blueprint_builder/builder.rs | 10 +- nexus/reconfigurator/planning/src/example.rs | 13 +- nexus/reconfigurator/planning/src/planner.rs | 45 ++-- nexus/reconfigurator/planning/src/system.rs | 35 ++++ .../background/tasks/sync_service_zone_nat.rs | 4 +- nexus/types/src/deployment.rs | 6 +- nexus/types/src/inventory.rs | 14 +- openapi/sled-agent.json | 25 +-- schema/crdb/dbinit.sql | 2 + sled-agent/api/src/lib.rs | 8 - sled-agent/src/http_entrypoints.rs | 7 - sled-agent/src/rack_setup/service.rs | 7 +- sled-agent/src/sim/http_entrypoints.rs | 7 - sled-agent/src/sim/sled_agent.rs | 1 + sled-agent/src/sled_agent.rs | 11 +- 28 files changed, 339 insertions(+), 391 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index ac41d751bc..29aa0f9376 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -5180,31 +5180,23 @@ fn inv_collection_print_sleds(collection: &Collection) { println!(" reservation: {reservation:?}, quota: {quota:?}"); } - if let Some(zones) = collection.omicron_zones.get(&sled.sled_id) { - println!( - " zones collected from {} at {}", - zones.source, zones.time_collected, - ); - println!( - " zones generation: {} (count: {})", - zones.zones.generation, - zones.zones.zones.len() - ); + println!( + " zones generation: {} (count: {})", + sled.omicron_zones.generation, + sled.omicron_zones.zones.len(), + ); - if zones.zones.zones.is_empty() { - continue; - } + if sled.omicron_zones.zones.is_empty() { + continue; + } - println!(" ZONES FOUND"); - for z in &zones.zones.zones { - println!( - " zone {} (type {})", - z.id, - z.zone_type.kind().report_str() - ); - } - } else { - println!(" warning: no zone information found"); + println!(" ZONES FOUND"); + for z in &sled.omicron_zones.zones { + println!( + " zone {} (type {})", + z.id, + z.zone_type.kind().report_str() + ); } } } diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 4cc7eafcde..f70ac2fc23 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -21,7 +21,6 @@ use nexus_reconfigurator_planning::planner::Planner; use nexus_reconfigurator_planning::system::{ SledBuilder, SledHwInventory, SystemDescription, }; -use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::execution; use nexus_types::deployment::execution::blueprint_external_dns_config; @@ -739,25 +738,11 @@ fn cmd_inventory_list( fn cmd_inventory_generate( sim: &mut ReconfiguratorSim, ) -> anyhow::Result> { - let mut builder = + let builder = sim.system.to_collection_builder().context("generating inventory")?; - // For an inventory we just generated from thin air, pretend like each sled - // has no zones on it. - let planning_input = - sim.system.to_planning_input_builder().unwrap().build(); - for sled_id in planning_input.all_sled_ids(SledFilter::Commissioned) { - builder - .found_sled_omicron_zones( - "fake sled agent", - sled_id, - OmicronZonesConfig { - generation: Generation::new(), - zones: vec![], - }, - ) - .context("recording Omicron zones")?; - } + // sim.system carries around Omicron zones, which will make their way into + // the inventory. let mut inventory = builder.build(); // Assign collection IDs from the RNG. This enables consistent results when // callers have explicitly seeded the RNG (e.g., in tests). diff --git a/dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout index a228c13d3e..b281d0c256 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmd-example-stdout @@ -328,7 +328,7 @@ generated inventory collection b32394d8-7d79-486f-8657-fd5219508181 from configu > blueprint-diff-inventory b32394d8-7d79-486f-8657-fd5219508181 ade5749d-bdf3-4fab-a8ae-00bea01b3a5a from: collection b32394d8-7d79-486f-8657-fd5219508181 to: blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a - MODIFIED SLEDS: + UNCHANGED SLEDS: sled 2eb69596-f081-4e2d-9425-9994926e0832 (active): @@ -348,24 +348,24 @@ to: blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a fake-vendor fake-model serial-d792c8cb-7490-40cb-bb1c-d4917242edf4 - omicron zones generation 1 -> 2: + omicron zones at generation 2: ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ -+ clickhouse fe79023f-c5d5-4be5-ad2c-da4e9e9237e4 in service fd00:1122:3344:102::23 -+ crucible 054f64a5-182c-4c28-8994-d2e082550201 in service fd00:1122:3344:102::26 -+ crucible 3b5bffea-e5ed-44df-8468-fd4fa69757d8 in service fd00:1122:3344:102::27 -+ crucible 53dd7fa4-899e-49ed-9fc2-48222db3e20d in service fd00:1122:3344:102::2a -+ crucible 7db307d4-a6ed-4c47-bddf-6759161bf64a in service fd00:1122:3344:102::2c -+ crucible 95ad9a1d-4063-4874-974c-2fc92830be27 in service fd00:1122:3344:102::29 -+ crucible bc095417-e2f0-4e95-b390-9cc3fc6e3c6d in service fd00:1122:3344:102::28 -+ crucible d90401f1-fbc2-42cb-bf17-309ee0f922fe in service fd00:1122:3344:102::2b -+ crucible e8f994c0-0a1b-40e6-8db1-40a8ca89e503 in service fd00:1122:3344:102::2d -+ crucible eaec16c0-0d44-4847-b2d6-31a5151bae52 in service fd00:1122:3344:102::24 -+ crucible f97aa057-6485-45d0-9cb4-4af5b0831d48 in service fd00:1122:3344:102::25 -+ internal_dns 8b8f7c02-7a18-4268-b045-2e286b464c5d in service fd00:1122:3344:1::1 -+ internal_ntp c67dd9a4-0d6c-4e9f-b28d-20003f211f7d in service fd00:1122:3344:102::21 -+ nexus 94b45ce9-d3d8-413a-a76b-865da1f67930 in service fd00:1122:3344:102::22 + clickhouse fe79023f-c5d5-4be5-ad2c-da4e9e9237e4 in service fd00:1122:3344:102::23 + crucible 054f64a5-182c-4c28-8994-d2e082550201 in service fd00:1122:3344:102::26 + crucible 3b5bffea-e5ed-44df-8468-fd4fa69757d8 in service fd00:1122:3344:102::27 + crucible 53dd7fa4-899e-49ed-9fc2-48222db3e20d in service fd00:1122:3344:102::2a + crucible 7db307d4-a6ed-4c47-bddf-6759161bf64a in service fd00:1122:3344:102::2c + crucible 95ad9a1d-4063-4874-974c-2fc92830be27 in service fd00:1122:3344:102::29 + crucible bc095417-e2f0-4e95-b390-9cc3fc6e3c6d in service fd00:1122:3344:102::28 + crucible d90401f1-fbc2-42cb-bf17-309ee0f922fe in service fd00:1122:3344:102::2b + crucible e8f994c0-0a1b-40e6-8db1-40a8ca89e503 in service fd00:1122:3344:102::2d + crucible eaec16c0-0d44-4847-b2d6-31a5151bae52 in service fd00:1122:3344:102::24 + crucible f97aa057-6485-45d0-9cb4-4af5b0831d48 in service fd00:1122:3344:102::25 + internal_dns 8b8f7c02-7a18-4268-b045-2e286b464c5d in service fd00:1122:3344:1::1 + internal_ntp c67dd9a4-0d6c-4e9f-b28d-20003f211f7d in service fd00:1122:3344:102::21 + nexus 94b45ce9-d3d8-413a-a76b-865da1f67930 in service fd00:1122:3344:102::22 sled 32d8d836-4d8a-4e54-8fa9-f31d79c42646 (active): @@ -386,23 +386,23 @@ to: blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a fake-vendor fake-model serial-d7410a1c-e01d-49a4-be9c-f861f086760a - omicron zones generation 1 -> 2: + omicron zones at generation 2: ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ -+ crucible 728db429-8621-4e1e-9915-282aadfa27d1 in service fd00:1122:3344:103::24 -+ crucible a999e5fa-3edc-4dac-919a-d7b554cdae58 in service fd00:1122:3344:103::27 -+ crucible b416f299-c23c-46c8-9820-be2b66ffea0a in service fd00:1122:3344:103::28 -+ crucible b5d5491d-b3aa-4727-8b55-f66e0581ea4f in service fd00:1122:3344:103::2c -+ crucible cc1dc86d-bd6f-4929-aa4a-9619012e9393 in service fd00:1122:3344:103::25 -+ crucible cd3bb540-e605-465f-8c62-177ac482d850 in service fd00:1122:3344:103::2a -+ crucible e7dd3e98-7fe7-4827-be7f-395ff9a5f542 in service fd00:1122:3344:103::23 -+ crucible e8971ab3-fb7d-4ad8-aae3-7f2fe87c51f3 in service fd00:1122:3344:103::26 -+ crucible f52aa245-7e1b-46c0-8a31-e09725f02caf in service fd00:1122:3344:103::2b -+ crucible fae49024-6cec-444d-a6c4-83658ab015a4 in service fd00:1122:3344:103::29 -+ internal_dns c8aa84a5-a802-46c9-adcd-d61e9c8393c9 in service fd00:1122:3344:2::1 -+ internal_ntp e9bf481e-323e-466e-842f-8107078c7137 in service fd00:1122:3344:103::21 -+ nexus 4f2eb088-7d28-4c4e-a27c-746400ec65ba in service fd00:1122:3344:103::22 + crucible 728db429-8621-4e1e-9915-282aadfa27d1 in service fd00:1122:3344:103::24 + crucible a999e5fa-3edc-4dac-919a-d7b554cdae58 in service fd00:1122:3344:103::27 + crucible b416f299-c23c-46c8-9820-be2b66ffea0a in service fd00:1122:3344:103::28 + crucible b5d5491d-b3aa-4727-8b55-f66e0581ea4f in service fd00:1122:3344:103::2c + crucible cc1dc86d-bd6f-4929-aa4a-9619012e9393 in service fd00:1122:3344:103::25 + crucible cd3bb540-e605-465f-8c62-177ac482d850 in service fd00:1122:3344:103::2a + crucible e7dd3e98-7fe7-4827-be7f-395ff9a5f542 in service fd00:1122:3344:103::23 + crucible e8971ab3-fb7d-4ad8-aae3-7f2fe87c51f3 in service fd00:1122:3344:103::26 + crucible f52aa245-7e1b-46c0-8a31-e09725f02caf in service fd00:1122:3344:103::2b + crucible fae49024-6cec-444d-a6c4-83658ab015a4 in service fd00:1122:3344:103::29 + internal_dns c8aa84a5-a802-46c9-adcd-d61e9c8393c9 in service fd00:1122:3344:2::1 + internal_ntp e9bf481e-323e-466e-842f-8107078c7137 in service fd00:1122:3344:103::21 + nexus 4f2eb088-7d28-4c4e-a27c-746400ec65ba in service fd00:1122:3344:103::22 sled 89d02b1b-478c-401a-8e28-7a26f74fa41b (active): @@ -423,23 +423,23 @@ to: blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a fake-vendor fake-model serial-fe1d5b9f-8db7-4e2d-bf17-c4b80e1f897c - omicron zones generation 1 -> 2: + omicron zones at generation 2: ------------------------------------------------------------------------------------------ zone type zone id disposition underlay IP ------------------------------------------------------------------------------------------ -+ crucible 315a3670-d019-425c-b7a6-c9429428b671 in service fd00:1122:3344:101::25 -+ crucible 413d3e02-e19f-400a-9718-a662347538f0 in service fd00:1122:3344:101::26 -+ crucible 6cb330f9-4609-4d6c-98ad-b5cc34245813 in service fd00:1122:3344:101::2b -+ crucible 6d725df0-0189-4429-b270-3eeb891d39c8 in service fd00:1122:3344:101::2a -+ crucible 8b47e1e8-0396-4e44-a4a5-ea891405c9f2 in service fd00:1122:3344:101::24 -+ crucible b43ce109-90d6-46f9-9df0-8c68bfe6d4a0 in service fd00:1122:3344:101::23 -+ crucible b5443ebd-1f5b-448c-8edc-b4ca25c25db1 in service fd00:1122:3344:101::27 -+ crucible bb55534c-1042-4af4-ad2f-9590803695ac in service fd00:1122:3344:101::29 -+ crucible e135441d-637e-4de9-8023-5ea0096347f3 in service fd00:1122:3344:101::28 -+ crucible fee71ee6-da42-4a7f-a00e-f56b6a3327ce in service fd00:1122:3344:101::2c -+ internal_dns cbe91cdc-cbb6-4760-aece-6ce08b67e85a in service fd00:1122:3344:3::1 -+ internal_ntp 09937ebb-bb6a-495b-bc97-b58076b70a78 in service fd00:1122:3344:101::21 -+ nexus f3628f0a-2301-4fc8-bcbf-961199771731 in service fd00:1122:3344:101::22 + crucible 315a3670-d019-425c-b7a6-c9429428b671 in service fd00:1122:3344:101::25 + crucible 413d3e02-e19f-400a-9718-a662347538f0 in service fd00:1122:3344:101::26 + crucible 6cb330f9-4609-4d6c-98ad-b5cc34245813 in service fd00:1122:3344:101::2b + crucible 6d725df0-0189-4429-b270-3eeb891d39c8 in service fd00:1122:3344:101::2a + crucible 8b47e1e8-0396-4e44-a4a5-ea891405c9f2 in service fd00:1122:3344:101::24 + crucible b43ce109-90d6-46f9-9df0-8c68bfe6d4a0 in service fd00:1122:3344:101::23 + crucible b5443ebd-1f5b-448c-8edc-b4ca25c25db1 in service fd00:1122:3344:101::27 + crucible bb55534c-1042-4af4-ad2f-9590803695ac in service fd00:1122:3344:101::29 + crucible e135441d-637e-4de9-8023-5ea0096347f3 in service fd00:1122:3344:101::28 + crucible fee71ee6-da42-4a7f-a00e-f56b6a3327ce in service fd00:1122:3344:101::2c + internal_dns cbe91cdc-cbb6-4760-aece-6ce08b67e85a in service fd00:1122:3344:3::1 + internal_ntp 09937ebb-bb6a-495b-bc97-b58076b70a78 in service fd00:1122:3344:101::21 + nexus f3628f0a-2301-4fc8-bcbf-961199771731 in service fd00:1122:3344:101::22 COCKROACHDB SETTINGS: diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index bda102c38e..2ed654b944 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -99,6 +99,7 @@ pub struct Inventory { pub usable_hardware_threads: u32, pub usable_physical_ram: ByteCount, pub reservoir_size: ByteCount, + pub omicron_zones: OmicronZonesConfig, pub disks: Vec, pub zpools: Vec, pub datasets: Vec, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index ae0afea3bb..95f7e8b5f7 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -31,9 +31,7 @@ use diesel::serialize::ToSql; use diesel::{serialize, sql_types}; use ipnetwork::IpNetwork; use nexus_sled_agent_shared::inventory::OmicronZoneDataset; -use nexus_sled_agent_shared::inventory::{ - OmicronZoneConfig, OmicronZoneType, OmicronZonesConfig, -}; +use nexus_sled_agent_shared::inventory::{OmicronZoneConfig, OmicronZoneType}; use nexus_types::inventory::{ BaseboardId, Caboose, Collection, NvmeFirmware, PowerState, RotPage, RotSlot, @@ -1178,7 +1176,11 @@ impl From for nexus_types::inventory::Dataset { } } -/// See [`nexus_types::inventory::OmicronZonesFound`]. +/// Information about a sled's Omicron zones, part of +/// [`nexus_types::inventory::SledAgent`]. +/// +/// TODO: This table is vestigial and can be combined with `InvSledAgent`. See +/// [issue #6770](https://github.com/oxidecomputer/omicron/issues/6770). #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_sled_omicron_zones)] pub struct InvSledOmicronZones { @@ -1192,28 +1194,14 @@ pub struct InvSledOmicronZones { impl InvSledOmicronZones { pub fn new( inv_collection_id: CollectionUuid, - zones_found: &nexus_types::inventory::OmicronZonesFound, + sled_agent: &nexus_types::inventory::SledAgent, ) -> InvSledOmicronZones { InvSledOmicronZones { inv_collection_id: inv_collection_id.into(), - time_collected: zones_found.time_collected, - source: zones_found.source.clone(), - sled_id: zones_found.sled_id.into(), - generation: Generation(zones_found.zones.generation), - } - } - - pub fn into_uninit_zones_found( - self, - ) -> nexus_types::inventory::OmicronZonesFound { - nexus_types::inventory::OmicronZonesFound { - time_collected: self.time_collected, - source: self.source, - sled_id: self.sled_id.into(), - zones: OmicronZonesConfig { - generation: *self.generation, - zones: Vec::new(), - }, + time_collected: sled_agent.time_collected, + source: sled_agent.source.clone(), + sled_id: sled_agent.sled_id.into(), + generation: Generation(sled_agent.omicron_zones.generation), } } } diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 1b58b50bbe..e43a50a291 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1882,10 +1882,6 @@ mod tests { &mut collection.sled_agents, &mut base_collection.sled_agents, ); - mem::swap( - &mut collection.omicron_zones, - &mut base_collection.omicron_zones, - ); // Treat this blueprint as the initial blueprint for the system. blueprint.parent_blueprint_id = None; @@ -1999,7 +1995,7 @@ mod tests { ); assert_eq!( blueprint1.blueprint_zones.len(), - collection.omicron_zones.len() + collection.sled_agents.len() ); assert_eq!( blueprint1.all_omicron_zones(BlueprintZoneFilter::All).count(), diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 93b5ab85c1..3fdf38f19a 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -62,6 +62,7 @@ use nexus_db_model::SqlU16; use nexus_db_model::SqlU32; use nexus_db_model::SwCaboose; use nexus_db_model::SwRotPage; +use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use nexus_types::inventory::PhysicalDiskFirmware; @@ -162,6 +163,32 @@ impl DataStore { } } + // Pull Omicron zone-related metadata out of all sled agents. + // + // TODO: InvSledOmicronZones is a vestigial table kept for backwards + // compatibility -- the only unique data within it (the generation + // number) can be moved into `InvSledAgent` in the future. See + // oxidecomputer/omicron#6770. + let sled_omicron_zones = collection + .sled_agents + .values() + .map(|sled_agent| { + InvSledOmicronZones::new(collection_id, sled_agent) + }) + .collect::>(); + + // Pull Omicron zones out of all sled agents. + let omicron_zones: Vec<_> = collection + .sled_agents + .iter() + .flat_map(|(sled_id, sled_agent)| { + sled_agent.omicron_zones.zones.iter().map(|zone| { + InvOmicronZone::new(collection_id, *sled_id, zone) + .map_err(|e| Error::internal_error(&e.to_string())) + }) + }) + .collect::, _>>()?; + // Pull disks out of all sled agents let disks: Vec<_> = collection .sled_agents @@ -214,30 +241,11 @@ impl DataStore { }) .collect::, Error>>()?; - let sled_omicron_zones = collection - .omicron_zones - .values() - .map(|found| InvSledOmicronZones::new(collection_id, found)) - .collect::>(); - let omicron_zones = collection - .omicron_zones - .values() - .flat_map(|found| { - found.zones.zones.iter().map(|found_zone| { - InvOmicronZone::new( - collection_id, - found.sled_id, - found_zone, - ) - .map_err(|e| Error::internal_error(&e.to_string())) - }) - }) - .collect::, Error>>()?; let omicron_zone_nics = collection - .omicron_zones + .sled_agents .values() - .flat_map(|found| { - found.zones.zones.iter().filter_map(|found_zone| { + .flat_map(|sled_agent| { + sled_agent.omicron_zones.zones.iter().filter_map(|found_zone| { InvOmicronZoneNic::new(collection_id, found_zone) .with_context(|| format!("zone {:?}", found_zone.id)) .map_err(|e| Error::internal_error(&format!("{:#}", e))) @@ -1899,56 +1907,6 @@ impl DataStore { }) }) .collect::, _>>()?; - let sled_agents: BTreeMap<_, _> = sled_agent_rows - .into_iter() - .map(|s: InvSledAgent| { - let sled_id = SledUuid::from(s.sled_id); - let baseboard_id = s - .hw_baseboard_id - .map(|id| { - baseboards_by_id.get(&id).cloned().ok_or_else(|| { - Error::internal_error( - "missing baseboard that we should have fetched", - ) - }) - }) - .transpose()?; - let sled_agent = nexus_types::inventory::SledAgent { - time_collected: s.time_collected, - source: s.source, - sled_id, - baseboard_id, - sled_agent_address: std::net::SocketAddrV6::new( - std::net::Ipv6Addr::from(s.sled_agent_ip), - u16::from(s.sled_agent_port), - 0, - 0, - ), - sled_role: s.sled_role.into(), - usable_hardware_threads: u32::from( - s.usable_hardware_threads, - ), - usable_physical_ram: s.usable_physical_ram.into(), - reservoir_size: s.reservoir_size.into(), - disks: physical_disks - .get(&sled_id) - .map(|disks| disks.to_vec()) - .unwrap_or_default(), - zpools: zpools - .get(sled_id.as_untyped_uuid()) - .map(|zpools| zpools.to_vec()) - .unwrap_or_default(), - datasets: datasets - .get(sled_id.as_untyped_uuid()) - .map(|datasets| datasets.to_vec()) - .unwrap_or_default(), - }; - Ok((sled_id, sled_agent)) - }) - .collect::, - Error, - >>()?; // Fetch records of cabooses found. let inv_caboose_rows = { @@ -2190,7 +2148,10 @@ impl DataStore { zones.extend(batch.into_iter().map(|sled_zones_config| { ( sled_zones_config.sled_id.into(), - sled_zones_config.into_uninit_zones_found(), + OmicronZonesConfig { + generation: sled_zones_config.generation.into(), + zones: Vec::new(), + }, ) })) } @@ -2298,7 +2259,7 @@ impl DataStore { .map_err(|e| { Error::internal_error(&format!("{:#}", e.to_string())) })?; - map.zones.zones.push(zone); + map.zones.push(zone); } // Now load the clickhouse keeper cluster memberships @@ -2338,6 +2299,88 @@ impl DataStore { omicron_zone_nics.keys() ); + // Finally, build up the sled-agent map using the sled agent and + // omicron zone rows. A for loop is easier to understand than into_iter + // + filter_map + return Result + collect. + let mut sled_agents = BTreeMap::new(); + for s in sled_agent_rows { + let sled_id = SledUuid::from(s.sled_id); + let baseboard_id = s + .hw_baseboard_id + .map(|id| { + baseboards_by_id.get(&id).cloned().ok_or_else(|| { + Error::internal_error( + "missing baseboard that we should have fetched", + ) + }) + }) + .transpose()?; + + // Look up the Omicron zones. + // + // Older versions of Nexus fetched the Omicron zones in a separate + // request from the other sled agent data. The database model stil + // accounts for the possibility that for a given (collection, sled) + // pair, one of those queries succeeded while the other failed. But + // this has since been changed to fetch all the data in a single + // query, which means that newer collections will either have both + // sets of data or neither of them. + // + // If it _is_ the case that one of the pieces of data is missing, + // log that as a warning and drop the sled from the collection. + // This should only happen for old collections, and only in the + // unlikely case that exactly one of the two related requests + // failed. + // + // TODO: Update the database model to reflect the new reality + // (oxidecomputer/omicron#6770). + let Some(omicron_zones) = omicron_zones.remove(&sled_id) else { + warn!( + self.log, + "no sled Omicron zone data present -- assuming that collection was done + by an old Nexus version and dropping sled from it"; + "collection" => %id, + "sled_id" => %sled_id, + ); + continue; + }; + + let sled_agent = nexus_types::inventory::SledAgent { + time_collected: s.time_collected, + source: s.source, + sled_id, + baseboard_id, + sled_agent_address: std::net::SocketAddrV6::new( + std::net::Ipv6Addr::from(s.sled_agent_ip), + u16::from(s.sled_agent_port), + 0, + 0, + ), + sled_role: s.sled_role.into(), + usable_hardware_threads: u32::from(s.usable_hardware_threads), + usable_physical_ram: s.usable_physical_ram.into(), + reservoir_size: s.reservoir_size.into(), + omicron_zones, + // For disks, zpools, and datasets, the map for a sled ID is + // only populated if there is at least one disk/zpool/dataset + // for that sled. The `unwrap_or_default` calls cover the case + // where there are no disks/zpools/datasets for a sled. + disks: physical_disks + .get(&sled_id) + .map(|disks| disks.to_vec()) + .unwrap_or_default(), + zpools: zpools + .get(sled_id.as_untyped_uuid()) + .map(|zpools| zpools.to_vec()) + .unwrap_or_default(), + datasets: datasets + .get(sled_id.as_untyped_uuid()) + .map(|datasets| datasets.to_vec()) + .unwrap_or_default(), + }; + sled_agents.insert(sled_id, sled_agent); + } + Ok(Collection { id, errors, @@ -2352,7 +2395,6 @@ impl DataStore { cabooses_found, rot_pages_found, sled_agents, - omicron_zones, clickhouse_keeper_cluster_membership, }) } diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 6326f385ad..61852e454e 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -330,7 +330,7 @@ mod test { use dropshot::PaginationOrder; use nexus_db_model::Generation; use nexus_sled_agent_shared::inventory::{ - Baseboard, Inventory, InventoryDisk, SledRole, + Baseboard, Inventory, InventoryDisk, OmicronZonesConfig, SledRole, }; use nexus_test_utils::db::test_setup_database; use nexus_types::identity::Asset; @@ -696,6 +696,10 @@ mod test { sled_id: SledUuid::from_untyped_uuid(sled.id()), usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + omicron_zones: OmicronZonesConfig { + generation: OmicronZonesConfig::INITIAL_GENERATION, + zones: vec![], + }, disks, zpools: vec![], datasets: vec![], diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 792c6bac41..00c4b7045d 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -17,13 +17,11 @@ use gateway_client::types::SpState; use gateway_client::types::SpType; use nexus_sled_agent_shared::inventory::Baseboard; use nexus_sled_agent_shared::inventory::Inventory; -use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Caboose; use nexus_types::inventory::CabooseFound; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; -use nexus_types::inventory::OmicronZonesFound; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageFound; use nexus_types::inventory::RotPageWhich; @@ -92,7 +90,6 @@ pub struct CollectionBuilder { rot_pages_found: BTreeMap, RotPageFound>>, sleds: BTreeMap, - omicron_zones: BTreeMap, clickhouse_keeper_cluster_membership: BTreeSet, @@ -122,7 +119,6 @@ impl CollectionBuilder { cabooses_found: BTreeMap::new(), rot_pages_found: BTreeMap::new(), sleds: BTreeMap::new(), - omicron_zones: BTreeMap::new(), clickhouse_keeper_cluster_membership: BTreeSet::new(), id_rng: TypedUuidRng::from_entropy(), } @@ -132,8 +128,8 @@ impl CollectionBuilder { pub fn build(mut self) -> Collection { // This is not strictly necessary. But for testing, it's helpful for // things to be in sorted order. - for v in self.omicron_zones.values_mut() { - v.zones.zones.sort_by(|a, b| a.id.cmp(&b.id)); + for v in self.sleds.values_mut() { + v.omicron_zones.zones.sort_by(|a, b| a.id.cmp(&b.id)); } Collection { @@ -150,7 +146,6 @@ impl CollectionBuilder { cabooses_found: self.cabooses_found, rot_pages_found: self.rot_pages_found, sled_agents: self.sleds, - omicron_zones: self.omicron_zones, clickhouse_keeper_cluster_membership: self .clickhouse_keeper_cluster_membership, } @@ -516,6 +511,7 @@ impl CollectionBuilder { reservoir_size: inventory.reservoir_size, time_collected, sled_id, + omicron_zones: inventory.omicron_zones, disks: inventory.disks.into_iter().map(|d| d.into()).collect(), zpools: inventory .zpools @@ -540,32 +536,6 @@ impl CollectionBuilder { } } - /// Record information about Omicron zones found on a sled - pub fn found_sled_omicron_zones( - &mut self, - source: &str, - sled_id: SledUuid, - zones: OmicronZonesConfig, - ) -> Result<(), anyhow::Error> { - if let Some(previous) = self.omicron_zones.get(&sled_id) { - Err(anyhow!( - "sled {sled_id} omicron zones: reported previously: {:?}", - previous - )) - } else { - self.omicron_zones.insert( - sled_id, - OmicronZonesFound { - time_collected: now_db_precision(), - source: source.to_string(), - sled_id, - zones, - }, - ); - Ok(()) - } - } - /// Record information about Keeper cluster membership learned from the /// clickhouse-admin service running in the keeper zones. pub fn found_clickhouse_keeper_cluster_membership( diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index eebc692019..8637765a48 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -353,24 +353,7 @@ impl<'a> Collector<'a> { } }; - let sled_id = inventory.sled_id; - self.in_progress.found_sled_inventory(&sled_agent_url, inventory)?; - - let maybe_config = - client.omicron_zones_get().await.with_context(|| { - format!("Sled Agent {:?}: omicron zones", &sled_agent_url) - }); - match maybe_config { - Err(error) => { - self.in_progress.found_error(InventoryError::from(error)); - Ok(()) - } - Ok(zones) => self.in_progress.found_sled_omicron_zones( - &sled_agent_url, - sled_id, - zones.into_inner(), - ), - } + self.in_progress.found_sled_inventory(&sled_agent_url, inventory) } /// Collect inventory from about keepers from all `ClickhouseAdminKeeper` @@ -522,24 +505,21 @@ mod test { write!(&mut s, " baseboard {:?}\n", sled_info.baseboard_id) .unwrap(); - if let Some(found_zones) = collection.omicron_zones.get(sled_id) { - assert_eq!(*sled_id, found_zones.sled_id); + write!( + &mut s, + " zone generation: {:?}\n", + sled_info.omicron_zones.generation + ) + .unwrap(); + write!(&mut s, " zones found:\n").unwrap(); + for zone in &sled_info.omicron_zones.zones { write!( &mut s, - " zone generation: {:?}\n", - found_zones.zones.generation + " zone {} type {}\n", + zone.id, + zone.zone_type.kind().report_str(), ) .unwrap(); - write!(&mut s, " zones found:\n").unwrap(); - for zone in &found_zones.zones.zones { - write!( - &mut s, - " zone {} type {}\n", - zone.id, - zone.zone_type.kind().report_str(), - ) - .unwrap(); - } } } diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 9d9da8de4b..8279465475 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -25,6 +25,7 @@ use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; use omicron_common::api::external::ByteCount; +use omicron_common::api::external::Generation; use omicron_common::disk::DiskVariant; use omicron_uuid_kinds::SledUuid; use std::sync::Arc; @@ -276,6 +277,24 @@ pub fn representative() -> Representative { // We deliberately provide no RoT pages for sled2. + // Report a representative set of Omicron zones, used in the sled-agent + // constructors below. + // + // We've hand-selected a minimal set of files to cover each type of zone. + // These files were constructed by: + // + // (1) copying the "omicron zones" ledgers from the sleds in a working + // Omicron deployment + // (2) pretty-printing each one with `json --in-place --file FILENAME` + // (3) adjusting the format slightly with + // `jq '{ generation: .omicron_generation, zones: .zones }'` + let sled14_data = include_str!("../example-data/madrid-sled14.json"); + let sled16_data = include_str!("../example-data/madrid-sled16.json"); + let sled17_data = include_str!("../example-data/madrid-sled17.json"); + let sled14: OmicronZonesConfig = serde_json::from_str(sled14_data).unwrap(); + let sled16: OmicronZonesConfig = serde_json::from_str(sled16_data).unwrap(); + let sled17: OmicronZonesConfig = serde_json::from_str(sled17_data).unwrap(); + // Report some sled agents. // // This first one will match "sled1_bb"'s baseboard information. @@ -368,6 +387,7 @@ pub fn representative() -> Representative { revision: 0, }, SledRole::Gimlet, + sled14, disks, zpools, datasets, @@ -395,6 +415,7 @@ pub fn representative() -> Representative { revision: 0, }, SledRole::Scrimlet, + sled16, vec![], vec![], vec![], @@ -417,6 +438,7 @@ pub fn representative() -> Representative { model: String::from("fellofftruck"), }, SledRole::Gimlet, + sled17, vec![], vec![], vec![], @@ -437,6 +459,12 @@ pub fn representative() -> Representative { sled_agent_id_unknown, Baseboard::Unknown, SledRole::Gimlet, + // We only have omicron zones for three sleds so use empty zone + // info here. + OmicronZonesConfig { + generation: Generation::new(), + zones: Vec::new(), + }, vec![], vec![], vec![], @@ -444,36 +472,6 @@ pub fn representative() -> Representative { ) .unwrap(); - // Report a representative set of Omicron zones. - // - // We've hand-selected a minimal set of files to cover each type of zone. - // These files were constructed by: - // - // (1) copying the "omicron zones" ledgers from the sleds in a working - // Omicron deployment - // (2) pretty-printing each one with `json --in-place --file FILENAME` - // (3) adjusting the format slightly with - // `jq '{ generation: .omicron_generation, zones: .zones }'` - let sled14_data = include_str!("../example-data/madrid-sled14.json"); - let sled16_data = include_str!("../example-data/madrid-sled16.json"); - let sled17_data = include_str!("../example-data/madrid-sled17.json"); - let sled14: OmicronZonesConfig = serde_json::from_str(sled14_data).unwrap(); - let sled16: OmicronZonesConfig = serde_json::from_str(sled16_data).unwrap(); - let sled17: OmicronZonesConfig = serde_json::from_str(sled17_data).unwrap(); - - let sled14_id = "7612d745-d978-41c8-8ee0-84564debe1d2".parse().unwrap(); - builder - .found_sled_omicron_zones("fake sled 14 agent", sled14_id, sled14) - .unwrap(); - let sled16_id = "af56cb43-3422-4f76-85bf-3f229db5f39c".parse().unwrap(); - builder - .found_sled_omicron_zones("fake sled 15 agent", sled16_id, sled16) - .unwrap(); - let sled17_id = "6eb2a0d9-285d-4e03-afa1-090e4656314b".parse().unwrap(); - builder - .found_sled_omicron_zones("fake sled 15 agent", sled17_id, sled17) - .unwrap(); - builder.found_clickhouse_keeper_cluster_membership( ClickhouseKeeperClusterMembership { queried_keeper: KeeperId(1), @@ -558,6 +556,7 @@ pub fn sled_agent( sled_id: SledUuid, baseboard: Baseboard, sled_role: SledRole, + omicron_zones: OmicronZonesConfig, disks: Vec, zpools: Vec, datasets: Vec, @@ -570,6 +569,7 @@ pub fn sled_agent( sled_id, usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + omicron_zones, disks, zpools, datasets, diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index f7bb3ef83b..3b5cfeb564 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -223,7 +223,7 @@ mod tests { // Create another zpool on one of the sleds, so we can add new // zones that use it. let new_zpool_id = ZpoolUuid::new_v4(); - for &sled_id in collection.omicron_zones.keys().take(1) { + for &sled_id in collection.sled_agents.keys().take(1) { let zpool = Zpool::new( new_zpool_id.into_untyped_uuid(), sled_id.into_untyped_uuid(), diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index eea54a246c..d3c75fd673 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -634,13 +634,12 @@ mod test { // Also assume any sled in the collection is active. let mut sled_state = BTreeMap::new(); - for (sled_id, zones_config) in collection.omicron_zones { + for (sled_id, sa) in collection.sled_agents { blueprint_zones.insert( sled_id, BlueprintZonesConfig { - generation: zones_config.zones.generation, - zones: zones_config - .zones + generation: sa.omicron_zones.generation, + zones: sa.omicron_zones .zones .into_iter() .map(|config| -> BlueprintZoneConfig { diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 25e0fa1967..d11108ea54 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -2480,7 +2480,7 @@ pub mod test { let err = builder .sled_ensure_zone_multiple_nexus( collection - .omicron_zones + .sled_agents .keys() .next() .copied() @@ -2509,10 +2509,10 @@ pub mod test { // `sled_id`. let sled_id = { let mut selected_sled_id = None; - for (sled_id, zones) in &mut collection.omicron_zones { - let nzones_before_retain = zones.zones.zones.len(); - zones.zones.zones.retain(|z| !z.zone_type.is_nexus()); - if zones.zones.zones.len() < nzones_before_retain { + for (sled_id, sa) in &mut collection.sled_agents { + let nzones_before_retain = sa.omicron_zones.zones.len(); + sa.omicron_zones.zones.retain(|z| !z.zone_type.is_nexus()); + if sa.omicron_zones.zones.len() < nzones_before_retain { selected_sled_id = Some(*sled_id); // Also remove this zone from the blueprint. let mut removed_nexus = None; diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index df6c4d0e52..16d7a00684 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -316,10 +316,6 @@ impl ExampleSystemBuilder { } let blueprint = builder.build(); - let mut builder = - system.to_collection_builder().expect("failed to build collection"); - builder.set_rng_seed((&self.test_name, "ExampleSystem collection")); - for sled_id in blueprint.sleds() { let Some(zones) = blueprint.blueprint_zones.get(&sled_id) else { continue; @@ -350,9 +346,8 @@ impl ExampleSystemBuilder { } for (sled_id, zones) in &blueprint.blueprint_zones { - builder - .found_sled_omicron_zones( - "fake sled agent", + system + .sled_set_omicron_zones( *sled_id, zones.to_omicron_zones_config( BlueprintZoneFilter::ShouldBeRunning, @@ -361,6 +356,10 @@ impl ExampleSystemBuilder { .unwrap(); } + let mut builder = + system.to_collection_builder().expect("failed to build collection"); + builder.set_rng_seed((&self.test_name, "ExampleSystem collection")); + // The blueprint evolves separately from the system -- so it's returned // as a separate value. let example = ExampleSystem { diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index c1163d9ef5..ed59689218 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -290,10 +290,14 @@ impl<'a> Planner<'a> { // problem here. let has_ntp_inventory = self .inventory - .omicron_zones + .sled_agents .get(&sled_id) - .map(|sled_zones| { - sled_zones.zones.zones.iter().any(|z| z.zone_type.is_ntp()) + .map(|sled_agent| { + sled_agent + .omicron_zones + .zones + .iter() + .any(|z| z.zone_type.is_ntp()) }) .unwrap_or(false); if !has_ntp_inventory { @@ -801,7 +805,6 @@ mod test { use clickhouse_admin_types::ClickhouseKeeperClusterMembership; use clickhouse_admin_types::KeeperId; use expectorate::assert_contents; - use nexus_inventory::now_db_precision; use nexus_sled_agent_shared::inventory::ZoneKind; use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::BlueprintDiff; @@ -818,7 +821,6 @@ mod test { use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; - use nexus_types::inventory::OmicronZonesFound; use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_test_utils::dev::test_setup_log; @@ -935,25 +937,24 @@ mod test { verify_blueprint(&blueprint4); // Now update the inventory to have the requested NTP zone. - let mut collection = example.collection.clone(); - assert!(collection - .omicron_zones - .insert( + // + // TODO: mutating example.system doesn't automatically update + // example.collection -- this should be addressed via API improvements. + example + .system + .sled_set_omicron_zones( new_sled_id, - OmicronZonesFound { - time_collected: now_db_precision(), - source: String::from("test suite"), - sled_id: new_sled_id, - zones: blueprint4 - .blueprint_zones - .get(&new_sled_id) - .expect("blueprint should contain zones for new sled") - .to_omicron_zones_config( - BlueprintZoneFilter::ShouldBeRunning - ) - } + blueprint4 + .blueprint_zones + .get(&new_sled_id) + .expect("blueprint should contain zones for new sled") + .to_omicron_zones_config( + BlueprintZoneFilter::ShouldBeRunning, + ), ) - .is_none()); + .unwrap(); + let collection = + example.system.to_collection_builder().unwrap().build(); // Check that the next step is to add Crucible zones let blueprint5 = Planner::new_based_on( diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 5a9607b54d..39d25fc2de 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -13,6 +13,7 @@ use nexus_inventory::CollectionBuilder; use nexus_sled_agent_shared::inventory::Baseboard; use nexus_sled_agent_shared::inventory::Inventory; use nexus_sled_agent_shared::inventory::InventoryDisk; +use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_sled_agent_shared::inventory::SledRole; use nexus_types::deployment::ClickhousePolicy; use nexus_types::deployment::CockroachDbClusterVersion; @@ -279,6 +280,7 @@ impl SystemDescription { sled.unique, sled.hardware, hardware_slot, + sled.omicron_zones, sled.npools, ); self.sleds.insert(sled_id, sled); @@ -324,6 +326,29 @@ impl SystemDescription { !self.sleds.is_empty() } + /// Set Omicron zones for a sled. + /// + /// The zones will be reported in the collection generated by + /// [`Self::to_collection_builder`]. + /// + /// Returns an error if the sled is not found. + /// + /// # Notes + /// + /// It is okay to call `sled_set_omicron_zones` in ways that wouldn't + /// happen in production, such as to test illegal states. + pub fn sled_set_omicron_zones( + &mut self, + sled_id: SledUuid, + omicron_zones: OmicronZonesConfig, + ) -> anyhow::Result<&mut Self> { + let sled = self.sleds.get_mut(&sled_id).with_context(|| { + format!("attempted to access sled {} not found in system", sled_id) + })?; + sled.inventory_sled_agent.omicron_zones = omicron_zones; + Ok(self) + } + pub fn to_collection_builder(&self) -> anyhow::Result { let collector_label = self .collector @@ -408,6 +433,7 @@ pub struct SledBuilder { hardware: SledHardware, hardware_slot: Option, sled_role: SledRole, + omicron_zones: OmicronZonesConfig, npools: u8, } @@ -425,6 +451,11 @@ impl SledBuilder { hardware: SledHardware::Gimlet, hardware_slot: None, sled_role: SledRole::Gimlet, + omicron_zones: OmicronZonesConfig { + // The initial generation is the one with no zones. + generation: OmicronZonesConfig::INITIAL_GENERATION, + zones: Vec::new(), + }, npools: Self::DEFAULT_NPOOLS, } } @@ -505,6 +536,7 @@ struct Sled { impl Sled { /// Create a `Sled` using faked-up information based on a `SledBuilder` + #[allow(clippy::too_many_arguments)] fn new_simulated( sled_id: SledUuid, sled_subnet: Ipv6Subnet, @@ -512,6 +544,7 @@ impl Sled { unique: Option, hardware: SledHardware, hardware_slot: u16, + omicron_zones: OmicronZonesConfig, nzpools: u8, ) -> Sled { use typed_rng::TypedUuidRng; @@ -598,6 +631,7 @@ impl Sled { sled_id, usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), + omicron_zones, // Populate disks, appearing like a real device. disks: zpools .values() @@ -753,6 +787,7 @@ impl Sled { sled_id, usable_hardware_threads: inv_sled_agent.usable_hardware_threads, usable_physical_ram: inv_sled_agent.usable_physical_ram, + omicron_zones: inv_sled_agent.omicron_zones.clone(), disks: vec![], zpools: vec![], datasets: vec![], diff --git a/nexus/src/app/background/tasks/sync_service_zone_nat.rs b/nexus/src/app/background/tasks/sync_service_zone_nat.rs index 6ef11e608d..972c9702cf 100644 --- a/nexus/src/app/background/tasks/sync_service_zone_nat.rs +++ b/nexus/src/app/background/tasks/sync_service_zone_nat.rs @@ -107,7 +107,7 @@ impl BackgroundTask for ServiceZoneNatTracker { let mut nexus_count = 0; let mut dns_count = 0; - for (sled_id, zones_found) in collection.omicron_zones { + for (sled_id, sa) in collection.sled_agents { let (_, sled) = match LookupPath::new(opctx, &self.datastore) .sled_id(sled_id.into_untyped_uuid()) .fetch() @@ -128,7 +128,7 @@ impl BackgroundTask for ServiceZoneNatTracker { let sled_address = oxnet::Ipv6Net::host_net(*sled.ip); - let zones_config: OmicronZonesConfig = zones_found.zones; + let zones_config: OmicronZonesConfig = sa.omicron_zones; let zones: Vec = zones_config.zones; for zone in zones { diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 891e30b41b..eabe4e5a3b 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -291,11 +291,9 @@ impl Blueprint { .collect(); let before_zones = before - .omicron_zones + .sled_agents .iter() - .map(|(sled_id, zones_found)| { - (*sled_id, zones_found.zones.clone().into()) - }) + .map(|(sled_id, sa)| (*sled_id, sa.omicron_zones.clone().into())) .collect(); let before_disks = before diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 7b3c1ee660..85958f518a 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -116,9 +116,6 @@ pub struct Collection { /// Sled Agent information, by *sled* id pub sled_agents: BTreeMap, - /// Omicron zones found, by *sled* id - pub omicron_zones: BTreeMap, - /// The raft configuration (cluster membership) of the clickhouse keeper /// cluster as returned from each available keeper via `clickhouse-admin` in /// the `ClickhouseKeeper` zone @@ -170,7 +167,7 @@ impl Collection { pub fn all_omicron_zones( &self, ) -> impl Iterator { - self.omicron_zones.values().flat_map(|z| z.zones.zones.iter()) + self.sled_agents.values().flat_map(|sa| sa.omicron_zones.zones.iter()) } /// Iterate over the sled ids of sleds identified as Scrimlets @@ -518,15 +515,8 @@ pub struct SledAgent { pub usable_hardware_threads: u32, pub usable_physical_ram: ByteCount, pub reservoir_size: ByteCount, + pub omicron_zones: OmicronZonesConfig, pub disks: Vec, pub zpools: Vec, pub datasets: Vec, } - -#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] -pub struct OmicronZonesFound { - pub time_collected: DateTime, - pub source: String, - pub sled_id: SledUuid, - pub zones: OmicronZonesConfig, -} diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 59ec25c253..3ebd833c51 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -431,27 +431,6 @@ } }, "/omicron-zones": { - "get": { - "operationId": "omicron_zones_get", - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/OmicronZonesConfig" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - }, "put": { "operationId": "omicron_zones_put", "requestBody": { @@ -3441,6 +3420,9 @@ "$ref": "#/components/schemas/InventoryDisk" } }, + "omicron_zones": { + "$ref": "#/components/schemas/OmicronZonesConfig" + }, "reservoir_size": { "$ref": "#/components/schemas/ByteCount" }, @@ -3472,6 +3454,7 @@ "baseboard", "datasets", "disks", + "omicron_zones", "reservoir_size", "sled_agent_address", "sled_id", diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 582fa5737d..f78e034721 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3391,6 +3391,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_dataset ( PRIMARY KEY (inv_collection_id, sled_id, name) ); +-- TODO: This table is vestigial and can be combined with `inv_sled_agent`. See +-- https://github.com/oxidecomputer/omicron/issues/6770. CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_omicron_zones ( -- where this observation came from -- (foreign key into `inv_collection` table) diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index dbd8709088..e0d76a857b 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -154,14 +154,6 @@ pub trait SledAgentApi { rqctx: RequestContext, ) -> Result>, HttpError>; - #[endpoint { - method = GET, - path = "/omicron-zones", - }] - async fn omicron_zones_get( - rqctx: RequestContext, - ) -> Result, HttpError>; - #[endpoint { method = PUT, path = "/omicron-zones", diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index f60b343b47..489fc9ab0d 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -258,13 +258,6 @@ impl SledAgentApi for SledAgentImpl { sa.zones_list().await.map(HttpResponseOk).map_err(HttpError::from) } - async fn omicron_zones_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_zones_list().await)) - } - async fn omicron_zones_put( rqctx: RequestContext, body: TypedBody, diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index fd1d2bd55b..5124096c1e 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1593,7 +1593,8 @@ mod test { use super::{Config, OmicronZonesConfigGenerator}; use crate::rack_setup::plan::service::{Plan as ServicePlan, SledInfo}; use nexus_sled_agent_shared::inventory::{ - Baseboard, Inventory, InventoryDisk, OmicronZoneType, SledRole, + Baseboard, Inventory, InventoryDisk, OmicronZoneType, + OmicronZonesConfig, SledRole, }; use omicron_common::{ address::{get_sled_address, Ipv6Subnet, SLED_PREFIX}, @@ -1620,6 +1621,10 @@ mod test { usable_hardware_threads: 32, usable_physical_ram: ByteCount::from_gibibytes_u32(16), reservoir_size: ByteCount::from_gibibytes_u32(0), + omicron_zones: OmicronZonesConfig { + generation: Generation::new(), + zones: vec![], + }, disks: (0..u2_count) .map(|i| InventoryDisk { identity: DiskIdentity { diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 7e16e4eee9..ca7f5e3410 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -326,13 +326,6 @@ impl SledAgentApi for SledAgentSimImpl { Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) } - async fn omicron_zones_get( - rqctx: RequestContext, - ) -> Result, HttpError> { - let sa = rqctx.context(); - Ok(HttpResponseOk(sa.omicron_zones_list().await)) - } - async fn omicron_zones_put( rqctx: RequestContext, body: TypedBody, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 248e4ea995..321e9cc34f 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -839,6 +839,7 @@ impl SledAgent { self.config.hardware.reservoir_ram, ) .context("reservoir_size")?, + omicron_zones: self.fake_zones.lock().await.clone(), disks: storage .physical_disks() .values() diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d21c3b8d39..4a4be08f76 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -917,11 +917,6 @@ impl SledAgent { Ok(disk_result) } - /// List the Omicron zone configuration that's currently running - pub async fn omicron_zones_list(&self) -> OmicronZonesConfig { - self.inner.services.omicron_zones_list().await - } - /// Ensures that the specific set of Omicron zones are running as configured /// (and that no other zones are running) pub async fn omicron_zones_ensure( @@ -1261,7 +1256,10 @@ impl SledAgent { let mut disks = vec![]; let mut zpools = vec![]; let mut datasets = vec![]; - let all_disks = self.storage().get_latest_disks().await; + let (all_disks, omicron_zones) = tokio::join!( + self.storage().get_latest_disks(), + self.inner.services.omicron_zones_list() + ); for (identity, variant, slot, firmware) in all_disks.iter_all() { disks.push(InventoryDisk { identity: identity.clone(), @@ -1345,6 +1343,7 @@ impl SledAgent { usable_hardware_threads, usable_physical_ram: ByteCount::try_from(usable_physical_ram)?, reservoir_size, + omicron_zones, disks, zpools, datasets,