Skip to content

Commit

Permalink
Expose uninitialized sleds and "add sled" via internal API and omdb (#…
Browse files Browse the repository at this point in the history
…5204)

This will let us drive the add sled process from a support context.
(Prior to this change adding a sled required interaction with the
external API.)

I want to test this before merging, but it's small enough that if
there's anything that needs tweaking it's unlikely to affect any review
comments. One change is that we had two `Baseboard` structs that were
identical except for a minor different in field names (`{serial,part}`
vs `{serial_number,part_number}`). I removed the internal one and
replaced it with the external one to avoid unnecessary churn in the
public API, but if I'm missing something intentional that led to having
both, please say so.

Closes #5133.
  • Loading branch information
jgallagher authored Mar 7, 2024
1 parent dcdf9bb commit 6a0d1af
Show file tree
Hide file tree
Showing 12 changed files with 302 additions and 36 deletions.
13 changes: 13 additions & 0 deletions dev-tools/omdb/src/bin/omdb/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,24 @@ struct Omdb {
#[arg(env = "OMDB_DNS_SERVER", long)]
dns_server: Option<SocketAddr>,

/// allow potentially-destructive subcommands
#[arg(short = 'w', long = "destructive")]
allow_destructive: bool,

#[command(subcommand)]
command: OmdbCommands,
}

impl Omdb {
fn check_allow_destructive(&self) -> anyhow::Result<()> {
anyhow::ensure!(
self.allow_destructive,
"This command is potentially destructive. \
Pass the `-w` / `--destructive` flag to allow it."
);
Ok(())
}

async fn dns_lookup_all(
&self,
log: slog::Logger,
Expand Down
111 changes: 107 additions & 4 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use nexus_client::types::ActivationReason;
use nexus_client::types::BackgroundTask;
use nexus_client::types::CurrentStatus;
use nexus_client::types::LastResult;
use nexus_client::types::UninitializedSledId;
use serde::Deserialize;
use slog_error_chain::InlineErrorChain;
use std::collections::BTreeMap;
Expand All @@ -38,8 +39,10 @@ pub struct NexusArgs {
enum NexusCommands {
/// print information about background tasks
BackgroundTasks(BackgroundTasksArgs),
/// print information about blueprints
/// interact with blueprints
Blueprints(BlueprintsArgs),
/// interact with sleds
Sleds(SledsArgs),
}

#[derive(Debug, Args)]
Expand Down Expand Up @@ -116,6 +119,28 @@ enum BlueprintTargetCommands {
Set(BlueprintIdArgs),
}

#[derive(Debug, Args)]
struct SledsArgs {
#[command(subcommand)]
command: SledsCommands,
}

#[derive(Debug, Subcommand)]
enum SledsCommands {
/// List all uninitialized sleds
ListUninitialized,
/// Add an uninitialized sled
Add(SledAddArgs),
}

#[derive(Debug, Args)]
struct SledAddArgs {
/// sled's serial number
serial: String,
/// sled's part number
part: String,
}

impl NexusArgs {
/// Run a `omdb nexus` subcommand.
pub(crate) async fn run_cmd(
Expand Down Expand Up @@ -167,7 +192,10 @@ impl NexusArgs {
}) => cmd_nexus_blueprints_diff(&client, args).await,
NexusCommands::Blueprints(BlueprintsArgs {
command: BlueprintsCommands::Delete(args),
}) => cmd_nexus_blueprints_delete(&client, args).await,
}) => {
omdb.check_allow_destructive()?;
cmd_nexus_blueprints_delete(&client, args).await
}
NexusCommands::Blueprints(BlueprintsArgs {
command:
BlueprintsCommands::Target(BlueprintsTargetArgs {
Expand All @@ -179,16 +207,33 @@ impl NexusArgs {
BlueprintsCommands::Target(BlueprintsTargetArgs {
command: BlueprintTargetCommands::Set(args),
}),
}) => cmd_nexus_blueprints_target_set(&client, args).await,
}) => {
omdb.check_allow_destructive()?;
cmd_nexus_blueprints_target_set(&client, args).await
}
NexusCommands::Blueprints(BlueprintsArgs {
command: BlueprintsCommands::Regenerate,
}) => cmd_nexus_blueprints_regenerate(&client).await,
}) => {
omdb.check_allow_destructive()?;
cmd_nexus_blueprints_regenerate(&client).await
}
NexusCommands::Blueprints(BlueprintsArgs {
command: BlueprintsCommands::GenerateFromCollection(args),
}) => {
omdb.check_allow_destructive()?;
cmd_nexus_blueprints_generate_from_collection(&client, args)
.await
}

NexusCommands::Sleds(SledsArgs {
command: SledsCommands::ListUninitialized,
}) => cmd_nexus_sleds_list_uninitialized(&client).await,
NexusCommands::Sleds(SledsArgs {
command: SledsCommands::Add(args),
}) => {
omdb.check_allow_destructive()?;
cmd_nexus_sled_add(&client, args).await
}
}
}
}
Expand Down Expand Up @@ -946,3 +991,61 @@ async fn cmd_nexus_blueprints_regenerate(
eprintln!("generated new blueprint {}", blueprint.id);
Ok(())
}

/// Runs `omdb nexus sleds list-uninitialized`
async fn cmd_nexus_sleds_list_uninitialized(
client: &nexus_client::Client,
) -> Result<(), anyhow::Error> {
let response = client
.sled_list_uninitialized()
.await
.context("listing uninitialized sleds")?;
let sleds = response.into_inner();
if sleds.next_page.is_some() {
eprintln!(
"warning: response includes next_page token; \
pagination not implemented"
);
}
let mut sleds = sleds.items;
sleds.sort_by_key(|sled| sled.cubby);

#[derive(Tabled)]
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
struct UninitializedSledRow {
rack_id: Uuid,
cubby: u16,
serial: String,
part: String,
revision: i64,
}
let rows = sleds.into_iter().map(|sled| UninitializedSledRow {
rack_id: sled.rack_id,
cubby: sled.cubby,
serial: sled.baseboard.serial,
part: sled.baseboard.part,
revision: sled.baseboard.revision,
});
let table = tabled::Table::new(rows)
.with(tabled::settings::Style::empty())
.with(tabled::settings::Padding::new(0, 1, 0, 0))
.to_string();
println!("{}", table);
Ok(())
}

/// Runs `omdb nexus sleds add`
async fn cmd_nexus_sled_add(
client: &nexus_client::Client,
args: &SledAddArgs,
) -> Result<(), anyhow::Error> {
client
.sled_add(&UninitializedSledId {
part: args.part.clone(),
serial: args.serial.clone(),
})
.await
.context("adding sled")?;
eprintln!("added sled {} ({})", args.serial, args.part);
Ok(())
}
7 changes: 6 additions & 1 deletion dev-tools/omdb/tests/usage_errors.out
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Commands:
Options:
--log-level <LOG_LEVEL> log level filter [env: LOG_LEVEL=] [default: warn]
--dns-server <DNS_SERVER> [env: OMDB_DNS_SERVER=]
-w, --destructive allow potentially-destructive subcommands
-h, --help Print help (see more with '--help')
=============================================
EXECUTING COMMAND: omdb ["--help"]
Expand Down Expand Up @@ -50,6 +51,9 @@ Options:
--dns-server <DNS_SERVER>
[env: OMDB_DNS_SERVER=]

-w, --destructive
allow potentially-destructive subcommands

-h, --help
Print help (see a summary with '-h')
---------------------------------------------
Expand Down Expand Up @@ -294,7 +298,8 @@ Usage: omdb nexus [OPTIONS] <COMMAND>

Commands:
background-tasks print information about background tasks
blueprints print information about blueprints
blueprints interact with blueprints
sleds interact with sleds
help Print this message or the help of the given subcommand(s)

Options:
Expand Down
4 changes: 2 additions & 2 deletions nexus/src/app/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ impl super::Nexus {
id,
info.sa_address,
db::model::SledBaseboard {
serial_number: info.baseboard.serial_number,
part_number: info.baseboard.part_number,
serial_number: info.baseboard.serial,
part_number: info.baseboard.part,
revision: info.baseboard.revision,
},
db::model::SledSystemHardware {
Expand Down
4 changes: 2 additions & 2 deletions nexus/src/app/switch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ impl super::Nexus {
) -> Result<Switch, Error> {
let switch = db::model::Switch::new(
id,
request.baseboard.serial_number,
request.baseboard.part_number,
request.baseboard.serial,
request.baseboard.part,
request.baseboard.revision,
request.rack_id,
);
Expand Down
47 changes: 47 additions & 0 deletions nexus/src/internal_api/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintMetadata;
use nexus_types::deployment::BlueprintTarget;
use nexus_types::deployment::BlueprintTargetSet;
use nexus_types::external_api::params::UninitializedSledId;
use nexus_types::external_api::shared::UninitializedSled;
use nexus_types::internal_api::params::SwitchPutRequest;
use nexus_types::internal_api::params::SwitchPutResponse;
use nexus_types::internal_api::views::to_list;
Expand Down Expand Up @@ -88,6 +90,9 @@ pub(crate) fn internal_api() -> NexusApiDescription {
api.register(blueprint_generate_from_collection)?;
api.register(blueprint_regenerate)?;

api.register(sled_list_uninitialized)?;
api.register(sled_add)?;

Ok(())
}

Expand Down Expand Up @@ -795,3 +800,45 @@ async fn blueprint_regenerate(
};
apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await
}

/// List uninitialized sleds
#[endpoint {
method = GET,
path = "/sleds/uninitialized",
}]
async fn sled_list_uninitialized(
rqctx: RequestContext<Arc<ServerContext>>,
) -> Result<HttpResponseOk<ResultsPage<UninitializedSled>>, HttpError> {
let apictx = rqctx.context();
let handler = async {
let nexus = &apictx.nexus;
let opctx = crate::context::op_context_for_internal_api(&rqctx).await;
let sleds = nexus.sled_list_uninitialized(&opctx).await?;
Ok(HttpResponseOk(ResultsPage { items: sleds, next_page: None }))
};
apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await
}

/// Add sled to initialized rack
//
// TODO: In the future this should really be a PUT request, once we resolve
// https://github.com/oxidecomputer/omicron/issues/4494. It should also
// explicitly be tied to a rack via a `rack_id` path param. For now we assume
// we are only operating on single rack systems.
#[endpoint {
method = POST,
path = "/sleds/add",
}]
async fn sled_add(
rqctx: RequestContext<Arc<ServerContext>>,
sled: TypedBody<UninitializedSledId>,
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
let apictx = rqctx.context();
let nexus = &apictx.nexus;
let handler = async {
let opctx = crate::context::op_context_for_internal_api(&rqctx).await;
nexus.sled_add(&opctx, sled.into_inner()).await?;
Ok(HttpResponseUpdatedNoContent())
};
apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await
}
6 changes: 3 additions & 3 deletions nexus/test-utils/src/resource_helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use nexus_types::external_api::params;
use nexus_types::external_api::params::PhysicalDiskKind;
use nexus_types::external_api::params::UserId;
use nexus_types::external_api::shared;
use nexus_types::external_api::shared::Baseboard;
use nexus_types::external_api::shared::IdentityType;
use nexus_types::external_api::shared::IpRange;
use nexus_types::external_api::views;
Expand All @@ -29,7 +30,6 @@ use nexus_types::external_api::views::User;
use nexus_types::external_api::views::{Project, Silo, Vpc, VpcRouter};
use nexus_types::identity::Resource;
use nexus_types::internal_api::params as internal_params;
use nexus_types::internal_api::params::Baseboard;
use omicron_common::api::external::ByteCount;
use omicron_common::api::external::Disk;
use omicron_common::api::external::IdentityMetadataCreateParams;
Expand Down Expand Up @@ -325,8 +325,8 @@ pub async fn create_switch(
"/switches",
&internal_params::SwitchPutRequest {
baseboard: Baseboard {
serial_number: serial.to_string(),
part_number: part.to_string(),
serial: serial.to_string(),
part: part.to_string(),
revision,
},
rack_id,
Expand Down
7 changes: 1 addition & 6 deletions nexus/tests/integration_tests/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use nexus_test_utils_macros::nexus_test;
use nexus_types::external_api::params;
use nexus_types::external_api::shared::UninitializedSled;
use nexus_types::external_api::views::Rack;
use nexus_types::internal_api::params::Baseboard;
use nexus_types::internal_api::params::SledAgentStartupInfo;
use nexus_types::internal_api::params::SledRole;
use omicron_common::api::external::ByteCount;
Expand Down Expand Up @@ -112,11 +111,7 @@ async fn test_sled_list_uninitialized(cptestctx: &ControlPlaneTestContext) {
let sa = SledAgentStartupInfo {
sa_address: "[fd00:1122:3344:0100::1]:8080".parse().unwrap(),
role: SledRole::Gimlet,
baseboard: Baseboard {
serial_number: baseboard.serial,
part_number: baseboard.part,
revision: baseboard.revision,
},
baseboard,
usable_hardware_threads: 32,
usable_physical_ram: ByteCount::from_gibibytes_u32(100),
reservoir_size: ByteCount::from_mebibytes_u32(100),
Expand Down
10 changes: 1 addition & 9 deletions nexus/types/src/internal_api/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use crate::external_api::params::PhysicalDiskKind;
use crate::external_api::params::UserId;
use crate::external_api::shared::Baseboard;
use crate::external_api::shared::IpRange;
use omicron_common::api::external::ByteCount;
use omicron_common::api::external::MacAddr;
Expand Down Expand Up @@ -35,15 +36,6 @@ pub enum SledRole {
Scrimlet,
}

// TODO: We need a unified representation of these hardware identifiers
/// Describes properties that should uniquely identify Oxide manufactured hardware
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
pub struct Baseboard {
pub serial_number: String,
pub part_number: String,
pub revision: i64,
}

/// Sent by a sled agent on startup to Nexus to request further instruction
#[derive(Serialize, Deserialize, Debug, JsonSchema)]
pub struct SledAgentStartupInfo {
Expand Down
Loading

0 comments on commit 6a0d1af

Please sign in to comment.