Skip to content

Commit

Permalink
omdb could try harder to find instances that are online (#5621)
Browse files Browse the repository at this point in the history
  • Loading branch information
davepacheco authored Apr 26, 2024
1 parent 05839a4 commit 0133faa
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 12 deletions.
64 changes: 64 additions & 0 deletions dev-tools/omdb/src/bin/omdb/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,16 @@
//! find strange things when debugging but we need our tools to tell us as
//! much as they can!)

use anyhow::anyhow;
use anyhow::ensure;
use anyhow::Context;
use clap::Parser;
use clap::Subcommand;
use futures::StreamExt;
use omicron_common::address::Ipv6Subnet;
use std::net::SocketAddr;
use std::net::SocketAddrV6;
use tokio::net::TcpSocket;

mod crucible_agent;
mod db;
Expand Down Expand Up @@ -117,6 +121,7 @@ mod check_allow_destructive {
}

impl Omdb {
/// Return the socket addresses of all instances of a service in DNS
async fn dns_lookup_all(
&self,
log: slog::Logger,
Expand All @@ -129,6 +134,65 @@ impl Omdb {
.with_context(|| format!("looking up {:?} in DNS", service_name))
}

/// Return the socket address of one instance of a service that we can at
/// least successfully connect to
async fn dns_lookup_one(
&self,
log: slog::Logger,
service_name: internal_dns::ServiceName,
) -> Result<SocketAddrV6, anyhow::Error> {
let addrs = self.dns_lookup_all(log, service_name).await?;
ensure!(
!addrs.is_empty(),
"expected at least one address from successful DNS lookup for {:?}",
service_name
);

// The caller is going to pick one of these addresses to connect to.
// Let's try to pick one that's at least not obviously broken by
// attempting to connect to whatever we found and returning any that we
// successfully connected to. It'd be nice if we could return the
// socket directly, but our callers are creating reqwest clients that
// cannot easily consume a socket directly.
//
// This approach scales poorly and there are many failure modes that
// this does not cover. But in the absence of better connection
// management, and with the risks in `omdb` being pretty low, and the
// value of it working pretty high, here we are. This approach should
// not be replicated elsewhere.
async fn try_connect(
sockaddr_v6: SocketAddrV6,
) -> Result<(), anyhow::Error> {
let _ = TcpSocket::new_v6()
.context("creating socket")?
.connect(SocketAddr::from(sockaddr_v6))
.await
.with_context(|| format!("connect \"{}\"", sockaddr_v6))?;
Ok(())
}

let mut socket_stream = futures::stream::iter(addrs)
.map(|sockaddr_v6| async move {
(sockaddr_v6, try_connect(sockaddr_v6).await)
})
.buffer_unordered(3);

while let Some((sockaddr, connect_result)) = socket_stream.next().await
{
match connect_result {
Ok(()) => return Ok(sockaddr),
Err(error) => {
eprintln!(
"warning: failed to connect to {:?} at {}: {:#}",
service_name, sockaddr, error
);
}
}
}

Err(anyhow!("failed to connect to any instances of {:?}", service_name))
}

async fn dns_resolver(
&self,
log: slog::Logger,
Expand Down
8 changes: 2 additions & 6 deletions dev-tools/omdb/src/bin/omdb/mgs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,12 @@ impl MgsArgs {
eprintln!(
"note: MGS URL not specified. Will pick one from DNS."
);
let addrs = omdb
.dns_lookup_all(
let addr = omdb
.dns_lookup_one(
log.clone(),
internal_dns::ServiceName::ManagementGatewayService,
)
.await?;
let addr = addrs.into_iter().next().expect(
"expected at least one MGS address from \
successful DNS lookup",
);
format!("http://{}", addr)
}
};
Expand Down
8 changes: 2 additions & 6 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,16 +272,12 @@ impl NexusArgs {
eprintln!(
"note: Nexus URL not specified. Will pick one from DNS."
);
let addrs = omdb
.dns_lookup_all(
let addr = omdb
.dns_lookup_one(
log.clone(),
internal_dns::ServiceName::Nexus,
)
.await?;
let addr = addrs.into_iter().next().expect(
"expected at least one Nexus address from \
successful DNS lookup",
);
format!("http://{}", addr)
}
};
Expand Down

0 comments on commit 0133faa

Please sign in to comment.