diff --git a/dev-tools/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs index 00fd4beccd..53e21fdd92 100644 --- a/dev-tools/omdb/src/bin/omdb/main.rs +++ b/dev-tools/omdb/src/bin/omdb/main.rs @@ -33,12 +33,16 @@ //! find strange things when debugging but we need our tools to tell us as //! much as they can!) +use anyhow::anyhow; +use anyhow::ensure; use anyhow::Context; use clap::Parser; use clap::Subcommand; +use futures::StreamExt; use omicron_common::address::Ipv6Subnet; use std::net::SocketAddr; use std::net::SocketAddrV6; +use tokio::net::TcpSocket; mod crucible_agent; mod db; @@ -117,6 +121,7 @@ mod check_allow_destructive { } impl Omdb { + /// Return the socket addresses of all instances of a service in DNS async fn dns_lookup_all( &self, log: slog::Logger, @@ -129,6 +134,65 @@ impl Omdb { .with_context(|| format!("looking up {:?} in DNS", service_name)) } + /// Return the socket address of one instance of a service that we can at + /// least successfully connect to + async fn dns_lookup_one( + &self, + log: slog::Logger, + service_name: internal_dns::ServiceName, + ) -> Result { + let addrs = self.dns_lookup_all(log, service_name).await?; + ensure!( + !addrs.is_empty(), + "expected at least one address from successful DNS lookup for {:?}", + service_name + ); + + // The caller is going to pick one of these addresses to connect to. + // Let's try to pick one that's at least not obviously broken by + // attempting to connect to whatever we found and returning any that we + // successfully connected to. It'd be nice if we could return the + // socket directly, but our callers are creating reqwest clients that + // cannot easily consume a socket directly. + // + // This approach scales poorly and there are many failure modes that + // this does not cover. But in the absence of better connection + // management, and with the risks in `omdb` being pretty low, and the + // value of it working pretty high, here we are. This approach should + // not be replicated elsewhere. + async fn try_connect( + sockaddr_v6: SocketAddrV6, + ) -> Result<(), anyhow::Error> { + let _ = TcpSocket::new_v6() + .context("creating socket")? + .connect(SocketAddr::from(sockaddr_v6)) + .await + .with_context(|| format!("connect \"{}\"", sockaddr_v6))?; + Ok(()) + } + + let mut socket_stream = futures::stream::iter(addrs) + .map(|sockaddr_v6| async move { + (sockaddr_v6, try_connect(sockaddr_v6).await) + }) + .buffer_unordered(3); + + while let Some((sockaddr, connect_result)) = socket_stream.next().await + { + match connect_result { + Ok(()) => return Ok(sockaddr), + Err(error) => { + eprintln!( + "warning: failed to connect to {:?} at {}: {:#}", + service_name, sockaddr, error + ); + } + } + } + + Err(anyhow!("failed to connect to any instances of {:?}", service_name)) + } + async fn dns_resolver( &self, log: slog::Logger, diff --git a/dev-tools/omdb/src/bin/omdb/mgs.rs b/dev-tools/omdb/src/bin/omdb/mgs.rs index ece4c4f109..cef3ff2a2c 100644 --- a/dev-tools/omdb/src/bin/omdb/mgs.rs +++ b/dev-tools/omdb/src/bin/omdb/mgs.rs @@ -66,16 +66,12 @@ impl MgsArgs { eprintln!( "note: MGS URL not specified. Will pick one from DNS." ); - let addrs = omdb - .dns_lookup_all( + let addr = omdb + .dns_lookup_one( log.clone(), internal_dns::ServiceName::ManagementGatewayService, ) .await?; - let addr = addrs.into_iter().next().expect( - "expected at least one MGS address from \ - successful DNS lookup", - ); format!("http://{}", addr) } }; diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 67b91e0280..8813f910b0 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -272,16 +272,12 @@ impl NexusArgs { eprintln!( "note: Nexus URL not specified. Will pick one from DNS." ); - let addrs = omdb - .dns_lookup_all( + let addr = omdb + .dns_lookup_one( log.clone(), internal_dns::ServiceName::Nexus, ) .await?; - let addr = addrs.into_iter().next().expect( - "expected at least one Nexus address from \ - successful DNS lookup", - ); format!("http://{}", addr) } };