Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix race between sled-agent and zone-setup service #6152

Merged
merged 2 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions illumos-utils/src/addrobj.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,17 @@
//! API for operating on addrobj objects.

/// The name provided to all link-local IPv6 addresses.
pub const IPV6_LINK_LOCAL_NAME: &str = "ll";
pub const IPV6_LINK_LOCAL_ADDROBJ_NAME: &str = "ll";

/// The name provided to all static IPv6 underlay addresses.
pub const IPV6_STATIC_ADDROBJ_NAME: &str = "omicron6";

/// The name provided to all static IPv4 addresses, usually for public OPTE
/// interfaces.
pub const IPV4_STATIC_ADDROBJ_NAME: &str = "omicron6";
bnaecker marked this conversation as resolved.
Show resolved Hide resolved

/// The name provided to DHCP-configured addresses, of either family.
pub const DHCP_ADDROBJ_NAME: &str = "omicron";
bnaecker marked this conversation as resolved.
Show resolved Hide resolved

/// Describes an "addrobj", which is the combination of an interface
/// with an associated name.
Expand Down Expand Up @@ -59,7 +69,7 @@ impl AddrObject {
/// Create a new addrobj on the same interface with the IPv6 link-local
/// name.
pub fn link_local_on_same_interface(&self) -> Result<Self, ParseError> {
self.on_same_interface(IPV6_LINK_LOCAL_NAME)
self.on_same_interface(IPV6_LINK_LOCAL_ADDROBJ_NAME)
}

pub fn new(interface: &str, name: &str) -> Result<Self, ParseError> {
Expand All @@ -76,7 +86,7 @@ impl AddrObject {

/// A link-local IPv6 addrobj over the provided interface.
pub fn link_local(interface: &str) -> Result<Self, ParseError> {
Self::new(interface, IPV6_LINK_LOCAL_NAME)
Self::new(interface, IPV6_LINK_LOCAL_ADDROBJ_NAME)
}

pub fn interface(&self) -> &str {
Expand Down
50 changes: 31 additions & 19 deletions illumos-utils/src/ipadm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,37 @@

//! Utilities for managing IP interfaces.

use crate::addrobj::{IPV6_LINK_LOCAL_ADDROBJ_NAME, IPV6_STATIC_ADDROBJ_NAME};
use crate::zone::IPADM;
use crate::{execute, ExecutionError, PFEXEC};
use std::net::Ipv6Addr;

/// Wraps commands for interacting with interfaces.
pub struct Ipadm {}

/// Expected error message contents when showing an addrobj that doesn't exist.
const ADDROBJ_NOT_FOUND_ERR: &str = "Address object not found";

/// Expected error message when an interface already exists.
const INTERFACE_ALREADY_EXISTS: &str = "Interface already exists";

#[cfg_attr(any(test, feature = "testing"), mockall::automock)]
impl Ipadm {
// Remove current IP interface and create a new temporary one.
pub fn set_temp_interface_for_datalink(
/// Ensure that an IP interface exists on the provided datalink.
pub fn ensure_ip_interface_exists(
datalink: &str,
) -> Result<(), ExecutionError> {
let mut cmd = std::process::Command::new(PFEXEC);
let cmd = cmd.args(&[IPADM, "delete-if", datalink]);
// First we remove IP interface if it already exists. If it doesn't
// exist and the command returns an error we continue anyway as
// the next step is to create it.
let _ = execute(cmd);

let mut cmd = std::process::Command::new(PFEXEC);
let cmd = cmd.args(&[IPADM, "create-if", "-t", datalink]);
execute(cmd)?;
Ok(())
match execute(cmd) {
Ok(_) => Ok(()),
Err(ExecutionError::CommandFailure(info))
if info.stderr.contains(INTERFACE_ALREADY_EXISTS) =>
{
Ok(())
}
Err(e) => Err(e),
}
}

// Set MTU to 9000 on both IPv4 and IPv6
Expand Down Expand Up @@ -65,11 +72,13 @@ impl Ipadm {
listen_addr: &Ipv6Addr,
) -> Result<(), ExecutionError> {
// Create auto-configured address on the IP interface if it doesn't already exist
let addrobj = format!("{}/ll", datalink);
let addrobj = format!("{}/{}", datalink, IPV6_LINK_LOCAL_ADDROBJ_NAME);
let mut cmd = std::process::Command::new(PFEXEC);
let cmd = cmd.args(&[IPADM, "show-addr", &addrobj]);
match execute(cmd) {
Err(_) => {
Err(ExecutionError::CommandFailure(info))
if info.stderr.contains(ADDROBJ_NOT_FOUND_ERR) =>
{
let mut cmd = std::process::Command::new(PFEXEC);
let cmd = cmd.args(&[
IPADM,
Expand All @@ -81,15 +90,18 @@ impl Ipadm {
]);
execute(cmd)?;
}
Err(other) => return Err(other),
Ok(_) => (),
};

// Create static address on the IP interface if it doesn't already exist
let addrobj = format!("{}/omicron6", datalink);
let addrobj = format!("{}/{}", datalink, IPV6_STATIC_ADDROBJ_NAME);
let mut cmd = std::process::Command::new(PFEXEC);
let cmd = cmd.args(&[IPADM, "show-addr", &addrobj]);
match execute(cmd) {
Err(_) => {
Err(ExecutionError::CommandFailure(info))
if info.stderr.contains(ADDROBJ_NOT_FOUND_ERR) =>
{
let mut cmd = std::process::Command::new(PFEXEC);
let cmd = cmd.args(&[
IPADM,
Expand All @@ -101,11 +113,11 @@ impl Ipadm {
&listen_addr.to_string(),
&addrobj,
]);
execute(cmd)?;
execute(cmd).map(|_| ())
}
Ok(_) => (),
};
Ok(())
Err(other) => Err(other),
Ok(_) => Ok(()),
}
}

// Create gateway on the IP interface if it doesn't already exist
Expand Down
17 changes: 12 additions & 5 deletions illumos-utils/src/running_zone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@

//! Utilities to manage running zones.

use crate::addrobj::AddrObject;
use crate::addrobj::{
AddrObject, DHCP_ADDROBJ_NAME, IPV4_STATIC_ADDROBJ_NAME,
IPV6_STATIC_ADDROBJ_NAME,
};
use crate::dladm::Etherstub;
use crate::link::{Link, VnicAllocator};
use crate::opte::{Port, PortTicket};
Expand Down Expand Up @@ -360,7 +363,11 @@ impl RunningZone {
}

pub fn control_interface(&self) -> AddrObject {
AddrObject::new(self.inner.get_control_vnic_name(), "omicron6").unwrap()
AddrObject::new(
self.inner.get_control_vnic_name(),
IPV6_STATIC_ADDROBJ_NAME,
)
.unwrap()
}

/// Runs a command within the Zone, return the output.
Expand Down Expand Up @@ -547,10 +554,10 @@ impl RunningZone {
addrtype: AddressRequest,
) -> Result<IpNetwork, EnsureAddressError> {
let name = match addrtype {
AddressRequest::Dhcp => "omicron",
AddressRequest::Dhcp => DHCP_ADDROBJ_NAME,
AddressRequest::Static(net) => match net.ip() {
std::net::IpAddr::V4(_) => "omicron4",
std::net::IpAddr::V6(_) => "omicron6",
std::net::IpAddr::V4(_) => IPV4_STATIC_ADDROBJ_NAME,
std::net::IpAddr::V6(_) => IPV6_STATIC_ADDROBJ_NAME,
},
};
self.ensure_address_with_name(addrtype, name).await
Expand Down
8 changes: 4 additions & 4 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ use camino::{Utf8Path, Utf8PathBuf};
use dpd_client::{types as DpdTypes, Client as DpdClient, Error as DpdError};
use dropshot::HandlerTaskMode;
use illumos_utils::addrobj::AddrObject;
use illumos_utils::addrobj::IPV6_LINK_LOCAL_NAME;
use illumos_utils::addrobj::IPV6_LINK_LOCAL_ADDROBJ_NAME;
use illumos_utils::dladm::{
Dladm, Etherstub, EtherstubVnic, GetSimnetError, PhysicalLink,
};
Expand Down Expand Up @@ -2879,7 +2879,7 @@ impl ServiceManager {
// cabled together.
AddrObject::new(
&format!("tfportrear{}_0", i),
IPV6_LINK_LOCAL_NAME,
IPV6_LINK_LOCAL_ADDROBJ_NAME,
)
.unwrap()
})
Expand All @@ -2891,7 +2891,7 @@ impl ServiceManager {
.map(|i| {
AddrObject::new(
&i.to_string(),
IPV6_LINK_LOCAL_NAME,
IPV6_LINK_LOCAL_ADDROBJ_NAME,
)
.unwrap()
})
Expand Down Expand Up @@ -3648,7 +3648,7 @@ impl ServiceManager {
}
}
Err(e) => {
info!(self.inner.log, "chronyc command failed: {}", e);
error!(self.inner.log, "chronyc command failed: {}", e);
Err(Error::NtpZoneNotReady)
}
}
Expand Down
17 changes: 11 additions & 6 deletions smf/zone-network-setup/manifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,26 @@
<service name='oxide/zone-network-setup' type='service' version='1'>
<create_default_instance enabled='true' />

<!-- Run after the operating system's svc:/network/physical service is done. -->
<!-- Run after the zone's networking stack is up. -->
<dependency name='physical' grouping='require_all' restart_on='none'
bnaecker marked this conversation as resolved.
Show resolved Hide resolved
type='service'>
<service_fmri value='svc:/network/physical:default' />
<service_fmri value='svc:/milestone/network:default' />
</dependency>

<dependency name='multi_user' grouping='require_all' restart_on='none'
type='service'>
<service_fmri value='svc:/milestone/multi-user:default' />
<!-- The zone-setup binary is not ready to run until its initial properties
have been set by the sled-agent, which happens after the
`manifest-import` service is running.
-->
<dependency name='zone-network-setup-manifest-import' type='service' grouping='require_all' restart_on='none'>
bnaecker marked this conversation as resolved.
Show resolved Hide resolved
<service_fmri value='svc:/system/manifest-import:default' />
</dependency>

<exec_method type='method' name='start'
exec='/opt/oxide/zone-setup-cli/bin/zone-setup common-networking -d %{config/datalink} -s %{config/static_addr} -g %{config/gateway}'
timeout_seconds='0' />


<exec_method type='method' name='stop' exec=':true' timeout_seconds='0' />

<property_group name='startd' type='framework'>
<propval name='duration' type='astring' value='transient' />
</property_group>
Expand Down
24 changes: 13 additions & 11 deletions zone-setup/src/bin/zone-setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use anyhow::anyhow;
use clap::{arg, command, value_parser, Arg, ArgMatches, Command};
use illumos_utils::addrobj::{AddrObject, IPV6_LINK_LOCAL_NAME};
use illumos_utils::addrobj::{AddrObject, IPV6_LINK_LOCAL_ADDROBJ_NAME};
use illumos_utils::ipadm::Ipadm;
use illumos_utils::route::{Gateway, Route};
use illumos_utils::svcadm::Svcadm;
Expand Down Expand Up @@ -232,7 +232,7 @@ async fn do_run() -> Result<(), CmdError> {
)
.subcommand(
Command::new(CHRONY_SETUP_CMD)
.about("Sets up Chrony configuration for NTP zone")
.about("Sets up Chrony configuration for NTP zone")
bnaecker marked this conversation as resolved.
Show resolved Hide resolved
.arg(
arg!(-f --file <String> "Chrony configuration file")
.default_value(CHRONY_CONFIG_FILE)
Expand Down Expand Up @@ -328,7 +328,7 @@ async fn switch_zone_setup(
for link in &links {
Zones::ensure_has_link_local_v6_address(
None,
&AddrObject::new(link, IPV6_LINK_LOCAL_NAME).unwrap(),
&AddrObject::new(link, IPV6_LINK_LOCAL_ADDROBJ_NAME).unwrap(),
)
.map_err(|err| {
CmdError::Failure(anyhow!(
Expand Down Expand Up @@ -635,7 +635,7 @@ maxslewrate 2708.333
})?;

if old_file.clone().is_some_and(|f| f != new_config) {
info!(&log, "Chrony configuration file has changed";
info!(&log, "Chrony configuration file has changed";
"old configuration file" => ?old_file, "new configuration file" => ?new_config,);
}

Expand Down Expand Up @@ -663,13 +663,15 @@ async fn common_nw_set_up(
))
})?;

// TODO: remove when https://github.com/oxidecomputer/stlouis/issues/435 is
// addressed
info!(&log, "Ensuring a temporary IP interface is created"; "data link" => ?datalink);
Ipadm::set_temp_interface_for_datalink(&datalink)
info!(
&log,
"Ensuring IP interface exists on datalink";
"datalink" => datalink
);
Ipadm::ensure_ip_interface_exists(datalink)
.map_err(|err| CmdError::Failure(anyhow!(err)))?;

info!(&log, "Setting MTU to 9000 for IPv6 and IPv4"; "data link" => ?datalink);
info!(&log, "Setting MTU to 9000 for IPv6 and IPv4"; "datalink" => ?datalink);
Ipadm::set_interface_mtu(&datalink)
.map_err(|err| CmdError::Failure(anyhow!(err)))?;

Expand Down Expand Up @@ -705,11 +707,11 @@ async fn common_nw_set_up(
if gw.is_empty() {
info!(&log, "Underlay is not available yet. Not ensuring there is a default route");
} else {
// We can safely retrieve the first address only as the CLI only accepts a single item.
// We can safely retrieve the first address only as the CLI only accepts a single item.
let gw = gw.first().unwrap();

// Ensuring default route with gateway must happen after peer agents have been initialized.
// Omicron zones will be able ensure a default route with gateway immediately, but the
// Omicron zones will be able ensure a default route with gateway immediately, but the
// switch zone on the secondary scrimlet might need a few tries while it waits.
retry_notify(
retry_policy_local(),
Expand Down
Loading