diff --git a/src/action/macos/create_apfs_volume.rs b/src/action/macos/create_apfs_volume.rs index ea1c901c6..8fc6f5555 100644 --- a/src/action/macos/create_apfs_volume.rs +++ b/src/action/macos/create_apfs_volume.rs @@ -1,9 +1,10 @@ use std::path::{Path, PathBuf}; +use std::time::Duration; use tokio::process::Command; use tracing::{span, Span}; -use crate::action::{ActionError, ActionTag, StatefulAction}; +use crate::action::{ActionError, ActionErrorKind, ActionTag, StatefulAction}; use crate::execute_command; use crate::action::{Action, ActionDescription}; @@ -145,14 +146,42 @@ impl Action for CreateApfsVolume { tracing::debug!("Volume was already unmounted, can skip unmounting") } - execute_command( - Command::new("/usr/sbin/diskutil") - .process_group(0) - .args(["apfs", "deleteVolume", &self.name]) - .stdin(std::process::Stdio::null()), - ) - .await - .map_err(Self::error)?; + // NOTE(cole-h): We believe that, because we're running the unmount force -> deleteVolume + // commands in an automated fashion, there's a race condition where we're running them too + // close to each other, so the OS doesn't notice the volume has been unmounted / hasn't + // completed its "unmount the volume" tasks by the time we try to delete it. If that is the + // case (unfortunately, we have been unable to reproduce this issue on the machines we have + // access to!), then trying to delete the volume 10 times -- with 500ms of time between + // attempts -- should alleviate this. + // https://github.com/DeterminateSystems/nix-installer/issues/1303 + // https://github.com/DeterminateSystems/nix-installer/issues/1267 + // https://github.com/DeterminateSystems/nix-installer/issues/1085 + let mut retry_tokens: usize = 10; + loop { + let mut command = Command::new("/usr/sbin/diskutil"); + command.process_group(0); + command.args(["apfs", "deleteVolume", &self.name]); + command.stdin(std::process::Stdio::null()); + tracing::trace!(%retry_tokens, command = ?command.as_std(), "Waiting for volume deletion to succeed"); + + let output = command + .output() + .await + .map_err(|e| ActionErrorKind::command(&command, e)) + .map_err(Self::error)?; + + if output.status.success() { + break; + } else if retry_tokens == 0 { + return Err(Self::error(ActionErrorKind::command_output( + &command, output, + )))?; + } else { + retry_tokens = retry_tokens.saturating_sub(1); + } + + tokio::time::sleep(Duration::from_millis(500)).await; + } Ok(()) }