Skip to content

Commit

Permalink
feat(node): setupOS drive health check (#3737)
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewbattat authored Feb 26, 2025
1 parent 148f4a6 commit 6e64281
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 8 deletions.
31 changes: 31 additions & 0 deletions ic-os/components/setupos-scripts/check-hardware.sh
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,36 @@ function verify_disks() {
fi
}

###############################################################################
# Drive Health Verification
###############################################################################

function verify_drive_health() {
echo "* Verifying drive health..."

local drives=($(get_large_drives))
local warning_triggered=0

for drive in "${drives[@]}"; do
echo "* Checking drive /dev/${drive} health..."
local smartctl_output
if ! smartctl_output=$(smartctl -H /dev/${drive} 2>&1); then
echo -e "\033[1;31mWARNING: Failed to run smartctl on /dev/${drive}.\033[0m"
warning_triggered=1
elif ! echo "${smartctl_output}" | grep -qi "PASSED"; then
echo -e "\033[1;31mWARNING: Drive /dev/${drive} did not pass the SMART health check.\033[0m"
warning_triggered=1
else
echo "Drive /dev/${drive} health is OK."
fi
done

if [ "${warning_triggered}" -eq 1 ]; then
echo "Pausing for 5 minutes before continuing installation..."
sleep 300
fi
}

###############################################################################
# Deployment Path Verification
###############################################################################
Expand Down Expand Up @@ -279,6 +309,7 @@ main() {
verify_cpu
verify_memory
verify_disks
verify_drive_health
verify_deployment_path
else
echo "* Hardware checks skipped by request via kernel command line"
Expand Down
22 changes: 14 additions & 8 deletions ic-os/components/setupos-scripts/setup-disk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,32 @@ source /opt/ic/bin/functions.sh
function purge_partitions() {
echo "* Purging partitions..."

# Destroy guest partitions
# Destroy guest partitions (for redeployments)
vgscan --mknodes
loop_device=$(losetup -P -f /dev/mapper/hostlvm-guestos --show)

if [ "${loop_device}" != "" ]; then
echo "Loop device detected: ${loop_device}. Wiping partitions."
wipefs --all --force "${loop_device}"*
if [ "${?}" -ne 0 ]; then
echo "Unable to purge GuestOS partitions"
echo "WARNING: Unable to purge GuestOS partitions on ${loop_device}"
fi
losetup -d "${loop_device}"
else
echo "Unable to detect GuestOS loop device (may not exist)"
fi

# Destroy host partitions
# Destroy host partitions (for redeployments)
wipefs --all --force "/dev/mapper/hostlvm"*
if [ "${?}" -ne 0 ]; then
echo "Unable to purge HostOS partitions"
echo "Unable to purge HostOS partitions (may not exist)"
fi
vgremove --force hostlvm

# Destroy master boot record and partition table
large_drives=($(get_large_drives))
for drive in "${large_drives[@]}"; do
echo "Wiping partitions on drive: /dev/${drive}."

for drive in $(echo ${large_drives[@]}); do
wipefs --all --force "/dev/${drive}"*
if [ "${?}" -ne 0 ]; then
echo "Unable to purge partitions on drive: /dev/${drive}"
Expand All @@ -42,11 +45,12 @@ function purge_partitions() {
}

function setup_storage() {
system_drive=$(find_first_drive)
echo "Starting storage setup..."

system_drive=$(find_first_drive)
# Create PVs on each additional drive
large_drives=($(get_large_drives))
for drive in $(echo ${large_drives[@]}); do
for drive in "${large_drives[@]}"; do
# Avoid creating PV on system drive
if [ "/dev/${drive}" == "/dev/${system_drive}" ]; then
continue
Expand All @@ -55,8 +59,10 @@ function setup_storage() {
test -b "/dev/${drive}"
log_and_halt_installation_on_error "${?}" "Drive '/dev/${drive}' not found. Are all drives correctly installed?"

echo "Creating physical volume on /dev/${drive}."
pvcreate "/dev/${drive}"
log_and_halt_installation_on_error "${?}" "Unable to setup PV on drive '/dev/${drive}'."
echo "Physical volume created on /dev/${drive}."
done
}

Expand Down
1 change: 1 addition & 0 deletions ic-os/setupos/context/packages.common
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ lvm2
net-tools
parted
python-is-python3
smartmontools
sudo
udev
usbutils
Expand Down

0 comments on commit 6e64281

Please sign in to comment.