Skip to content

validate self-hosted runner status #37

validate self-hosted runner status

validate self-hosted runner status #37

---
# NOTE: ipatch, self-hosted runners are removed from runner registry if they don't connect to github in 14 days
# 1. check if remote machine hosting the virtual machine running the self-hosted runner is online
# 2. if remote box is online, check to see if vm serice ie. vmmojave is up and running.
# a. status the systemd service for the vm
# b. use `nc` to verify the online status of the vm using its static ip (ping is NOT an option)
# c. if the vm for the self-hosted runner is online, verify online status of the self-hosted runner service/process
# 3. if one of our checks fails we need to send an email followed by an action / step to try
# and bring the service online, and then move to the next step.
# 4. etup a specific group on remote box and virtual-machine allowing an isolated $USER to run specific tasks for statusing and controlling runner related services.
# the way of thinking of this is to,
# 1 .first check if the runner is online, by querying the github api endpoint
# a1. if getting a successful online respone from api take note
# b1. if runner is offline, run through the below steps/checks
name: validate self-hosted runner status
on:
# NOTE: run this action on request from github web UI
workflow_dispatch:
schedule:
# min hour day month day-of-week,1-6 contrab.guru
- cron: '1 4 */12 * *' # run task every 12 days
jobs:
validate-self-hosted-runners:
# a fast booting github-hosted runner
runs-on: ubuntu-latest
env:
vmmojave: 192.168.1.114
vmcatalina: 192.168.1.115
vmbigsur: 191.168.1.116
maintainer_emai: [email protected]
steps:
- name: Configure SSH for github hosted runner
id: configure_ssh
run: |
mkdir -p ~/.ssh/
echo "$SSH_KEY" > ~/.ssh/shrunserver.key
chmod 600 ~/.ssh/shrunserver.key
cat >>~/.ssh/config <<END
Host shrunserver
HostName $SSH_HOST
User $SSH_USER
IdentityFile ~/.ssh/shrunserver.key
StrictHostKeyChecking no
END
env:
SSH_USER: ${{ secrets.ARCHBOX_SERVER_USER }}
SSH_KEY: ${{ secrets.ACTIONS_SSH_PRIVATE_KEY }}
SSH_HOST: ${{ secrets.ARCHBOX_SERVER_IP }}
- name: Debug runner IPs
id: debug_runner_ips
run: |
echo "vmmojave: $vmmojave"
echo "vmcatalina: $vmcatalina"
echo "vmbigsur: $vmbigsur"
- name: test_runner_status
id: test_runner_status
run: |
for runner in "vmmojave" "vmcatalina" "vmbigsur"; do
# Access the IP address using indirect expansion
runner_ip="${!runner}"
echo "$runner_ip"
status_selfhosted_runner=$(curl -s -H "Authorization: Bearer ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}" \
https://api.github.com/repos/freecad/homebrew-freecad/actions/runners \
| jq -r ".runners[] | select(.name == \"$runner\") | .status // \"not found\"")
echo "$runner is $status_selfhosted_runner"
done
# NOTE: HOMEBREW_GITHUB_API_TOKEN needed in repo secrets, use web UI to add token
- name: check self-hosted runners status for freecad/homebrew-freecad
id: status_runners
run: |
for runner in "vmmojave" "vmcatalina" "vmbigsur"; do
# Access the IP address using indirect expansion
status_selfhosted_runner=$(curl -s -H "Authorization: Bearer ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}" \
https://api.github.com/repos/freecad/homebrew-freecad/actions/runners \
| jq -r ".runners[] | select(.name == \"$runner\") | .status // \"not found\"")
echo "$status_selfhosted_runner"
if [[ "$status_selfhosted_runner" == "online" ]]; then
echo "$runner is online ✅"
echo "${runner}_status=online" >> "$GITHUB_ENV"
else
echo "the github runner service for $runner is OFFLINE 🚫"
# Check reachability of host machine
if nc -v -z -w 5 "${{ secrets.ARCHBOX_SERVER_IP }}" 22; then
echo "Machine hosting the vm services for $runner is online ✅"
# NOTE: ipatch, run command on remote computer via ssh from github hosted runner
ssh shrunserver "date"
# ssh shrunserver "/usr/bin/sudo ls" # DEBUG
# echo "DEBUG $runner_ip" # DEBUG
if ssh shrunserver "nc -v -z -w 5 \$runner_ip\ 22 < /dev/null"; then
echo "virtual machine $runner appears to be online"
else
echo "could not reach the virtual machine $runner"
# NOTE: status the vm systemd service, if offline attempt to start the service
ssh shrunserver "/usr/bin/sudo systemctl restart \$runner\\" || echo "failed to restart service"
sleep 45
if ssh shrunserver "nc -v -z -w 5 \$runner_ip\ 22 < /dev/null"; then
echo "$runner appears to have come back online ✅"
else
echo "$runner is still NOT online 🚫"
echo "${runner}_status=down" >> "$GITHUB_ENV"
fi
fi
else
echo "can not connect to host machine running $runner virtual machine"
fi
fi
done
- name: Print vm_status variable
id: print_vm_status
run: |
echo "vmmojave status is ${{ env.vmmojave_status }}"
echo "vmcatalina status is ${{ env.vmcatalina_status }}"
echo "vmbigsur status is ${{ env.vmbigsur_status }}"
- name: Send email on failure to reach runner service
id: send_alert_email
if: ${{
env.vmmojave_status == 'down' ||
env.vmcatalina_status == 'down' ||
env.vmbigsur_status == 'down'
}}
uses: dawidd6/[email protected]
with:
server_address: smtp.gmail.com
server_port: 587
username: ${{ secrets.SMTP_USERNAME }}
password: ${{ secrets.SMTP_PASSWORD }}
subject: 'homebrew-freecad self-hosted runner is offline 🚫'
from: ${{ secrets.SMTP_USERNAME }}
to: ${{ env.maintainer_emai }}
body: >
One of the components related to the homebrew-freecad self-hosted runners is offline,
and can not be reached. Please follow up before runner expires from github registry.