Skip to content

Commit

Permalink
monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
fewensa committed Jul 26, 2024
1 parent 394b05d commit bd41021
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 29 deletions.
55 changes: 55 additions & 0 deletions .github/workflows/check-ansible.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: Check

on:
pull_request:
branches: [main]

env:
SLACK_INCOMING_WEBHOOK_URL: ${{ secrets.SLACK_INCOMING_WEBHOOK_URL }}

jobs:
deploy-essentials:
name: Deploy essentials
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0

- name: Verify essentials file changed
uses: tj-actions/[email protected]
id: changed_files
with:
files: |
ansible/inventories/hosts.ini
ansible/playbooks/_essentials/*
ansible/playbooks/essentials.yml
- name: Deploy essentials
id: deploy-essentials
if: steps.changed_files.outputs.any_changed == 'true'
uses: dawidd6/action-ansible-playbook@v2
with:
directory: ansible
playbook: playbooks/essentials.yml
key: "${{ secrets.SSH_PRIVATE_KEY }}"
options: --user ansible

check-playbooks:
name: Check playbook
runs-on: ubuntu-latest
needs: [deploy-essentials]
strategy:
matrix:
playbook:
- monitor
steps:
- uses: actions/checkout@v2

- name: Run playbook
uses: dawidd6/action-ansible-playbook@v2
with:
directory: ansible
playbook: playbooks/${{ matrix.playbook }}/playbook.yml
key: "${{ secrets.SSH_PRIVATE_KEY }}"
options: --user ansible --verbose --diff --check
15 changes: 15 additions & 0 deletions ansible/roles/monitor/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

monitor:
workdir: /tmp/monitor
notify_slack_webhook: ''
notify_slack_channel: 'darwinia-alert-notification'
server_name: ''
check_disks:
- /dev/sdb
alert_thread_cpu_p2: 90
alert_thread_cpu_p1: 98
alert_thread_ram_p2: 90
alert_thread_ram_p1: 98
alert_thread_disk_p2: 90
alert_thread_disk_p1: 98

15 changes: 15 additions & 0 deletions ansible/roles/monitor/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

- name: Creates workdir
file:
path: "{{ monitor.workdir }}"
state: directory

- name: Generate scripts file
template:
src: crawl.sh
dest: "{{ monitor.workdir }}/crawl.sh"
mode: "0644"

- name: Run snapshot
command: bash {{ monitor.workdir }}/crawl.sh

64 changes: 35 additions & 29 deletions ansible/roles/monitor/templates/crawl.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
#!/bin/bash

DISKS_TO_MONITOR=("/dev/sda1" "/dev/nvme0n1p2")
DISKS_TO_MONITOR=({{ monitor.check_disks | join(' ') }})

SERVER_NAME='test-server'
NOTIFY_SLACK_WEBHOOK='https://hooks.slack.com/services/xxx/xxx/xxx'
NOTIFY_SLACK_CHANNEL='darwinia-alert-notification'
SERVER_NAME='{{ monitor.server_name }}'
NOTIFY_SLACK_WEBHOOK='{{ monitor.notify_slack_webhook }}'
NOTIFY_SLACK_CHANNEL='{{ monitor.notify_slack_channel }}'

ALERT_THREAD_CPU_P2={{ monitor.alert_thread_cpu_p2 }}
ALERT_THREAD_CPU_P1={{ monitor.alert_thread_cpu_p1 }}
ALERT_THREAD_RAM_P2={{ monitor.alert_thread_ram_p2 }}
ALERT_THREAD_RAM_P1={{ monitor.alert_thread_ram_p1 }}
ALERT_THREAD_DISK_P2={{ monitor.alert_thread_disk_p2 }}
ALERT_THREAD_DISK_P1={{ monitor.alert_thread_disk_p1 }}

timestamp() {
date +"%Y-%m-%d %H:%M:%S"
Expand All @@ -20,8 +27,8 @@ memory_usage() {

disk_usage() {
for disk in "${DISKS_TO_MONITOR[@]}"; do
usage=$(df -h | grep "^$disk" | awk '{print $5}' | sed 's/%//')
echo "$disk $usage"
usage=$(df -h | grep "^$disk" | awk '{print $5}' | sed 's/%//')
echo "$disk $usage"
done
}

Expand All @@ -36,23 +43,23 @@ generate_alert_message() {
local alert_message="[]"
local priority='P2'

if (( $(echo "$cpu > 95" | bc -l) )); then
if (( $(echo "$cpu > $ALERT_THREAD_CPU_P1" | bc -l) )); then
priority='P1'
fi
if (( $(echo "$ram > 95" | bc -l) )); then
if (( $(echo "$ram > $ALERT_THREAD_RAM_P1" | bc -l) )); then
priority='P1'
fi
if [[ "P1" == "$priority" ]]; then
priority_alert=$(jq -n --arg priority "${priority}" '[{"type":"mrkdwn","text":"*Priority*"},{"type":"plain_text","text":$priority}]')
alert_message=$(echo "$alert_message" | jq --argjson priority_alert "$priority_alert" '. += $priority_alert')
fi

if (( $(echo "$cpu > 80" | bc -l) )); then
if (( $(echo "$cpu > $ALERT_THREAD_CPU_P2" | bc -l) )); then
cpu_alert=$(jq -n --arg cpu "${cpu}%" '[{"type":"mrkdwn","text":"*CPU*"},{"type":"plain_text","text":$cpu}]')
alert_message=$(echo "$alert_message" | jq --argjson cpu_alert "$cpu_alert" '. += $cpu_alert')
fi

if (( $(echo "$ram > 80" | bc -l) )); then
if (( $(echo "$ram > $ALERT_THREAD_RAM_P2" | bc -l) )); then
ram_alert=$(jq -n --arg ram "${ram}%" '[{"type":"mrkdwn","text":"*RAM*"},{"type":"plain_text","text":$ram}]')
alert_message=$(echo "$alert_message" | jq --argjson ram_alert "$ram_alert" '. += $ram_alert')
fi
Expand All @@ -77,10 +84,10 @@ generate_disk_alert_message() {
continue
fi

if (( $(echo "$usage > 90" | bc -l) )); then
if (( $(echo "$usage > $ALERT_THREAD_DISK_P1" | bc -l) )); then
priority='P1'
fi
if (( $(echo "$usage > 80" | bc -l) )); then
if (( $(echo "$usage > $ALERT_THREAD_DISK_P2" | bc -l) )); then
disk_alert=$(jq -n --arg disk "*DISK* ($disk)" --arg usage "${usage}%" '[{"type":"mrkdwn","text":$disk},{"type":"plain_text","text":$usage}]')
alert_message=$(echo "$alert_message" | jq --argjson disk_alert "$disk_alert" '. += $disk_alert')
fi
Expand All @@ -98,14 +105,14 @@ generate_disk_alert_message() {
check_and_send_alert() {
local alert_message=$(generate_alert_message)
local disk_alert_message=$(generate_disk_alert_message)

local HOSTNAME=${SERVER_NAME:-$(hostname)}

local blocks="[]"

if [[ "$alert_message" != "[]" ]]; then
alert_block=$(
jq -n \
--arg warning "[*WARNING*]: New server alert > $SERVER_NAME" \
--arg warning "[*WARNING*]: New server alert > $HOSTNAME" \
--argjson msg "$alert_message" \
'{ "type": "section", "text": {"type": "mrkdwn", "text": $warning}, "fields": $msg }'
)
Expand All @@ -115,23 +122,25 @@ check_and_send_alert() {
if [[ "$disk_alert_message" != "[]" ]]; then
disk_block=$(
jq -n \
--arg warning "[*WARNING*]: New disk alert > $SERVER_NAME" \
--arg warning "[*WARNING*]: New disk alert > $HOSTNAME" \
--argjson msg "$disk_alert_message" \
'{ "type": "section", "text": {"type": "mrkdwn", "text": $warning}, "fields": $msg }'
)
blocks=$(echo "$blocks" | jq --argjson block "$disk_block" '. += [$block]')
fi

if [[ "$blocks" != "[]" ]]; then
local data=$(jq -n \
--arg channel "$NOTIFY_SLACK_CHANNEL" \
--argjson blocks "$blocks" \
'{
"username": "ServerBot",
"icon_emoji": ":loudspeaker:",
"channel": $channel,
"blocks": $blocks
}')
local data=$(
jq -n \
--arg channel "$NOTIFY_SLACK_CHANNEL" \
--argjson blocks "$blocks" \
'{
"username": "ServerBot",
"icon_emoji": ":loudspeaker:",
"channel": $channel,
"blocks": $blocks
}'
)

send_alert "$data"
fi
Expand All @@ -140,23 +149,20 @@ check_and_send_alert() {
send_alert() {
local message=$1

echo $message

curl -X POST \
-H "Content-type: application/json" \
$NOTIFY_SLACK_WEBHOOK \
--data "$message"
}

log_usage() {
main() {
local cpu=$(cpu_usage)
local ram=$(memory_usage)
local disk=$(disk_usage)
local requests=$(request_count)
echo "$(timestamp) CPU: ${cpu}% RAM: ${ram}% Disk: ${disk}% Requests: ${requests}"

check_and_send_alert

}

log_usage
main

0 comments on commit bd41021

Please sign in to comment.