diff --git a/.github/workflows/check-ansible.yml b/.github/workflows/check-ansible.yml new file mode 100644 index 0000000..bc88092 --- /dev/null +++ b/.github/workflows/check-ansible.yml @@ -0,0 +1,55 @@ +name: Check + +on: + pull_request: + branches: [main] + +env: + SLACK_INCOMING_WEBHOOK_URL: ${{ secrets.SLACK_INCOMING_WEBHOOK_URL }} + +jobs: + deploy-essentials: + name: Deploy essentials + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Verify essentials file changed + uses: tj-actions/changed-files@v37.4.0 + id: changed_files + with: + files: | + ansible/inventories/hosts.ini + ansible/playbooks/_essentials/* + ansible/playbooks/essentials.yml + + - name: Deploy essentials + id: deploy-essentials + if: steps.changed_files.outputs.any_changed == 'true' + uses: dawidd6/action-ansible-playbook@v2 + with: + directory: ansible + playbook: playbooks/essentials.yml + key: "${{ secrets.SSH_PRIVATE_KEY }}" + options: --user ansible + + check-playbooks: + name: Check playbook + runs-on: ubuntu-latest + needs: [deploy-essentials] + strategy: + matrix: + playbook: + - monitor + steps: + - uses: actions/checkout@v2 + + - name: Run playbook + uses: dawidd6/action-ansible-playbook@v2 + with: + directory: ansible + playbook: playbooks/${{ matrix.playbook }}/playbook.yml + key: "${{ secrets.SSH_PRIVATE_KEY }}" + options: --user ansible --verbose --diff --check diff --git a/ansible/roles/monitor/defaults/main.yml b/ansible/roles/monitor/defaults/main.yml new file mode 100644 index 0000000..f1b5300 --- /dev/null +++ b/ansible/roles/monitor/defaults/main.yml @@ -0,0 +1,15 @@ + +monitor: + workdir: /tmp/monitor + notify_slack_webhook: '' + notify_slack_channel: 'darwinia-alert-notification' + server_name: '' + check_disks: + - /dev/sdb + alert_thread_cpu_p2: 90 + alert_thread_cpu_p1: 98 + alert_thread_ram_p2: 90 + alert_thread_ram_p1: 98 + alert_thread_disk_p2: 90 + alert_thread_disk_p1: 98 + diff --git a/ansible/roles/monitor/tasks/main.yml b/ansible/roles/monitor/tasks/main.yml new file mode 100644 index 0000000..489dfc4 --- /dev/null +++ b/ansible/roles/monitor/tasks/main.yml @@ -0,0 +1,15 @@ + +- name: Creates workdir + file: + path: "{{ monitor.workdir }}" + state: directory + +- name: Generate scripts file + template: + src: crawl.sh + dest: "{{ monitor.workdir }}/crawl.sh" + mode: "0644" + +- name: Run snapshot + command: bash {{ monitor.workdir }}/crawl.sh + diff --git a/ansible/roles/monitor/templates/crawl.sh b/ansible/roles/monitor/templates/crawl.sh index 8ae945d..b149b52 100755 --- a/ansible/roles/monitor/templates/crawl.sh +++ b/ansible/roles/monitor/templates/crawl.sh @@ -1,10 +1,17 @@ #!/bin/bash -DISKS_TO_MONITOR=("/dev/sda1" "/dev/nvme0n1p2") +DISKS_TO_MONITOR=({{ monitor.check_disks | join(' ') }}) -SERVER_NAME='test-server' -NOTIFY_SLACK_WEBHOOK='https://hooks.slack.com/services/xxx/xxx/xxx' -NOTIFY_SLACK_CHANNEL='darwinia-alert-notification' +SERVER_NAME='{{ monitor.server_name }}' +NOTIFY_SLACK_WEBHOOK='{{ monitor.notify_slack_webhook }}' +NOTIFY_SLACK_CHANNEL='{{ monitor.notify_slack_channel }}' + +ALERT_THREAD_CPU_P2={{ monitor.alert_thread_cpu_p2 }} +ALERT_THREAD_CPU_P1={{ monitor.alert_thread_cpu_p1 }} +ALERT_THREAD_RAM_P2={{ monitor.alert_thread_ram_p2 }} +ALERT_THREAD_RAM_P1={{ monitor.alert_thread_ram_p1 }} +ALERT_THREAD_DISK_P2={{ monitor.alert_thread_disk_p2 }} +ALERT_THREAD_DISK_P1={{ monitor.alert_thread_disk_p1 }} timestamp() { date +"%Y-%m-%d %H:%M:%S" @@ -20,8 +27,8 @@ memory_usage() { disk_usage() { for disk in "${DISKS_TO_MONITOR[@]}"; do - usage=$(df -h | grep "^$disk" | awk '{print $5}' | sed 's/%//') - echo "$disk $usage" + usage=$(df -h | grep "^$disk" | awk '{print $5}' | sed 's/%//') + echo "$disk $usage" done } @@ -36,10 +43,10 @@ generate_alert_message() { local alert_message="[]" local priority='P2' - if (( $(echo "$cpu > 95" | bc -l) )); then + if (( $(echo "$cpu > $ALERT_THREAD_CPU_P1" | bc -l) )); then priority='P1' fi - if (( $(echo "$ram > 95" | bc -l) )); then + if (( $(echo "$ram > $ALERT_THREAD_RAM_P1" | bc -l) )); then priority='P1' fi if [[ "P1" == "$priority" ]]; then @@ -47,12 +54,12 @@ generate_alert_message() { alert_message=$(echo "$alert_message" | jq --argjson priority_alert "$priority_alert" '. += $priority_alert') fi - if (( $(echo "$cpu > 80" | bc -l) )); then + if (( $(echo "$cpu > $ALERT_THREAD_CPU_P2" | bc -l) )); then cpu_alert=$(jq -n --arg cpu "${cpu}%" '[{"type":"mrkdwn","text":"*CPU*"},{"type":"plain_text","text":$cpu}]') alert_message=$(echo "$alert_message" | jq --argjson cpu_alert "$cpu_alert" '. += $cpu_alert') fi - if (( $(echo "$ram > 80" | bc -l) )); then + if (( $(echo "$ram > $ALERT_THREAD_RAM_P2" | bc -l) )); then ram_alert=$(jq -n --arg ram "${ram}%" '[{"type":"mrkdwn","text":"*RAM*"},{"type":"plain_text","text":$ram}]') alert_message=$(echo "$alert_message" | jq --argjson ram_alert "$ram_alert" '. += $ram_alert') fi @@ -77,10 +84,10 @@ generate_disk_alert_message() { continue fi - if (( $(echo "$usage > 90" | bc -l) )); then + if (( $(echo "$usage > $ALERT_THREAD_DISK_P1" | bc -l) )); then priority='P1' fi - if (( $(echo "$usage > 80" | bc -l) )); then + if (( $(echo "$usage > $ALERT_THREAD_DISK_P2" | bc -l) )); then disk_alert=$(jq -n --arg disk "*DISK* ($disk)" --arg usage "${usage}%" '[{"type":"mrkdwn","text":$disk},{"type":"plain_text","text":$usage}]') alert_message=$(echo "$alert_message" | jq --argjson disk_alert "$disk_alert" '. += $disk_alert') fi @@ -98,14 +105,14 @@ generate_disk_alert_message() { check_and_send_alert() { local alert_message=$(generate_alert_message) local disk_alert_message=$(generate_disk_alert_message) - + local HOSTNAME=${SERVER_NAME:-$(hostname)} local blocks="[]" if [[ "$alert_message" != "[]" ]]; then alert_block=$( jq -n \ - --arg warning "[*WARNING*]: New server alert > $SERVER_NAME" \ + --arg warning "[*WARNING*]: New server alert > $HOSTNAME" \ --argjson msg "$alert_message" \ '{ "type": "section", "text": {"type": "mrkdwn", "text": $warning}, "fields": $msg }' ) @@ -115,7 +122,7 @@ check_and_send_alert() { if [[ "$disk_alert_message" != "[]" ]]; then disk_block=$( jq -n \ - --arg warning "[*WARNING*]: New disk alert > $SERVER_NAME" \ + --arg warning "[*WARNING*]: New disk alert > $HOSTNAME" \ --argjson msg "$disk_alert_message" \ '{ "type": "section", "text": {"type": "mrkdwn", "text": $warning}, "fields": $msg }' ) @@ -123,15 +130,17 @@ check_and_send_alert() { fi if [[ "$blocks" != "[]" ]]; then - local data=$(jq -n \ - --arg channel "$NOTIFY_SLACK_CHANNEL" \ - --argjson blocks "$blocks" \ - '{ - "username": "ServerBot", - "icon_emoji": ":loudspeaker:", - "channel": $channel, - "blocks": $blocks - }') + local data=$( + jq -n \ + --arg channel "$NOTIFY_SLACK_CHANNEL" \ + --argjson blocks "$blocks" \ + '{ + "username": "ServerBot", + "icon_emoji": ":loudspeaker:", + "channel": $channel, + "blocks": $blocks + }' + ) send_alert "$data" fi @@ -140,15 +149,13 @@ check_and_send_alert() { send_alert() { local message=$1 - echo $message - curl -X POST \ -H "Content-type: application/json" \ $NOTIFY_SLACK_WEBHOOK \ --data "$message" } -log_usage() { +main() { local cpu=$(cpu_usage) local ram=$(memory_usage) local disk=$(disk_usage) @@ -156,7 +163,6 @@ log_usage() { echo "$(timestamp) CPU: ${cpu}% RAM: ${ram}% Disk: ${disk}% Requests: ${requests}" check_and_send_alert - } -log_usage +main