From 74ccf87bb8f6797f05369d3be2a8a8e6d89560d4 Mon Sep 17 00:00:00 2001 From: Dominic Robinson <65237317+drobinson-moj@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:49:25 +0000 Subject: [PATCH] Nomis: DSOS-2359: fixngo connected refactor (#398) * Renamed fixngo connectivity role * fix * fix * Commit changes made by code formatters --------- Co-authored-by: github-actions[bot] --- ansible/group_vars/server_type_nomis_db.yml | 9 +++- ansible/group_vars/server_type_nomis_web.yml | 4 ++ ansible/group_vars/server_type_nomis_xtag.yml | 4 ++ .../collectd-connectivity-test/README.md | 12 +++++ .../defaults/main.yml | 6 +++ .../handlers/main.yml | 10 ++++ .../collectd-connectivity-test/meta/main.yml | 4 ++ .../tasks/configure_collectd.yml | 18 +++++++ .../collectd-connectivity-test/tasks/main.yml | 8 +++ .../collectd_connectivity_test.conf.j2 | 4 ++ .../collectd_connectivity_test.sh.j2 | 28 +++++++++++ .../collectd_oracle_db_connected.sh.j2 | 2 +- .../collectd_textfile_monitoring.sh.j2 | 24 ++++----- .../roles/fixngo-connected-metric/README.md | 3 -- .../fixngo-connected-metric/files/fixngo.conf | 4 -- .../fixngo-connected-metric/handlers/main.yml | 6 --- .../fixngo-connected-metric/meta/main.yml | 3 -- .../tasks/fixngo-connected-metric.yml | 46 ----------------- .../fixngo-connected-metric/tasks/main.yml | 6 --- .../templates/fixngo_connected_metrics.sh.j2 | 49 ------------------- .../fixngo-connected-metric/vars/main.yml | 2 - 21 files changed, 119 insertions(+), 133 deletions(-) create mode 100644 ansible/roles/collectd-connectivity-test/README.md create mode 100644 ansible/roles/collectd-connectivity-test/defaults/main.yml create mode 100644 ansible/roles/collectd-connectivity-test/handlers/main.yml create mode 100644 ansible/roles/collectd-connectivity-test/meta/main.yml create mode 100644 ansible/roles/collectd-connectivity-test/tasks/configure_collectd.yml create mode 100644 ansible/roles/collectd-connectivity-test/tasks/main.yml create mode 100644 ansible/roles/collectd-connectivity-test/templates/collectd_connectivity_test.conf.j2 create mode 100644 ansible/roles/collectd-connectivity-test/templates/collectd_connectivity_test.sh.j2 delete mode 100644 ansible/roles/fixngo-connected-metric/README.md delete mode 100644 ansible/roles/fixngo-connected-metric/files/fixngo.conf delete mode 100644 ansible/roles/fixngo-connected-metric/handlers/main.yml delete mode 100644 ansible/roles/fixngo-connected-metric/meta/main.yml delete mode 100644 ansible/roles/fixngo-connected-metric/tasks/fixngo-connected-metric.yml delete mode 100644 ansible/roles/fixngo-connected-metric/tasks/main.yml delete mode 100644 ansible/roles/fixngo-connected-metric/templates/fixngo_connected_metrics.sh.j2 delete mode 100644 ansible/roles/fixngo-connected-metric/vars/main.yml diff --git a/ansible/group_vars/server_type_nomis_db.yml b/ansible/group_vars/server_type_nomis_db.yml index 4ec797ec6..5802f4b4b 100644 --- a/ansible/group_vars/server_type_nomis_db.yml +++ b/ansible/group_vars/server_type_nomis_db.yml @@ -6,12 +6,17 @@ server_type_roles_list: - set-ec2-hostname - domain-search - ansible-script + - epel - oracle-11g - oracle-secure-backup - oracle-db-backup + - collectd + - amazon-cloudwatch-agent + - amazon-cloudwatch-agent-collectd - collectd-service-metrics - # - oracle-db-restore - - fixngo-connected-metric + - collectd-connectivity-test + - collectd-oracle-db-connected + - collectd-textfile-monitoring - nomis-misload # the below vars are defined in multiple groups. Keep the values the same to avoid unexpected behaviour diff --git a/ansible/group_vars/server_type_nomis_web.yml b/ansible/group_vars/server_type_nomis_web.yml index 4ec0a8e2a..0f663d2e7 100644 --- a/ansible/group_vars/server_type_nomis_web.yml +++ b/ansible/group_vars/server_type_nomis_web.yml @@ -6,8 +6,12 @@ server_type_roles_list: - set-ec2-hostname - domain-search - ansible-script + - epel - nomis-weblogic - nomis-release-deployment + - collectd + - amazon-cloudwatch-agent + - amazon-cloudwatch-agent-collectd - collectd-service-metrics - autoscale-group-hooks-state diff --git a/ansible/group_vars/server_type_nomis_xtag.yml b/ansible/group_vars/server_type_nomis_xtag.yml index 38e646407..be165a5c4 100644 --- a/ansible/group_vars/server_type_nomis_xtag.yml +++ b/ansible/group_vars/server_type_nomis_xtag.yml @@ -7,7 +7,11 @@ server_type_roles_list: - set-ec2-hostname - domain-search - ansible-script + - epel - nomis-xtag-weblogic + - collectd + - amazon-cloudwatch-agent + - amazon-cloudwatch-agent-collectd - collectd-service-metrics - autoscale-group-hooks-state diff --git a/ansible/roles/collectd-connectivity-test/README.md b/ansible/roles/collectd-connectivity-test/README.md new file mode 100644 index 000000000..c0d5fb040 --- /dev/null +++ b/ansible/roles/collectd-connectivity-test/README.md @@ -0,0 +1,12 @@ +Check connectivity with a remote host/port using netcat. + +Define a `collectd-connectivity-tests` tag on the AWS instance. In format "hostname1:port1 hostname2:port2 ...". +A collectd_connectivity_test.sh script is spawned to periodically check each hostname:port using netcat. + +The hostname:port will be used as a dimension in cloudwatch: + +Metric type type_instance +collectd_connectivity_test_value exitcode hostname1:port1 +collectd_connectivity_test_value exitcode hostname2:port2 + +The metric value is the netcat exitcode, i.e. 0 if connected, non-zero if not. diff --git a/ansible/roles/collectd-connectivity-test/defaults/main.yml b/ansible/roles/collectd-connectivity-test/defaults/main.yml new file mode 100644 index 000000000..79d4b726d --- /dev/null +++ b/ansible/roles/collectd-connectivity-test/defaults/main.yml @@ -0,0 +1,6 @@ +--- +collectd_script_path: /usr/local/bin +collectd_script_name: collectd_connectivity_test +collectd_script_user: ec2-user +collectd_script_interval: 60 +connectivity_test_connect_timeout: 2 diff --git a/ansible/roles/collectd-connectivity-test/handlers/main.yml b/ansible/roles/collectd-connectivity-test/handlers/main.yml new file mode 100644 index 000000000..a9c35d30b --- /dev/null +++ b/ansible/roles/collectd-connectivity-test/handlers/main.yml @@ -0,0 +1,10 @@ +--- +- name: restart collectd + ansible.builtin.service: + name: collectd + state: restarted + +- name: restart plugin script + ansible.builtin.shell: | + pkill -u {{ collectd_script_user }} -f {{ collectd_script_path }}/{{ collectd_script_name }}.sh + failed_when: false diff --git a/ansible/roles/collectd-connectivity-test/meta/main.yml b/ansible/roles/collectd-connectivity-test/meta/main.yml new file mode 100644 index 000000000..fb5e24a73 --- /dev/null +++ b/ansible/roles/collectd-connectivity-test/meta/main.yml @@ -0,0 +1,4 @@ +--- +dependencies: + - role: get-ec2-facts + - role: amazon-cloudwatch-agent-collectd diff --git a/ansible/roles/collectd-connectivity-test/tasks/configure_collectd.yml b/ansible/roles/collectd-connectivity-test/tasks/configure_collectd.yml new file mode 100644 index 000000000..27be4676d --- /dev/null +++ b/ansible/roles/collectd-connectivity-test/tasks/configure_collectd.yml @@ -0,0 +1,18 @@ +--- +- name: copy collectd config + ansible.builtin.template: + src: "{{ collectd_script_name }}.conf.j2" + dest: "/etc/collectd.d/{{ collectd_script_name }}.conf" + owner: root + mode: 0644 + notify: + - restart collectd + +- name: copy collectd plugin script + ansible.builtin.template: + src: "{{ collectd_script_name }}.sh.j2" + dest: "{{ collectd_script_path }}/{{ collectd_script_name }}.sh" + owner: root + mode: 0755 + notify: + - restart plugin script diff --git a/ansible/roles/collectd-connectivity-test/tasks/main.yml b/ansible/roles/collectd-connectivity-test/tasks/main.yml new file mode 100644 index 000000000..2dfdaf45d --- /dev/null +++ b/ansible/roles/collectd-connectivity-test/tasks/main.yml @@ -0,0 +1,8 @@ +--- +- import_tasks: configure_collectd.yml + tags: + - ec2provision + - ec2patch + when: + - ansible_distribution in ['RedHat', 'OracleLinux'] + - ec2.tags['connectivity-tests'] is defined diff --git a/ansible/roles/collectd-connectivity-test/templates/collectd_connectivity_test.conf.j2 b/ansible/roles/collectd-connectivity-test/templates/collectd_connectivity_test.conf.j2 new file mode 100644 index 000000000..f259faeb8 --- /dev/null +++ b/ansible/roles/collectd-connectivity-test/templates/collectd_connectivity_test.conf.j2 @@ -0,0 +1,4 @@ +LoadPlugin exec + + Exec "{{ collectd_script_user }}" "{{ collectd_script_path }}/{{ collectd_script_name }}.sh" + diff --git a/ansible/roles/collectd-connectivity-test/templates/collectd_connectivity_test.sh.j2 b/ansible/roles/collectd-connectivity-test/templates/collectd_connectivity_test.sh.j2 new file mode 100644 index 000000000..9920073e7 --- /dev/null +++ b/ansible/roles/collectd-connectivity-test/templates/collectd_connectivity_test.sh.j2 @@ -0,0 +1,28 @@ +#!/bin/bash +# Managed by collectd-connectivity-test ansible role +# If manually editing, just kill script and collectd will respawn +# e.g. pkill -u {{ collectd_script_user }} -f {{ collectd_script_path }}/{{ collectd_script_name }}.sh + +HOSTNAME="${HOSTNAME:-localhost}" +INTERVAL="${INTERVAL:-{{ collectd_script_interval }}}" +CONNECTIVITY_TIMEOUT={{ connectivity_test_connect_timeout }} + +get_targets() { + aws ec2 describe-tags --filters "Name=resource-id,Values={{ ansible_ec2_instance_id }}" "Name=key,Values=connectivity-tests" --query Tags[0].Value --output=text +} + +connectivity_test() { + ip=$(echo $1 | cut -d: -f1) + port=$(echo $1 | cut -d: -f2) + ncat -vzw "$CONNECTIVITY_TIMEOUT" $ip $port +} + +while sleep "$INTERVAL"; do + targets=$(get_targets) + if [[ "$targets" != "None" ]]; then + for target in $targets; do + connectivity_test $target >/dev/null 2>&1 + echo "PUTVAL $HOSTNAME/connectivity_test/exitcode-$target interval=$INTERVAL N:$?" + done + fi +done diff --git a/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2 b/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2 index d051c945d..6e37daf81 100644 --- a/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2 +++ b/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2 @@ -57,7 +57,7 @@ ORAENV_ASK="NO" while sleep "$INTERVAL"; do SIDS=$(get_sids) if [[ "$SIDS" != "None" ]]; then - for SID in $(get_sids); do + for SID in $SIDS; do db_connected $SID >/dev/null 2>&1 echo "PUTVAL $HOSTNAME/oracle_db_connected/exitcode-$SID interval=$INTERVAL N:$?" done diff --git a/ansible/roles/collectd-textfile-monitoring/templates/collectd_textfile_monitoring.sh.j2 b/ansible/roles/collectd-textfile-monitoring/templates/collectd_textfile_monitoring.sh.j2 index 5a98310e9..35e48e919 100644 --- a/ansible/roles/collectd-textfile-monitoring/templates/collectd_textfile_monitoring.sh.j2 +++ b/ansible/roles/collectd-textfile-monitoring/templates/collectd_textfile_monitoring.sh.j2 @@ -10,18 +10,20 @@ while sleep "$INTERVAL"; do now=$(date +%s) for file in {{ collectd_textfile_monitoring_paths }}; do {% raw %} - IFS=$'\n' - metrics=($(grep -E "^[[:alnum:]_]+[[:space:]]+[[:digit:]]+" $file)) - unset IFS - file_last_modified=$(date -r $file +%s) - secs_since_last_modified=$((now - file_last_modified)) + if [[ -e "$file" ]]; then + IFS=$'\n' + metrics=($(grep -E "^[[:alnum:]_]+[[:space:]]+[[:digit:]]+" "$file")) + unset IFS + file_last_modified=$(date -r "$file" +%s) + secs_since_last_modified=$((now - file_last_modified)) - num_metrics=${#metrics[@]} - for ((i=0; i - Exec "ec2-user" "/opt/collectd/fixngo_connected_metrics.sh" - diff --git a/ansible/roles/fixngo-connected-metric/handlers/main.yml b/ansible/roles/fixngo-connected-metric/handlers/main.yml deleted file mode 100644 index 09351aa76..000000000 --- a/ansible/roles/fixngo-connected-metric/handlers/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -- name: restart collectd - ansible.builtin.service: - name: collectd - state: restarted - enabled: yes diff --git a/ansible/roles/fixngo-connected-metric/meta/main.yml b/ansible/roles/fixngo-connected-metric/meta/main.yml deleted file mode 100644 index 4ff987c1c..000000000 --- a/ansible/roles/fixngo-connected-metric/meta/main.yml +++ /dev/null @@ -1,3 +0,0 @@ ---- -dependencies: - - role: get-ec2-facts diff --git a/ansible/roles/fixngo-connected-metric/tasks/fixngo-connected-metric.yml b/ansible/roles/fixngo-connected-metric/tasks/fixngo-connected-metric.yml deleted file mode 100644 index b3a45a73e..000000000 --- a/ansible/roles/fixngo-connected-metric/tasks/fixngo-connected-metric.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- -- name: create folder for exec script to collect metrics - ansible.builtin.file: - path: "/opt/collectd" - state: directory - owner: root - group: root - mode: 0755 - -# format must be "x.x.x.x port x.x.x.x port x.x.x.x port ..." -- name: get fact for fixngo connection targets - set_fact: - fixngo_connection_targets: "{{ ec2.tags['fixngo-connection-targets'] }}" - -- name: check if fixngo connection targets contains more than two items - set_fact: - multiple_pairs: "{{ fixngo_connection_targets.split(' ') | length > 2 }}" - -- name: convert string to list of pairs (multiple targets) - set_fact: - fixngo_connection_targets_list: "{{ fixngo_connection_targets.split(' ') | slice(2) | map('list') | list }}" - when: multiple_pairs - -- name: convert string to list of pairs (single target) - set_fact: - fixngo_connection_targets_list: "[{{ fixngo_connection_targets.split(' ') }}]" - when: not multiple_pairs - -- name: create dictionary of lists - set_fact: - fixngo_connection_targets_dict: "{{ fixngo_connection_targets_dict | default([]) + [{'ip': item[0], 'port': item[1] }] }}" - loop: "{{ fixngo_connection_targets_list }}" - -- name: add fixngo conf for collectd to get connection state as a metric - ansible.builtin.copy: - src: fixngo.conf - dest: "/etc/collectd.d/fixngo.conf" - owner: root - mode: 0755 - -- name: Add fixngo connected check script - ansible.builtin.template: - src: fixngo_connected_metrics.sh.j2 - dest: /opt/collectd/fixngo_connected_metrics.sh - mode: 0755 - notify: restart collectd diff --git a/ansible/roles/fixngo-connected-metric/tasks/main.yml b/ansible/roles/fixngo-connected-metric/tasks/main.yml deleted file mode 100644 index 2a6ca232c..000000000 --- a/ansible/roles/fixngo-connected-metric/tasks/main.yml +++ /dev/null @@ -1,6 +0,0 @@ ---- -- import_tasks: "fixngo-connected-metric.yml" - tags: - - ec2provision - - ec2patch - when: ec2.tags['fixngo-connection-targets'] is defined and ansible_facts['distribution'] == "RedHat" diff --git a/ansible/roles/fixngo-connected-metric/templates/fixngo_connected_metrics.sh.j2 b/ansible/roles/fixngo-connected-metric/templates/fixngo_connected_metrics.sh.j2 deleted file mode 100644 index c10e7f165..000000000 --- a/ansible/roles/fixngo-connected-metric/templates/fixngo_connected_metrics.sh.j2 +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env bash - -# Templated in from ansible -HOSTNAME="${HOSTNAME:-localhost}" -INTERVAL={{ collectd_script_interval }} -timeout=1 -targets="{{ fixngo_connection_targets_dict }}" # just to be able to see the values being used - -{% for target in fixngo_connection_targets_dict -%} - - function{{ loop.index }}() { - connection=$(ncat -vzw "$timeout" {{ target.ip }} {{ target.port }} 2>&1) - if [[ "${connection}" == *"Connected"* ]] - then - return 0 - else - return 1 # Connection failed - fi - } - -{% endfor %} - -while sleep "$INTERVAL" -do - - {% for target in fixngo_connection_targets_dict %} - function{{ loop.index }} - connection{{ loop.index }}=$? - {% endfor %} - - all_connection_checks_failed=true - - connections=({% for target in fixngo_connection_targets_dict %} connection{{ loop.index }} {% endfor %}) - - for connection in "${connections[@]}"; do - # if any connection check succeeds, then we're connected, so break and set all_connection_checks_failed to false - if [[ $connection -ne 1 ]] - then - all_connection_checks_failed=false - break - fi - done - - if [[ "$all_connection_checks_failed" = true ]] ; then - echo "PUTVAL $HOSTNAME/exec-fixngo_connected/bool-fixngo_connected interval=$INTERVAL N:1" - else - echo "PUTVAL $HOSTNAME/exec-fixngo_connected/bool-fixngo_connected interval=$INTERVAL N:0" - fi -done diff --git a/ansible/roles/fixngo-connected-metric/vars/main.yml b/ansible/roles/fixngo-connected-metric/vars/main.yml deleted file mode 100644 index 84ab8c8b0..000000000 --- a/ansible/roles/fixngo-connected-metric/vars/main.yml +++ /dev/null @@ -1,2 +0,0 @@ ---- -collectd_script_interval: 60