diff --git a/ansible/roles/audit/README.md b/ansible/roles/audit/README.md new file mode 100644 index 000000000..d2776667a --- /dev/null +++ b/ansible/roles/audit/README.md @@ -0,0 +1 @@ +Role for enabling audit daemon diff --git a/ansible/roles/audit/tasks/audit.yml b/ansible/roles/audit/tasks/audit.yml new file mode 100644 index 000000000..a47ad0544 --- /dev/null +++ b/ansible/roles/audit/tasks/audit.yml @@ -0,0 +1,13 @@ +- name: Install audit package + yum: + name: audit + state: present + lock_timeout: 60 + retries: 3 + delay: 10 + +- name: Start auditd + service: + name: auditd + state: started + enabled: yes diff --git a/ansible/roles/audit/tasks/main.yml b/ansible/roles/audit/tasks/main.yml new file mode 100644 index 000000000..80a000bb6 --- /dev/null +++ b/ansible/roles/audit/tasks/main.yml @@ -0,0 +1,6 @@ +- import_tasks: audit.yml + tags: + - amibuild + - ec2provision + - ec2patch + when: ansible_distribution in ['RedHat', 'OracleLinux'] diff --git a/ansible/roles/collectd-oracle-db-connected/defaults/main.yml b/ansible/roles/collectd-oracle-db-connected/defaults/main.yml new file mode 100644 index 000000000..6a73fc5c2 --- /dev/null +++ b/ansible/roles/collectd-oracle-db-connected/defaults/main.yml @@ -0,0 +1,5 @@ +--- +collectd_script_path: /usr/local/bin +collectd_script_name: collectd_oracle_db_connected +collectd_script_user: oracle +collectd_script_interval: 60 diff --git a/ansible/roles/collectd-oracle-db-connected/handlers/main.yml b/ansible/roles/collectd-oracle-db-connected/handlers/main.yml new file mode 100644 index 000000000..a9c35d30b --- /dev/null +++ b/ansible/roles/collectd-oracle-db-connected/handlers/main.yml @@ -0,0 +1,10 @@ +--- +- name: restart collectd + ansible.builtin.service: + name: collectd + state: restarted + +- name: restart plugin script + ansible.builtin.shell: | + pkill -u {{ collectd_script_user }} -f {{ collectd_script_path }}/{{ collectd_script_name }}.sh + failed_when: false diff --git a/ansible/roles/collectd-oracle-db-connected/meta/main.yml b/ansible/roles/collectd-oracle-db-connected/meta/main.yml new file mode 100644 index 000000000..fb5e24a73 --- /dev/null +++ b/ansible/roles/collectd-oracle-db-connected/meta/main.yml @@ -0,0 +1,4 @@ +--- +dependencies: + - role: get-ec2-facts + - role: amazon-cloudwatch-agent-collectd diff --git a/ansible/roles/collectd-oracle-db-connected/tasks/configure_collectd.yml b/ansible/roles/collectd-oracle-db-connected/tasks/configure_collectd.yml new file mode 100644 index 000000000..27be4676d --- /dev/null +++ b/ansible/roles/collectd-oracle-db-connected/tasks/configure_collectd.yml @@ -0,0 +1,18 @@ +--- +- name: copy collectd config + ansible.builtin.template: + src: "{{ collectd_script_name }}.conf.j2" + dest: "/etc/collectd.d/{{ collectd_script_name }}.conf" + owner: root + mode: 0644 + notify: + - restart collectd + +- name: copy collectd plugin script + ansible.builtin.template: + src: "{{ collectd_script_name }}.sh.j2" + dest: "{{ collectd_script_path }}/{{ collectd_script_name }}.sh" + owner: root + mode: 0755 + notify: + - restart plugin script diff --git a/ansible/roles/collectd-oracle-db-connected/tasks/main.yml b/ansible/roles/collectd-oracle-db-connected/tasks/main.yml new file mode 100644 index 000000000..e701061c0 --- /dev/null +++ b/ansible/roles/collectd-oracle-db-connected/tasks/main.yml @@ -0,0 +1,6 @@ +--- +- import_tasks: configure_collectd.yml + tags: + - ec2provision + - ec2patch + when: ansible_distribution in ['RedHat', 'OracleLinux'] diff --git a/ansible/roles/collectd-service-metrics/templates/collectd.conf.j2 b/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.conf.j2 similarity index 100% rename from ansible/roles/collectd-service-metrics/templates/collectd.conf.j2 rename to ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.conf.j2 diff --git a/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2 b/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2 new file mode 100644 index 000000000..99a228135 --- /dev/null +++ b/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2 @@ -0,0 +1,65 @@ +#!/bin/bash +# Managed by collectd-oracle-db-connected ansible role +# If manually editing, just kill script and collectd will respawn +# e.g. pkill -u {{ collectd_script_user }} -f {{ collectd_script_path }}/{{ collectd_script_name }}.sh + +HOSTNAME="${HOSTNAME:-localhost}" +INTERVAL="${INTERVAL:-{{ collectd_script_interval }}}" + +if [[ "$(whoami)" != "oracle" ]] +then + echo "This script is expected to be run as the Oracle user" 1>&2 + exit 1 +fi + +# We need to make sure this is in the path +export PATH=${PATH}:/usr/local/bin + +get_sids() { + aws ec2 describe-tags --filters "Name=resource-id,Values={{ ansible_ec2_instance_id }}" "Name=key,Values=oracle-sids" --query Tags[0].Value --output=text +} + +db_connected() { + # DB resources names are usually 'ora.${DB}.db' but some have a suffix after ${DB} + DB="$(crsctl status resource | grep -m1 -i ora\.${SID}.*\.db | cut -f2 -d=)" + + # Check added to alert on not having a database resource BEFORE trying to get it's status + if [[ -z "$DB" ]] + then + echo "Failed to find a database resource for ${SID}" 1>&2 + return 1 + fi + + # Worth noting here that crsctl exits with code 0 even if you try and find details of a database that doesn't exist + STATUS=$(timeout $INTERVAL crsctl status resource ${DB} -v | grep STATE_DETAILS | cut -f2 -d= | cut -f1 -d,) + + case ${STATUS} in + "Open") + return 0 + ;; + "Open,Readonly") + return 0 + ;; + "Mounted (Closed)") + return 0 + ;; + *) + # If this check returns a non-zero value then the database is not connected + return 1 + ;; + esac +} + +ORACLE_SID="+ASM" +ORAENV_ASK="NO" +. oraenv > /dev/null + +while sleep "$INTERVAL"; do + SIDS=$(get_sids) + if [[ "$SIDS" != "None" ]]; then + for SID in $(get_sids); do + db_connected $SID >/dev/null 2>&1 + echo "PUTVAL $HOSTNAME/exec-db_connected/bool-$SID interval=$INTERVAL N:$?" + done + fi +done diff --git a/ansible/roles/collectd-service-metrics/tasks/configure_collectd.yml b/ansible/roles/collectd-service-metrics/tasks/configure_collectd.yml index 6a688b548..27be4676d 100644 --- a/ansible/roles/collectd-service-metrics/tasks/configure_collectd.yml +++ b/ansible/roles/collectd-service-metrics/tasks/configure_collectd.yml @@ -1,7 +1,7 @@ --- - name: copy collectd config ansible.builtin.template: - src: "collectd.conf.j2" + src: "{{ collectd_script_name }}.conf.j2" dest: "/etc/collectd.d/{{ collectd_script_name }}.conf" owner: root mode: 0644 @@ -11,7 +11,7 @@ - name: copy collectd plugin script ansible.builtin.template: src: "{{ collectd_script_name }}.sh.j2" - dest: "/usr/local/bin/{{ collectd_script_name }}.sh" + dest: "{{ collectd_script_path }}/{{ collectd_script_name }}.sh" owner: root mode: 0755 notify: diff --git a/ansible/roles/collectd-service-metrics/templates/collectd_service_metrics.conf.j2 b/ansible/roles/collectd-service-metrics/templates/collectd_service_metrics.conf.j2 new file mode 100644 index 000000000..f259faeb8 --- /dev/null +++ b/ansible/roles/collectd-service-metrics/templates/collectd_service_metrics.conf.j2 @@ -0,0 +1,4 @@ +LoadPlugin exec + + Exec "{{ collectd_script_user }}" "{{ collectd_script_path }}/{{ collectd_script_name }}.sh" + diff --git a/ansible/roles/collectd/README.md b/ansible/roles/collectd/README.md index 71f7ae32e..21a937719 100644 --- a/ansible/roles/collectd/README.md +++ b/ansible/roles/collectd/README.md @@ -4,20 +4,20 @@ Installs collectd and configures it based on the values in group_vars `collectd_ Collectd is able to run scripts and perform other tasks based on plugins. The scripts are run by the exec plugin and the results are made available to the Cloudwatch agent on the same host via the network plugin. The Cloudwatch agent then sends the metrics to Cloudwatch. -The common plugins are defined in collectd.conf.j2 (network plugin being the most important) with additional plugins pulled in by the statement -`Include "/etc/collectd.d` in the main collectd.conf file. +The common plugins are defined in collectd.conf.j2 (network plugin being the most important) with additional plugins pulled in by the statement +`Include "/etc/collectd.d` in the main collectd.conf file. The collectd_configure task does the following: 1. reads values of `collectd_metric_configs` from group_vars, for example: -``` +``` collectd_metric_configs: - nomis-db ``` 2. loops through values of files/[collectd_metric_configs] and templates/[collectd_metric_configs] deploys them to the host if the relevant files exist - + 3. files/linux.conf and templates/linux.sh.j2 are deployed to the host by default if additional collectd_metric_configs are not defined @@ -35,15 +35,15 @@ Further collectd Troubleshooting [here](https://collectd.org/wiki/index.php/Trou 1. *.conf files must have an empty line at the end to load, otherwise collectd won't start... -2. formatting for the exec message (sent to localhost udp port 25826) is very important. It MUST be in the format "PUTVAL $HOSTNAME/exec-/guage-$signifier. Values after exec- and guage- (or other value type) cannot use additional '-' characters or spaces otherwise the exec plugin will deliver a mal-formed message. +2. formatting for the exec message (sent to localhost udp port 25826) is very important. It MUST be in the format "PUTVAL $HOSTNAME/exec-/guage-$signifier. Values after exec- and guage- (or other value type) cannot use additional '-' characters or spaces otherwise the exec plugin will deliver a mal-formed message. ## Collectd and Selinux -There is an additional task specifically to create a selinux policy for collectd. This is because collectd runs scripts via the exec plugin and selinux will block this by default. +There is an additional task specifically to create a selinux policy for collectd. This is because collectd runs scripts via the exec plugin and selinux will block this by default. Having logging for collectd is NOT enabled. Most of the useful information goes to /var/log/messages anyway or with selinux to /var/log/audit/audit.log where you can see what's being blocked in relation to collectd -There are selinux exceptions for collectd when it comes to Rhel 7 & 8. It _seems_ this isn't needed for Rhel 6 but there is an existing task to automatically scan the audit.log for issues and then create a policy file. +There are selinux exceptions for collectd when it comes to Rhel 7 & 8. It _seems_ this isn't needed for Rhel 6 but there is an existing task to automatically scan the audit.log for issues and then create a policy file. ### Some useful selinux commands for troubleshooting @@ -63,14 +63,7 @@ Once you have found an AVC denial message in /var/log/audit/audit.log you can us If/when there are additional instances of this please add the settings back to the relevant collectd_selinux_policy_rhel_(version).te file and re-run the ansible task to create the policy file. -At some point we may simply decide to place the whole collectd_t domain into permissive mode. - -``` -- name: change the collectd_t domain to permissive - community.general.selinux_permissive: - type: collectd_t - permissive: true -``` +Although we create a specific collectd policy, it is unlikely to cover everything. Especially when scripts are triggered from collectd. For this reason, we set collectd domain to permissive mode by default. You can also grab AVC rules like this: diff --git a/ansible/roles/collectd/defaults/main.yml b/ansible/roles/collectd/defaults/main.yml new file mode 100644 index 000000000..ead4a4654 --- /dev/null +++ b/ansible/roles/collectd/defaults/main.yml @@ -0,0 +1,2 @@ +--- +collectd_selinux_permissive: true diff --git a/ansible/roles/collectd/tasks/collectd_selinux_policy.yml b/ansible/roles/collectd/tasks/collectd_selinux_policy.yml index 14ec14091..606a0aa99 100644 --- a/ansible/roles/collectd/tasks/collectd_selinux_policy.yml +++ b/ansible/roles/collectd/tasks/collectd_selinux_policy.yml @@ -32,3 +32,22 @@ # block when: collectd_selinux_mode.stdout|lower == "enforcing" or collectd_selinux_mode.stdout|lower == "permissive" + +- name: Check if permissive state applied already + ansible.builtin.stat: + path: /root/.ansible-collectd-selinux + register: ansible_collectd_selinux_installed + +- name: Enable permissive mode for collectd + ansible.builtin.shell: | + set -eo pipefail + main() { + if [[ ! -e /root/.ansible-collectd-selinux ]]; then + semanage permissive -a collectd_t > /root/.ansible-collectd-selinux + fi + } + main 2>&1 | logger -p local3.info -t ansible-collectd + when: + - collectd_selinux_mode.stdout|lower == "enforcing" or collectd_selinux_mode.stdout|lower == "permissive" + - not ansible_collectd_selinux_installed.stat.exists + - collectd_selinux_permissive|bool