diff --git a/ansible/roles/audit/README.md b/ansible/roles/audit/README.md
new file mode 100644
index 000000000..d2776667a
--- /dev/null
+++ b/ansible/roles/audit/README.md
@@ -0,0 +1 @@
+Role for enabling audit daemon
diff --git a/ansible/roles/audit/tasks/audit.yml b/ansible/roles/audit/tasks/audit.yml
new file mode 100644
index 000000000..a47ad0544
--- /dev/null
+++ b/ansible/roles/audit/tasks/audit.yml
@@ -0,0 +1,13 @@
+- name: Install audit package
+ yum:
+ name: audit
+ state: present
+ lock_timeout: 60
+ retries: 3
+ delay: 10
+
+- name: Start auditd
+ service:
+ name: auditd
+ state: started
+ enabled: yes
diff --git a/ansible/roles/audit/tasks/main.yml b/ansible/roles/audit/tasks/main.yml
new file mode 100644
index 000000000..80a000bb6
--- /dev/null
+++ b/ansible/roles/audit/tasks/main.yml
@@ -0,0 +1,6 @@
+- import_tasks: audit.yml
+ tags:
+ - amibuild
+ - ec2provision
+ - ec2patch
+ when: ansible_distribution in ['RedHat', 'OracleLinux']
diff --git a/ansible/roles/collectd-oracle-db-connected/defaults/main.yml b/ansible/roles/collectd-oracle-db-connected/defaults/main.yml
new file mode 100644
index 000000000..6a73fc5c2
--- /dev/null
+++ b/ansible/roles/collectd-oracle-db-connected/defaults/main.yml
@@ -0,0 +1,5 @@
+---
+collectd_script_path: /usr/local/bin
+collectd_script_name: collectd_oracle_db_connected
+collectd_script_user: oracle
+collectd_script_interval: 60
diff --git a/ansible/roles/collectd-oracle-db-connected/handlers/main.yml b/ansible/roles/collectd-oracle-db-connected/handlers/main.yml
new file mode 100644
index 000000000..a9c35d30b
--- /dev/null
+++ b/ansible/roles/collectd-oracle-db-connected/handlers/main.yml
@@ -0,0 +1,10 @@
+---
+- name: restart collectd
+ ansible.builtin.service:
+ name: collectd
+ state: restarted
+
+- name: restart plugin script
+ ansible.builtin.shell: |
+ pkill -u {{ collectd_script_user }} -f {{ collectd_script_path }}/{{ collectd_script_name }}.sh
+ failed_when: false
diff --git a/ansible/roles/collectd-oracle-db-connected/meta/main.yml b/ansible/roles/collectd-oracle-db-connected/meta/main.yml
new file mode 100644
index 000000000..fb5e24a73
--- /dev/null
+++ b/ansible/roles/collectd-oracle-db-connected/meta/main.yml
@@ -0,0 +1,4 @@
+---
+dependencies:
+ - role: get-ec2-facts
+ - role: amazon-cloudwatch-agent-collectd
diff --git a/ansible/roles/collectd-oracle-db-connected/tasks/configure_collectd.yml b/ansible/roles/collectd-oracle-db-connected/tasks/configure_collectd.yml
new file mode 100644
index 000000000..27be4676d
--- /dev/null
+++ b/ansible/roles/collectd-oracle-db-connected/tasks/configure_collectd.yml
@@ -0,0 +1,18 @@
+---
+- name: copy collectd config
+ ansible.builtin.template:
+ src: "{{ collectd_script_name }}.conf.j2"
+ dest: "/etc/collectd.d/{{ collectd_script_name }}.conf"
+ owner: root
+ mode: 0644
+ notify:
+ - restart collectd
+
+- name: copy collectd plugin script
+ ansible.builtin.template:
+ src: "{{ collectd_script_name }}.sh.j2"
+ dest: "{{ collectd_script_path }}/{{ collectd_script_name }}.sh"
+ owner: root
+ mode: 0755
+ notify:
+ - restart plugin script
diff --git a/ansible/roles/collectd-oracle-db-connected/tasks/main.yml b/ansible/roles/collectd-oracle-db-connected/tasks/main.yml
new file mode 100644
index 000000000..e701061c0
--- /dev/null
+++ b/ansible/roles/collectd-oracle-db-connected/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- import_tasks: configure_collectd.yml
+ tags:
+ - ec2provision
+ - ec2patch
+ when: ansible_distribution in ['RedHat', 'OracleLinux']
diff --git a/ansible/roles/collectd-service-metrics/templates/collectd.conf.j2 b/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.conf.j2
similarity index 100%
rename from ansible/roles/collectd-service-metrics/templates/collectd.conf.j2
rename to ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.conf.j2
diff --git a/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2 b/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2
new file mode 100644
index 000000000..99a228135
--- /dev/null
+++ b/ansible/roles/collectd-oracle-db-connected/templates/collectd_oracle_db_connected.sh.j2
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Managed by collectd-oracle-db-connected ansible role
+# If manually editing, just kill script and collectd will respawn
+# e.g. pkill -u {{ collectd_script_user }} -f {{ collectd_script_path }}/{{ collectd_script_name }}.sh
+
+HOSTNAME="${HOSTNAME:-localhost}"
+INTERVAL="${INTERVAL:-{{ collectd_script_interval }}}"
+
+if [[ "$(whoami)" != "oracle" ]]
+then
+ echo "This script is expected to be run as the Oracle user" 1>&2
+ exit 1
+fi
+
+# We need to make sure this is in the path
+export PATH=${PATH}:/usr/local/bin
+
+get_sids() {
+ aws ec2 describe-tags --filters "Name=resource-id,Values={{ ansible_ec2_instance_id }}" "Name=key,Values=oracle-sids" --query Tags[0].Value --output=text
+}
+
+db_connected() {
+ # DB resources names are usually 'ora.${DB}.db' but some have a suffix after ${DB}
+ DB="$(crsctl status resource | grep -m1 -i ora\.${SID}.*\.db | cut -f2 -d=)"
+
+ # Check added to alert on not having a database resource BEFORE trying to get it's status
+ if [[ -z "$DB" ]]
+ then
+ echo "Failed to find a database resource for ${SID}" 1>&2
+ return 1
+ fi
+
+ # Worth noting here that crsctl exits with code 0 even if you try and find details of a database that doesn't exist
+ STATUS=$(timeout $INTERVAL crsctl status resource ${DB} -v | grep STATE_DETAILS | cut -f2 -d= | cut -f1 -d,)
+
+ case ${STATUS} in
+ "Open")
+ return 0
+ ;;
+ "Open,Readonly")
+ return 0
+ ;;
+ "Mounted (Closed)")
+ return 0
+ ;;
+ *)
+ # If this check returns a non-zero value then the database is not connected
+ return 1
+ ;;
+ esac
+}
+
+ORACLE_SID="+ASM"
+ORAENV_ASK="NO"
+. oraenv > /dev/null
+
+while sleep "$INTERVAL"; do
+ SIDS=$(get_sids)
+ if [[ "$SIDS" != "None" ]]; then
+ for SID in $(get_sids); do
+ db_connected $SID >/dev/null 2>&1
+ echo "PUTVAL $HOSTNAME/exec-db_connected/bool-$SID interval=$INTERVAL N:$?"
+ done
+ fi
+done
diff --git a/ansible/roles/collectd-service-metrics/tasks/configure_collectd.yml b/ansible/roles/collectd-service-metrics/tasks/configure_collectd.yml
index 6a688b548..27be4676d 100644
--- a/ansible/roles/collectd-service-metrics/tasks/configure_collectd.yml
+++ b/ansible/roles/collectd-service-metrics/tasks/configure_collectd.yml
@@ -1,7 +1,7 @@
---
- name: copy collectd config
ansible.builtin.template:
- src: "collectd.conf.j2"
+ src: "{{ collectd_script_name }}.conf.j2"
dest: "/etc/collectd.d/{{ collectd_script_name }}.conf"
owner: root
mode: 0644
@@ -11,7 +11,7 @@
- name: copy collectd plugin script
ansible.builtin.template:
src: "{{ collectd_script_name }}.sh.j2"
- dest: "/usr/local/bin/{{ collectd_script_name }}.sh"
+ dest: "{{ collectd_script_path }}/{{ collectd_script_name }}.sh"
owner: root
mode: 0755
notify:
diff --git a/ansible/roles/collectd-service-metrics/templates/collectd_service_metrics.conf.j2 b/ansible/roles/collectd-service-metrics/templates/collectd_service_metrics.conf.j2
new file mode 100644
index 000000000..f259faeb8
--- /dev/null
+++ b/ansible/roles/collectd-service-metrics/templates/collectd_service_metrics.conf.j2
@@ -0,0 +1,4 @@
+LoadPlugin exec
+
+ Exec "{{ collectd_script_user }}" "{{ collectd_script_path }}/{{ collectd_script_name }}.sh"
+
diff --git a/ansible/roles/collectd/README.md b/ansible/roles/collectd/README.md
index 71f7ae32e..21a937719 100644
--- a/ansible/roles/collectd/README.md
+++ b/ansible/roles/collectd/README.md
@@ -4,20 +4,20 @@ Installs collectd and configures it based on the values in group_vars `collectd_
Collectd is able to run scripts and perform other tasks based on plugins. The scripts are run by the exec plugin and the results are made available to the Cloudwatch agent on the same host via the network plugin. The Cloudwatch agent then sends the metrics to Cloudwatch.
-The common plugins are defined in collectd.conf.j2 (network plugin being the most important) with additional plugins pulled in by the statement
-`Include "/etc/collectd.d` in the main collectd.conf file.
+The common plugins are defined in collectd.conf.j2 (network plugin being the most important) with additional plugins pulled in by the statement
+`Include "/etc/collectd.d` in the main collectd.conf file.
The collectd_configure task does the following:
1. reads values of `collectd_metric_configs` from group_vars, for example:
-```
+```
collectd_metric_configs:
- nomis-db
```
2. loops through values of files/[collectd_metric_configs] and templates/[collectd_metric_configs] deploys them to the host if the relevant files exist
-
+
3. files/linux.conf and templates/linux.sh.j2 are deployed to the host by default if additional collectd_metric_configs are not defined
@@ -35,15 +35,15 @@ Further collectd Troubleshooting [here](https://collectd.org/wiki/index.php/Trou
1. *.conf files must have an empty line at the end to load, otherwise collectd won't start...
-2. formatting for the exec message (sent to localhost udp port 25826) is very important. It MUST be in the format "PUTVAL $HOSTNAME/exec-/guage-$signifier. Values after exec- and guage- (or other value type) cannot use additional '-' characters or spaces otherwise the exec plugin will deliver a mal-formed message.
+2. formatting for the exec message (sent to localhost udp port 25826) is very important. It MUST be in the format "PUTVAL $HOSTNAME/exec-/guage-$signifier. Values after exec- and guage- (or other value type) cannot use additional '-' characters or spaces otherwise the exec plugin will deliver a mal-formed message.
## Collectd and Selinux
-There is an additional task specifically to create a selinux policy for collectd. This is because collectd runs scripts via the exec plugin and selinux will block this by default.
+There is an additional task specifically to create a selinux policy for collectd. This is because collectd runs scripts via the exec plugin and selinux will block this by default.
Having logging for collectd is NOT enabled. Most of the useful information goes to /var/log/messages anyway or with selinux to /var/log/audit/audit.log where you can see what's being blocked in relation to collectd
-There are selinux exceptions for collectd when it comes to Rhel 7 & 8. It _seems_ this isn't needed for Rhel 6 but there is an existing task to automatically scan the audit.log for issues and then create a policy file.
+There are selinux exceptions for collectd when it comes to Rhel 7 & 8. It _seems_ this isn't needed for Rhel 6 but there is an existing task to automatically scan the audit.log for issues and then create a policy file.
### Some useful selinux commands for troubleshooting
@@ -63,14 +63,7 @@ Once you have found an AVC denial message in /var/log/audit/audit.log you can us
If/when there are additional instances of this please add the settings back to the relevant collectd_selinux_policy_rhel_(version).te file and re-run the ansible task to create the policy file.
-At some point we may simply decide to place the whole collectd_t domain into permissive mode.
-
-```
-- name: change the collectd_t domain to permissive
- community.general.selinux_permissive:
- type: collectd_t
- permissive: true
-```
+Although we create a specific collectd policy, it is unlikely to cover everything. Especially when scripts are triggered from collectd. For this reason, we set collectd domain to permissive mode by default.
You can also grab AVC rules like this:
diff --git a/ansible/roles/collectd/defaults/main.yml b/ansible/roles/collectd/defaults/main.yml
new file mode 100644
index 000000000..ead4a4654
--- /dev/null
+++ b/ansible/roles/collectd/defaults/main.yml
@@ -0,0 +1,2 @@
+---
+collectd_selinux_permissive: true
diff --git a/ansible/roles/collectd/tasks/collectd_selinux_policy.yml b/ansible/roles/collectd/tasks/collectd_selinux_policy.yml
index 14ec14091..606a0aa99 100644
--- a/ansible/roles/collectd/tasks/collectd_selinux_policy.yml
+++ b/ansible/roles/collectd/tasks/collectd_selinux_policy.yml
@@ -32,3 +32,22 @@
# block
when: collectd_selinux_mode.stdout|lower == "enforcing" or collectd_selinux_mode.stdout|lower == "permissive"
+
+- name: Check if permissive state applied already
+ ansible.builtin.stat:
+ path: /root/.ansible-collectd-selinux
+ register: ansible_collectd_selinux_installed
+
+- name: Enable permissive mode for collectd
+ ansible.builtin.shell: |
+ set -eo pipefail
+ main() {
+ if [[ ! -e /root/.ansible-collectd-selinux ]]; then
+ semanage permissive -a collectd_t > /root/.ansible-collectd-selinux
+ fi
+ }
+ main 2>&1 | logger -p local3.info -t ansible-collectd
+ when:
+ - collectd_selinux_mode.stdout|lower == "enforcing" or collectd_selinux_mode.stdout|lower == "permissive"
+ - not ansible_collectd_selinux_installed.stat.exists
+ - collectd_selinux_permissive|bool