From 88d682c945a436b5d0d0b029918af921a8e614ac Mon Sep 17 00:00:00 2001 From: Dominic Robinson <65237317+drobinson-moj@users.noreply.github.com> Date: Mon, 16 Oct 2023 16:50:58 +0100 Subject: [PATCH] DSOS-2093: cloudwatch-agent plus some general improvements (#364) * add OL8.5 group vars * simplify cloudwatch agent installation * Fix for RHEL6 * Update README * readme * don't install cloudwatch agent in startup * update amazon-cloudwatch-agent * fix --- ansible/README.md | 31 ++++++++++++ ansible/group_vars/server_type_base_ol85.yml | 12 +++++ .../group_vars/server_type_base_rhel610.yml | 1 - .../group_vars/server_type_base_rhel79.yml | 1 - .../group_vars/server_type_base_rhel85.yml | 1 - ansible/requirements.rhel6.yml | 27 +++++++++++ .../roles/amazon-cloudwatch-agent/README.md | 22 +-------- .../amazon-cloudwatch-agent/defaults/main.yml | 6 +++ .../amazon-cloudwatch-agent/handlers/main.yml | 5 ++ .../tasks/configure.yml | 30 +++--------- .../amazon-cloudwatch-agent/tasks/install.yml | 30 +++--------- .../amazon-cloudwatch-agent/tasks/main.yml | 9 ++++ .../amazon-cloudwatch-agent/tasks/start.yml | 19 ++------ .../templates/linux.json.j2 | 48 +++++++++++++------ .../amazon-cloudwatch-agent/vars/main.yml | 5 -- ansible/roles/ansible-script/files/ansible.sh | 6 ++- 16 files changed, 148 insertions(+), 105 deletions(-) create mode 100644 ansible/group_vars/server_type_base_ol85.yml create mode 100644 ansible/requirements.rhel6.yml create mode 100644 ansible/roles/amazon-cloudwatch-agent/defaults/main.yml create mode 100644 ansible/roles/amazon-cloudwatch-agent/handlers/main.yml delete mode 100644 ansible/roles/amazon-cloudwatch-agent/vars/main.yml diff --git a/ansible/README.md b/ansible/README.md index 5ada7577a..0ba8f5b9a 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -13,6 +13,16 @@ Use `user_data` to provide a cloud init or shell script which runs ansible. See nomis ansible template scripts in [modernisation-platform-environments](https://github.com/ministryofjustice/modernisation-platform-environments/tree/main/terraform/environments/nomis/templates/) for an example. This relies on tags to identify which roles to run. +## Running ansible locally on a linux EC2 instance + +The `ansible-script` role installs a wrapper script ansible.sh in the /root/ directory. +Use this to run ansible within a virtual environment pulling in appropriate group_vars. +For example: + +``` +/root/ansible.sh site.yml --tags ec2patch +``` + ## Installing on Mac Ensure you have python3.6+ installed on your local mac. @@ -84,6 +94,8 @@ A generic [site.yml](/ansible/site.yml) is provided with dynamic inventories under [hosts/](/ansible/hosts/) folder. This creates groups based of the following tags: +- ami +- os-type - environment-name - server-type @@ -133,3 +145,22 @@ ansible-playbook site.yml -e "role=amazon-cloudwatch-agent" # Run locally (the comma after localhost is important) ansible-playbook site.yml --connection=local -i localhost, -e "target=localhost" -e "@group_vars/server_type_nomis_db.yml" --check ``` + +## Gotchas for RHEL6 + +The ansible.builtin.yum task misbehaves when running from local MacOS on a RHEL6 server. +Run ansible locally on the server instead. Example error message when running on MacOS: + +``` +TASK [amazon-cloudwatch-agent : Install amazon-cloudwatch-agent] ********************************************************************************************** +fatal: [xxx]: FAILED! => {"changed": false, "msg": "ansible-core requires a minimum of Python2 version 2.7 or Python3 version 3.5. Current version: 2.6.6 (r266:84292, May 31 2023, 09:01:24) [GCC 4.4.7 20120313 (Red Hat 4.4.7-23)]"} +``` + +The `galaxy.ansible.com` recent updates have broken collection installation on RHEL6. +Use requirements.rhel6.yml instead. Example error: + +``` +# [WARNING]: Skipping Galaxy server https://galaxy.ansible.com/api/. Got an unexpected error when getting available versions of collection amazon.aws: +# '/api/v3/plugin/ansible/content/published/collections/index/amazon/aws/versions/' +# ERROR! Unexpected Exception, this is probably a bug: '/api/v3/plugin/ansible/content/published/collections/index/amazon/aws/versions/' +``` diff --git a/ansible/group_vars/server_type_base_ol85.yml b/ansible/group_vars/server_type_base_ol85.yml new file mode 100644 index 000000000..3bd3ecfb4 --- /dev/null +++ b/ansible/group_vars/server_type_base_ol85.yml @@ -0,0 +1,12 @@ +--- +ansible_python_interpreter: python3.9 + +server_type_roles_list: + - autoscale-group-hooks + - get-ec2-facts + - set-ec2-hostname + - domain-search + - ansible-script + - autoscale-group-hooks-state + +roles_list: "{{ (ami_roles_list | default([]) | difference(server_type_roles_list | default([]))) + (server_type_roles_list | default([])) }}" diff --git a/ansible/group_vars/server_type_base_rhel610.yml b/ansible/group_vars/server_type_base_rhel610.yml index ffa6d73cf..e355a4e77 100644 --- a/ansible/group_vars/server_type_base_rhel610.yml +++ b/ansible/group_vars/server_type_base_rhel610.yml @@ -5,7 +5,6 @@ server_type_roles_list: - autoscale-group-hooks - set-ec2-hostname - domain-search - - amazon-cloudwatch-agent - autoscale-group-hooks-state - ansible-script diff --git a/ansible/group_vars/server_type_base_rhel79.yml b/ansible/group_vars/server_type_base_rhel79.yml index 17a7a1e5e..665e0d2fc 100644 --- a/ansible/group_vars/server_type_base_rhel79.yml +++ b/ansible/group_vars/server_type_base_rhel79.yml @@ -5,7 +5,6 @@ server_type_roles_list: - autoscale-group-hooks - set-ec2-hostname - domain-search - - amazon-cloudwatch-agent - ansible-script - autoscale-group-hooks-state diff --git a/ansible/group_vars/server_type_base_rhel85.yml b/ansible/group_vars/server_type_base_rhel85.yml index da2abfbe4..3bd3ecfb4 100644 --- a/ansible/group_vars/server_type_base_rhel85.yml +++ b/ansible/group_vars/server_type_base_rhel85.yml @@ -6,7 +6,6 @@ server_type_roles_list: - get-ec2-facts - set-ec2-hostname - domain-search - - amazon-cloudwatch-agent - ansible-script - autoscale-group-hooks-state diff --git a/ansible/requirements.rhel6.yml b/ansible/requirements.rhel6.yml new file mode 100644 index 000000000..ab60027a7 --- /dev/null +++ b/ansible/requirements.rhel6.yml @@ -0,0 +1,27 @@ +# https://galaxy.ansible.com broken on RHEL6 for some collections with following error: +# [WARNING]: Skipping Galaxy server https://galaxy.ansible.com/api/. Got an unexpected error when getting available versions of collection amazon.aws: +# '/api/v3/plugin/ansible/content/published/collections/index/amazon/aws/versions/' +# ERROR! Unexpected Exception, this is probably a bug: '/api/v3/plugin/ansible/content/published/collections/index/amazon/aws/versions/' + +# from Galaxy +roles: + +collections: + - name: community.general + source: https://old-galaxy.ansible.com + version: 6.3.0 + - name: amazon.aws + source: https://old-galaxy.ansible.com + version: 3.2.0 + - name: community.aws + source: https://old-galaxy.ansible.com + version: 3.2.1 + - name: ansible.posix + source: https://galaxy.ansible.com + version: 1.4.0 + - name: ansible.windows + source: https://galaxy.ansible.com + version: 1.13.0 + - name: community.windows + source: https://galaxy.ansible.com + version: 1.12.0 diff --git a/ansible/roles/amazon-cloudwatch-agent/README.md b/ansible/roles/amazon-cloudwatch-agent/README.md index 970e248bf..b6e4fce8c 100644 --- a/ansible/roles/amazon-cloudwatch-agent/README.md +++ b/ansible/roles/amazon-cloudwatch-agent/README.md @@ -1,28 +1,8 @@ -# FIXME: this page needs an extensive re-write - # Cloudwatch Agent Role This role installs the Cloudwatch Agent on a Linux host and configures it to send metrics to Cloudwatch. - -If the group_vars for a host has the variable `cloudwatch_agent_configs` defined then this will deploy additional cloudwatch agent config files to the host. See files in /templates for examples. - -Amazon Cloudwatch Agent config exection and start order is: - - 1. ansible_system == 'linux' (the default ansible_system i.e. linux) via `/templates/linux.json.j2` - 2. loops through values of `cloudwatch_agent_configs` in group_vars and deploys them to the host - -e.g. if you have a group_vars entry like this: - -``` -cloudwatch_agent_configs: - - nomis-db -``` - - then the file `templates/nomis-db.json.j2` will be deployed to the host. - -# Cloudwatch Agent - Metrics sent to Cloudwatch will all appear in the default CWAgent namespace + ## Debugging on Linux ssm onto the machine/instance and run the following command to find out the running status of the agent: diff --git a/ansible/roles/amazon-cloudwatch-agent/defaults/main.yml b/ansible/roles/amazon-cloudwatch-agent/defaults/main.yml new file mode 100644 index 000000000..25b837f0a --- /dev/null +++ b/ansible/roles/amazon-cloudwatch-agent/defaults/main.yml @@ -0,0 +1,6 @@ +--- +amazon_cloudwatch_agent_config_name: linux.json.j2 +amazon_cloudwatch_agent_gpg: https://amazoncloudwatch-agent-eu-west-2.s3.eu-west-2.amazonaws.com/assets/amazon-cloudwatch-agent.gpg +amazon_cloudwatch_agent_package: https://amazoncloudwatch-agent-eu-west-2.s3.eu-west-2.amazonaws.com/redhat/amd64/latest/amazon-cloudwatch-agent.rpm +amazon_cloudwatch_agent_config_file: amazon-cloudwatch-agent.json +amazon_cloudwatch_agent_config_path: /opt/aws/amazon-cloudwatch-agent/etc diff --git a/ansible/roles/amazon-cloudwatch-agent/handlers/main.yml b/ansible/roles/amazon-cloudwatch-agent/handlers/main.yml new file mode 100644 index 000000000..87a4e9f25 --- /dev/null +++ b/ansible/roles/amazon-cloudwatch-agent/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart amazon-cloudwatch-agent + ansible.builtin.service: + name: amazon-cloudwatch-agent + state: restarted diff --git a/ansible/roles/amazon-cloudwatch-agent/tasks/configure.yml b/ansible/roles/amazon-cloudwatch-agent/tasks/configure.yml index 43e983245..39cc65e75 100644 --- a/ansible/roles/amazon-cloudwatch-agent/tasks/configure.yml +++ b/ansible/roles/amazon-cloudwatch-agent/tasks/configure.yml @@ -7,32 +7,16 @@ group: root mode: 0755 -- name: Get tag values for use in config file - set_fact: - name: "{{ ec2.tags.Name }}" - server_type: "{{ ec2.tags['server-type'] }}" - instance_id: "{{ ansible_ec2_instance_id }}" +- name: Fail if tags not defined + fail: + msg: "Please ensure Name tag is defined" + when: ec2.tags['Name'] is not defined or ansible_ec2_instance_id is not defined -# default cloudwatch-agent config file based on OS type (linux or windows) -# NOTE: windows default config doesn't exist, this is being handled in the Windows instance user-data in terraform -- name: Create OS specific amazon-cloudwatch-agent config file +- name: Create amazon-cloudwatch-agent config file ansible.builtin.template: - src: "{{ ansible_system|lower }}.json.j2" + src: "{{ amazon_cloudwatch_agent_config_name }}" dest: "{{ amazon_cloudwatch_agent_config_path }}/{{ amazon_cloudwatch_agent_config_file }}" owner: root group: root mode: 0755 - -# additional settings for EC2 instances if they exist -- name: template config files - ansible.builtin.template: - src: "{{ item|replace('-', '_') }}.json.j2" - dest: "{{ amazon_cloudwatch_agent_config_path }}/{{ item|replace('-', '_') }}.json" - owner: root - group: root - mode: 0755 - loop: "{{ cloudwatch_agent_configs }}" - loop_control: - label: item - when: cloudwatch_agent_configs is defined # currently none defined - + notify: restart amazon-cloudwatch-agent diff --git a/ansible/roles/amazon-cloudwatch-agent/tasks/install.yml b/ansible/roles/amazon-cloudwatch-agent/tasks/install.yml index 4b9df0681..94a778255 100644 --- a/ansible/roles/amazon-cloudwatch-agent/tasks/install.yml +++ b/ansible/roles/amazon-cloudwatch-agent/tasks/install.yml @@ -1,29 +1,13 @@ --- -- name: check if amazon-cloudwatch-agent installed - ansible.builtin.shell: | - check_installed() { - check="$(amazon-cloudwatch-agent-ctl -m ec2 -a status)" - if [[ $check ]] - then - return 0 - else - return 1 - fi - } - check_installed - ignore_errors: true - register: agent_installed +# Not bothering to install RPM key as RPM doesn't seem to be signed +# Plus it probably won't work if selinux enabled +# - name: Import amazon cloudwatch agent RPM key +# ansible.builtin.rpm_key: +# state: present +# key: "{{ amazon_cloudwatch_agent_gpg }}" - name: Install amazon-cloudwatch-agent ansible.builtin.yum: name: "{{ amazon_cloudwatch_agent_package }}" state: present - disable_gpg_check: true - when: agent_installed.rc == 1 and ansible_distribution_major_version != '6' - -- name: Install amazon-cloudwatch-agent on Rhel 6 - ansible.builtin.shell: | - wget https://s3.amazonaws.com/amazoncloudwatch-agent/redhat/amd64/latest/amazon-cloudwatch-agent.rpm - rpm -U ./amazon-cloudwatch-agent.rpm - become: true - when: agent_installed.rc == 1 and ansible_distribution_major_version == '6' + disable_gpg_check: true # RPM doesn't appear to be signed even through GPG key provided diff --git a/ansible/roles/amazon-cloudwatch-agent/tasks/main.yml b/ansible/roles/amazon-cloudwatch-agent/tasks/main.yml index adc978891..e9f8d022c 100644 --- a/ansible/roles/amazon-cloudwatch-agent/tasks/main.yml +++ b/ansible/roles/amazon-cloudwatch-agent/tasks/main.yml @@ -1,18 +1,27 @@ --- - import_tasks: "install.yml" tags: + - amazon-cloudwatch-agent-install - ec2provision - ec2patch when: ansible_distribution in ['RedHat', 'OracleLinux'] - import_tasks: "configure.yml" tags: + - amazon-cloudwatch-agent-configure - ec2provision - ec2patch when: ansible_distribution in ['RedHat', 'OracleLinux'] +# Ensure any restarts done prior to start +- name: Flush handlers + meta: flush_handlers + tags: + - always + - import_tasks: "start.yml" tags: + - amazon-cloudwatch-agent-start - ec2provision - ec2patch when: ansible_distribution in ['RedHat', 'OracleLinux'] diff --git a/ansible/roles/amazon-cloudwatch-agent/tasks/start.yml b/ansible/roles/amazon-cloudwatch-agent/tasks/start.yml index 9c2863b95..d1816a191 100644 --- a/ansible/roles/amazon-cloudwatch-agent/tasks/start.yml +++ b/ansible/roles/amazon-cloudwatch-agent/tasks/start.yml @@ -1,15 +1,6 @@ --- -# covers cloudwatch agent start for linux (common) -- name: Start amazon-cloudwatch-agent service - ansible.builtin.shell: | - /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:"{{ amazon_cloudwatch_agent_config_path }}/{{ amazon_cloudwatch_agent_config_file }}" - -# additional settings for EC2 instances if they exist -- name: Append settings for amazon-cloudwatch-agent for other EC2 instances - ansible.builtin.shell: | - /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl -a append-config -m ec2 -s -c file:"{{ amazon_cloudwatch_agent_config_path }}/{{ item }}.json" - loop: "{{ cloudwatch_agent_configs|replace('-', '_') }}" - loop_control: - label: item - when: cloudwatch_agent_configs is defined # currently none defined - notify: restart amazon-cloudwatch-agent +- name: Start and enable amazon-cloudwatch-agent + ansible.builtin.service: + name: amazon-cloudwatch-agent + state: started + enabled: yes diff --git a/ansible/roles/amazon-cloudwatch-agent/templates/linux.json.j2 b/ansible/roles/amazon-cloudwatch-agent/templates/linux.json.j2 index e74c7b883..97567244f 100644 --- a/ansible/roles/amazon-cloudwatch-agent/templates/linux.json.j2 +++ b/ansible/roles/amazon-cloudwatch-agent/templates/linux.json.j2 @@ -24,8 +24,12 @@ "metrics_collection_interval": 60, "totalcpu": false, "append_dimensions": { - "name": "{{ name }}", - "server_type": "{{ server_type }}" +{% if ec2.tags['server-type'] is defined %} + "name": "{{ ec2.tags['Name'] }}", + "server_type": "{{ ec2.tags['server-type'] }}" +{% else %} + "name": "{{ ec2.tags['Name'] }}" +{% endif %} } }, "disk": { @@ -38,8 +42,12 @@ "*" ], "append_dimensions": { - "name": "{{ name }}", - "server_type": "{{ server_type }}" +{% if ec2.tags['server-type'] is defined %} + "name": "{{ ec2.tags['Name'] }}", + "server_type": "{{ ec2.tags['server-type'] }}" +{% else %} + "name": "{{ ec2.tags['Name'] }}" +{% endif %} } }, "diskio": { @@ -51,8 +59,12 @@ "*" ], "append_dimensions": { - "name": "{{ name }}", - "server_type": "{{ server_type }}" +{% if ec2.tags['server-type'] is defined %} + "name": "{{ ec2.tags['Name'] }}", + "server_type": "{{ ec2.tags['server-type'] }}" +{% else %} + "name": "{{ ec2.tags['Name'] }}" +{% endif %} } }, "mem": { @@ -64,8 +76,12 @@ "*" ], "append_dimensions": { - "name": "{{ name }}", - "server_type": "{{ server_type }}" +{% if ec2.tags['server-type'] is defined %} + "name": "{{ ec2.tags['Name'] }}", + "server_type": "{{ ec2.tags['server-type'] }}" +{% else %} + "name": "{{ ec2.tags['Name'] }}" +{% endif %} } }, "swap": { @@ -77,11 +93,15 @@ "*" ], "append_dimensions": { - "name": "{{ name }}", - "server_type": "{{ server_type }}" +{% if ec2.tags['server-type'] is defined %} + "name": "{{ ec2.tags['Name'] }}", + "server_type": "{{ ec2.tags['server-type'] }}" +{% else %} + "name": "{{ ec2.tags['Name'] }}" +{% endif %} } } - } + } }, "logs": { "logs_collected": { @@ -90,15 +110,15 @@ { "file_path": "/var/log/messages", "log_group_name": "cwagent-var-log-messages", - "log_stream_name": "{{ instance_id }}" + "log_stream_name": "{{ ansible_ec2_instance_id }}" }, { "file_path": "/var/log/secure", "log_group_name": "cwagent-var-log-secure", - "log_stream_name": "{{ instance_id }}" + "log_stream_name": "{{ ansible_ec2_instance_id }}" } ] } } } -} \ No newline at end of file +} diff --git a/ansible/roles/amazon-cloudwatch-agent/vars/main.yml b/ansible/roles/amazon-cloudwatch-agent/vars/main.yml deleted file mode 100644 index 6d384e00b..000000000 --- a/ansible/roles/amazon-cloudwatch-agent/vars/main.yml +++ /dev/null @@ -1,5 +0,0 @@ ---- -# common vars for setting up cloudwatch agent -amazon_cloudwatch_agent_package: https://s3.eu-west-2.amazonaws.com/amazoncloudwatch-agent-eu-west-2/redhat/amd64/latest/amazon-cloudwatch-agent.rpm -amazon_cloudwatch_agent_config_file: amazon-cloudwatch-agent.json -amazon_cloudwatch_agent_config_path: /opt/aws/amazon-cloudwatch-agent/etc diff --git a/ansible/roles/ansible-script/files/ansible.sh b/ansible/roles/ansible-script/files/ansible.sh index 3fdb96fb6..6dd25f20e 100755 --- a/ansible/roles/ansible-script/files/ansible.sh +++ b/ansible/roles/ansible-script/files/ansible.sh @@ -97,6 +97,7 @@ run_ansible() { cd $ansible_dir/python-venv $python -m venv ansible source ansible/bin/activate + requirements_yml=requirements.yml if [[ $update == 1 ]]; then $python -m pip install --upgrade pip if [[ "$python" =~ 3.6 ]]; then @@ -104,6 +105,7 @@ run_ansible() { $python -m pip install cryptography==2.3 export LC_ALL=en_US.UTF-8 $python -m pip install ansible-core==2.11.12 + requirements_yml=requirements.rhel6.yml else $python -m pip install ansible==6.0.0 fi @@ -112,8 +114,8 @@ run_ansible() { echo "# Installing ansible requirements" cd $ansible_dir/${ansible_repo}/${ansible_repo_basedir} $python -m pip install -r requirements.txt - ansible-galaxy role install -r requirements.yml - ansible-galaxy collection install -r requirements.yml + ansible-galaxy role install -r $requirements_yml + ansible-galaxy collection install -r $requirements_yml fi # run ansible (comma after localhost deliberate)