diff --git a/ansible/group_vars/environment_name_hmpps_oem_preproduction.yml b/ansible/group_vars/environment_name_hmpps_oem_preproduction.yml
index 5fcd61227..441003853 100644
--- a/ansible/group_vars/environment_name_hmpps_oem_preproduction.yml
+++ b/ansible/group_vars/environment_name_hmpps_oem_preproduction.yml
@@ -72,3 +72,31 @@ housekeeping_cron:
emcli: /u01/app/oracle/product/mw135/bin/emcli
emctl_oem: /u01/app/oracle/product/mw135/bin/emctl
emctl_agent: /u01/app/oracle/product/oem-agent/agent_inst/bin/emctl
+
+collectd_endpoint_monitoring:
+ - metric_dimension: c.pp-nomis.az.justice.gov.uk
+ url: https://c.pp-nomis.az.justice.gov.uk/forms/frmservlet?config=tag
+ - metric_dimension: c.lsast-nomis.az.justice.gov.uk
+ url: https://c.lsast-nomis.az.justice.gov.uk/forms/frmservlet?config=tag
+ - metric_dimension: pp-oasys.az.justice.gov.uk
+ url: https://pp-oasys.az.justice.gov.uk/eor/f?p=100
+ - metric_dimension: onr.pp-oasys.az.justice.gov.uk
+ url: https://onr.pp-oasys.az.justice.gov.uk/InfoViewApp
+ - metric_dimension: r1.pp.csr.service.justice.gov.uk
+ url: http://r1.pp.csr.service.justice.gov.uk:7770/isps/index.html?2057
+ - metric_dimension: r2.pp.csr.service.justice.gov.uk
+ url: http://r2.pp.csr.service.justice.gov.uk:7771/isps/index.html?2057
+ - metric_dimension: r3.pp.csr.service.justice.gov.uk
+ url: http://r3.pp.csr.service.justice.gov.uk:7770/isps/index.html?2057
+ - metric_dimension: r4.pp.csr.service.justice.gov.uk
+ url: http://r4.pp.csr.service.justice.gov.uk:7771/isps/index.html?2057
+ - metric_dimension: r5.pp.csr.service.justice.gov.uk
+ url: http://r5.pp.csr.service.justice.gov.uk:7770/isps/index.html?2057
+ - metric_dimension: r6.pp.csr.service.justice.gov.uk
+ url: http://r6.pp.csr.service.justice.gov.uk:7771/isps/index.html?2057
+ - metric_dimension: traina.csr.service.justice.gov.uk
+ url: http://traina.csr.service.justice.gov.uk/isps/index.html?2057
+ - metric_dimension: cafmwebx.pp.planetfm.service.justice.gov.uk
+ url: https://cafmwebx.pp.planetfm.service.justice.gov.uk/PlanetPortal
+ - metric_dimension: hpa-preprod.service.hmpps.dsd.io
+ url: https://hpa-preprod.service.hmpps.dsd.io/
diff --git a/ansible/group_vars/environment_name_hmpps_oem_production.yml b/ansible/group_vars/environment_name_hmpps_oem_production.yml
index 0fcc0c1c0..22694a142 100644
--- a/ansible/group_vars/environment_name_hmpps_oem_production.yml
+++ b/ansible/group_vars/environment_name_hmpps_oem_production.yml
@@ -90,3 +90,41 @@ endpoint_monitoring_targets:
emcli: /u01/app/oracle/product/mw135/bin/emcli
emctl_oem: /u01/app/oracle/product/mw135/bin/emctl
emctl_agent: /u01/app/oracle/product/oem-agent/agent_inst/bin/emctl
+
+collectd_endpoint_monitoring:
+ - metric_dimension: c.nomis.az.justice.gov.uk
+ url: https://c.nomis.az.justice.gov.uk/forms/frmservlet?config=tag
+ - metric_dimension: reporting.nomis.az.justice.gov.uk
+ url: https://reporting.nomis.az.justice.gov.uk/BOE/BI
+ - metric_dimension: oasys.az.justice.gov.uk
+ url: https://oasys.az.justice.gov.uk/eor/f?p=100
+ - metric_dimension: training.oasys.az.justice.gov.uk
+ url: https://training.oasys.az.justice.gov.uk/eor/f?p=100
+ - metric_dimension: practice.oasys.az.justice.gov.uk
+ url: https://practice.oasys.az.justice.gov.uk/eor/f?p=100
+ - metric_dimension: bridge-oasys.az.justice.gov.uk
+ url: https://bridge-oasys.az.justice.gov.uk/
+ - metric_dimension: onr.oasys.az.justice.gov.uk
+ url: https://onr.oasys.az.justice.gov.uk/InfoViewApp
+ - metric_dimension: r1.csr.service.justice.gov.uk
+ url: http://r1.csr.service.justice.gov.uk:7770/isps/index.html?2057
+ - metric_dimension: r2.csr.service.justice.gov.uk
+ url: http://r2.csr.service.justice.gov.uk:7771/isps/index.html?2057
+ - metric_dimension: r3.csr.service.justice.gov.uk
+ url: http://r3.csr.service.justice.gov.uk:7770/isps/index.html?2057
+ - metric_dimension: r4.csr.service.justice.gov.uk
+ url: http://r4.csr.service.justice.gov.uk:7771/isps/index.html?2057
+ - metric_dimension: r5.csr.service.justice.gov.uk
+ url: http://r5.csr.service.justice.gov.uk:7770/isps/index.html?2057
+ - metric_dimension: r6.csr.service.justice.gov.uk
+ url: http://r6.csr.service.justice.gov.uk:7771/isps/index.html?2057
+ - metric_dimension: cafmwebx2.az.justice.gov.uk
+ url: https://cafmwebx2.az.justice.gov.uk/PlanetPortal
+ - metric_dimension: cafmtrainweb.az.justice.gov.uk
+ url: https://cafmtrainweb.az.justice.gov.uk/PlanetPortal
+ - metric_dimension: www.offloc.service.justice.gov.uk
+ url: https://www.offloc.service.justice.gov.uk/health
+ - metric_dimension: hpa.service.hmpps.dsd.io
+ url: https://hpa.service.hmpps.dsd.io/
+ - metric_dimension: hmpps-az-gw1.justice.gov.uk
+ url: https://hmpps-az-gw1.justice.gov.uk/RDWeb
diff --git a/ansible/group_vars/environment_name_hmpps_oem_test.yml b/ansible/group_vars/environment_name_hmpps_oem_test.yml
index c0389dba0..616f8528c 100644
--- a/ansible/group_vars/environment_name_hmpps_oem_test.yml
+++ b/ansible/group_vars/environment_name_hmpps_oem_test.yml
@@ -71,3 +71,19 @@ housekeeping_cron:
emcli: /u01/app/oracle/product/mw135/bin/emcli
emctl_oem: /u01/app/oracle/product/mw135/bin/emctl
emctl_agent: /u01/app/oracle/product/oem-agent/agent_inst/bin/emctl
+
+collectd_endpoint_monitoring:
+ - metric_dimension: c-t1.test.nomis.service.justice.gov.uk
+ url: https://c-t1.test.nomis.service.justice.gov.uk/forms/frmservlet?config=tag
+ - metric_dimension: c-t2.test.nomis.service.justice.gov.uk
+ url: https://c-t1.test.nomis.service.justice.gov.uk/forms/frmservlet?config=tag
+ - metric_dimension: c-t3.test.nomis.service.justice.gov.uk
+ url: https://c-t3.test.nomis.service.justice.gov.uk/forms/frmservlet?config=tag
+ - metric_dimension: t1-int.oasys.service.justice.gov.uk
+ url: https://t1-int.oasys.service.justice.gov.uk/
+ - metric_dimension: t2-int.oasys.service.justice.gov.uk
+ url: https://t2-int.oasys.service.justice.gov.uk/
+ - metric_dimension: stage.offloc.service.justice.gov.uk
+ url: https://stage.offloc.service.justice.gov.uk/health
+ - metric_dimension: hmppgw1.justice.gov.uk
+ url: https://hmppgw1.justice.gov.uk/RDWeb
diff --git a/ansible/group_vars/server_type_hmpps_oem.yml b/ansible/group_vars/server_type_hmpps_oem.yml
index ffd42049d..4d2629d7e 100644
--- a/ansible/group_vars/server_type_hmpps_oem.yml
+++ b/ansible/group_vars/server_type_hmpps_oem.yml
@@ -65,8 +65,8 @@ server_type_roles_list:
- collectd-service-metrics
- collectd-oracle-db-connected
- collectd-textfile-monitoring
+ - collectd-endpoint-monitoring
- oracle-db-refresh
- - endpoint-monitoring
collectd_monitored_services_servertype:
- metric_name: service_status_os
diff --git a/ansible/roles/collectd-endpoint-monitoring/README.md b/ansible/roles/collectd-endpoint-monitoring/README.md
new file mode 100644
index 000000000..54c90eaa9
--- /dev/null
+++ b/ansible/roles/collectd-endpoint-monitoring/README.md
@@ -0,0 +1,32 @@
+# Role to configure endpoint monitoring via collectd
+
+Monitor the status of endpoints via collectd and cloudwatch.
+
+The role installs a collectd configuration file for using an exec plugin,
+and a script for checking the status of an endpoint.
+
+Use this if you cannot use an alternative solution such as pingdom due
+to IP allow listing restrictions, and you already have a linux EC2 that
+can be used for this kind of monitoring.
+
+Use `collectd-connectivity-tests` role if you just want to check
+connectivity to an IP/port.
+
+Why collectd? This is Amazon recommended approach for collecting metrics
+from an EC2 via CWAgent
+
+## Finding metrics in Cloudwatch
+
+Metrics collected by the Cloudwatch agent will appear in the 'metrics' panel under the CWAgent namespace
+
+```
+metric: collectd_endpoint_monitoring_status (the metric_name)
+type: exitcode (fixed, 0 = ok, non-zero = error)
+type_instance: Friendly name of URL, e.g. c.nomis.service.justice.gov.uk (the metric_dimension)
+
+metric: collectd_endpoint_monitoring_cert_days_to_expiry (the metric_name)
+type: gauge (number of days until cert expires)
+type_instance: Friendly name of URL, e.g. amazonssmagent (the metric_dimension)
+```
+
+Cloudwatch metrics are easily filtered by `instance_id` so you can see all the metrics for a particular instance.
diff --git a/ansible/roles/collectd-endpoint-monitoring/defaults/main.yml b/ansible/roles/collectd-endpoint-monitoring/defaults/main.yml
new file mode 100644
index 000000000..741a63487
--- /dev/null
+++ b/ansible/roles/collectd-endpoint-monitoring/defaults/main.yml
@@ -0,0 +1,15 @@
+---
+collectd_script_path: /usr/local/bin
+collectd_script_name: collectd_endpoint_monitoring
+collectd_script_user: ec2-user
+collectd_script_interval: 30
+
+# define in relevant group vars
+collectd_endpoint_monitoring:
+# for example
+# collectd_endpoint_monitoring:
+# - metric_dimension: oasys.az.justice.gov.uk
+# url: https://oasys.az.justice.gov.uk
+# follow_redirect: 0 # optionally include and set to 0 if you don't want to follow redirects
+# timeout: 5 # optionally include to change timeout from default 5s
+# time_ranges: "1.0900-1.1700,2.0900-2.1700,3.0900-3.1700,4.0900-4.1700,5.0900-5.1700" # optionally include to limit monitoring to 9-5pm weekdays
diff --git a/ansible/roles/collectd-endpoint-monitoring/handlers/main.yml b/ansible/roles/collectd-endpoint-monitoring/handlers/main.yml
new file mode 100644
index 000000000..a9c35d30b
--- /dev/null
+++ b/ansible/roles/collectd-endpoint-monitoring/handlers/main.yml
@@ -0,0 +1,10 @@
+---
+- name: restart collectd
+ ansible.builtin.service:
+ name: collectd
+ state: restarted
+
+- name: restart plugin script
+ ansible.builtin.shell: |
+ pkill -u {{ collectd_script_user }} -f {{ collectd_script_path }}/{{ collectd_script_name }}.sh
+ failed_when: false
diff --git a/ansible/roles/collectd-endpoint-monitoring/meta/main.yml b/ansible/roles/collectd-endpoint-monitoring/meta/main.yml
new file mode 100644
index 000000000..8b78741b1
--- /dev/null
+++ b/ansible/roles/collectd-endpoint-monitoring/meta/main.yml
@@ -0,0 +1,3 @@
+---
+dependencies:
+ - role: amazon-cloudwatch-agent-collectd
diff --git a/ansible/roles/collectd-endpoint-monitoring/tasks/configure_collectd.yml b/ansible/roles/collectd-endpoint-monitoring/tasks/configure_collectd.yml
new file mode 100644
index 000000000..27be4676d
--- /dev/null
+++ b/ansible/roles/collectd-endpoint-monitoring/tasks/configure_collectd.yml
@@ -0,0 +1,18 @@
+---
+- name: copy collectd config
+ ansible.builtin.template:
+ src: "{{ collectd_script_name }}.conf.j2"
+ dest: "/etc/collectd.d/{{ collectd_script_name }}.conf"
+ owner: root
+ mode: 0644
+ notify:
+ - restart collectd
+
+- name: copy collectd plugin script
+ ansible.builtin.template:
+ src: "{{ collectd_script_name }}.sh.j2"
+ dest: "{{ collectd_script_path }}/{{ collectd_script_name }}.sh"
+ owner: root
+ mode: 0755
+ notify:
+ - restart plugin script
diff --git a/ansible/roles/collectd-endpoint-monitoring/tasks/main.yml b/ansible/roles/collectd-endpoint-monitoring/tasks/main.yml
new file mode 100644
index 000000000..e701061c0
--- /dev/null
+++ b/ansible/roles/collectd-endpoint-monitoring/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- import_tasks: configure_collectd.yml
+ tags:
+ - ec2provision
+ - ec2patch
+ when: ansible_distribution in ['RedHat', 'OracleLinux']
diff --git a/ansible/roles/collectd-endpoint-monitoring/templates/collectd_endpoint_monitoring.conf.j2 b/ansible/roles/collectd-endpoint-monitoring/templates/collectd_endpoint_monitoring.conf.j2
new file mode 100644
index 000000000..f259faeb8
--- /dev/null
+++ b/ansible/roles/collectd-endpoint-monitoring/templates/collectd_endpoint_monitoring.conf.j2
@@ -0,0 +1,4 @@
+LoadPlugin exec
+
+ Exec "{{ collectd_script_user }}" "{{ collectd_script_path }}/{{ collectd_script_name }}.sh"
+
diff --git a/ansible/roles/collectd-endpoint-monitoring/templates/collectd_endpoint_monitoring.sh.j2 b/ansible/roles/collectd-endpoint-monitoring/templates/collectd_endpoint_monitoring.sh.j2
new file mode 100644
index 000000000..26c578e61
--- /dev/null
+++ b/ansible/roles/collectd-endpoint-monitoring/templates/collectd_endpoint_monitoring.sh.j2
@@ -0,0 +1,150 @@
+#!/bin/bash
+# Managed by collectd-endpoint-monitoring ansible role
+# If manually editing, just kill script and collectd will respawn
+# e.g. pkill -u {{ collectd_script_user }} -f {{ collectd_script_path }}/{{ collectd_script_name }}.sh
+#
+# To debug, run INTERVAL=5 LOGGER_INTERVAL_FOR_ERRORS=0 {{ collectd_script_path }}/{{ collectd_script_name }}.sh
+
+ENDPOINTS=()
+CERT_EXPIRY_METRIC_INTERVAL=3600
+LOGGER_INTERVAL_FOR_ERRORS="${LOGGER_INTERVAL_FOR_ERRORS:-3600}" # set to 0 to display to stdout
+DEFAULT_INTERVAL="{{ collectd_script_interval }}"
+HOSTNAME="${HOSTNAME:-localhost}"
+INTERVAL="${INTERVAL:-$DEFAULT_INTERVAL}"
+
+{% for item in collectd_endpoint_monitoring %}
+ENDPOINTS+=("{{ item.follow_redirect|default(1) }} {{ item.timeout_sec|default(5) }} {{ item.url }} {{ item.metric_dimension }} {{ item.time_ranges|default('') }}")
+{% endfor %}
+{% raw %}
+
+#Comment in below for testing
+#INTERVAL=5
+#LOGGER_INTERVAL_FOR_ERRORS=0
+#ENDPOINTS+=("1 5 https://www.google.com www.google.com 1.0900-1.1700,2.0900-2.1700,3.0900-3.1700,4.0900-4.1700,5.0900-5.1700")
+#ENDPOINTS+=("1 5 https://www.microsoft.com www.microsoft.com")
+#ENDPOINTS+=("1 5 https://www.amazon.com www.amazon.com 1.0900-5.1700")
+
+check_within_timeranges() {
+ local now
+ local timeranges
+ local times
+ now=$1
+ timeranges=$2
+ for timerange in ${timeranges//,/ }; do
+ times=(${timerange/-/ })
+ if [[ ($now == "${times[0]}" || $now > "${times[0]}") && $now < "${times[1]}" ]]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+check_endpoint() {
+ local follow_redirect
+ local timeout_secs
+ local url
+ local optional_curl_args
+ local output
+ local http_code
+ local expiry
+ local expiry_epoch_secs
+ local now_epoch_secs
+ local secs_to_expiry
+ local days_to_expiry
+
+ follow_redirect="$1"
+ timeout_secs="$2"
+ url="$3"
+
+ optional_curl_args=
+ if [[ $follow_redirect == 1 ]]; then
+ optional_curl_args="-L"
+ fi
+ if ! output=$(curl -sSv -m "$timeout_secs" -o /dev/null -w "http_code=%{http_code}" $optional_curl_args "$url" 2>&1); then
+ grep -v "^\*" <<<"$output" | grep -v ^http_code= | grep -v "^>" | grep -v "^<" | grep -v "^{" | grep -v "^}" >&2
+ return 1
+ fi
+ http_code=$(grep "^http_code=" <<< "$output" | cut -d= -f2)
+ if [[ -z $http_code ]]; then
+ echo "missing http_code in curl output" >&2
+ return 1
+ fi
+ if [[ $http_code != 200 ]]; then
+ if [[ $follow_redirect == 1 || ! $http_code =~ ^30* ]]; then
+ echo "unexpected http_code $http_code" >&2
+ return 1
+ fi
+ fi
+ if [[ $url =~ https: ]]; then
+ expiry=$(grep -F "* expire date:" <<< "$output" | cut -d: -f2-)
+ if [[ -z $expiry ]]; then
+ echo "could not find expiry date in curl output" >&2
+ return 1
+ fi
+ if [[ "$(uname)" == "Darwin" ]]; then
+ expiry_epoch_secs=$(date -j -f " %b %d %T %Y %Z" "$expiry" +%s)
+ else
+ expiry_epoch_secs=$(date +%s -d "$expiry" 2>/dev/null)
+ fi
+ if [[ -z $expiry_epoch_secs ]]; then
+ echo "could not parse expiry date $expiry" >&2
+ return 1
+ fi
+ now_epoch_secs=$(date +%s)
+ secs_to_expiry=$(( expiry_epoch_secs - now_epoch_secs ))
+ days_to_expiry=$(( secs_to_expiry / 86400 ))
+ echo "days_to_expiry=$days_to_expiry"
+ fi
+}
+
+
+n=${#ENDPOINTS[@]}
+
+last_error_log_timestamp=()
+last_days_to_expiry=()
+last_expiry_metric_timestamp=()
+for ((i=0; i&1)
+ exitcode=$?
+ days_to_expiry=$(grep "^days_to_expiry=" <<< "$output" | cut -d= -f2)
+ echo "PUTVAL $HOSTNAME/endpoint_status/exitcode-${args[3]} interval=$INTERVAL N:$exitcode"
+ if [[ -n $days_to_expiry ]]; then
+ if [[ ${last_days_to_expiry[i]} != "$days_to_expiry" || $((now_epoch_secs - last_expiry_metric_timestamp[i])) -gt $CERT_EXPIRY_METRIC_INTERVAL ]]; then
+ echo "PUTVAL $HOSTNAME/endpoint_cert_expiry/gauge-${args[3]} interval=$INTERVAL N:$days_to_expiry"
+ last_expiry_metric_timestamp[i]="$now_epoch_secs"
+ last_days_to_expiry[i]="$days_to_expiry"
+ fi
+ fi
+ if [[ $exitcode -ne 0 ]]; then
+ if [[ $LOGGER_INTERVAL_FOR_ERRORS -eq 0 ]]; then
+ echo "${args[3]}: $output"
+ elif [[ $((now_epoch_secs - last_error_log_timestamp[i])) -gt $LOGGER_INTERVAL_FOR_ERRORS ]]; then
+ echo "${args[3]}: $output" | logger -p local3.info -t collectd_endpoint_monitoring
+ last_error_log_timestamp[i]="$now_epoch_secs"
+ fi
+ fi
+ done
+ new_epoch_secs=$(date +%s)
+ elapsed=$((new_epoch_secs - now_epoch_secs))
+ if (( elapsed >= 0 && elapsed < INTERVAL )); then
+ sleep $((INTERVAL - elapsed))
+ fi
+done
+
+{% endraw %}
diff --git a/ansible/roles/endpoint-monitoring/defaults/main.yml b/ansible/roles/endpoint-monitoring/defaults/main.yml
deleted file mode 100644
index 2a1904af8..000000000
--- a/ansible/roles/endpoint-monitoring/defaults/main.yml
+++ /dev/null
@@ -1,2 +0,0 @@
----
-script_path: "/opt/monitoring/check_endpoints.sh"
diff --git a/ansible/roles/endpoint-monitoring/meta/main.yml b/ansible/roles/endpoint-monitoring/meta/main.yml
deleted file mode 100644
index 4ff987c1c..000000000
--- a/ansible/roles/endpoint-monitoring/meta/main.yml
+++ /dev/null
@@ -1,3 +0,0 @@
----
-dependencies:
- - role: get-ec2-facts
diff --git a/ansible/roles/endpoint-monitoring/tasks/endpoint-monitoring.yml b/ansible/roles/endpoint-monitoring/tasks/endpoint-monitoring.yml
deleted file mode 100644
index 47d56c11e..000000000
--- a/ansible/roles/endpoint-monitoring/tasks/endpoint-monitoring.yml
+++ /dev/null
@@ -1,36 +0,0 @@
----
-- name: Ensure monitoring directory exists
- file:
- path: "/opt/monitoring"
- state: directory
- mode: "0755"
-
-- name: set fact for endpoint_monitoring variable in environments
- set_fact:
- endpoint_monitoring_targets: "{{ endpoint_monitoring_targets }}"
-
-- name: Template endpoint monitoring script
- template:
- src: "check_endpoints.sh.j2"
- dest: "{{ script_path }}"
- mode: "0755"
-
-- name: Install required packages
- package:
- name:
- - curl
- state: present
-
-- name: Set up cron job for endpoint monitoring
- cron:
- name: "Check endpoints and send metrics to CloudWatch"
- minute: "*/5"
- job: "{{ script_path }} 2>&1 | logger -p local3.info -t 'endpoint-monitoring'"
-
-- name: Ensure log file exists with correct permissions
- file:
- path: "{{ log_path }}"
- state: touch
- mode: "0644"
- owner: root
- group: root
diff --git a/ansible/roles/endpoint-monitoring/tasks/main.yml b/ansible/roles/endpoint-monitoring/tasks/main.yml
deleted file mode 100644
index 047fcdf2d..000000000
--- a/ansible/roles/endpoint-monitoring/tasks/main.yml
+++ /dev/null
@@ -1,6 +0,0 @@
----
-- import_tasks: endpoint-monitoring.yml
- tags:
- - amibuild
- - ec2provision
- when: endpoint_monitoring_targets is defined
diff --git a/ansible/roles/endpoint-monitoring/templates/check_endpoints.sh.j2 b/ansible/roles/endpoint-monitoring/templates/check_endpoints.sh.j2
deleted file mode 100644
index fa70701cc..000000000
--- a/ansible/roles/endpoint-monitoring/templates/check_endpoints.sh.j2
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/bin/bash
-
-# Add /usr/local/bin to PATH
-export PATH=/usr/local/bin:$PATH
-
-
-# Set AWS region
-AWS_REGION="eu-west-2"
-
-# Function to check endpoint and send metric
-check_endpoint() {
- endpoint="$1"
- expected_response_code="$2"
- check_domain_ssl_expiry="$3" # true of false
-
- # Check if the status code is as expected
- start_time=$(date +%s%N)
- http_code=$(curl -s -o /dev/null -w "%{http_code}" "$endpoint" -m 10)
- end_time=$(date +%s%N)
-
- duration=$(( (end_time - start_time) / 1000000 )) # Convert to milliseconds
-
- if [ "$http_code" -eq "$expected_response_code" ]; then
- status=0
- echo "Success: $endpoint returned $http_code (expected $expected_response_code)"
- else
- status=1
- echo "Failure: $endpoint returned $http_code (expected $expected_response_code)"
- fi
-
- # Send metrics to CloudWatch
- aws cloudwatch put-metric-data \
- --region "$AWS_REGION" \
- --namespace "CustomMetrics" \
- --metric-data \
- "[
- {
- \"MetricName\": \"EndpointStatus\",
- \"Dimensions\": [{\"Name\": \"Endpoint\",\"Value\": \"$endpoint\"}],
- \"Value\": $status,
- \"Unit\": \"Count\"
- },
- {
- \"MetricName\": \"ResponseTime\",
- \"Dimensions\": [{\"Name\": \"Endpoint\",\"Value\": \"$endpoint\"}],
- \"Value\": $duration,
- \"Unit\": \"Milliseconds\"
- },
- {
- \"MetricName\": \"ResponseCode\",
- \"Dimensions\": [{\"Name\": \"Endpoint\",\"Value\": \"$endpoint\"}],
- \"Value\": $http_code,
- \"Unit\": \"Count\"
- }
- ]"
-
- if [ $check_domain_ssl_expiry = "true" ]; then
- # remove https:// from the endpoint or http:// from the endpoint to get the domain name
- endpoint=$(echo $endpoint | sed -e 's/https:\/\///' -e 's/http:\/\///')
- domain_ssl_expiry=$(echo | openssl s_client -servername "$endpoint" -connect "$endpoint":443 2>/dev/null | openssl x509 -noout -dates | grep notAfter | cut -d= -f2)
- domain_ssl_expiry_in_seconds=$(date -d "$domain_ssl_expiry" +%s)
- current_time=$(date +%s)
- check_domain_ssl_expiry=$(( ($domain_ssl_expiry_in_seconds - $current_time) / 86400 ))
- echo "Cert: $endpoint SSL Expiry in days: $check_domain_ssl_expiry"
-
- # Send metrics to CloudWatch
- aws cloudwatch put-metric-data \
- --region "$AWS_REGION" \
- --namespace "CustomMetrics" \
- --metric-data \
- "[
- {
- \"MetricName\": \"DomainSSLExpiryInDays\",
- \"Dimensions\": [{\"Name\": \"Endpoint\",\"Value\": \"$endpoint\"}],
- \"Value\": $check_domain_ssl_expiry,
- \"Unit\": \"Count\"
- }
- ]"
- else
- check_domain_ssl_expiry=0
- echo "Cert SSL Expiry check is disabled for $endpoint"
- fi
-}
-
-{% for item in endpoint_monitoring_targets %}
-# List of endpoints to check
-check_endpoint "{{ item.target }}" "{{ item.expected_response_code }}" "{{ item.check_domain_ssl_expiry }}"
-{% endfor %}