Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TM-720: endpoint monitoring further tweaks #1132

Merged
merged 6 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 37 additions & 21 deletions ansible/group_vars/environment_name_hmpps_oem_preproduction.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,21 +74,7 @@ emctl_oem: /u01/app/oracle/product/mw135/bin/emctl
emctl_agent: /u01/app/oracle/product/oem-agent/agent_inst/bin/emctl

collectd_endpoint_monitoring:
- metric_dimension: c.pp-nomis.az.justice.gov.uk
url: https://c.pp-nomis.az.justice.gov.uk/forms/frmservlet?config=tag
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: reporting.pp-nomis.az.justice.gov.uk
url: https://reporting.pp-nomis.az.justice.gov.uk/keepalive.htm
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: c.lsast-nomis.az.justice.gov.uk
url: https://c.lsast-nomis.az.justice.gov.uk/forms/frmservlet?config=tag
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: pp-oasys.az.justice.gov.uk
url: https://pp-oasys.az.justice.gov.uk/eor/f?p=100
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: onr.pp-oasys.az.justice.gov.uk
url: https://onr.pp-oasys.az.justice.gov.uk/InfoViewApp
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
# corporate-staff-rostering
- metric_dimension: r1.pp.csr.service.justice.gov.uk
url: http://r1.pp.csr.service.justice.gov.uk:7770/isps/index.html?2057
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
Expand All @@ -110,16 +96,46 @@ collectd_endpoint_monitoring:
- metric_dimension: traina.csr.service.justice.gov.uk
url: http://traina.csr.service.justice.gov.uk/isps/index.html?2057
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"

# hmpps-domain-services
- metric_dimension: rdgateway1.preproduction.hmpps-domain.service.justice.gov.uk
url: https://rdgateway1.preproduction.hmpps-domain.service.justice.gov.uk/
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"

# nomis
- metric_dimension: c-lsast.preproduction.nomis.service.justice.gov.uk
url: https://c-lsast.preproduction.nomis.service.justice.gov.uk/forms/frmservlet?config=tag
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: c.preproduction.nomis.service.justice.gov.uk
url: https://c.preproduction.nomis.service.justice.gov.uk/forms/frmservlet?config=tag
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"

# nomis-combined-reporting
- metric_dimension: reporting.pp-nomis.az.justice.gov.uk # AZURE
url: https://reporting.pp-nomis.az.justice.gov.uk/keepalive.htm
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: preproduction.reporting.nomis.service.justice.gov.uk
url: https://preproduction.reporting.nomis.service.justice.gov.uk/keepalive.htm
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"

# oasys
- metric_dimension: pp.oasys.service.justice.gov.uk
url: https://pp.oasys.service.justice.gov.uk/eor/f?p=100
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: pp-int.oasys.service.justice.gov.uk
url: https://pp-int.oasys.service.justice.gov.uk/eor/f?p=100
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"

# oasys-national-reporting
- metric_dimension: onr.pp-oasys.az.justice.gov.uk # AZURE
url: https://onr.pp-oasys.az.justice.gov.uk/InfoViewApp
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"

# planetfm
- metric_dimension: cafmtx.pp.planetfm.service.justice.gov.uk
url: https://cafmtx.pp.planetfm.service.justice.gov.uk/RDWeb
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: cafmwebx.pp.planetfm.service.justice.gov.uk
url: https://cafmwebx.pp.planetfm.service.justice.gov.uk/PlanetPortal
follow_redirect: 0
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: hpa-preprod.service.hmpps.dsd.io
url: https://hpa-preprod.service.hmpps.dsd.io/
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: rdgateway1.preproduction.hmpps-domain.service.justice.gov.uk
url: https://rdgateway1.preproduction.hmpps-domain.service.justice.gov.uk/
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
89 changes: 44 additions & 45 deletions ansible/group_vars/environment_name_hmpps_oem_production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,44 +69,20 @@ housekeeping_cron:
hour: "08"
# job: command generated in

# Replaces Grafana/Prometheus black-box exporter monitoring
endpoint_monitoring_targets:
# - target: https://google.com # Example target
# expected_response_code: 301
# check_domain_ssl_expiry: true
- target: https://oasys.az.justice.gov.uk
expected_response_code: 302
check_domain_ssl_expiry: "true"
- target: https://oasys.az.justice.gov.uk/eor/f?p=100
expected_response_code: 302
check_domain_ssl_expiry: "false" # don't re-check this as it's just a duplicate of above
- target: https://bridge-oasys.az.justice.gov.uk
expected_response_code: 200
check_domain_ssl_expiry: "true"
- target: https://bridge-oasys.az.justice.gov.uk/eor/f?p=100,
expected_response_code: 200
check_domain_ssl_expiry: "false"

emcli: /u01/app/oracle/product/mw135/bin/emcli
emctl_oem: /u01/app/oracle/product/mw135/bin/emctl
emctl_agent: /u01/app/oracle/product/oem-agent/agent_inst/bin/emctl

collectd_endpoint_monitoring:
- metric_dimension: c.nomis.az.justice.gov.uk
url: https://c.nomis.az.justice.gov.uk/forms/frmservlet?config=tag
- metric_dimension: reporting.nomis.az.justice.gov.uk
url: https://reporting.nomis.az.justice.gov.uk/keepalive.htm
time_ranges: "0.0000-1.0200,1.0210-3.0200,3.0210-5.0200,5.0210-7.0000" # web servers refreshed Mon/Wed/Fri at 2am
- metric_dimension: oasys.az.justice.gov.uk
url: https://oasys.az.justice.gov.uk/eor/f?p=100
- metric_dimension: training.oasys.az.justice.gov.uk
url: https://training.oasys.az.justice.gov.uk/eor/f?p=100
- metric_dimension: practice.oasys.az.justice.gov.uk
url: https://practice.oasys.az.justice.gov.uk/eor/f?p=100
- metric_dimension: bridge-oasys.az.justice.gov.uk
url: https://bridge-oasys.az.justice.gov.uk/
- metric_dimension: onr.oasys.az.justice.gov.uk
url: https://onr.oasys.az.justice.gov.uk/InfoViewApp
# az-noms-production-1
- metric_dimension: hmpps-az-gw1.justice.gov.uk
url: https://hmpps-az-gw1.justice.gov.uk/RDWeb
- metric_dimension: hpa.service.hmpps.dsd.io
url: https://hpa.service.hmpps.dsd.io/health
- metric_dimension: www.offloc.service.justice.gov.uk
url: https://www.offloc.service.justice.gov.uk/health

# corporate-staff-rostering
- metric_dimension: r1.csr.service.justice.gov.uk
url: http://r1.csr.service.justice.gov.uk:7770/isps/index.html?2057
- metric_dimension: r2.csr.service.justice.gov.uk
Expand All @@ -119,20 +95,43 @@ collectd_endpoint_monitoring:
url: http://r5.csr.service.justice.gov.uk:7770/isps/index.html?2057
- metric_dimension: r6.csr.service.justice.gov.uk
url: http://r6.csr.service.justice.gov.uk:7771/isps/index.html?2057

# hmpps-domain-services
- metric_dimension: rdgateway1.hmpps-domain.service.justice.gov.uk
url: https://rdgateway1.hmpps-domain.service.justice.gov.uk/

# nomis
- metric_dimension: c.nomis.az.justice.gov.uk
url: https://c.nomis.az.justice.gov.uk/forms/frmservlet?config=tag
- metric_dimension: c.nomis.service.justice.gov.uk
url: https://c.nomis.service.justice.gov.uk/forms/frmservlet?config=tag

# nomis-combined-reporting
- metric_dimension: reporting.nomis.az.justice.gov.uk # AZURE
url: https://reporting.nomis.az.justice.gov.uk/keepalive.htm
time_ranges: "0.0000-1.0200,1.0210-3.0200,3.0210-5.0200,5.0210-7.0000" # web servers refreshed Mon/Wed/Fri at 2am

# oasys
- metric_dimension: oasys.service.justice.gov.uk
url: https://oasys.service.justice.gov.uk/
- metric_dimension: int.oasys.service.justice.gov.uk
url: https://int.oasys.service.justice.gov.uk/eor/f?p=100
- metric_dimension: practice.int.oasys.service.justice.gov.uk
url: https://practice.int.oasys.service.justice.gov.uk/eor/f?p=100
- metric_dimension: training.int.oasys.service.justice.gov.uk
url: https://training.int.oasys.service.justice.gov.uk/eor/f?p=100

# oasys-national-reporting
- metric_dimension: onr.oasys.az.justice.gov.uk # Azure
url: https://onr.oasys.az.justice.gov.uk/InfoViewApp

# planetfm
- metric_dimension: cafmtx.planetfm.service.justice.gov.uk
url: https://cafmtx.planetfm.service.justice.gov.uk/RDWeb
- metric_dimension: cafmwebx2.az.justice.gov.uk
url: https://cafmwebx2.az.justice.gov.uk/PlanetPortal
- metric_dimension: cafmwebx2.planetfm.service.justice.gov.uk
url: https://cafmwebx2.planetfm.service.justice.gov.uk/PlanetPortal
follow_redirect: 0
- metric_dimension: cafmtrainweb.az.justice.gov.uk
url: https://cafmtrainweb.az.justice.gov.uk/PlanetPortal
- metric_dimension: cafmtrainweb.planetfm.service.justice.gov.uk
url: https://cafmtrainweb.planetfm.service.justice.gov.uk/PlanetPortal
follow_redirect: 0
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: www.offloc.service.justice.gov.uk
url: https://www.offloc.service.justice.gov.uk/health
- metric_dimension: hpa.service.hmpps.dsd.io
url: https://hpa.service.hmpps.dsd.io/
- metric_dimension: hmpps-az-gw1.justice.gov.uk
url: https://hmpps-az-gw1.justice.gov.uk/RDWeb
- metric_dimension: rdgateway1.hmpps-domain.service.justice.gov.uk
url: https://rdgateway1.hmpps-domain.service.justice.gov.uk/
23 changes: 15 additions & 8 deletions ansible/group_vars/environment_name_hmpps_oem_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,23 +73,30 @@ emctl_oem: /u01/app/oracle/product/mw135/bin/emctl
emctl_agent: /u01/app/oracle/product/oem-agent/agent_inst/bin/emctl

collectd_endpoint_monitoring:
# az-noms-dev-test-environments
- metric_dimension: stage.offloc.service.justice.gov.uk
url: https://stage.offloc.service.justice.gov.uk/health
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: hmppgw1.justice.gov.uk
url: https://hmppgw1.justice.gov.uk/RDWeb

# hmpps-domain-services
- metric_dimension: rdgateway1.test.hmpps-domain.service.justice.gov.uk
url: https://rdgateway1.test.hmpps-domain.service.justice.gov.uk/
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"

# nomis
- metric_dimension: c-t1.test.nomis.service.justice.gov.uk
url: https://c-t1.test.nomis.service.justice.gov.uk/forms/frmservlet?config=tag
- metric_dimension: c-t2.test.nomis.service.justice.gov.uk
url: https://c-t1.test.nomis.service.justice.gov.uk/forms/frmservlet?config=tag
- metric_dimension: c-t3.test.nomis.service.justice.gov.uk
url: https://c-t3.test.nomis.service.justice.gov.uk/forms/frmservlet?config=tag

# oasys
- metric_dimension: t1-int.oasys.service.justice.gov.uk
url: https://t1-int.oasys.service.justice.gov.uk/
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: t2-int.oasys.service.justice.gov.uk
url: https://t2-int.oasys.service.justice.gov.uk/
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: stage.offloc.service.justice.gov.uk
url: https://stage.offloc.service.justice.gov.uk/health
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
- metric_dimension: hmppgw1.justice.gov.uk
url: https://hmppgw1.justice.gov.uk/RDWeb
- metric_dimension: rdgateway1.test.hmpps-domain.service.justice.gov.uk
url: https://rdgateway1.test.hmpps-domain.service.justice.gov.uk/
time_ranges: "1.0700-1.1800,2.0700-2.1800,3.0700-3.1800,4.0700-4.1800,5.0700-5.1800"
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# To debug, run INTERVAL=5 LOGGER_INTERVAL_FOR_ERRORS=0 {{ collectd_script_path }}/{{ collectd_script_name }}.sh

ENDPOINTS=()
CERT_EXPIRY_METRIC_INTERVAL=3600
CERT_EXPIRY_METRIC_INTERVAL=1800
LOGGER_INTERVAL_FOR_ERRORS="${LOGGER_INTERVAL_FOR_ERRORS:-3600}" # set to 0 to display to stdout
DEFAULT_INTERVAL="{{ collectd_script_interval }}"
HOSTNAME="${HOSTNAME:-localhost}"
Expand Down
4 changes: 2 additions & 2 deletions ansible/roles/ncr-bip/tasks/setup_tomcat_restart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
- name: Setup tomcat restart cron
ansible.builtin.cron:
name: "tomcat_restart"
minute: "{{ 60 | random(seed=inventory_hostname) }}"
minute: "0"
hour: "2"
weekday: "MON,WED,FRI"
job: "/home/bobj/tomcat_restart.sh"
Expand All @@ -31,7 +31,7 @@
- name: Setup log cleanup cron
ansible.builtin.cron:
name: "log_cleanup"
minute: "{{ 60 | random(seed=inventory_hostname) }}"
minute: "0"
hour: "3"
weekday: "MON,WED,FRI"
job: "find {{ sap_bip_archived_logs_directory }} -mtime +365 -prune -exec rm -rf {} \\;"
Expand Down