From 4eebe8d4beb18bf878d07ccf4df4f3978a7c8091 Mon Sep 17 00:00:00 2001 From: Ben Ramchandani Date: Fri, 15 Dec 2023 17:14:04 +0000 Subject: [PATCH] Add more detailed descriptions to CloudWatch alarms --- .../modules/active_directory/ldap_nlb.tf | 7 ++- .../alb_alarms.tf | 12 +++- .../cloudfront_alarms.tf | 28 ++++++--- terraform/modules/jaspersoft/monitoring.tf | 33 +++++++--- .../modules/marklogic/monitoring_alarms.tf | 63 +++++++++++++++---- .../modules/networking/firewall_monitoring.tf | 16 ++--- .../ses_identity/ses-cloudwatch-lambda.tf | 7 ++- terraform/modules/ses_monitoring/main.tf | 6 +- terraform/modules/waf/monitoring.tf | 11 +++- 9 files changed, 138 insertions(+), 45 deletions(-) diff --git a/terraform/modules/active_directory/ldap_nlb.tf b/terraform/modules/active_directory/ldap_nlb.tf index bcb4f81d..a480725a 100644 --- a/terraform/modules/active_directory/ldap_nlb.tf +++ b/terraform/modules/active_directory/ldap_nlb.tf @@ -138,7 +138,12 @@ resource "aws_cloudwatch_metric_alarm" "ldap_lb_healthy_count_low" { threshold = 0 evaluation_periods = 1 - alarm_description = "The Active Directory domain controller in use is unhealthy" + alarm_description = < ${var.cloudfront_p90_origin_latency_high_alarm_threshold_ms}ms). +This can be a false alarm if only a small number of users are using the service, so it can be ignored outside of business hours. + EOF alarm_actions = [var.alarms_sns_topic_global_arn] ok_actions = [var.alarms_sns_topic_global_arn] @@ -184,7 +194,7 @@ resource "aws_cloudwatch_metric_alarm" "ddos_attack" { period = "60" statistic = "Average" threshold = "0" - alarm_description = "Triggers when AWS Shield Advanced detects a DDoS attack" + alarm_description = "Triggers when AWS Shield Advanced detects a DDoS attack. Escalate immediately." treat_missing_data = "notBreaching" alarm_actions = [var.security_sns_topic_global_arn] ok_actions = [var.security_sns_topic_global_arn] diff --git a/terraform/modules/jaspersoft/monitoring.tf b/terraform/modules/jaspersoft/monitoring.tf index 8454faf3..b45f5f52 100644 --- a/terraform/modules/jaspersoft/monitoring.tf +++ b/terraform/modules/jaspersoft/monitoring.tf @@ -1,7 +1,3 @@ -locals { - alarm_description_template = "Average instance %v utilization %v last %d minutes" -} - resource "aws_cloudwatch_metric_alarm" "cpu_utilisation_high" { alarm_name = "jaspersoft-${var.environment}-cpu-high" comparison_operator = "GreaterThanThreshold" @@ -12,7 +8,11 @@ resource "aws_cloudwatch_metric_alarm" "cpu_utilisation_high" { statistic = "Average" threshold = 80 - alarm_description = format(local.alarm_description_template, "CPU", "High", 10) + alarm_description = <