From 1bb885668b9ce54382f224078b1484645ecf6bb3 Mon Sep 17 00:00:00 2001 From: Andrew Pearce Date: Tue, 7 Nov 2023 09:56:47 +0000 Subject: [PATCH] MLPAB-1527 - create metric alarm for failed replications (#824) --- .../s3_object_replication.tf | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/terraform/environment/region/modules/uploads_s3_bucket/s3_object_replication.tf b/terraform/environment/region/modules/uploads_s3_bucket/s3_object_replication.tf index 4f1b01ac97..0896ac6a9a 100644 --- a/terraform/environment/region/modules/uploads_s3_bucket/s3_object_replication.tf +++ b/terraform/environment/region/modules/uploads_s3_bucket/s3_object_replication.tf @@ -177,3 +177,31 @@ resource "aws_ssm_parameter" "s3_batch_configuration" { }) provider = aws.region } + +resource "aws_cloudwatch_metric_alarm" "replication-failed" { + actions_enabled = var.s3_replication.enabled + alarm_actions = ["arn:aws:sns:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:custom_cloudwatch_alarms"] + alarm_description = null + alarm_name = "${data.aws_default_tags.current.tags.environment-name}-${data.aws_region.current.name}-replication-failed" + comparison_operator = "GreaterThanThreshold" + datapoints_to_alarm = 1 + dimensions = { + DestinationBucket = var.s3_replication.destination_bucket_arn + RuleId = "whenScannedOkAndReadyToReplicate" + SourceBucket = aws_s3_bucket.bucket.bucket + } + evaluate_low_sample_count_percentiles = null + evaluation_periods = 1 + extended_statistic = null + insufficient_data_actions = [] + metric_name = "OperationsFailedReplication" + namespace = "AWS/S3" + ok_actions = [] + period = 300 + statistic = "Sum" + threshold = 1 + threshold_metric_id = null + treat_missing_data = "missing" + unit = null + provider = aws.region +}