Skip to content

Commit

Permalink
Merge pull request #1774 from ministryofjustice/MLPAB-2841-build-infr…
Browse files Browse the repository at this point in the history
…astructure-to-query-load-balancer-logs

MLPAB-2841 - create athena resources for alb access logs
  • Loading branch information
andrewpearce-digital authored Feb 13, 2025
2 parents cdd3c41 + b1218cc commit 4342998
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 29 deletions.
45 changes: 16 additions & 29 deletions terraform/account/.terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

155 changes: 155 additions & 0 deletions terraform/account/region/load_balancer_access_logs_athena.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#tfsec:ignore:aws-s3-enable-versioning:exp:2025-02-28
resource "aws_s3_bucket" "athena_results" {
bucket = "${data.aws_default_tags.current.tags.application}-${data.aws_default_tags.current.tags.account-name}-lb-logs-athena-${data.aws_region.current.name}"
force_destroy = true
provider = aws.region
}

resource "aws_s3_bucket_ownership_controls" "athena_results" {
bucket = aws_s3_bucket.athena_results.id
rule {
object_ownership = "BucketOwnerEnforced"
}
provider = aws.region
}

resource "aws_s3_bucket_lifecycle_configuration" "athena_results" {
bucket = aws_s3_bucket.athena_results.id

rule {
id = "ExpireObjectsAfter28Days"
status = "Enabled"

expiration {
days = 28
}
}
provider = aws.region
}

#tfsec:ignore:aws-s3-encryption-customer-key:exp:2025-02-28
resource "aws_s3_bucket_server_side_encryption_configuration" "athena_results" {
bucket = aws_s3_bucket.athena_results.id

rule {
apply_server_side_encryption_by_default {
sse_algorithm = "aws:kms"
}
}
provider = aws.region
}

resource "aws_s3_bucket_public_access_block" "athena_results" {
bucket = aws_s3_bucket.athena_results.id

block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
provider = aws.region
}

resource "aws_s3_bucket_policy" "athena_results" {
depends_on = [aws_s3_bucket_public_access_block.athena_results]
bucket = aws_s3_bucket.athena_results.id
policy = data.aws_iam_policy_document.athena_results.json
provider = aws.region
}

resource "aws_s3_bucket_logging" "athena_results" {
bucket = aws_s3_bucket.athena_results.id

target_bucket = aws_s3_bucket.access_log.id
target_prefix = "log/${aws_s3_bucket.athena_results.id}/"
provider = aws.region
}

data "aws_iam_policy_document" "athena_results" {
policy_id = "PutObjPolicy"

statement {
sid = "DenyNoneSSLRequests"
effect = "Deny"
actions = ["s3:*"]
resources = [
aws_s3_bucket.athena_results.arn,
"${aws_s3_bucket.athena_results.arn}/*"
]

condition {
test = "Bool"
variable = "aws:SecureTransport"
values = [false]
}

principals {
type = "AWS"
identifiers = ["*"]
}
}

statement {
sid = "AllowOperatorAccess"
effect = "Allow"
actions = ["s3:*"]
resources = [
aws_s3_bucket.athena_results.arn,
"${aws_s3_bucket.athena_results.arn}/*"
]

principals {
type = "AWS"
identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/operator"]
}
}
provider = aws.region
}

resource "aws_athena_workgroup" "alb_logs" {
name = "${data.aws_default_tags.current.tags.account-name}-${data.aws_region.current.name}"
description = "Workgroup for the interrogation of Load Balancer Logs in ${data.aws_default_tags.current.tags.account-name} ${data.aws_region.current.name}"
force_destroy = true

configuration {
enforce_workgroup_configuration = true
publish_cloudwatch_metrics_enabled = true

result_configuration {
output_location = "s3://${aws_s3_bucket.athena_results.bucket}/workspace/"

encryption_configuration {
encryption_option = "SSE_S3"
}
}
}
provider = aws.region
}

resource "aws_athena_database" "access_logs" {
name = "${data.aws_default_tags.current.tags.account-name}_load_balancer_logs"
bucket = aws_s3_bucket.athena_results.id
force_destroy = true

encryption_configuration {
encryption_option = "SSE_S3"
}
provider = aws.region
}

resource "aws_athena_named_query" "create_alb_log_table" {
name = "create-alb-log-table"
description = "Query to create the ALB Logging Table for an Environment"
workgroup = aws_athena_workgroup.alb_logs.id
database = aws_athena_database.access_logs.name
query = templatefile("${path.module}/load_balancer_logs_create_table.tpl", local.template_vars)
provider = aws.region
}

locals {
template_vars = {
bucket = aws_s3_bucket.access_log.id
account_id = data.aws_caller_identity.current.account_id
region = data.aws_region.current.name
workspace = data.aws_default_tags.current.tags.account-name
}
}
57 changes: 57 additions & 0 deletions terraform/account/region/load_balancer_logs_create_table.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
CREATE EXTERNAL TABLE IF NOT EXISTS {PICK YOUR ENVIRONMENT NAME} (
type string,
time string,
elb string,
client_ip string,
client_port int,
target_ip string,
target_port int,
request_processing_time double,
target_processing_time double,
response_processing_time double,
elb_status_code int,
target_status_code string,
received_bytes bigint,
sent_bytes bigint,
request_verb string,
request_url string,
request_proto string,
user_agent string,
ssl_cipher string,
ssl_protocol string,
target_group_arn string,
trace_id string,
domain_name string,
chosen_cert_arn string,
matched_rule_priority string,
request_creation_time string,
actions_executed string,
redirect_url string,
lambda_error_reason string,
target_port_list string,
target_status_code_list string,
classification string,
classification_reason string,
conn_trace_id string
)
PARTITIONED BY
(
day STRING
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
'serialization.format' = '1',
'input.regex' =
'([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\\s]+?)\" \"([^\\s]+)\" \"([^ ]*)\" \"([^ ]*)\" ?([^ ]*)?'
)
LOCATION 's3://${bucket}/{PICK YOUR ENVIRONMENT NAME}/AWSLogs/${account_id}/elasticloadbalancing/${region}/'
TBLPROPERTIES
(
"projection.enabled" = "true",
"projection.day.type" = "date",
"projection.day.range" = "2022/01/01,NOW",
"projection.day.format" = "yyyy/MM/dd",
"projection.day.interval" = "1",
"projection.day.interval.unit" = "DAYS",
"storage.location.template" = "s3://${bucket}/{PICK YOUR ENVIRONMENT NAME}/AWSLogs/${account_id}/elasticloadbalancing/${region}/$${day}"
)
1 change: 1 addition & 0 deletions terraform/account/region/network.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ resource "aws_security_group" "lambda_egress" {
provider = aws.region
}

#tfsec:ignore:aws-ec2-no-public-egress-sgr egress is managed by AWS Network Firewall
resource "aws_security_group_rule" "lambda_egress" {
type = "egress"
protocol = "-1"
Expand Down

0 comments on commit 4342998

Please sign in to comment.