From 525a3bf847f33dfef18ec9f860b22a937da4a40f Mon Sep 17 00:00:00 2001 From: Sam Ebstein Date: Wed, 5 Jun 2024 08:46:37 -0700 Subject: [PATCH] feat(promtail): Adding S3 log parser support for AWS GuardDuty (#13148) Co-authored-by: James Callahan --- tools/lambda-promtail/lambda-promtail/s3.go | 16 +++++++++- .../lambda-promtail/s3_test.go | 32 +++++++++++++++++++ tools/lambda-promtail/main.tf | 2 +- tools/lambda-promtail/variables.tf | 6 ++++ 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/tools/lambda-promtail/lambda-promtail/s3.go b/tools/lambda-promtail/lambda-promtail/s3.go index b919915533e29..2d2b7fe090fe9 100644 --- a/tools/lambda-promtail/lambda-promtail/s3.go +++ b/tools/lambda-promtail/lambda-promtail/s3.go @@ -50,6 +50,7 @@ const ( LB_NLB_TYPE string = "net" LB_ALB_TYPE string = "app" WAF_LOG_TYPE string = "WAFLogs" + GUARDDUTY_LOG_TYPE string = "GuardDuty" ) var ( @@ -75,6 +76,10 @@ var ( // source: https://docs.aws.amazon.com/waf/latest/developerguide/logging-s3.html // format: aws-waf-logs-suffix[/prefix]/AWSLogs/aws-account-id/WAFLogs/region/webacl-name/year/month/day/hour/minute/aws-account-id_waflogs_region_webacl-name_timestamp_hash.log.gz // example: aws-waf-logs-test/AWSLogs/11111111111/WAFLogs/us-east-1/TEST-WEBACL/2021/10/28/19/50/11111111111_waflogs_us-east-1_TEST-WEBACL_20211028T1950Z_e0ca43b5.log.gz + // AWS GuardDuty + // source: https://docs.aws.amazon.com/guardduty/latest/ug/guardduty_exportfindings.html + // format: my-bucket/AWSLogs/aws-account-id/GuardDuty/region/year/month/day/random-string.jsonl.gz + // example: my-bucket/AWSLogs/123456789012/GuardDuty/us-east-1/2024/05/30/07a3f2ce-1485-3031-b842-e1f324c4a48d.jsonl.gz defaultFilenameRegex = regexp.MustCompile(`AWSLogs\/(?P\d+)\/(?P[a-zA-Z0-9_\-]+)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/\d+\_(?:elasticloadbalancing|vpcflowlogs)_(?:\w+-\w+-(?:\w+-)?\d)_(?:(?Papp|net)\.*?)?(?P[a-zA-Z0-9\-]+)`) defaultTimestampRegex = regexp.MustCompile(`(?P\d+-\d+-\d+T\d+:\d+:\d+(?:\.\d+Z)?)`) cloudtrailFilenameRegex = regexp.MustCompile(`AWSLogs\/(?Po-[a-z0-9]{10,32})?\/?(?P\d+)\/(?P[a-zA-Z0-9_\-]+)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/\d+\_(?:CloudTrail|CloudTrail-Digest)_(?:\w+-\w+-(?:\w+-)?\d)_(?:(?:app|nlb|net)\.*?)?.+_(?P[a-zA-Z0-9\-]+)`) @@ -82,6 +87,7 @@ var ( cloudfrontTimestampRegex = regexp.MustCompile(`(?P\d+-\d+-\d+\s\d+:\d+:\d+)`) wafFilenameRegex = regexp.MustCompile(`AWSLogs\/(?P\d+)\/(?PWAFLogs)\/(?P[\w-]+)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/\d+\_waflogs\_[\w-]+_[\w-]+_\d+T\d+Z_\w+`) wafTimestampRegex = regexp.MustCompile(`"timestamp":\s*(?P\d+),`) + guarddutyFilenameRegex = regexp.MustCompile(`AWSLogs\/(?P\d+)\/(?PGuardDuty)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/.+`) parsers = map[string]parserConfig{ FLOW_LOG_TYPE: { logTypeLabel: "s3_vpc_flow", @@ -122,6 +128,14 @@ var ( timestampRegex: wafTimestampRegex, timestampType: "unix", }, + GUARDDUTY_LOG_TYPE: { + logTypeLabel: "s3_guardduty", + filenameRegex: guarddutyFilenameRegex, + ownerLabelKey: "account_id", + timestampFormat: time.RFC3339, + timestampRegex: defaultTimestampRegex, + timestampType: "string", + }, } ) @@ -165,7 +179,7 @@ func parseS3Log(ctx context.Context, b *batch, labels map[string]string, obj io. ls = applyLabels(ls) // extract the timestamp of the nested event and sends the rest as raw json - if labels["type"] == CLOUDTRAIL_LOG_TYPE { + if labels["type"] == CLOUDTRAIL_LOG_TYPE || labels["type"] == GUARDDUTY_LOG_TYPE { records := make(chan Record) jsonStream := NewJSONStream(records) go jsonStream.Start(gzreader, parser.skipHeaderCount) diff --git a/tools/lambda-promtail/lambda-promtail/s3_test.go b/tools/lambda-promtail/lambda-promtail/s3_test.go index 644ad12f17276..4cbcacc6e7577 100644 --- a/tools/lambda-promtail/lambda-promtail/s3_test.go +++ b/tools/lambda-promtail/lambda-promtail/s3_test.go @@ -93,6 +93,38 @@ func Test_getLabels(t *testing.T) { }, wantErr: false, }, + { + name: "s3_guardduty", + args: args{ + record: events.S3EventRecord{ + AWSRegion: "us-east-1", + S3: events.S3Entity{ + Bucket: events.S3Bucket{ + Name: "s3_guardduty_test", + OwnerIdentity: events.S3UserIdentity{ + PrincipalID: "test", + }, + }, + Object: events.S3Object{ + Key: "AWSLogs/123456789012/GuardDuty/us-east-1/2024/05/30/07a3f2ce-1485-3031-b842-e1f324c4a48d.jsonl.gz", + }, + }, + }, + }, + want: map[string]string{ + "account_id": "123456789012", + "bucket": "s3_guardduty_test", + "bucket_owner": "test", + "bucket_region": "us-east-1", + "day": "30", + "key": "AWSLogs/123456789012/GuardDuty/us-east-1/2024/05/30/07a3f2ce-1485-3031-b842-e1f324c4a48d.jsonl.gz", + "month": "05", + "region": "us-east-1", + "type": GUARDDUTY_LOG_TYPE, + "year": "2024", + }, + wantErr: false, + }, { name: "s3_flow_logs", args: args{ diff --git a/tools/lambda-promtail/main.tf b/tools/lambda-promtail/main.tf index 37f7e9ede7d0e..3c7e74fdc7e5b 100644 --- a/tools/lambda-promtail/main.tf +++ b/tools/lambda-promtail/main.tf @@ -251,7 +251,7 @@ resource "aws_s3_bucket_notification" "this" { lambda_function_arn = aws_lambda_function.this.arn events = ["s3:ObjectCreated:*"] filter_prefix = "AWSLogs/" - filter_suffix = ".log.gz" + filter_suffix = var.filter_suffix } depends_on = [ diff --git a/tools/lambda-promtail/variables.tf b/tools/lambda-promtail/variables.tf index bda956bc855b3..74dceb1a4a199 100644 --- a/tools/lambda-promtail/variables.tf +++ b/tools/lambda-promtail/variables.tf @@ -16,6 +16,12 @@ variable "bucket_names" { default = [] } +variable "filter_suffix" { + type = string + description = "Suffix for S3 bucket notification filter" + default = ".gz" +} + variable "log_group_names" { type = set(string) description = "List of CloudWatch Log Group names to create Subscription Filters for."