diff --git a/AGENTS.md b/AGENTS.md index 06a9cff..3a84da3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -109,6 +109,8 @@ AWS Resource Exporter is a Prometheus exporter for AWS resources, built in Go. I - `aws_resources_exporter_vpc_routesperroutetable_usage` - Usage of routes per route table - `aws_resources_exporter_vpc_ipv4blockspervpc_quota` - Quota for IPv4 blocks per VPC - `aws_resources_exporter_vpc_ipv4blockspervpc_usage` - Usage of IPv4 blocks per VPC +- `aws_resources_exporter_vpc_ipv4addressespersubnet_capacity` - Amount of usable IPv4 addresses per subnet (based on CIDR block) +- `aws_resources_exporter_vpc_ipv4addressespersubnet_usage` - Used IPv4 addresses per subnet ### EC2 Metrics - `aws_resources_exporter_ec2_transitgatewaysperregion_quota` - Quota for transit gateways per region diff --git a/README.md b/README.md index 00d6008..32465e4 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ This was made as a complement to [CloudWatch Exporter](https://github.com/promet | VPC | routetablespervpc | Quota and usage of routetables per VPC | | VPC | routesperroutetable | Quota and usage of the routes per routetable | | VPC | ipv4blockspervpc | Quota and usage of ipv4 blocks per VPC | +| VPC | ipv4addressespersubnet | Capacity and usage of IPv4 addresses per subnet | | EC2 | transitgatewaysperregion | Quota and usage of transitgateways per region | | Route53 | recordsperhostedzone | Quota and usage of resource records per Hosted Zone | diff --git a/dashboards/aws-resource-exporter.yaml b/dashboards/aws-resource-exporter.yaml index 31cf09d..d715349 100644 --- a/dashboards/aws-resource-exporter.yaml +++ b/dashboards/aws-resource-exporter.yaml @@ -735,12 +735,258 @@ data: "type": "alertlist" }, { + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 32 }, + "id": 17, + "panels": [], + "title": "VPC", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Shows IPv4 address utilization per subnet", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "aws_resources_exporter_vpc_ipv4addressespersubnet_capacity", + "legendFormat": "{{subnetid}} capacity", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "aws_resources_exporter_vpc_ipv4addressespersubnet_usage", + "legendFormat": "{{subnetid}} used", + "range": true, + "refId": "B" + } + ], + "title": "IPv4 Address Usage per Subnet", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "description": "Shows percentage utilization of IPv4 addresses per subnet", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "color-background" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 19, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Utilization %" + } + ] + }, + "pluginVersion": "10.4.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "(aws_resources_exporter_vpc_ipv4addressespersubnet_usage / aws_resources_exporter_vpc_ipv4addressespersubnet_capacity) * 100", + "format": "table", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Subnet IPv4 Utilization", + "transformations": [ + { + "id": "groupBy", + "options": { + "fields": { + "Value": { + "aggregations": [ + "lastNotNull" + ], + "operation": "aggregate" + }, + "aws_region": { + "aggregations": [], + "operation": "groupby" + }, + "subnetid": { + "aggregations": [], + "operation": "groupby" + }, + "vpcid": { + "aggregations": [], + "operation": "groupby" + } + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "Value (lastNotNull)": "Utilization %", + "aws_region": "Region", + "subnetid": "Subnet ID", + "vpcid": "VPC ID" + } + } + } + ], + "type": "table" + }, + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 41 + }, "id": 4, "title": "Route53", "type": "row" @@ -806,7 +1052,7 @@ data: "h": 8, "w": 24, "x": 0, - "y": 33 + "y": 42 }, "id": 2, "options": { diff --git a/pkg/util.go b/pkg/util.go index ce52d4f..c0d41c6 100644 --- a/pkg/util.go +++ b/pkg/util.go @@ -2,6 +2,7 @@ package pkg import ( "errors" + "net" "os" "sort" "strconv" @@ -61,3 +62,25 @@ func GetEOLStatus(eol string, thresholds []Threshold) (string, error) { } return thresholds[len(thresholds)-1].Name, nil } + +// CalculateTotalIPsFromCIDR calculates the total number of IP addresses in a CIDR block using Go's net package +func CalculateTotalIPsFromCIDR(cidrBlock string) (int64, error) { + _, ipNet, err := net.ParseCIDR(cidrBlock) + if err != nil { + return 0, err + } + + // Get the prefix length + prefixLength, _ := ipNet.Mask.Size() + + // Validate reasonable prefix length for IPv4 subnets (AWS supports /16 to /28) + if prefixLength < 16 || prefixLength > 28 { + return 0, errors.New("invalid subnet prefix length for AWS (must be /16 to /28)") + } + + // For IPv4, calculate 2^(32-prefix_length) + hostBits := 32 - prefixLength + totalIPs := int64(1 << hostBits) + + return totalIPs, nil +} diff --git a/pkg/util_test.go b/pkg/util_test.go index c6f728f..6dd1567 100644 --- a/pkg/util_test.go +++ b/pkg/util_test.go @@ -43,3 +43,119 @@ func TestWithKeyValue(t *testing.T) { }) } } + +func TestCalculateTotalIPsFromCIDR(t *testing.T) { + tests := []struct { + name string + cidrBlock string + expectedIPs int64 + expectError bool + }{ + { + name: "Valid /24 subnet", + cidrBlock: "10.0.1.0/24", + expectedIPs: 256, + expectError: false, + }, + { + name: "Valid /28 subnet", + cidrBlock: "10.0.1.0/28", + expectedIPs: 16, + expectError: false, + }, + { + name: "Valid /16 subnet", + cidrBlock: "10.0.0.0/16", + expectedIPs: 65536, + expectError: false, + }, + { + name: "Invalid CIDR format - no slash", + cidrBlock: "10.0.1.0", + expectedIPs: 0, + expectError: true, + }, + { + name: "Invalid CIDR format - multiple slashes", + cidrBlock: "10.0.1.0/24/16", + expectedIPs: 0, + expectError: true, + }, + { + name: "Invalid prefix length - non-numeric", + cidrBlock: "10.0.1.0/abc", + expectedIPs: 0, + expectError: true, + }, + { + name: "Invalid prefix length - too small for AWS", + cidrBlock: "10.0.0.0/15", + expectedIPs: 0, + expectError: true, + }, + { + name: "Invalid prefix length - too large for AWS", + cidrBlock: "10.0.1.0/29", + expectedIPs: 0, + expectError: true, + }, + { + name: "Edge case - /16 (largest AWS subnet)", + cidrBlock: "172.16.0.0/16", + expectedIPs: 65536, + expectError: false, + }, + { + name: "Edge case - /28 (smallest AWS subnet)", + cidrBlock: "192.168.1.0/28", + expectedIPs: 16, + expectError: false, + }, + { + name: "Invalid prefix length - negative", + cidrBlock: "10.0.1.0/-1", + expectedIPs: 0, + expectError: true, + }, + { + name: "Invalid prefix length - too large", + cidrBlock: "10.0.1.0/33", + expectedIPs: 0, + expectError: true, + }, + { + name: "Invalid IP address", + cidrBlock: "999.999.999.999/24", + expectedIPs: 0, + expectError: true, + }, + { + name: "IPv6 CIDR (should fail AWS validation)", + cidrBlock: "2001:db8::/32", + expectedIPs: 0, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := CalculateTotalIPsFromCIDR(tt.cidrBlock) + + if tt.expectError { + if err == nil { + t.Errorf("CalculateTotalIPsFromCIDR() expected error but got none") + } + if result != 0 { + t.Errorf("CalculateTotalIPsFromCIDR() expected 0 IPs when error, got %d", result) + } + } else { + if err != nil { + t.Errorf("CalculateTotalIPsFromCIDR() unexpected error: %v", err) + } + if result != tt.expectedIPs { + t.Errorf("CalculateTotalIPsFromCIDR() = %d, want %d", result, tt.expectedIPs) + } + } + }) + } +} diff --git a/pkg/vpc.go b/pkg/vpc.go index 13aee43..7d311ec 100644 --- a/pkg/vpc.go +++ b/pkg/vpc.go @@ -23,6 +23,7 @@ const ( QUOTA_ROUTE_TABLES_PER_VPC string = "L-589F43AA" QUOTA_IPV4_BLOCKS_PER_VPC string = "L-83CA0A9D" SERVICE_CODE_VPC string = "vpc" + AWS_RESERVED_IPS_PER_SUBNET int64 = 5 ) type VPCExporter struct { @@ -40,6 +41,8 @@ type VPCExporter struct { RouteTablesPerVpcUsage *prometheus.Desc IPv4BlocksPerVpcQuota *prometheus.Desc IPv4BlocksPerVpcUsage *prometheus.Desc + IPv4AddressesPerSubnetCapacity *prometheus.Desc + IPv4AddressesPerSubnetUsage *prometheus.Desc logger *slog.Logger timeout time.Duration @@ -73,6 +76,8 @@ func NewVPCExporter(configs []aws.Config, logger *slog.Logger, config VPCConfig, RouteTablesPerVpcUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_routetablespervpc_usage"), "The usage of route tables per vpc", []string{"aws_region", "vpcid"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_ROUTE_TABLES_PER_VPC)), IPv4BlocksPerVpcQuota: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_ipv4blockspervpc_quota"), "The quota of ipv4 blocks per vpc", []string{"aws_region"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_IPV4_BLOCKS_PER_VPC)), IPv4BlocksPerVpcUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_ipv4blockspervpc_usage"), "The usage of ipv4 blocks per vpc", []string{"aws_region", "vpcid"}, WithKeyValue(constLabels, QUOTA_CODE_KEY, QUOTA_IPV4_BLOCKS_PER_VPC)), + IPv4AddressesPerSubnetCapacity: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_ipv4addressespersubnet_capacity"), "The amount of usable IPv4 addresses per subnet (based on CIDR)", []string{"aws_region", "vpcid", "subnetid"}, constLabels), + IPv4AddressesPerSubnetUsage: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "vpc_ipv4addressespersubnet_usage"), "The usage of IPv4 addresses per subnet", []string{"aws_region", "vpcid", "subnetid"}, constLabels), logger: logger, timeout: *config.Timeout, cache: *NewMetricsCache(*config.CacheTTL), @@ -103,6 +108,7 @@ func (e *VPCExporter) CollectInRegion(cfg aws.Config, region string, wg *sync.Wa e.collectInterfaceVpcEndpointsPerVpcUsage(allVpcs.Vpcs[i], ec2Svc, region) e.collectRoutesTablesPerVpcUsage(allVpcs.Vpcs[i], ec2Svc, region) e.collectIPv4BlocksPerVpcUsage(allVpcs.Vpcs[i], ec2Svc, region) + e.collectIPv4AddressesPerSubnetUsage(allVpcs.Vpcs[i], ec2Svc, region) } } e.collectRoutesPerRouteTableQuota(quotaSvc, region) @@ -322,6 +328,77 @@ func (e *VPCExporter) collectIPv4BlocksPerVpcUsage(vpc ec2_types.Vpc, ec2Svc *ec e.cache.AddMetric(prometheus.MustNewConstMetric(e.IPv4BlocksPerVpcUsage, prometheus.GaugeValue, float64(quota), region, *vpc.VpcId)) } +func (e *VPCExporter) collectIPv4AddressesPerSubnetUsage(vpc ec2_types.Vpc, ec2Svc *ec2.Client, region string) { + ctx, cancelFunc := context.WithTimeout(context.Background(), e.timeout) + defer cancelFunc() + + input := &ec2.DescribeSubnetsInput{ + Filters: []ec2_types.Filter{{ + Name: aws.String("vpc-id"), + Values: []string{*vpc.VpcId}, + }}, + } + + var subnets []ec2_types.Subnet + paginator := ec2.NewDescribeSubnetsPaginator(ec2Svc, input) + + for paginator.HasMorePages() { + awsclient.AwsExporterMetrics.IncrementRequests() + result, err := paginator.NextPage(ctx) + if err != nil { + e.logger.Error("Call to DescribeSubnets failed", "region", region, "err", err) + awsclient.AwsExporterMetrics.IncrementErrors() + return + } + subnets = append(subnets, result.Subnets...) + } + + for _, subnet := range subnets { + // Validate required fields + if subnet.SubnetId == nil { + e.logger.Error("Subnet has nil SubnetId", "region", region, "vpcId", *vpc.VpcId) + awsclient.AwsExporterMetrics.IncrementErrors() + continue + } + if subnet.CidrBlock == nil { + e.logger.Error("Subnet has nil CidrBlock", "region", region, "subnetId", *subnet.SubnetId) + awsclient.AwsExporterMetrics.IncrementErrors() + continue + } + if subnet.AvailableIpAddressCount == nil { + e.logger.Error("Subnet has nil AvailableIpAddressCount", "region", region, "subnetId", *subnet.SubnetId) + awsclient.AwsExporterMetrics.IncrementErrors() + continue + } + + // Calculate total IPs from CIDR block + cidrBlock := *subnet.CidrBlock + totalIPs, err := CalculateTotalIPsFromCIDR(cidrBlock) + if err != nil { + e.logger.Error("Could not calculate total IPs from CIDR", "region", region, "subnetId", *subnet.SubnetId, "cidr", cidrBlock, "err", err) + awsclient.AwsExporterMetrics.IncrementErrors() + continue + } + + // AWS reserves 5 IPs per subnet, so usable IPs = total - 5 + // https://docs.aws.amazon.com/vpc/latest/userguide/subnet-sizing.html + usableIPs := totalIPs - AWS_RESERVED_IPS_PER_SUBNET + availableIPs := int64(*subnet.AvailableIpAddressCount) + usedIPs := usableIPs - availableIPs + + // Validate that used IPs is not negative (sanity check) + if usedIPs < 0 { + e.logger.Error("Calculated negative used IPs", "region", region, "subnetId", *subnet.SubnetId, "usableIPs", usableIPs, "availableIPs", availableIPs) + awsclient.AwsExporterMetrics.IncrementErrors() + continue + } + + // Add both quota and usage metrics + e.cache.AddMetric(prometheus.MustNewConstMetric(e.IPv4AddressesPerSubnetCapacity, prometheus.GaugeValue, float64(usableIPs), region, *vpc.VpcId, *subnet.SubnetId)) + e.cache.AddMetric(prometheus.MustNewConstMetric(e.IPv4AddressesPerSubnetUsage, prometheus.GaugeValue, float64(usedIPs), region, *vpc.VpcId, *subnet.SubnetId)) + } +} + func (e *VPCExporter) Describe(ch chan<- *prometheus.Desc) { ch <- e.VpcsPerRegionQuota ch <- e.VpcsPerRegionUsage @@ -335,4 +412,6 @@ func (e *VPCExporter) Describe(ch chan<- *prometheus.Desc) { ch <- e.InterfaceVpcEndpointsPerVpcUsage ch <- e.RouteTablesPerVpcQuota ch <- e.RoutesPerRouteTableUsage + ch <- e.IPv4AddressesPerSubnetCapacity + ch <- e.IPv4AddressesPerSubnetUsage }