Skip to content

Commit

Permalink
fix(metrics): Avoid 0 value when instance is not available
Browse files Browse the repository at this point in the history
  • Loading branch information
vmercierfr committed Oct 18, 2023
1 parent 59551d4 commit ed61550
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 53 deletions.
60 changes: 30 additions & 30 deletions internal/app/cloudwatch/rds.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,55 +27,55 @@ type CloudWatchMetrics struct {
}

type RdsMetrics struct {
CPUUtilization float64
DBLoad float64
DBLoadCPU float64
DBLoadNonCPU float64
DatabaseConnections float64
FreeStorageSpace float64
FreeableMemory float64
MaximumUsedTransactionIDs float64
ReadIOPS float64
ReadThroughput float64
ReplicaLag float64
ReplicationSlotDiskUsage float64
SwapUsage float64
WriteIOPS float64
WriteThroughput float64
CPUUtilization *float64
DBLoad *float64
DBLoadCPU *float64
DBLoadNonCPU *float64
DatabaseConnections *float64
FreeStorageSpace *float64
FreeableMemory *float64
MaximumUsedTransactionIDs *float64
ReadIOPS *float64
ReadThroughput *float64
ReplicaLag *float64
ReplicationSlotDiskUsage *float64
SwapUsage *float64
WriteIOPS *float64
WriteThroughput *float64
}

func (m *RdsMetrics) Update(field string, value float64) error {
switch field {
case "DBLoad":
m.DBLoad = value
m.DBLoad = &value
case "DBLoadCPU":
m.DBLoadCPU = value
m.DBLoadCPU = &value
case "DBLoadNonCPU":
m.DBLoadNonCPU = value
m.DBLoadNonCPU = &value
case "CPUUtilization":
m.CPUUtilization = value
m.CPUUtilization = &value
case "DatabaseConnections":
m.DatabaseConnections = value
m.DatabaseConnections = &value
case "FreeStorageSpace":
m.FreeStorageSpace = value
m.FreeStorageSpace = &value
case "FreeableMemory":
m.FreeableMemory = value
m.FreeableMemory = &value
case "SwapUsage":
m.SwapUsage = value
m.SwapUsage = &value
case "WriteIOPS":
m.WriteIOPS = value
m.WriteIOPS = &value
case "ReadIOPS":
m.ReadIOPS = value
m.ReadIOPS = &value
case "ReplicaLag":
m.ReplicaLag = value
m.ReplicaLag = &value
case "ReplicationSlotDiskUsage":
m.ReplicationSlotDiskUsage = value
m.ReplicationSlotDiskUsage = &value
case "MaximumUsedTransactionIDs":
m.MaximumUsedTransactionIDs = value
m.MaximumUsedTransactionIDs = &value
case "ReadThroughput":
m.ReadThroughput = value
m.ReadThroughput = &value
case "WriteThroughput":
m.WriteThroughput = value
m.WriteThroughput = &value
default:
return fmt.Errorf("can't process '%s' metrics: %w", field, errUnknownMetric)
}
Expand Down
79 changes: 63 additions & 16 deletions internal/app/exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,9 +389,12 @@ func (c *rdsCollector) Collect(ch chan<- prometheus.Metric) {
// RDS metrics
ch <- prometheus.MustNewConstMetric(c.apiCall, prometheus.CounterValue, c.counters.rdsAPIcalls, c.awsAccountID, c.awsRegion, "rds")
for dbidentifier, instance := range c.metrics.rds.Instances {
if instance.LogFilesSize != nil {
ch <- prometheus.MustNewConstMetric(c.logFilesSize, prometheus.GaugeValue, float64(*instance.LogFilesSize), c.awsAccountID, c.awsRegion, dbidentifier)
}

ch <- prometheus.MustNewConstMetric(c.allocatedStorage, prometheus.GaugeValue, float64(instance.AllocatedStorage), c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.information, prometheus.GaugeValue, 1, c.awsAccountID, c.awsRegion, dbidentifier, instance.DbiResourceID, instance.DBInstanceClass, instance.Engine, instance.EngineVersion, instance.StorageType, strconv.FormatBool(instance.MultiAZ), strconv.FormatBool(instance.DeletionProtection), instance.Role, instance.SourceDBInstanceIdentifier, strconv.FormatBool(instance.PendingModifiedValues), instance.PendingMaintenanceAction, strconv.FormatBool(instance.PerformanceInsightsEnabled))
ch <- prometheus.MustNewConstMetric(c.logFilesSize, prometheus.GaugeValue, float64(instance.LogFilesSize), c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.maxAllocatedStorage, prometheus.GaugeValue, float64(instance.MaxAllocatedStorage), c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.maxIops, prometheus.GaugeValue, float64(instance.MaxIops), c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.status, prometheus.GaugeValue, float64(instance.Status), c.awsAccountID, c.awsRegion, dbidentifier)
Expand All @@ -402,21 +405,65 @@ func (c *rdsCollector) Collect(ch chan<- prometheus.Metric) {
// Cloudwatch metrics
ch <- prometheus.MustNewConstMetric(c.apiCall, prometheus.CounterValue, c.counters.cloudwatchAPICalls, c.awsAccountID, c.awsRegion, "cloudwatch")
for dbidentifier, instance := range c.metrics.cloudwatchInstances.Instances {
ch <- prometheus.MustNewConstMetric(c.databaseConnections, prometheus.GaugeValue, instance.DatabaseConnections, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.freeStorageSpace, prometheus.GaugeValue, instance.FreeStorageSpace, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.freeableMemory, prometheus.GaugeValue, instance.FreeableMemory, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.maximumUsedTransactionIDs, prometheus.GaugeValue, instance.MaximumUsedTransactionIDs, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.readThroughput, prometheus.GaugeValue, instance.ReadThroughput, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.replicaLag, prometheus.GaugeValue, instance.ReplicaLag, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.replicationSlotDiskUsage, prometheus.GaugeValue, instance.ReplicationSlotDiskUsage, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.swapUsage, prometheus.GaugeValue, instance.SwapUsage, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.readIOPS, prometheus.GaugeValue, instance.ReadIOPS, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.writeIOPS, prometheus.GaugeValue, instance.WriteIOPS, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.writeThroughput, prometheus.GaugeValue, instance.WriteThroughput, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.DBLoad, prometheus.GaugeValue, instance.DBLoad, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.cpuUtilisation, prometheus.GaugeValue, instance.CPUUtilization, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.dBLoadCPU, prometheus.GaugeValue, instance.DBLoadCPU, c.awsAccountID, c.awsRegion, dbidentifier)
ch <- prometheus.MustNewConstMetric(c.dBLoadNonCPU, prometheus.GaugeValue, instance.DBLoadNonCPU, c.awsAccountID, c.awsRegion, dbidentifier)
if instance.DatabaseConnections != nil {
ch <- prometheus.MustNewConstMetric(c.databaseConnections, prometheus.GaugeValue, *instance.DatabaseConnections, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.FreeStorageSpace != nil {
ch <- prometheus.MustNewConstMetric(c.freeStorageSpace, prometheus.GaugeValue, *instance.FreeStorageSpace, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.FreeableMemory != nil {
ch <- prometheus.MustNewConstMetric(c.freeableMemory, prometheus.GaugeValue, *instance.FreeableMemory, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.MaximumUsedTransactionIDs != nil {
ch <- prometheus.MustNewConstMetric(c.maximumUsedTransactionIDs, prometheus.GaugeValue, *instance.MaximumUsedTransactionIDs, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.ReadThroughput != nil {
ch <- prometheus.MustNewConstMetric(c.readThroughput, prometheus.GaugeValue, *instance.ReadThroughput, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.ReplicaLag != nil {
ch <- prometheus.MustNewConstMetric(c.replicaLag, prometheus.GaugeValue, *instance.ReplicaLag, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.ReplicationSlotDiskUsage != nil {
ch <- prometheus.MustNewConstMetric(c.replicationSlotDiskUsage, prometheus.GaugeValue, *instance.ReplicationSlotDiskUsage, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.SwapUsage != nil {
ch <- prometheus.MustNewConstMetric(c.swapUsage, prometheus.GaugeValue, *instance.SwapUsage, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.ReadIOPS != nil {
ch <- prometheus.MustNewConstMetric(c.readIOPS, prometheus.GaugeValue, *instance.ReadIOPS, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.WriteIOPS != nil {
ch <- prometheus.MustNewConstMetric(c.writeIOPS, prometheus.GaugeValue, *instance.WriteIOPS, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.WriteThroughput != nil {
ch <- prometheus.MustNewConstMetric(c.writeThroughput, prometheus.GaugeValue, *instance.WriteThroughput, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.DBLoad != nil {
ch <- prometheus.MustNewConstMetric(c.DBLoad, prometheus.GaugeValue, *instance.DBLoad, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.CPUUtilization != nil {
ch <- prometheus.MustNewConstMetric(c.cpuUtilisation, prometheus.GaugeValue, *instance.CPUUtilization, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.DBLoadCPU != nil {
ch <- prometheus.MustNewConstMetric(c.dBLoadCPU, prometheus.GaugeValue, *instance.DBLoadCPU, c.awsAccountID, c.awsRegion, dbidentifier)
}

if instance.DBLoadNonCPU != nil {
ch <- prometheus.MustNewConstMetric(c.dBLoadNonCPU, prometheus.GaugeValue, *instance.DBLoadNonCPU, c.awsAccountID, c.awsRegion, dbidentifier)
}
}

// usage metrics
Expand Down
17 changes: 10 additions & 7 deletions internal/app/rds/rds.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type RdsInstanceMetrics struct {
StorageThroughput int64
MaxAllocatedStorage int64
MaxIops int64
LogFilesSize int64
LogFilesSize *int64
PendingMaintenanceAction string
PendingModifiedValues bool
BackupRetentionPeriod int32
Expand Down Expand Up @@ -243,8 +243,8 @@ func (r *RDSFetcher) computeInstanceMetrics(dbInstance aws_rds_types.DBInstance,
}

// getLogFilesSize returns the size of all logs on the specified instance
func (r *RDSFetcher) getLogFilesSize(dbidentifier string) (int64, error) {
var filesSize int64
func (r *RDSFetcher) getLogFilesSize(dbidentifier string) (*int64, error) {
var filesSize *int64

input := &aws_rds.DescribeDBLogFilesInput{DBInstanceIdentifier: &dbidentifier}

Expand All @@ -254,15 +254,18 @@ func (r *RDSFetcher) getLogFilesSize(dbidentifier string) (int64, error) {
if err != nil {
var notFoundError *aws_rds_types.DBInstanceNotFoundFault
if errors.As(err, &notFoundError) { // Replica in "creating" status may return notFoundError exception
return 0, nil
return filesSize, nil
}

return 0, fmt.Errorf("can't describe db logs files for %s: %w", dbidentifier, err)
return filesSize, fmt.Errorf("can't describe db logs files for %s: %w", dbidentifier, err)
}

if result != nil {
if result != nil && len(result.DescribeDBLogFiles) > 0 {
if filesSize == nil {
filesSize = new(int64)
}
for _, file := range result.DescribeDBLogFiles {
filesSize += file.Size
*filesSize += file.Size
}
}

Expand Down

0 comments on commit ed61550

Please sign in to comment.