Skip to content

Commit

Permalink
update with main
Browse files Browse the repository at this point in the history
  • Loading branch information
wildum committed Sep 17, 2024
2 parents b8e80ee + 34d850e commit e6451e4
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 34 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ v1.4.0-rc.0

- `prometheus.exporter.cloudwatch` can now collect metrics from custom namespaces via the `custom_namespace` block. (@ptodev)

- Add the label `alloy_cluster` in the metric `alloy_config_hash` when the flag `cluster.name` is set to help differentiate between
configs from the same alloy cluster or different alloy clusters. (@wildum)

### Bugfixes

- Fix a bug where the scrape timeout for a Probe resource was not applied, overwriting the scrape interval instead. (@morremeyer, @stefanandres)
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ require (
github.com/grafana/alloy/syntax v0.1.0
github.com/grafana/beyla v1.8.2
github.com/grafana/catchpoint-prometheus-exporter v0.0.0-20240606062944-e55f3668661d
github.com/grafana/ckit v0.0.0-20240624165704-36f3407a8eaa
github.com/grafana/ckit v0.0.0-20240913130805-0ee98bafad88
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2
github.com/grafana/dskit v0.0.0-20240104111617-ea101a3b86eb
github.com/grafana/go-gelf/v2 v2.0.1
Expand Down Expand Up @@ -154,7 +154,7 @@ require (
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.66.0
github.com/prometheus-operator/prometheus-operator/pkg/client v0.66.0
github.com/prometheus/blackbox_exporter v0.24.1-0.20230623125439-bd22efa1c900
github.com/prometheus/client_golang v1.20.2
github.com/prometheus/client_golang v1.20.3
github.com/prometheus/client_model v0.6.1
github.com/prometheus/common v0.55.0
github.com/prometheus/common/sigv4 v0.1.0
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1196,8 +1196,8 @@ github.com/grafana/cadvisor v0.0.0-20240729082359-1f04a91701e2 h1:ju6EcY2aEobeBg
github.com/grafana/cadvisor v0.0.0-20240729082359-1f04a91701e2/go.mod h1:8sLW/G7rcFe1CKMaA4pYT4mX3P1xQVGqM6luzEzx/2g=
github.com/grafana/catchpoint-prometheus-exporter v0.0.0-20240606062944-e55f3668661d h1:6sNPBwOokfCxAyateu7iLdtyWDUzaLLShPs7F4eTLfw=
github.com/grafana/catchpoint-prometheus-exporter v0.0.0-20240606062944-e55f3668661d/go.mod h1:aGPSALDAkw18nn8M7gumhM/MbJG+zgOA3jNWTwPYtLg=
github.com/grafana/ckit v0.0.0-20240624165704-36f3407a8eaa h1:3rdc/z801roM6ky8cT8wz4tahQWkTxJ4VAmzANZe8qQ=
github.com/grafana/ckit v0.0.0-20240624165704-36f3407a8eaa/go.mod h1:k21VjCNs7gj1pAV80wb1577fVRePk51Hek5QUMEvKE0=
github.com/grafana/ckit v0.0.0-20240913130805-0ee98bafad88 h1:GgbYRGz2+/Vgz8/lk19Ht8TQDsAudl51Qenuw+COs5k=
github.com/grafana/ckit v0.0.0-20240913130805-0ee98bafad88/go.mod h1:dDqep1rKTbq2ppMYEgIM88GaPXHp4i6Cp3qantiloA0=
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 h1:qhugDMdQ4Vp68H0tp/0iN17DM2ehRo1rLEdOFe/gB8I=
github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2/go.mod h1:w/aiO1POVIeXUQyl0VQSZjl5OAGDTL5aX+4v0RA1tcw=
github.com/grafana/dskit v0.0.0-20240104111617-ea101a3b86eb h1:AWE6+kvtE18HP+lRWNUCyvymyrFSXs6TcS2vXIXGIuw=
Expand Down Expand Up @@ -2149,8 +2149,8 @@ github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP
github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY=
github.com/prometheus/client_golang v1.20.2 h1:5ctymQzZlyOON1666svgwn3s6IKWgfbjsejTMiXIyjg=
github.com/prometheus/client_golang v1.20.2/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4=
github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.0.0-20171117100541-99fa1f4be8e5/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
Expand Down
5 changes: 1 addition & 4 deletions internal/alloycli/cmd_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,7 @@ func (fr *alloyRun) Run(configPath string) error {
ready = f.Ready
reload = func() (*alloy_runtime.Source, error) {
alloySource, err := loadAlloySource(configPath, fr.configFormat, fr.configBypassConversionErrors, fr.configExtraArgs)
defer instrumentation.InstrumentSHA256(alloySource.SHA256())
defer instrumentation.InstrumentLoad(err == nil)
defer instrumentation.InstrumentConfig(err == nil, alloySource.SHA256(), fr.clusterName)

if err != nil {
return nil, fmt.Errorf("reading config path %q: %w", configPath, err)
Expand Down Expand Up @@ -476,8 +475,6 @@ func loadAlloySource(path string, converterSourceFormat string, converterBypassE
}
}

instrumentation.InstrumentConfig(bb)

return alloy_runtime.ParseSource(path, bb)
}

Expand Down
31 changes: 13 additions & 18 deletions internal/static/config/instrumentation/config_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,20 @@ type configMetrics struct {
var confMetrics *configMetrics
var configMetricsInitializer sync.Once

func initializeConfigMetrics() {
confMetrics = newConfigMetrics()
func initializeConfigMetrics(clusterName string) {
confMetrics = newConfigMetrics(clusterName)
}

func newConfigMetrics() *configMetrics {
func newConfigMetrics(clusterName string) *configMetrics {
var m configMetrics

m.configHash = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "alloy_config_hash",
Help: "Hash of the currently active config file.",
ConstLabels: prometheus.Labels{
"cluster_name": clusterName,
},
},
[]string{"sha256"},
)
Expand All @@ -49,27 +52,19 @@ func newConfigMetrics() *configMetrics {
return &m
}

// Create a sha256 hash of the config before expansion and expose it via
// the alloy_config_hash metric.
func InstrumentConfig(buf []byte) {
InstrumentSHA256(sha256.Sum256(buf))
}

// InstrumentSHA256 stores the provided hash to the alloy_config_hash metric.
func InstrumentSHA256(hash [sha256.Size]byte) {
configMetricsInitializer.Do(initializeConfigMetrics)
confMetrics.configHash.Reset()
confMetrics.configHash.WithLabelValues(fmt.Sprintf("%x", hash)).Set(1)
}
func InstrumentConfig(success bool, hash [sha256.Size]byte, clusterName string) {
configMetricsInitializer.Do(func() {
initializeConfigMetrics(clusterName)
})

// Expose metrics for load success / failures.
func InstrumentLoad(success bool) {
configMetricsInitializer.Do(initializeConfigMetrics)
if success {
confMetrics.configLoadSuccessSeconds.SetToCurrentTime()
confMetrics.configLoadSuccess.Set(1)
} else {
confMetrics.configLoadSuccess.Set(0)
confMetrics.configLoadFailures.Inc()
}

confMetrics.configHash.Reset()
confMetrics.configHash.WithLabelValues(fmt.Sprintf("%x", hash)).Set(1)
}
12 changes: 6 additions & 6 deletions operations/alloy-mixin/alerts/clustering.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ local alert = import './utils/alert.jsonnet';
alert.newRule(
'ClusterNotConverging',
if enableK8sCluster then
'stddev by (cluster, namespace, job) (sum without (state) (cluster_node_peers)) != 0'
'stddev by (cluster, namespace, job, cluster_name) (sum without (state) (cluster_node_peers)) != 0'
else
'stddev by (job) (sum without (state) (cluster_node_peers)) != 0',
'Cluster is not converging.',
Expand All @@ -25,8 +25,8 @@ local alert = import './utils/alert.jsonnet';
// metrics.
if enableK8sCluster then |||
sum without (state) (cluster_node_peers) !=
on (cluster, namespace, job) group_left
count by (cluster, namespace, job) (cluster_node_info)
on (cluster, namespace, job, cluster_name) group_left
count by (cluster, namespace, job, cluster_name) (cluster_node_info)
||| else |||
sum without (state) (cluster_node_peers) !=
on (job) group_left
Expand All @@ -53,7 +53,7 @@ local alert = import './utils/alert.jsonnet';
alert.newRule(
'ClusterNodeNameConflict',
if enableK8sCluster then
'sum by (cluster, namespace, job) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0'
'sum by (cluster, namespace, job, cluster_name) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0'
else
'sum by (job) (rate(cluster_node_gossip_received_events_total{event="node_conflict"}[2m])) > 0'
,
Expand All @@ -66,7 +66,7 @@ local alert = import './utils/alert.jsonnet';
alert.newRule(
'ClusterNodeStuckTerminating',
if enableK8sCluster then
'sum by (cluster, namespace, job, instance) (cluster_node_peers{state="terminating"}) > 0'
'sum by (cluster, namespace, job, instance, cluster_name) (cluster_node_peers{state="terminating"}) > 0'
else
'sum by (job, instance) (cluster_node_peers{state="terminating"}) > 0'
,
Expand All @@ -80,7 +80,7 @@ local alert = import './utils/alert.jsonnet';
'ClusterConfigurationDrift',
if enableK8sCluster then |||
count without (sha256) (
max by (cluster, namespace, sha256, job) (alloy_config_hash and on(cluster, namespace, job) cluster_node_info)
max by (cluster, namespace, sha256, job, cluster_name) (alloy_config_hash and on(cluster, namespace, job, cluster_name) cluster_node_info)
) > 1
||| else |||
count without (sha256) (
Expand Down

0 comments on commit e6451e4

Please sign in to comment.