From 818ea10d054ae747b93069358537570dd95fb90a Mon Sep 17 00:00:00 2001 From: Tim Ross Date: Fri, 6 Dec 2024 10:39:51 -0500 Subject: [PATCH] Prevent races counting missed heartbeats Increments the `heartbeatsMissedByAuth` gauge directly instead of keeping a local count and setting the gauge once. This fixes a race caught in https://github.com/gravitational/teleport/actions/runs/11828850190/job/32959672995?pr=48949. ``` ================== WARNING: DATA RACE Write at 0x00c003857da8 by goroutine 48258: github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations.func3.1() /__w/teleport/teleport/lib/auth/auth.go:1471 +0x84 github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).getRange.func1.1() /__w/teleport/teleport/lib/services/unified_resource.go:239 +0x14d github.com/google/btree.(*node[go.shape.*uint8]).iterate() /go/pkg/mod/github.com/google/btree@v1.1.3/btree_generic.go:522 +0x62c github.com/google/btree.(*BTreeG[go.shape.*uint8]).AscendRange() /go/pkg/mod/github.com/google/btree@v1.1.3/btree_generic.go:752 +0x104 github.com/google/btree.(*BTreeG[*github.com/gravitational/teleport/lib/services.item]).AscendRange-fm() :1 +0x58 github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).getRange.func1() /__w/teleport/teleport/lib/services/unified_resource.go:230 +0x7a8 github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).read() /__w/teleport/teleport/lib/services/unified_resource.go:591 +0x2ad github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).getRange() /__w/teleport/teleport/lib/services/unified_resource.go:215 +0x2d9 github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).IterateUnifiedResources() /__w/teleport/teleport/lib/services/unified_resource.go:288 +0xe6 github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations.func3() /__w/teleport/teleport/lib/auth/auth.go:1464 +0x237 Previous read at 0x00c003857da8 by goroutine 48219: github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations.func3() /__w/teleport/teleport/lib/auth/auth.go:1489 +0x2d2 Goroutine 48258 (running) created at: github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations() /__w/teleport/teleport/lib/auth/auth.go:1460 +0xf9e github.com/gravitational/teleport/lib/auth.initCluster.gowrap1() /__w/teleport/teleport/lib/auth/init.go:594 +0x33 Goroutine 48219 (finished) created at: github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations() /__w/teleport/teleport/lib/auth/auth.go:1460 +0xf9e github.com/gravitational/teleport/lib/auth.initCluster.gowrap1() /__w/teleport/teleport/lib/auth/init.go:594 +0x33 ================== ``` --- lib/auth/auth.go | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/auth/auth.go b/lib/auth/auth.go index 76cf9729b6b63..a1816d6ca8f67 100644 --- a/lib/auth/auth.go +++ b/lib/auth/auth.go @@ -1266,8 +1266,6 @@ func (a *Server) runPeriodicOperations() { defer ticker.Stop() - missedKeepAliveCount := 0 - // Prevent some periodic operations from running for dashboard tenants. if !services.IsDashboard(*modules.GetModules().Features().ToProto()) { ticker.Push(interval.SubInterval[periodicIntervalKey]{ @@ -1369,7 +1367,7 @@ func (a *Server) runPeriodicOperations() { return false, nil } if services.NodeHasMissedKeepAlives(srv) { - missedKeepAliveCount++ + heartbeatsMissedByAuth.Inc() } return false, nil }, @@ -1385,9 +1383,6 @@ func (a *Server) runPeriodicOperations() { break } } - - // Update prometheus gauge - heartbeatsMissedByAuth.Set(float64(missedKeepAliveCount)) }() case metricsKey: go a.updateAgentMetrics()