Skip to content

Commit

Permalink
Prevent races counting missed heartbeats (#49910)
Browse files Browse the repository at this point in the history
Increments the `heartbeatsMissedByAuth` gauge directly instead of
keeping a local count and setting the gauge once. This fixes a
race caught in https://github.com/gravitational/teleport/actions/runs/11828850190/job/32959672995?pr=48949.

```
==================
WARNING: DATA RACE
Write at 0x00c003857da8 by goroutine 48258:
  github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations.func3.1()
      /__w/teleport/teleport/lib/auth/auth.go:1471 +0x84
  github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).getRange.func1.1()
      /__w/teleport/teleport/lib/services/unified_resource.go:239 +0x14d
  github.com/google/btree.(*node[go.shape.*uint8]).iterate()
      /go/pkg/mod/github.com/google/[email protected]/btree_generic.go:522 +0x62c
  github.com/google/btree.(*BTreeG[go.shape.*uint8]).AscendRange()
      /go/pkg/mod/github.com/google/[email protected]/btree_generic.go:752 +0x104
  github.com/google/btree.(*BTreeG[*github.com/gravitational/teleport/lib/services.item]).AscendRange-fm()
      <autogenerated>:1 +0x58
  github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).getRange.func1()
      /__w/teleport/teleport/lib/services/unified_resource.go:230 +0x7a8
  github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).read()
      /__w/teleport/teleport/lib/services/unified_resource.go:591 +0x2ad
  github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).getRange()
      /__w/teleport/teleport/lib/services/unified_resource.go:215 +0x2d9
  github.com/gravitational/teleport/lib/services.(*UnifiedResourceCache).IterateUnifiedResources()
      /__w/teleport/teleport/lib/services/unified_resource.go:288 +0xe6
  github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations.func3()
      /__w/teleport/teleport/lib/auth/auth.go:1464 +0x237

Previous read at 0x00c003857da8 by goroutine 48219:
  github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations.func3()
      /__w/teleport/teleport/lib/auth/auth.go:1489 +0x2d2

Goroutine 48258 (running) created at:
  github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations()
      /__w/teleport/teleport/lib/auth/auth.go:1460 +0xf9e
  github.com/gravitational/teleport/lib/auth.initCluster.gowrap1()
      /__w/teleport/teleport/lib/auth/init.go:594 +0x33

Goroutine 48219 (finished) created at:
  github.com/gravitational/teleport/lib/auth.(*Server).runPeriodicOperations()
      /__w/teleport/teleport/lib/auth/auth.go:1460 +0xf9e
  github.com/gravitational/teleport/lib/auth.initCluster.gowrap1()
      /__w/teleport/teleport/lib/auth/init.go:594 +0x33
==================
```
  • Loading branch information
rosstimothy authored Dec 6, 2024
1 parent f7ebb21 commit 48a0562
Showing 1 changed file with 1 addition and 6 deletions.
7 changes: 1 addition & 6 deletions lib/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -1266,8 +1266,6 @@ func (a *Server) runPeriodicOperations() {

defer ticker.Stop()

missedKeepAliveCount := 0

// Prevent some periodic operations from running for dashboard tenants.
if !services.IsDashboard(*modules.GetModules().Features().ToProto()) {
ticker.Push(interval.SubInterval[periodicIntervalKey]{
Expand Down Expand Up @@ -1369,7 +1367,7 @@ func (a *Server) runPeriodicOperations() {
return false, nil
}
if services.NodeHasMissedKeepAlives(srv) {
missedKeepAliveCount++
heartbeatsMissedByAuth.Inc()
}
return false, nil
},
Expand All @@ -1385,9 +1383,6 @@ func (a *Server) runPeriodicOperations() {
break
}
}

// Update prometheus gauge
heartbeatsMissedByAuth.Set(float64(missedKeepAliveCount))
}()
case metricsKey:
go a.updateAgentMetrics()
Expand Down

0 comments on commit 48a0562

Please sign in to comment.