From 2f2ef4a38b42ab65a1c45fae7bcd1a264f6cac27 Mon Sep 17 00:00:00 2001 From: istae <14264581+istae@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:39:04 +0300 Subject: [PATCH] fix(kademlia): always connect to bootnodes on startup to identify them (#4910) --- pkg/topology/kademlia/internal/metrics/metrics.go | 12 +++++++++--- .../kademlia/internal/metrics/metrics_test.go | 13 +++++++++++++ pkg/topology/kademlia/kademlia.go | 6 +++++- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/pkg/topology/kademlia/internal/metrics/metrics.go b/pkg/topology/kademlia/internal/metrics/metrics.go index 89fa1b1f51..cc5f954cd8 100644 --- a/pkg/topology/kademlia/internal/metrics/metrics.go +++ b/pkg/topology/kademlia/internal/metrics/metrics.go @@ -38,7 +38,7 @@ func IsBootnode(b bool) RecordOp { return func(cs *Counters) { cs.Lock() defer cs.Unlock() - cs.isBootnode = b + cs.IsBootnode = b } } @@ -147,6 +147,7 @@ type Snapshot struct { LatencyEWMA time.Duration Reachability p2p.ReachabilityStatus Healthy bool + IsBootnode bool } // persistentCounters is a helper struct used for persisting selected counters. @@ -154,6 +155,7 @@ type persistentCounters struct { PeerAddress swarm.Address `json:"peerAddress"` LastSeenTimestamp int64 `json:"lastSeenTimestamp"` ConnTotalDuration time.Duration `json:"connTotalDuration"` + IsBootnode bool `json:"isBootnode"` } // Counters represents a collection of peer metrics @@ -164,7 +166,7 @@ type Counters struct { // Bookkeeping. isLoggedIn bool peerAddress swarm.Address - isBootnode bool + IsBootnode bool // Counters. lastSeenTimestamp int64 @@ -187,6 +189,7 @@ func (cs *Counters) UnmarshalJSON(b []byte) (err error) { cs.peerAddress = val.PeerAddress cs.lastSeenTimestamp = val.LastSeenTimestamp cs.connTotalDuration = val.ConnTotalDuration + cs.IsBootnode = val.IsBootnode cs.Unlock() return nil } @@ -198,6 +201,7 @@ func (cs *Counters) MarshalJSON() ([]byte, error) { PeerAddress: cs.peerAddress, LastSeenTimestamp: cs.lastSeenTimestamp, ConnTotalDuration: cs.connTotalDuration, + IsBootnode: cs.IsBootnode, } cs.Unlock() return json.Marshal(val) @@ -224,6 +228,7 @@ func (cs *Counters) snapshot(t time.Time) *Snapshot { LatencyEWMA: cs.latencyEWMA, Reachability: cs.ReachabilityStatus, Healthy: cs.Healthy, + IsBootnode: cs.IsBootnode, } } @@ -249,6 +254,7 @@ func NewCollector(db *shed.DB) (*Collector, error) { peerAddress: val.PeerAddress, lastSeenTimestamp: val.LastSeenTimestamp, connTotalDuration: val.ConnTotalDuration, + IsBootnode: val.IsBootnode, }) } @@ -321,7 +327,7 @@ type ExcludeOp func(*Counters) bool // IsBootnode is used to filter bootnode peers. func Bootnode() ExcludeOp { return func(cs *Counters) bool { - return cs.isBootnode + return cs.IsBootnode } } diff --git a/pkg/topology/kademlia/internal/metrics/metrics_test.go b/pkg/topology/kademlia/internal/metrics/metrics_test.go index 5c806b1d73..4fb7e33023 100644 --- a/pkg/topology/kademlia/internal/metrics/metrics_test.go +++ b/pkg/topology/kademlia/internal/metrics/metrics_test.go @@ -116,6 +116,18 @@ func TestPeerMetricsCollector(t *testing.T) { t.Fatalf("Snapshot(%q, ...): session connection duration counter mismatch: have %q; want %q", addr, have, want) } + // Bootnode. + mc.Record(addr, metrics.IsBootnode(false)) + ss = snapshot(t, mc, t2, addr) + if have, want := ss.IsBootnode, false; have != want { + t.Fatalf("Snapshot(%q, ...): latency mismatch: have %v; want %v", addr, have, want) + } + mc.Record(addr, metrics.IsBootnode(true)) + ss = snapshot(t, mc, t2, addr) + if have, want := ss.IsBootnode, true; have != want { + t.Fatalf("Snapshot(%q, ...): is bootnode mismatch: have %v; want %v", addr, have, want) + } + // Latency. mc.Record(addr, metrics.PeerLatency(t4)) ss = snapshot(t, mc, t2, addr) @@ -188,6 +200,7 @@ func TestPeerMetricsCollector(t *testing.T) { want = &metrics.Snapshot{ LastSeenTimestamp: ss.LastSeenTimestamp, ConnectionTotalDuration: 2 * ss.ConnectionTotalDuration, // 2x because we've already logout with t3 and login with t1 again. + IsBootnode: true, } if diff := cmp.Diff(have, want); diff != "" { t.Fatalf("unexpected snapshot difference:\n%s", diff) diff --git a/pkg/topology/kademlia/kademlia.go b/pkg/topology/kademlia/kademlia.go index 5f23b4ec2f..a310e47b6c 100644 --- a/pkg/topology/kademlia/kademlia.go +++ b/pkg/topology/kademlia/kademlia.go @@ -744,7 +744,11 @@ func (k *Kad) balancedSlotPeers(pseudoAddr swarm.Address, peers []swarm.Address, return ret } -func (k *Kad) Start(_ context.Context) error { +func (k *Kad) Start(ctx context.Context) error { + + // always discover bootnodes on startup to exclude them from protocol requests + k.connectBootNodes(ctx) + k.wg.Add(1) go k.manage()