Skip to content

Commit

Permalink
fix: increment and refactor number of hops for routed messages (#12188)
Browse files Browse the repository at this point in the history
Co-authored-by: Stefan Neamtu <[email protected]>
  • Loading branch information
stedfn and Stefan Neamtu authored Dec 20, 2024
1 parent 2e8dcea commit 7e0b24d
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 25 deletions.
2 changes: 1 addition & 1 deletion chain/network/src/network_protocol/borsh_conv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ impl TryFrom<&net::PeerMessage> for mem::PeerMessage {
net::PeerMessage::Routed(r) => mem::PeerMessage::Routed(Box::new(RoutedMessageV2 {
msg: *r,
created_at: None,
num_hops: Some(0),
num_hops: 0,
})),
net::PeerMessage::Disconnect => mem::PeerMessage::Disconnect(mem::Disconnect {
// This flag is used by the disconnecting peer to advise the other peer that there
Expand Down
4 changes: 2 additions & 2 deletions chain/network/src/network_protocol/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ pub struct RoutedMessageV2 {
pub created_at: Option<time::Utc>,
/// Number of peers this routed message travelled through.
/// Doesn't include the peers that are the source and the destination of the message.
pub num_hops: Option<i32>,
pub num_hops: u32,
}

impl std::ops::Deref for RoutedMessageV2 {
Expand Down Expand Up @@ -946,7 +946,7 @@ impl RawRoutedMessage {
body: self.body,
},
created_at: now,
num_hops: Some(0),
num_hops: 0,
}
}
}
25 changes: 14 additions & 11 deletions chain/network/src/network_protocol/network.proto
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import "google/protobuf/timestamp.proto";
message OwnedAccount {
PublicKey account_key = 1; // required
// PeerId of the node owning the account_key.
PublicKey peer_id = 2; // required
PublicKey peer_id = 2; // required
// Timestamp indicates the date of signing - we do not assume the
// nodes' clocks to be synchronized, but for security if the timestamp
// deviation is too large, the handshake will be rejected.
Expand Down Expand Up @@ -158,7 +158,7 @@ message Handshake {
uint32 sender_listen_port = 5;
// Basic info about the NEAR chain that the sender belongs to.
// Sender expects receiver to belong to the same chain.
// In case of mismatch, receiver sends back HandshakeFailure with
// In case of mismatch, receiver sends back HandshakeFailure with
// reason GenesisMismatch.
PeerChainInfo sender_chain_info = 6;
// Edge (sender,receiver) signed by sender, which once signed by
Expand Down Expand Up @@ -227,7 +227,7 @@ message AccountData {
// to a specific peer_id. Then this field won't be necessary.
// Unless we use it instead of AnnounceAccount.
PublicKey peer_id = 5; // required.

PublicKey account_key = 6; // required.

// List of nodes which
Expand All @@ -244,7 +244,7 @@ message AccountData {
uint64 version = 7;
// Time of creation of this AccountData.
// TODO(gprusak): consider expiring the AccountData based on this field.
google.protobuf.Timestamp timestamp = 4;
google.protobuf.Timestamp timestamp = 4;
}

// Message sent whenever the sender learns about new connections
Expand All @@ -261,7 +261,7 @@ message AccountData {
message RoutingTableUpdate {
reserved 3,4;
repeated Edge edges = 1;
// list of known NEAR validator accounts
// list of known NEAR validator accounts
repeated AnnounceAccount accounts = 2;
}

Expand Down Expand Up @@ -373,11 +373,14 @@ message SignedTransaction {
// Wrapper of borsh-encoded RoutedMessage
// https://github.com/near/nearcore/blob/1a4edefd0116f7d1e222bc96569367a02fe64199/chain/network-primitives/src/network_protocol/mod.rs#L295
message RoutedMessage {
// Deprecated
reserved 3;

bytes borsh = 1;
// Timestamp of creating the Routed message by its original author.
google.protobuf.Timestamp created_at = 2;
// Number of peers this routed message travelled through. Doesn't include the peer that created the message.
optional int32 num_hops = 3;
uint32 num_hops = 4;
}

// Disconnect is send by a node before closing a TCP connection.
Expand Down Expand Up @@ -430,7 +433,7 @@ message StateResponse {
}

message SnapshotHostInfo {
PublicKey peer_id = 1;
PublicKey peer_id = 1;
CryptoHash sync_hash = 2;
uint64 epoch_height = 3;
repeated uint64 shards = 4;
Expand Down Expand Up @@ -478,21 +481,21 @@ message PeerMessage {
LastEdge last_edge = 6;
RoutingTableUpdate sync_routing_table = 7;
DistanceVector distance_vector = 28;

UpdateNonceRequest update_nonce_request = 8;
UpdateNonceResponse update_nonce_response = 9;

SyncAccountsData sync_accounts_data = 25;

PeersRequest peers_request = 10;
PeersResponse peers_response = 11;

BlockHeadersRequest block_headers_request = 12;
BlockHeadersResponse block_headers_response = 13;

BlockRequest block_request = 14;
BlockResponse block_response = 15;

SignedTransaction transaction = 16;
RoutedMessage routed = 17;
Disconnect disconnect = 18;
Expand Down
1 change: 1 addition & 0 deletions chain/network/src/peer/peer_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,7 @@ impl PeerActor {
}
} else {
if msg.decrease_ttl() {
msg.num_hops += 1;
self.network_state.send_message_to_peer(&self.clock, conn.tier, msg);
} else {
#[cfg(test)]
Expand Down
5 changes: 3 additions & 2 deletions chain/network/src/peer_manager/tests/routing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -879,9 +879,9 @@ async fn max_num_peers_limit() {
drop(pm3);
}

/// Test that TTL is handled properly.
/// Test that TTL and number of hops are handled properly.
#[tokio::test]
async fn ttl() {
async fn ttl_and_num_hops() {
abort_on_panic();
let mut rng = make_rng(921853233);
let rng = &mut rng;
Expand Down Expand Up @@ -931,6 +931,7 @@ async fn ttl() {
.await;
assert_eq!(msg.body, got.body);
assert_eq!(msg.ttl - 1, got.ttl);
assert_eq!(msg.num_hops + 1, got.num_hops);
}
}
}
Expand Down
14 changes: 5 additions & 9 deletions chain/network/src/stats/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -444,17 +444,13 @@ fn record_routed_msg_latency(
// The routed message reached its destination. If the number of hops is known, then update the
// corresponding metric.
fn record_routed_msg_hops(msg: &RoutedMessageV2) {
const MAX_NUM_HOPS: i32 = 20;
const MAX_NUM_HOPS: u32 = 20;
// We assume that the number of hops is small.
// As long as the number of hops is below 10, this metric will not consume too much memory.
if let Some(num_hops) = msg.num_hops {
if num_hops >= 0 {
let num_hops = if num_hops > MAX_NUM_HOPS { MAX_NUM_HOPS } else { num_hops };
NETWORK_ROUTED_MSG_NUM_HOPS
.with_label_values(&[msg.body_variant(), &num_hops.to_string()])
.inc();
}
}
let num_hops = std::cmp::min(MAX_NUM_HOPS, msg.num_hops);
NETWORK_ROUTED_MSG_NUM_HOPS
.with_label_values(&[msg.body_variant(), &num_hops.to_string()])
.inc();
}

#[derive(Clone, Copy, strum::AsRefStr)]
Expand Down

0 comments on commit 7e0b24d

Please sign in to comment.