From 8c4bc5b3f15ce0d137a8b4e0298f9d1304291d96 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Wed, 26 Jul 2023 12:21:19 +0300 Subject: [PATCH 1/2] Fix sending msg queue queries --- validator/collator-node.cpp | 52 +++++++++++++------------- validator/impl/out-msg-queue-proof.cpp | 28 +++++++------- validator/manager.cpp | 3 +- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/validator/collator-node.cpp b/validator/collator-node.cpp index 2cc06b340..f5900e0a9 100644 --- a/validator/collator-node.cpp +++ b/validator/collator-node.cpp @@ -74,33 +74,35 @@ void CollatorNode::new_masterchain_block_notification(td::Ref last_masterchain_block_ = state->get_block_id(); last_top_blocks_.clear(); last_top_blocks_[ShardIdFull{masterchainId, shardIdAll}] = last_masterchain_block_; - std::vector next_shards; - if (can_collate_shard(ShardIdFull(masterchainId))) { - next_shards.push_back(ShardIdFull(masterchainId)); - } - for (const auto& desc : state->get_shards()) { - last_top_blocks_[desc->shard()] = desc->top_block_id(); - ShardIdFull shard = desc->shard(); - if (desc->before_split()) { - if (can_collate_shard(shard_child(shard, true))) { - next_shards.push_back(shard_child(shard, true)); - } - if (can_collate_shard(shard_child(shard, false))) { - next_shards.push_back(shard_child(shard, false)); - } - } else if (desc->before_merge()) { - if (is_left_child(shard) && can_collate_shard(shard_parent(shard))) { - next_shards.push_back(shard_parent(shard)); + if (state->get_unix_time() > (td::uint32)td::Clocks::system() - 20) { + std::vector next_shards; + if (can_collate_shard(ShardIdFull(masterchainId))) { + next_shards.push_back(ShardIdFull(masterchainId)); + } + for (const auto& desc : state->get_shards()) { + last_top_blocks_[desc->shard()] = desc->top_block_id(); + ShardIdFull shard = desc->shard(); + if (desc->before_split()) { + if (can_collate_shard(shard_child(shard, true))) { + next_shards.push_back(shard_child(shard, true)); + } + if (can_collate_shard(shard_child(shard, false))) { + next_shards.push_back(shard_child(shard, false)); + } + } else if (desc->before_merge()) { + if (is_left_child(shard) && can_collate_shard(shard_parent(shard))) { + next_shards.push_back(shard_parent(shard)); + } + } else if (can_collate_shard(shard)) { + next_shards.push_back(shard); } - } else if (can_collate_shard(shard)) { - next_shards.push_back(shard); } - } - for (const ShardIdFull& shard : next_shards) { - for (const auto& neighbor : last_top_blocks_) { - if (neighbor.first != shard && block::ShardConfig::is_neighbor(shard, neighbor.first)) { - td::actor::send_closure(manager_, &ValidatorManager::wait_out_msg_queue_proof, neighbor.second, shard, 0, - td::Timestamp::in(10.0), [](td::Ref) {}); + for (const ShardIdFull& shard : next_shards) { + for (const auto& neighbor : last_top_blocks_) { + if (neighbor.first != shard && block::ShardConfig::is_neighbor(shard, neighbor.first)) { + td::actor::send_closure(manager_, &ValidatorManager::wait_out_msg_queue_proof, neighbor.second, shard, 0, + td::Timestamp::in(10.0), [](td::Ref) {}); + } } } } diff --git a/validator/impl/out-msg-queue-proof.cpp b/validator/impl/out-msg-queue-proof.cpp index cc8c7e179..52c59afa0 100644 --- a/validator/impl/out-msg-queue-proof.cpp +++ b/validator/impl/out-msg-queue-proof.cpp @@ -280,20 +280,20 @@ void WaitOutMsgQueueProof::run_local_cont() { } void WaitOutMsgQueueProof::run_net() { - auto P = - td::PromiseCreator::lambda([SelfId = actor_id(this), block_id = block_id_](td::Result> R) { - if (R.is_error()) { - if (R.error().code() == ErrorCode::notready) { - LOG(DEBUG) << "failed to get msg queue for " << block_id.to_str() << " from net: " << R.move_as_error(); - } else { - LOG(WARNING) << "failed to get msg queue for " << block_id.to_str() << " from net: " << R.move_as_error(); - } - delay_action([SelfId]() mutable { td::actor::send_closure(SelfId, &WaitOutMsgQueueProof::run_net); }, - td::Timestamp::in(0.1)); - } else { - td::actor::send_closure(SelfId, &WaitOutMsgQueueProof::finish_query, R.move_as_ok()); - } - }); + auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), block_id = block_id_, + retry_after = td::Timestamp::in(0.5)](td::Result> R) { + if (R.is_error()) { + if (R.error().code() == ErrorCode::notready) { + LOG(DEBUG) << "failed to get msg queue for " << block_id.to_str() << " from net: " << R.move_as_error(); + } else { + LOG(WARNING) << "failed to get msg queue for " << block_id.to_str() << " from net: " << R.move_as_error(); + } + delay_action([SelfId]() mutable { td::actor::send_closure(SelfId, &WaitOutMsgQueueProof::run_net); }, + retry_after); + } else { + td::actor::send_closure(SelfId, &WaitOutMsgQueueProof::finish_query, R.move_as_ok()); + } + }); td::actor::send_closure(manager_, &ValidatorManager::send_get_out_msg_queue_proof_request, block_id_, dst_shard_, limits_, priority_, std::move(P)); diff --git a/validator/manager.cpp b/validator/manager.cpp index 638b741f9..d419f80ee 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -1839,7 +1839,8 @@ void ValidatorManagerImpl::new_masterchain_block() { for (auto &c : collator_nodes_) { td::actor::send_closure(c.second.actor, &CollatorNode::new_masterchain_block_notification, last_masterchain_state_); } - if (opts_->validator_mode() == ValidatorManagerOptions::validator_lite_shards && validating_masterchain()) { + if (opts_->validator_mode() == ValidatorManagerOptions::validator_lite_shards && validating_masterchain() && + last_masterchain_state_->get_unix_time() > (td::uint32)td::Clocks::system() - 20) { // Prepare neighboours' queues for collating masterchain for (const auto &desc : last_masterchain_state_->get_shards()) { wait_out_msg_queue_proof(desc->top_block_id(), ShardIdFull(masterchainId), 0, td::Timestamp::in(10.0), From da137fecf55b670d79df25390096396264da380c Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Wed, 26 Jul 2023 13:05:16 +0300 Subject: [PATCH 2/2] Extra shard overlay stats --- overlay/overlay.cpp | 7 +++- overlay/overlays.h | 3 ++ tl/generate/scheme/ton_api.tl | 7 +++- tl/generate/scheme/ton_api.tlo | Bin 90912 -> 91588 bytes .../validator-engine-console-query.cpp | 14 ++++++-- validator/full-node-shard.cpp | 34 ++++++++++++++++++ validator/full-node-shard.hpp | 1 + 7 files changed, 62 insertions(+), 4 deletions(-) diff --git a/overlay/overlay.cpp b/overlay/overlay.cpp index c4cf8428b..88518eb7e 100644 --- a/overlay/overlay.cpp +++ b/overlay/overlay.cpp @@ -661,7 +661,12 @@ void OverlayImpl::get_stats(td::Promisestats_.push_back( create_tl_object("neighbours_cnt", PSTRING() << neighbours_.size())); - promise.set_value(std::move(res)); + callback_->get_stats_extra([promise = std::move(promise), res = std::move(res)](td::Result R) mutable { + if (R.is_ok()) { + res->extra_ = R.move_as_ok(); + } + promise.set_value(std::move(res)); + }); } } // namespace overlay diff --git a/overlay/overlays.h b/overlay/overlays.h index cf153c3a5..ee225c3b8 100644 --- a/overlay/overlays.h +++ b/overlay/overlays.h @@ -170,6 +170,9 @@ class Overlays : public td::actor::Actor { td::Promise promise) { promise.set_value(td::Unit()); } + virtual void get_stats_extra(td::Promise promise) { + promise.set_result(""); + } virtual ~Callback() = default; }; diff --git a/tl/generate/scheme/ton_api.tl b/tl/generate/scheme/ton_api.tl index 5d636e9c3..0a4134e5f 100644 --- a/tl/generate/scheme/ton_api.tl +++ b/tl/generate/scheme/ton_api.tl @@ -656,9 +656,14 @@ engine.validator.dhtServersStatus servers:(vector engine.validator.dhtServerStat engine.validator.overlayStatsNode adnl_id:int256 ip_addr:string bdcst_errors:int fec_bdcst_errors:int last_in_query:int last_out_query:int t_out_bytes:int t_in_bytes:int t_out_pckts:int t_in_pckts:int = engine.validator.OverlayStatsNode; -engine.validator.overlayStats overlay_id:int256 overlay_id_full:PublicKey adnl_id:int256 scope:string nodes:(vector engine.validator.overlayStatsNode) stats:(vector engine.validator.oneStat) = engine.validator.OverlayStats; +engine.validator.overlayStats overlay_id:int256 overlay_id_full:PublicKey adnl_id:int256 scope:string nodes:(vector engine.validator.overlayStatsNode) stats:(vector engine.validator.oneStat) extra:string = engine.validator.OverlayStats; engine.validator.overlaysStats overlays:(vector engine.validator.overlayStats) = engine.validator.OverlaysStats; +engine.validator.shardOverlayStats.neighbour id:string proto_verison:int capabilities:long + roundtrip:double unreliability:double has_state:string = engine.validator.shardOverlayStats.Neighbour; +engine.validator.shardOverlayStats shard:string mode:string + neighbours:(vector engine.validator.shardOverlayStats.neighbour) = engine.validator.ShardOverlayStats; + engine.validator.onePerfTimerStat time:int min:double avg:double max:double = engine.validator.OnePerfTimerStat; engine.validator.perfTimerStatsByName name:string stats:(vector engine.validator.OnePerfTimerStat) = engine.validator.PerfTimerStatsByName; engine.validator.perfTimerStats stats:(vector engine.validator.PerfTimerStatsByName) = engine.validator.PerfTimerStats; diff --git a/tl/generate/scheme/ton_api.tlo b/tl/generate/scheme/ton_api.tlo index 60e942922e5b98a51f11eb69ef9e47aaf47f22ce..974e5ad6d42377302c711e0636525175d496910f 100644 GIT binary patch delta 407 zcmZ2*jP=NAR^CUm^{p77fOjMBWpP$M59vn zu`;+Mv80%Raq>YX@y&N66Bt?7T0Zg9nY?$IxG0hWJ-^h<^o*qZ(xS;1`K348NT*0L zPTIWCBsqr#Wc*}~g$kS?#?50wKJ1eX7k=lFes*7zHMOFoD3O6-@`PD3V0D{K7HKd+ zSP4sxFhSh1XPE@DRSX~pg3Mu_Tv#GLdBq|D7M9%nl*t#)%W!k$!JJV%Io@AZ0_0Xk zsAr3u*>pH;+YW-XZM&T46;w)MgaAJ^iTRM0kVXbx1cD$BtIUgG_yE=vSFF@ zNLXKqhaNS;evw0NxLf5&!@I delta 124 zcmX?dnsvc3R^CUm^{p77fO{kFWpUP@zZTq=ovbJ!yO~Mi7UO0asRYK&C!|v(87FMk zHBHWe%70uW%gw+5QhW25kPq8r#{JrpUo1SsQ{>F1!(rQY5G1qt!6FSN7BGuv=@F*Q OQ&zArZ$7i;#|{9nj5Zqp diff --git a/validator-engine-console/validator-engine-console-query.cpp b/validator-engine-console/validator-engine-console-query.cpp index d0f6ab290..98a4d324c 100644 --- a/validator-engine-console/validator-engine-console-query.cpp +++ b/validator-engine-console/validator-engine-console-query.cpp @@ -934,8 +934,18 @@ td::Status GetOverlaysStatsJsonQuery::receive(td::BufferSlice data) { sb << " \"" << t->key_ << "\": \"" << t->value_ << "\""; } - sb << "\n }\n"; - sb << "}\n"; + sb << "\n }"; + if (!s->extra_.empty()) { + sb << ",\n \"extra\": "; + for (char c : s->extra_) { + if (c == '\n') { + sb << "\n "; + } else { + sb << c; + } + } + } + sb << "\n}\n"; } sb << "]\n"; sb << std::flush; diff --git a/validator/full-node-shard.cpp b/validator/full-node-shard.cpp index 3c7b80638..a99197546 100644 --- a/validator/full-node-shard.cpp +++ b/validator/full-node-shard.cpp @@ -39,6 +39,9 @@ #include "td/utils/Random.h" #include "common/delay.h" +#include "td/utils/JsonBuilder.h" +#include "tl/tl_json.h" +#include "auto/tl/ton_api_json.h" namespace ton { @@ -99,6 +102,9 @@ void FullNodeShardImpl::create_overlay() { td::Promise promise) override { td::actor::send_closure(node_, &FullNodeShardImpl::check_broadcast, src, std::move(data), std::move(promise)); } + void get_stats_extra(td::Promise promise) override { + td::actor::send_closure(node_, &FullNodeShardImpl::get_stats_extra, std::move(promise)); + } Callback(td::actor::ActorId node) : node_(node) { } @@ -1290,6 +1296,34 @@ void FullNodeShardImpl::ping_neighbours() { } } +void FullNodeShardImpl::get_stats_extra(td::Promise promise) { + auto res = create_tl_object(); + res->shard_ = shard_.to_str(); + switch (mode_) { + case active: + res->mode_ = "active"; + break; + case active_temp: + res->mode_ = "active_temp"; + break; + case inactive: + res->mode_ = "inactive"; + break; + } + for (const auto &p : neighbours_) { + const auto &n = p.second; + auto f = create_tl_object(); + f->id_ = n.adnl_id.bits256_value().to_hex(); + f->proto_verison_ = n.proto_version; + f->capabilities_ = n.capabilities; + f->roundtrip_ = n.roundtrip; + f->unreliability_ = n.unreliability; + f->has_state_ = (n.has_state_known ? (n.has_state ? "true" : "false") : "undefined"); + res->neighbours_.push_back(std::move(f)); + } + promise.set_result(td::json_encode(td::ToJson(*res), true)); +} + FullNodeShardImpl::FullNodeShardImpl(ShardIdFull shard, PublicKeyHash local_id, adnl::AdnlNodeIdShort adnl_id, FileHash zero_state_file_hash, FullNodeConfig config, td::actor::ActorId keyring, td::actor::ActorId adnl, diff --git a/validator/full-node-shard.hpp b/validator/full-node-shard.hpp index b2be91081..af16ccc91 100644 --- a/validator/full-node-shard.hpp +++ b/validator/full-node-shard.hpp @@ -164,6 +164,7 @@ class FullNodeShardImpl : public FullNodeShard { void process_broadcast(PublicKeyHash src, ton_api::tonNode_newShardBlockBroadcast &query); void receive_broadcast(PublicKeyHash src, td::BufferSlice query); void check_broadcast(PublicKeyHash src, td::BufferSlice query, td::Promise promise); + void get_stats_extra(td::Promise promise); void remove_neighbour(adnl::AdnlNodeIdShort id); void send_ihr_message(td::BufferSlice data) override;