diff --git a/create-hardfork/create-hardfork.cpp b/create-hardfork/create-hardfork.cpp index 42d2914a9..72bffae56 100644 --- a/create-hardfork/create-hardfork.cpp +++ b/create-hardfork/create-hardfork.cpp @@ -279,6 +279,9 @@ class HardforkCreator : public td::actor::Actor { void new_key_block(ton::validator::BlockHandle handle) override { } + void send_validator_telemetry(ton::PublicKeyHash key, + ton::tl_object_ptr telemetry) override { + } }; td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::install_callback, diff --git a/crypto/block/block.tlb b/crypto/block/block.tlb index 5eddbde38..aa7c0d3f9 100644 --- a/crypto/block/block.tlb +++ b/crypto/block/block.tlb @@ -801,11 +801,6 @@ misbehaviour_punishment_config_v1#01 = MisbehaviourPunishmentConfig; _ MisbehaviourPunishmentConfig = ConfigParam 40; -// collator_nodes: each collator is (workchain:int32 shard:uint64 adnl_id:uint256) -collator_info#00 = CollatorInfo; -colator_config#a0 full_collated_data:Bool collator_nodes:(HashmapE 352 CollatorInfo) = CollatorConfig; -_ CollatorConfig = ConfigParam 41; - size_limits_config#01 max_msg_bits:uint32 max_msg_cells:uint32 max_library_cells:uint32 max_vm_data_depth:uint16 max_ext_msg_size:uint32 max_ext_msg_depth:uint16 = SizeLimitsConfig; size_limits_config_v2#02 max_msg_bits:uint32 max_msg_cells:uint32 max_library_cells:uint32 max_vm_data_depth:uint16 diff --git a/crypto/block/mc-config.cpp b/crypto/block/mc-config.cpp index 0a8c0c7b1..56ee85ae3 100644 --- a/crypto/block/mc-config.cpp +++ b/crypto/block/mc-config.cpp @@ -2339,28 +2339,4 @@ td::optional PrecompiledContractsConfig::g return c; } -CollatorConfig Config::get_collator_config(bool need_collator_nodes) const { - CollatorConfig collator_config; - gen::CollatorConfig::Record rec; - auto cell = get_config_param(41, -41); - if (cell.is_null() || !tlb::unpack_cell(std::move(cell), rec)) { - return collator_config; - } - collator_config.full_collated_data = rec.full_collated_data; - if (need_collator_nodes) { - vm::Dictionary dict{rec.collator_nodes->prefetch_ref(), 32 + 64 + 256}; - dict.check_for_each([&](Ref value, td::ConstBitPtr key, int n) { - CHECK(n == 32 + 64 + 256); - auto workchain = (td::int32)key.get_int(32); - key.advance(32); - td::uint64 shard = key.get_uint(64); - key.advance(64); - td::Bits256 adnl_id(key); - collator_config.collator_nodes.push_back({ton::ShardIdFull(workchain, shard), adnl_id}); - return true; - }); - } - return collator_config; -} - } // namespace block diff --git a/crypto/block/mc-config.h b/crypto/block/mc-config.h index 94de329bf..3c4421da5 100644 --- a/crypto/block/mc-config.h +++ b/crypto/block/mc-config.h @@ -543,11 +543,6 @@ struct CollatorNodeDescr { ton::NodeIdShort adnl_id; }; -struct CollatorConfig { - bool full_collated_data = false; - std::vector collator_nodes; -}; - class Config { enum { default_mc_catchain_lifetime = 200, @@ -664,7 +659,6 @@ class Config { std::vector compute_validator_set(ton::ShardIdFull shard, ton::UnixTime time, ton::CatchainSeqno cc_seqno) const; std::vector compute_total_validator_set(int next) const; - CollatorConfig get_collator_config(bool need_collator_nodes) const; td::Result get_size_limits_config() const; static td::Result do_get_size_limits_config(td::Ref cs); std::unique_ptr get_suspended_addresses(ton::UnixTime now) const; diff --git a/overlay/overlay-manager.cpp b/overlay/overlay-manager.cpp index ff6ad2c99..f24c6cbc2 100644 --- a/overlay/overlay-manager.cpp +++ b/overlay/overlay-manager.cpp @@ -68,6 +68,9 @@ void OverlayManager::register_overlay(adnl::AdnlNodeIdShort local_id, OverlayIdS } overlays_[local_id][overlay_id] = OverlayDescription{std::move(overlay), std::move(cert)}; + if (!with_db_) { + return; + } auto P = td::PromiseCreator::lambda([id = overlays_[local_id][overlay_id].overlay.get()](td::Result R) { R.ensure(); @@ -417,13 +420,19 @@ OverlayManager::OverlayManager(std::string db_root, td::actor::ActorId kv = - std::make_shared(td::RocksDb::open(PSTRING() << db_root_ << "/overlays").move_as_ok()); - db_ = DbType{std::move(kv)}; + if (!db_root_.empty()) { + with_db_ = true; + std::shared_ptr kv = + std::make_shared(td::RocksDb::open(PSTRING() << db_root_ << "/overlays").move_as_ok()); + db_ = DbType{std::move(kv)}; + } } void OverlayManager::save_to_db(adnl::AdnlNodeIdShort local_id, OverlayIdShort overlay_id, std::vector nodes) { + if (!with_db_) { + return; + } std::vector> nodes_vec; for (auto &n : nodes) { nodes_vec.push_back(n.tl()); diff --git a/overlay/overlay-manager.h b/overlay/overlay-manager.h index 12206e048..68b033a3b 100644 --- a/overlay/overlay-manager.h +++ b/overlay/overlay-manager.h @@ -131,6 +131,7 @@ class OverlayManager : public Overlays { td::actor::ActorId dht_node_; using DbType = td::KeyValueAsync; + bool with_db_ = false; DbType db_; class AdnlCallback : public adnl::Adnl::Callback { diff --git a/overlay/overlay-peers.cpp b/overlay/overlay-peers.cpp index 00b508fd4..7def4a2d3 100644 --- a/overlay/overlay-peers.cpp +++ b/overlay/overlay-peers.cpp @@ -213,7 +213,7 @@ void OverlayImpl::add_peer(OverlayNode node) { peer_list_.peers_.insert(id, OverlayPeer(std::move(node))); del_some_peers(); auto X = peer_list_.peers_.get(id); - if (X != nullptr && peer_list_.neighbours_.size() < max_neighbours() && + if (X != nullptr && !X->is_neighbour() && peer_list_.neighbours_.size() < max_neighbours() && !(X->get_node()->flags() & OverlayMemberFlags::DoNotReceiveBroadcasts) && X->get_id() != local_id_) { peer_list_.neighbours_.push_back(X->get_id()); X->set_neighbour(true); @@ -440,7 +440,7 @@ void OverlayImpl::update_neighbours(td::uint32 nodes_to_change) { VLOG(OVERLAY_INFO) << this << ": adding new neighbour " << X->get_id(); peer_list_.neighbours_.push_back(X->get_id()); X->set_neighbour(true); - } else { + } else if (X->is_alive()) { CHECK(nodes_to_change > 0); auto i = td::Random::fast(0, static_cast(peer_list_.neighbours_.size()) - 1); auto Y = peer_list_.peers_.get(peer_list_.neighbours_[i]); diff --git a/overlay/overlay.cpp b/overlay/overlay.cpp index c9da6f5c3..429c6a9c8 100644 --- a/overlay/overlay.cpp +++ b/overlay/overlay.cpp @@ -347,7 +347,12 @@ void OverlayImpl::alarm() { update_db_at_ = td::Timestamp::in(60.0); } - update_neighbours(0); + if (update_neighbours_at_.is_in_past()) { + update_neighbours(2); + update_neighbours_at_ = td::Timestamp::in(td::Random::fast(30.0, 120.0)); + } else { + update_neighbours(0); + } alarm_timestamp() = td::Timestamp::in(1.0); } else { update_neighbours(0); diff --git a/overlay/overlay.hpp b/overlay/overlay.hpp index ba0fae46d..41a04dec2 100644 --- a/overlay/overlay.hpp +++ b/overlay/overlay.hpp @@ -391,6 +391,7 @@ class OverlayImpl : public Overlay { td::Timestamp next_dht_store_query_ = td::Timestamp::in(1.0); td::Timestamp update_db_at_; td::Timestamp update_throughput_at_; + td::Timestamp update_neighbours_at_; td::Timestamp last_throughput_update_; std::unique_ptr callback_; diff --git a/tdutils/td/utils/port/Stat.cpp b/tdutils/td/utils/port/Stat.cpp index 816d622e4..73b006084 100644 --- a/tdutils/td/utils/port/Stat.cpp +++ b/tdutils/td/utils/port/Stat.cpp @@ -472,4 +472,45 @@ Result get_total_mem_stat() { #endif } +Result get_cpu_cores() { +#if TD_LINUX + uint32 result = 0; + TRY_RESULT(fd, FileFd::open("/proc/cpuinfo", FileFd::Read)); + SCOPE_EXIT { + fd.close(); + }; + std::string data; + char buf[10000]; + while (true) { + TRY_RESULT(size, fd.read(MutableSlice{buf, sizeof(buf) - 1})); + if (size == 0) { + break; + } + buf[size] = '\0'; + data += buf; + } + size_t i = 0; + while (i < data.size()) { + const char *line_begin = data.data() + i; + while (i < data.size() && data[i] != '\n') { + ++i; + } + auto line_end = data.data() + i; + ++i; + Slice line{line_begin, line_end}; + size_t j = 0; + while (j < line.size() && line[j] != ' ' && line[j] != '\t' && line[j] != ':') { + ++j; + } + Slice name = line.substr(0, j); + if (name == "processor") { + ++result; + } + } + return result; +#else + return Status::Error("Not supported"); +#endif +} + } // namespace td diff --git a/tdutils/td/utils/port/Stat.h b/tdutils/td/utils/port/Stat.h index ab97be0f1..82e1832a0 100644 --- a/tdutils/td/utils/port/Stat.h +++ b/tdutils/td/utils/port/Stat.h @@ -70,4 +70,6 @@ struct TotalMemStat { }; Result get_total_mem_stat() TD_WARN_UNUSED_RESULT; +Result get_cpu_cores() TD_WARN_UNUSED_RESULT; + } // namespace td diff --git a/test/test-ton-collator.cpp b/test/test-ton-collator.cpp index 9e30836fe..0fde1e68d 100644 --- a/test/test-ton-collator.cpp +++ b/test/test-ton-collator.cpp @@ -380,6 +380,9 @@ class TestNode : public td::actor::Actor { void new_key_block(ton::validator::BlockHandle handle) override { } + void send_validator_telemetry(ton::PublicKeyHash key, + ton::tl_object_ptr telemetry) override { + } }; td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::install_callback, diff --git a/tl/generate/scheme/ton_api.tl b/tl/generate/scheme/ton_api.tl index 05ea249ce..178b70a70 100644 --- a/tl/generate/scheme/ton_api.tl +++ b/tl/generate/scheme/ton_api.tl @@ -595,6 +595,10 @@ validator.group workchain:int shard:long catchain_seqno:int config_hash:int256 m validator.groupEx workchain:int shard:long vertical_seqno:int catchain_seqno:int config_hash:int256 members:(vector validator.groupMember) = validator.Group; validator.groupNew workchain:int shard:long vertical_seqno:int last_key_block_seqno:int catchain_seqno:int config_hash:int256 members:(vector validator.groupMember) = validator.Group; +validator.telemetry flags:# timestamp:double adnl_id:int256 + node_version:string os_version:string node_started_at:int + ram_size:long cpu_cores:int node_threads:int = validator.Telemetry; + ---functions--- @@ -646,8 +650,9 @@ engine.validator.fullNodeMaster port:int adnl:int256 = engine.validator.FullNode engine.validator.fullNodeSlave ip:int port:int adnl:PublicKey = engine.validator.FullNodeSlave; engine.validator.fullNodeConfig ext_messages_broadcast_disabled:Bool = engine.validator.FullNodeConfig; engine.validator.fastSyncMemberCertificate adnl_id:int256 certificate:overlay.MemberCertificate = engine.validator.FastSyncMemberCertificate; +engine.validator.collatorNodeWhitelist enabled:Bool adnl_ids:(vector int256) = engine.validator.CollatorNodeWhitelist; engine.validator.extraConfig state_serializer_enabled:Bool fast_sync_member_certificates:(vector engine.validator.fastSyncMemberCertificate) - = engine.validator.ExtraConfig; + collator_node_whitelist:engine.validator.collatorNodeWhitelist = engine.validator.ExtraConfig; engine.validator.config out_port:int addrs:(vector engine.Addr) adnl:(vector engine.adnl) dht:(vector engine.dht) validators:(vector engine.validator) collators:(vector engine.collator) @@ -668,13 +673,13 @@ engine.validator.collatorOptions deferring_enabled:Bool defer_messages_after:int defer_out_queue_size_limit:long dispatch_phase_2_max_total:int dispatch_phase_3_max_total:int dispatch_phase_2_max_per_initiator:int dispatch_phase_3_max_per_initiator:int - whitelist:(vector string) prioritylist:(vector string) = engine.validator.CollatorOptions; + whitelist:(vector string) prioritylist:(vector string) + force_full_collated_data:Bool ignore_collated_data_limits:Bool = engine.validator.CollatorOptions; -engine.validator.collatorsList.collator adnl_id:int256 trusted:Bool = engine.validator.collatorsList.Collator; +engine.validator.collatorsList.collator adnl_id:int256 = engine.validator.collatorsList.Collator; engine.validator.collatorsList.shard shard_id:tonNode.shardId collators:(vector engine.validator.collatorsList.collator) - = engine.validator.collatorsList.Shard; -engine.validator.collatorsList self_collate:Bool use_config_41:Bool shards:(vector engine.validator.collatorsList.shard) - = engine.validator.CollatorsList; + self_collate:Bool select_mode:string = engine.validator.collatorsList.Shard; +engine.validator.collatorsList shards:(vector engine.validator.collatorsList.shard) = engine.validator.CollatorsList; ---functions--- ---types--- @@ -740,6 +745,11 @@ engine.validator.perfTimerStats stats:(vector engine.validator.PerfTimerStatsByN engine.validator.shardOutQueueSize size:long = engine.validator.ShardOutQueueSize; +engine.validator.collationManagerStats.shard shard_id:tonNode.shardId self_collate:Bool select_mode:string active:Bool collators:(vector int256) = engine.validator.collationManagerStats.Shard; +engine.validator.collationManagerStats.collator adnl_id:int256 active:Bool alive:Bool ping_in:double = engine.validator.collationManagerStats.Collator; +engine.validator.collationManagerStats.localId adnl_id:int256 shards:(vector engine.validator.collationManagerStats.shard) + collators:(vector engine.validator.collationManagerStats.collator) = engine.validator.collationManagerStats.LocalId; +engine.validator.collationManagerStats local_ids:(vector engine.validator.collationManagerStats.localId) = engine.validator.CollationManagerStats; ---functions--- @@ -811,8 +821,14 @@ engine.validator.addShard shard:tonNode.shardId = engine.validator.Success; engine.validator.delCollator adnl_id:int256 shard:tonNode.shardId = engine.validator.Success; engine.validator.delShard shard:tonNode.shardId = engine.validator.Success; +engine.validator.collatorNodeSetWhitelistedValidator adnl_id:int256 add:Bool = engine.validator.Success; +engine.validator.collatorNodeSetWhitelistEnabled enabled:Bool = engine.validator.Success; +engine.validator.showCollatorNodeWhitelist = engine.validator.CollatorNodeWhitelist; + engine.validator.setCollatorsList list:engine.validator.collatorsList = engine.validator.Success; +engine.validator.clearCollatorsList = engine.validator.Success; engine.validator.showCollatorsList = engine.validator.CollatorsList; +engine.validator.getCollationManagerStats = engine.validator.CollationManagerStats; engine.validator.signOverlayMemberCertificate sign_by:int256 adnl_id:int256 slot:int expire_at:int = overlay.MemberCertificate; engine.validator.importFastSyncMemberCertificate adnl_id:int256 certificate:overlay.MemberCertificate = engine.validator.Success; @@ -863,8 +879,12 @@ http.server.config dhs:(vector http.server.dnsEntry) local_hosts:(vector http.se ---types--- -validatorSession.collationStats bytes:int gas:int lt_delta:int cat_bytes:int cat_gas:int cat_lt_delta:int - limits_log:string ext_msgs_total:int ext_msgs_filtered:int ext_msgs_accepted:int ext_msgs_rejected:int = validadorSession.CollationStats; +validatorSession.collationStats actual_bytes:int actual_collated_data_bytes:int + bytes:int gas:int lt_delta:int collated_data_bytes:int + cat_bytes:int cat_gas:int cat_lt_delta:int cat_collated_data_bytes:int + limits_log:string ext_msgs_total:int ext_msgs_filtered:int ext_msgs_accepted:int ext_msgs_rejected:int + work_time:double cpu_work_time:double + serialized_size:int serialized_size_no_collated_data:int = validadorSession.CollationStats; validatorSession.statsProducer id:int256 candidate_id:int256 block_status:int root_hash:int256 file_hash:int256 comment:string block_timestamp:double is_accepted:Bool is_ours:Bool got_submit_at:double @@ -885,10 +905,12 @@ validatorSession.stats success:Bool id:tonNode.blockIdExt timestamp:double self: signatures:int signatures_weight:long approve_signatures:int approve_signatures_weight:long first_round:int rounds:(vector validatorSession.statsRound) = validatorSession.Stats; +validatorSession.statsCollatedBlock timestamp:double id:tonNode.blockIdExt stats:validatorSession.collationStats = validatorSession.StatsCollatedBlock; + collatorNode.candidate source:PublicKey id:tonNode.blockIdExt data:bytes collated_data:bytes = collatorNode.Candidate; collatorNode.compressedCandidate flags:# source:PublicKey id:tonNode.blockIdExt decompressed_size:int data:bytes = collatorNode.Candidate; -collatorNode.generateBlockSuccess candidate:collatorNode.Candidate = collatorNode.GenerateBlockResult; -collatorNode.generateBlockError code:int message:string = collatorNode.GenerateBlockResult; +collatorNode.pong flags:# = collatorNode.Pong; +collatorNode.error code:int message:string = collatorNode.Error; validatorSession.newValidatorGroupStats.node id:int256 weight:long = validatorSession.newValidatorGroupStats.Node; validatorSession.newValidatorGroupStats session_id:int256 workchain:int shard:long cc_seqno:int @@ -901,7 +923,8 @@ validatorSession.endValidatorGroupStats session_id:int256 timestamp:double ---functions--- collatorNode.generateBlock shard:tonNode.shardId cc_seqno:int prev_blocks:(vector tonNode.blockIdExt) - creator:int256 = collatorNode.GenerateBlockResult; + creator:int256 = collatorNode.Candidate; +collatorNode.ping flags:# = collatorNode.Pong; ---types--- diff --git a/tl/generate/scheme/ton_api.tlo b/tl/generate/scheme/ton_api.tlo index eaa9c4161..c984fba0b 100644 Binary files a/tl/generate/scheme/ton_api.tlo and b/tl/generate/scheme/ton_api.tlo differ diff --git a/ton/ton-types.h b/ton/ton-types.h index cd9700814..11741c5ec 100644 --- a/ton/ton-types.h +++ b/ton/ton-types.h @@ -62,7 +62,8 @@ enum GlobalCapabilities { capShortDequeue = 32, capStoreOutMsgQueueSize = 64, capMsgMetadata = 128, - capDeferMessages = 256 + capDeferMessages = 256, + capFullCollatedData = 512 }; inline int shard_pfx_len(ShardId shard) { diff --git a/validator-engine-console/validator-engine-console-query.cpp b/validator-engine-console/validator-engine-console-query.cpp index 958692d51..cdc831d66 100644 --- a/validator-engine-console/validator-engine-console-query.cpp +++ b/validator-engine-console/validator-engine-console-query.cpp @@ -1582,6 +1582,92 @@ td::Status DelShardQuery::receive(td::BufferSlice data) { return td::Status::OK(); } +td::Status CollatorNodeAddWhitelistedValidatorQuery::run() { + TRY_RESULT_ASSIGN(adnl_id_, tokenizer_.get_token()); + TRY_STATUS(tokenizer_.check_endl()); + return td::Status::OK(); +} + +td::Status CollatorNodeAddWhitelistedValidatorQuery::send() { + auto b = ton::create_serialize_tl_object( + adnl_id_.bits256_value(), true); + td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); + return td::Status::OK(); +} + +td::Status CollatorNodeAddWhitelistedValidatorQuery::receive(td::BufferSlice data) { + TRY_RESULT_PREFIX(f, ton::fetch_tl_object(data.as_slice(), true), + "received incorrect answer: "); + td::TerminalIO::out() << "success\n"; + return td::Status::OK(); +} + +td::Status CollatorNodeDelWhitelistedValidatorQuery::run() { + TRY_RESULT_ASSIGN(adnl_id_, tokenizer_.get_token()); + TRY_STATUS(tokenizer_.check_endl()); + return td::Status::OK(); +} + +td::Status CollatorNodeDelWhitelistedValidatorQuery::send() { + auto b = ton::create_serialize_tl_object( + adnl_id_.bits256_value(), false); + td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); + return td::Status::OK(); +} + +td::Status CollatorNodeDelWhitelistedValidatorQuery::receive(td::BufferSlice data) { + TRY_RESULT_PREFIX(f, ton::fetch_tl_object(data.as_slice(), true), + "received incorrect answer: "); + td::TerminalIO::out() << "success\n"; + return td::Status::OK(); +} + +td::Status CollatorNodeEnableWhitelistQuery::run() { + TRY_RESULT(value, tokenizer_.get_token()); + if (value != 0 && value != 1) { + return td::Status::Error("expected 0 or 1"); + } + TRY_STATUS(tokenizer_.check_endl()); + enabled_ = value; + return td::Status::OK(); +} + +td::Status CollatorNodeEnableWhitelistQuery::send() { + auto b = ton::create_serialize_tl_object(enabled_); + td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); + return td::Status::OK(); +} + +td::Status CollatorNodeEnableWhitelistQuery::receive(td::BufferSlice data) { + TRY_RESULT_PREFIX(f, ton::fetch_tl_object(data.as_slice(), true), + "received incorrect answer: "); + td::TerminalIO::out() << "success\n"; + return td::Status::OK(); +} + +td::Status CollatorNodeShowWhitelistQuery::run() { + TRY_STATUS(tokenizer_.check_endl()); + return td::Status::OK(); +} + +td::Status CollatorNodeShowWhitelistQuery::send() { + auto b = ton::create_serialize_tl_object(); + td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); + return td::Status::OK(); +} + +td::Status CollatorNodeShowWhitelistQuery::receive(td::BufferSlice data) { + TRY_RESULT_PREFIX(f, + ton::fetch_tl_object(data.as_slice(), true), + "received incorrect answer: "); + td::TerminalIO::out() << "Collator node whitelist: " << (f->enabled_ ? "ENABLED" : "DISABLED") << "\n"; + td::TerminalIO::out() << f->adnl_ids_.size() << " validator adnl ids\n"; + for (const auto &id : f->adnl_ids_) { + td::TerminalIO::out() << id.to_hex() << "\n"; + } + return td::Status::OK(); +} + td::Status SetCollatorsListQuery::run() { TRY_RESULT_ASSIGN(file_name_, tokenizer_.get_token()); TRY_STATUS(tokenizer_.check_endl()); @@ -1611,9 +1697,7 @@ td::Status ClearCollatorsListQuery::run() { } td::Status ClearCollatorsListQuery::send() { - auto list = ton::create_tl_object(); - list->self_collate_ = true; - auto b = ton::create_serialize_tl_object(std::move(list)); + auto b = ton::create_serialize_tl_object(); td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); return td::Status::OK(); } @@ -1640,20 +1724,60 @@ td::Status ShowCollatorsListQuery::receive(td::BufferSlice data) { TRY_RESULT_PREFIX(list, ton::fetch_tl_object(data.as_slice(), true), "received incorrect answer: "); td::TerminalIO::out() << "Collators list:\n"; - if (list->self_collate_) { - td::TerminalIO::out() << "self_collate = true\n"; - } - if (list->use_config_41_) { - td::TerminalIO::out() << "use_config_41 = true\n"; - } if (list->shards_.empty()) { td::TerminalIO::out() << "Shard list is empty\n"; return td::Status::OK(); } for (const auto &shard : list->shards_) { td::TerminalIO::out() << "Shard " << create_shard_id(shard->shard_id_).to_str() << "\n"; + td::TerminalIO::out() << " Self collate = " << shard->self_collate_ << "\n"; + td::TerminalIO::out() << " Select mode = " << shard->select_mode_ << "\n"; for (const auto &collator : shard->collators_) { - td::TerminalIO::out() << " Collator " << collator->adnl_id_ << (collator->trusted_ ? " (trusted)" : "") << "\n"; + td::TerminalIO::out() << " Collator " << collator->adnl_id_ << "\n"; + } + } + return td::Status::OK(); +} + +td::Status GetCollationManagerStatsQuery::run() { + TRY_STATUS(tokenizer_.check_endl()); + return td::Status::OK(); +} + +td::Status GetCollationManagerStatsQuery::send() { + auto b = ton::create_serialize_tl_object(); + td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); + return td::Status::OK(); +} + +td::Status GetCollationManagerStatsQuery::receive(td::BufferSlice data) { + TRY_RESULT_PREFIX(list, + ton::fetch_tl_object(data.as_slice(), true), + "received incorrect answer: "); + if (list->local_ids_.empty()) { + td::TerminalIO::out() << "No stats\n"; + return td::Status::OK();; + } + for (auto &stats : list->local_ids_) { + td::TerminalIO::out() << "VALIDATOR ADNL ID = " << stats->adnl_id_ << "\n"; + std::map collators; + for (auto &collator: stats->collators_) { + collators[collator->adnl_id_] = collator.get(); + } + for (auto &shard : stats->shards_) { + td::TerminalIO::out() << " Shard " << create_shard_id(shard->shard_id_).to_str() << "\n"; + td::TerminalIO::out() << " Self collate = " << shard->self_collate_ << "\n"; + td::TerminalIO::out() << " Select mode = " << shard->select_mode_ << "\n"; + td::TerminalIO::out() << " Active = " << shard->active_ << "\n"; + td::TerminalIO::out() << " Collators: " << shard->collators_.size() << "\n"; + for (auto &id : shard->collators_) { + auto collator = collators[id]; + if (collator == nullptr) { + return td::Status::Error("collator not found"); + } + td::TerminalIO::out() << " " << id << " alive=" << (int)collator->alive_ + << " ping_in=" << collator->ping_in_ << "\n"; + } } } return td::Status::OK(); diff --git a/validator-engine-console/validator-engine-console-query.h b/validator-engine-console/validator-engine-console-query.h index 5708fecda..2d59c29b7 100644 --- a/validator-engine-console/validator-engine-console-query.h +++ b/validator-engine-console/validator-engine-console-query.h @@ -1457,6 +1457,91 @@ class DelShardQuery : public Query { td::int64 shard_; }; +class CollatorNodeAddWhitelistedValidatorQuery : public Query { + public: + CollatorNodeAddWhitelistedValidatorQuery(td::actor::ActorId console, Tokenizer tokenizer) + : Query(console, std::move(tokenizer)) { + } + td::Status run() override; + td::Status send() override; + td::Status receive(td::BufferSlice data) override; + static std::string get_name() { + return "collatorwhitelistadd"; + } + static std::string get_help() { + return "collatorwhitelistadd \tadd validator adnl id to collator node whitelist"; + } + std::string name() const override { + return get_name(); + } + + private: + ton::PublicKeyHash adnl_id_; +}; + +class CollatorNodeDelWhitelistedValidatorQuery : public Query { + public: + CollatorNodeDelWhitelistedValidatorQuery(td::actor::ActorId console, Tokenizer tokenizer) + : Query(console, std::move(tokenizer)) { + } + td::Status run() override; + td::Status send() override; + td::Status receive(td::BufferSlice data) override; + static std::string get_name() { + return "collatorwhitelistdel"; + } + static std::string get_help() { + return "collatorwhitelistdel \tremove validator adnl id from collator node whitelist"; + } + std::string name() const override { + return get_name(); + } + + private: + ton::PublicKeyHash adnl_id_; +}; + +class CollatorNodeEnableWhitelistQuery : public Query { + public: + CollatorNodeEnableWhitelistQuery(td::actor::ActorId console, Tokenizer tokenizer) + : Query(console, std::move(tokenizer)) { + } + td::Status run() override; + td::Status send() override; + td::Status receive(td::BufferSlice data) override; + static std::string get_name() { + return "collatorwhitelistenable"; + } + static std::string get_help() { + return "collatorwhitelistenable \tenable or disable collator node whiltelist (value is 0 or 1)"; + } + std::string name() const override { + return get_name(); + } + + private: + bool enabled_; +}; + +class CollatorNodeShowWhitelistQuery : public Query { + public: + CollatorNodeShowWhitelistQuery(td::actor::ActorId console, Tokenizer tokenizer) + : Query(console, std::move(tokenizer)) { + } + td::Status run() override; + td::Status send() override; + td::Status receive(td::BufferSlice data) override; + static std::string get_name() { + return "collatorwhitelistshow"; + } + static std::string get_help() { + return "collatorwhitelistshow\tshow collator node whitelist"; + } + std::string name() const override { + return get_name(); + } +}; + class SetCollatorsListQuery : public Query { public: SetCollatorsListQuery(td::actor::ActorId console, Tokenizer tokenizer) @@ -1517,6 +1602,25 @@ class ShowCollatorsListQuery : public Query { } }; +class GetCollationManagerStatsQuery : public Query { + public: + GetCollationManagerStatsQuery(td::actor::ActorId console, Tokenizer tokenizer) + : Query(console, std::move(tokenizer)) { + } + td::Status run() override; + td::Status send() override; + td::Status receive(td::BufferSlice data) override; + static std::string get_name() { + return "collationmanagerstats"; + } + static std::string get_help() { + return "collationmanagerstats\tshow stats of collation manager"; + } + std::string name() const override { + return get_name(); + } +}; + class SignOverlayMemberCertificateQuery : public Query { public: SignOverlayMemberCertificateQuery(td::actor::ActorId console, Tokenizer tokenizer) diff --git a/validator-engine-console/validator-engine-console.cpp b/validator-engine-console/validator-engine-console.cpp index 4c4922494..67e932115 100644 --- a/validator-engine-console/validator-engine-console.cpp +++ b/validator-engine-console/validator-engine-console.cpp @@ -157,9 +157,14 @@ void ValidatorEngineConsole::run() { add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); + add_query_runner(std::make_unique>()); + add_query_runner(std::make_unique>()); + add_query_runner(std::make_unique>()); + add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); + add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); } diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index 3722dcd28..16aabde5f 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -75,6 +75,7 @@ #include "common/delay.h" #include "block/precompiled-smc/PrecompiledSmartContract.h" #include "interfaces/validator-manager.h" +#include "tl-utils/lite-utils.hpp" #if TON_USE_JEMALLOC #include @@ -147,9 +148,9 @@ Config::Config(const ton::ton_api::engine_validator_config &config) { } } for (auto &col : config.collators_) { - auto key = ton::PublicKeyHash{col->adnl_id_}; + auto id = ton::adnl::AdnlNodeIdShort{col->adnl_id_}; ton::ShardIdFull shard = ton::create_shard_id(col->shard_); - config_add_collator(key, shard).ensure(); + config_add_collator(id, shard).ensure(); } config_add_full_node_adnl_id(ton::PublicKeyHash{config.fullnode_}).ensure(); @@ -175,6 +176,12 @@ Config::Config(const ton::ton_api::engine_validator_config &config) { fast_sync_member_certificates.emplace_back(adnl_id, std::move(certificate)); } } + if (config.extraconfig_->collator_node_whitelist_) { + collator_node_whiltelist_enabled = config.extraconfig_->collator_node_whitelist_->enabled_; + for (const auto& id : config.extraconfig_->collator_node_whitelist_->adnl_ids_) { + collator_node_whitelist.emplace(id); + } + } } else { state_serializer_enabled = true; } @@ -242,9 +249,11 @@ ton::tl_object_ptr Config::tl() const { val.first.tl(), std::move(temp_vec), std::move(adnl_val_vec), val.second.election_date, val.second.expire_at)); } std::vector> col_vec; - for (auto &col : collators) { - col_vec.push_back( - ton::create_tl_object(col.adnl_id.tl(), ton::create_tl_shard_id(col.shard))); + for (auto &[col, shards] : collators) { + for (auto &shard : shards) { + col_vec.push_back( + ton::create_tl_object(col.bits256_value(), ton::create_tl_shard_id(shard))); + } } std::vector> full_node_slaves_vec; @@ -263,8 +272,17 @@ ton::tl_object_ptr Config::tl() const { full_node_config_obj = full_node_config.tl(); } + ton::tl_object_ptr collator_node_whitelist_obj = {}; + if (collator_node_whiltelist_enabled || !collator_node_whitelist.empty()) { + collator_node_whitelist_obj = ton::create_tl_object(); + collator_node_whitelist_obj->enabled_ = collator_node_whiltelist_enabled; + for (const auto& id : collator_node_whitelist) { + collator_node_whitelist_obj->adnl_ids_.push_back(id.bits256_value()); + } + } + ton::tl_object_ptr extra_config_obj = {}; - if (!state_serializer_enabled || !fast_sync_member_certificates.empty()) { + if (!state_serializer_enabled || !fast_sync_member_certificates.empty() || collator_node_whitelist_obj) { // Non-default values extra_config_obj = ton::create_tl_object(); extra_config_obj->state_serializer_enabled_ = state_serializer_enabled; @@ -273,6 +291,7 @@ ton::tl_object_ptr Config::tl() const { ton::create_tl_object(adnl_id.bits256_value(), certificate.tl())); } + extra_config_obj->collator_node_whitelist_ = std::move(collator_node_whitelist_obj); } std::vector> liteserver_vec; @@ -448,28 +467,28 @@ td::Result Config::config_add_validator_adnl_id(ton::PublicKeyHash perm_ke } } -td::Result Config::config_add_collator(ton::PublicKeyHash addr, ton::ShardIdFull shard) { +td::Result Config::config_add_collator(ton::adnl::AdnlNodeIdShort addr, ton::ShardIdFull shard) { if (!shard.is_valid_ext()) { return td::Status::Error(PSTRING() << "invalid shard: " << shard.to_str()); } - Collator c{addr, shard}; - if (std::find(collators.begin(), collators.end(), c) != collators.end()) { + auto& shards = collators[addr]; + if (std::ranges::find(shards, shard) != collators[addr].end()) { return false; } - collators.push_back(c); + shards.push_back(shard); return true; } -td::Result Config::config_del_collator(ton::PublicKeyHash addr, ton::ShardIdFull shard) { +td::Result Config::config_del_collator(ton::adnl::AdnlNodeIdShort addr, ton::ShardIdFull shard) { if (!shard.is_valid_ext()) { return td::Status::Error(PSTRING() << "invalid shard: " << shard.to_str()); } - Collator c{addr, shard}; - auto it = std::find(collators.begin(), collators.end(), c); - if (it == collators.end()) { + auto& shards = collators[addr]; + auto it = std::ranges::find(shards, shard); + if (it == shards.end()) { return false; } - collators.erase(it); + shards.erase(it); return true; } @@ -1564,6 +1583,11 @@ td::Status ValidatorEngine::load_global_config() { } validator_options_.write().set_fast_state_serializer_enabled(fast_state_serializer_enabled_); + for (auto& id : config_.collator_node_whitelist) { + validator_options_.write().set_collator_node_whitelisted_validator(id, true); + } + validator_options_.write().set_collator_node_whitelist_enabled(config_.collator_node_whiltelist_enabled); + return td::Status::OK(); } @@ -1572,14 +1596,16 @@ void ValidatorEngine::set_shard_check_function() { validator_options_.write().set_shard_check_function([](ton::ShardIdFull shard) -> bool { return true; }); } else { std::vector shards = {ton::ShardIdFull(ton::masterchainId)}; - for (const auto& c : config_.collators) { - shards.push_back(c.shard); + for (const auto& [_, collator_shards] : config_.collators) { + for (const auto& shard : collator_shards) { + shards.push_back(shard); + } } for (const auto& s : config_.shards_to_monitor) { shards.push_back(s); } std::sort(shards.begin(), shards.end()); - shards.erase(std::unique(shards.begin(), shards.end()), shards.end()); + shards.erase(std::ranges::unique(shards).begin(), shards.end()); validator_options_.write().set_shard_check_function( [shards = std::move(shards)](ton::ShardIdFull shard) -> bool { for (auto s : shards) { @@ -1613,8 +1639,13 @@ void ValidatorEngine::load_collators_list() { return; } td::Ref list{true}; - list.write().unpack(*collators_list_); - validator_options_.write().set_collators_list(std::move(list)); + S = list.write().unpack(*collators_list_); + if (S.is_ok()) { + validator_options_.write().set_collators_list(std::move(list)); + } else { + LOG(ERROR) << "Invalid collators list: " << S.move_as_error(); + collators_list_ = {}; + } } void ValidatorEngine::load_empty_local_config(td::Promise promise) { @@ -2100,14 +2131,17 @@ void ValidatorEngine::start_full_node() { td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::add_permanent_key, v.first, [](td::Unit) {}); } - for (auto &c : config_.collators) { - td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::add_collator_adnl_id, - ton::adnl::AdnlNodeIdShort(c.adnl_id)); + for (auto &[c, _] : config_.collators) { + td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::add_collator_adnl_id, c); } for (auto &x : config_.fast_sync_member_certificates) { td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::import_fast_sync_member_certificate, x.first, x.second); } + if (!validator_telemetry_filename_.empty()) { + td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::set_validator_telemetry_filename, + validator_telemetry_filename_); + } load_custom_overlays_config(); } else { started_full_node(); @@ -2139,9 +2173,10 @@ void ValidatorEngine::started_lite_server() { } void ValidatorEngine::start_collator() { - for (auto &c : config_.collators) { - td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::add_collator, - ton::adnl::AdnlNodeIdShort(c.adnl_id), c.shard); + for (auto& [id, shards] : config_.collators) { + for (auto& shard : shards) { + td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::add_collator, id, shard); + } } started_collator(); @@ -2676,6 +2711,8 @@ static td::Result> parse_collator_optio f.dispatch_phase_2_max_per_initiator_ = opts.dispatch_phase_2_max_per_initiator; f.dispatch_phase_3_max_per_initiator_ = opts.dispatch_phase_3_max_per_initiator ? opts.dispatch_phase_3_max_per_initiator.value() : -1; + f.force_full_collated_data_ = false; + f.ignore_collated_data_limits_ = false; TRY_RESULT_PREFIX(json, td::json_decode(json_str), "failed to parse json: "); TRY_STATUS_PREFIX(ton::ton_api::from_json(f, json.get_object()), "json does not fit TL scheme: "); @@ -2715,6 +2752,8 @@ static td::Result> parse_collator_optio TRY_RESULT(addr, block::StdAddress::parse(s)); opts.prioritylist.emplace(addr.workchain, addr.addr); } + opts.force_full_collated_data = f.force_full_collated_data_; + opts.ignore_collated_data_limits = f.ignore_collated_data_limits_; return ref; } @@ -4054,6 +4093,92 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_setCollat promise.set_value(ton::create_serialize_tl_object()); } +void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_collatorNodeSetWhitelistedValidator &query, + td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, + td::Promise promise) { + if (!(perm & ValidatorEnginePermissions::vep_modify)) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::error, "not authorized"))); + return; + } + if (!started_) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::notready, "not started"))); + return; + } + ton::adnl::AdnlNodeIdShort adnl_id{query.adnl_id_}; + if (query.add_) { + if (!config_.collator_node_whitelist.insert(adnl_id).second) { + promise.set_value(ton::create_serialize_tl_object()); + return; + } + } else { + if (config_.collator_node_whitelist.erase(adnl_id) == 0) { + promise.set_value(ton::create_serialize_tl_object()); + return; + } + } + + validator_options_.write().set_collator_node_whitelisted_validator(adnl_id, query.add_); + td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::update_options, + validator_options_); + write_config([promise = std::move(promise)](td::Result R) mutable { + if (R.is_error()) { + promise.set_value(create_control_query_error(R.move_as_error())); + } else { + promise.set_value( + ton::serialize_tl_object(ton::create_tl_object(), true)); + } + }); +} + +void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_collatorNodeSetWhitelistEnabled &query, + td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, + td::Promise promise) { + if (!(perm & ValidatorEnginePermissions::vep_modify)) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::error, "not authorized"))); + return; + } + if (!started_) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::notready, "not started"))); + return; + } + if (config_.collator_node_whiltelist_enabled == query.enabled_) { + promise.set_value(ton::create_serialize_tl_object()); + return; + } + config_.collator_node_whiltelist_enabled = query.enabled_; + validator_options_.write().set_collator_node_whitelist_enabled(query.enabled_); + td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::update_options, + validator_options_); + write_config([promise = std::move(promise)](td::Result R) mutable { + if (R.is_error()) { + promise.set_value(create_control_query_error(R.move_as_error())); + } else { + promise.set_value( + ton::serialize_tl_object(ton::create_tl_object(), true)); + } + }); +} + +void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_showCollatorNodeWhitelist &query, + td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, + td::Promise promise) { + if (!(perm & ValidatorEnginePermissions::vep_default)) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::error, "not authorized"))); + return; + } + if (!started_) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::notready, "not started"))); + return; + } + ton::tl_object_ptr result = {}; + result = ton::create_tl_object(); + result->enabled_ = config_.collator_node_whiltelist_enabled; + for (const auto &id : config_.collator_node_whitelist) { + result->adnl_ids_.push_back(id.bits256_value()); + } + promise.set_value(ton::serialize_tl_object(result, true)); +} + void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_getCollatorOptionsJson &query, td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, td::Promise promise) { @@ -4107,16 +4232,44 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_setCollat return; } + td::Ref list{true}; + auto S = list.write().unpack(*query.list_); + if (S.is_error()) { + promise.set_value(create_control_query_error(S.move_as_error_prefix("Invalid collators list: "))); + return; + } auto s = td::json_encode(td::ToJson(*query.list_), true); - auto S = td::write_file(collators_list_file(), s); + S = td::write_file(collators_list_file(), s); if (S.is_error()) { promise.set_value(create_control_query_error(std::move(S))); return; } - collators_list_ = std::move(query.list_); - td::Ref list{true}; - list.write().unpack(*collators_list_); + validator_options_.write().set_collators_list(std::move(list)); + td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::update_options, + validator_options_); + promise.set_value(ton::serialize_tl_object(ton::create_tl_object(), true)); +} + +void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_clearCollatorsList &query, td::BufferSlice data, + ton::PublicKeyHash src, td::uint32 perm, td::Promise promise) { + if (!(perm & ValidatorEnginePermissions::vep_modify)) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::error, "not authorized"))); + return; + } + if (!started_) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::notready, "not started"))); + return; + } + + auto S = td::unlink(collators_list_file()); + if (S.is_error()) { + promise.set_value(create_control_query_error(std::move(S))); + return; + } + + td::Ref list{true, ton::validator::CollatorsList::default_list()}; + collators_list_ = {}; validator_options_.write().set_collators_list(std::move(list)); td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::update_options, validator_options_); @@ -4136,10 +4289,31 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_showColla if (collators_list_) { promise.set_value(ton::serialize_tl_object(collators_list_, true)); } else { - auto list = ton::create_tl_object(); - list->self_collate_ = true; - promise.set_value(ton::serialize_tl_object(list, true)); + promise.set_value(create_control_query_error(td::Status::Error("collators list is empty"))); + } +} + +void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_getCollationManagerStats &query, + td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, + td::Promise promise) { + if (!(perm & ValidatorEnginePermissions::vep_default)) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::error, "not authorized"))); + return; } + if (!started_) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::notready, "not started"))); + return; + } + td::actor::send_closure( + validator_manager_, &ton::validator::ValidatorManagerInterface::get_collation_manager_stats, + [promise = std::move(promise)]( + td::Result> R) mutable { + if (R.is_ok()) { + promise.set_value(ton::serialize_tl_object(R.move_as_ok(), true)); + } else { + promise.set_value(create_control_query_error(R.move_as_error())); + } + }); } void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_addCollator &query, @@ -4154,7 +4328,7 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_addCollat return; } - auto id = ton::PublicKeyHash{query.adnl_id_}; + auto id = ton::adnl::AdnlNodeIdShort{query.adnl_id_}; auto shard = ton::create_shard_id(query.shard_); auto R = config_.config_add_collator(id, shard); if (R.is_error()) { @@ -4169,12 +4343,10 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_addCollat if (!validator_manager_.empty()) { td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::update_options, validator_options_); - td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::add_collator, - ton::adnl::AdnlNodeIdShort(id), shard); + td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::add_collator, id, shard); } if (!full_node_.empty()) { - td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::add_collator_adnl_id, - ton::adnl::AdnlNodeIdShort(id)); + td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::add_collator_adnl_id, id); } write_config([promise = std::move(promise)](td::Result R) mutable { if (R.is_error()) { @@ -4229,7 +4401,7 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_delCollat return; } - auto id = ton::PublicKeyHash{query.adnl_id_}; + auto id = ton::adnl::AdnlNodeIdShort{query.adnl_id_}; auto shard = ton::create_shard_id(query.shard_); auto R = config_.config_del_collator(id, shard); if (R.is_error()) { @@ -4248,12 +4420,10 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_delCollat if (!validator_manager_.empty()) { td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::update_options, validator_options_); - td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::del_collator, - ton::adnl::AdnlNodeIdShort(id), shard); + td::actor::send_closure(validator_manager_, &ton::validator::ValidatorManagerInterface::del_collator, id, shard); } if (!full_node_.empty()) { - td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::del_collator_adnl_id, - ton::adnl::AdnlNodeIdShort(id)); + td::actor::send_closure(full_node_, &ton::validator::fullnode::FullNode::del_collator_adnl_id, id); } write_config([promise = std::move(promise)](td::Result R) mutable { if (R.is_error()) { @@ -4318,7 +4488,7 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_signOverl int expire_at = query.expire_at_; td::actor::send_closure( keyring_, &ton::keyring::Keyring::get_public_key, public_key_hash, - [=, promise = std::move(promise)](td::Result R) mutable { + [=, keyring = keyring_.get(), promise = std::move(promise)](td::Result R) mutable { if (R.is_error()) { promise.set_value(create_control_query_error(R.move_as_error())); return; @@ -4330,7 +4500,7 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_signOverl return; } td::BufferSlice to_sign = certificate.to_sign_data(adnl_id); - td::actor::send_closure(keyring_, &ton::keyring::Keyring::sign_message, public_key_hash, std::move(to_sign), + td::actor::send_closure(keyring, &ton::keyring::Keyring::sign_message, public_key_hash, std::move(to_sign), [certificate = std::move(certificate), promise = std::move(promise)](td::Result R) mutable { if (R.is_error()) { @@ -4813,6 +4983,15 @@ int main(int argc, char *argv[]) { acts.push_back( [&x]() { td::actor::send_closure(x, &ValidatorEngine::set_fast_state_serializer_enabled, true); }); }); + p.add_option( + '\0', "collect-validator-telemetry", + "store validator telemetry from fast sync overlay to a given file (json format)", + [&](td::Slice s) { + acts.push_back( + [&x, s = s.str()]() { + td::actor::send_closure(x, &ValidatorEngine::set_validator_telemetry_filename, s); + }); + }); auto S = p.run(argc, argv); if (S.is_error()) { LOG(ERROR) << "failed to parse options: " << S.move_as_error(); diff --git a/validator-engine/validator-engine.hpp b/validator-engine/validator-engine.hpp index 4a8e9bdb4..827ba4b3f 100644 --- a/validator-engine/validator-engine.hpp +++ b/validator-engine/validator-engine.hpp @@ -67,14 +67,6 @@ struct Config { ton::UnixTime election_date; ton::UnixTime expire_at; }; - struct Collator { - ton::PublicKeyHash adnl_id; - ton::ShardIdFull shard; - - bool operator==(const Collator& b) const { - return adnl_id == b.adnl_id && shard == b.shard; - } - }; struct Control { ton::PublicKeyHash key; std::map clients; @@ -90,7 +82,9 @@ struct Config { std::map adnl_ids; std::set dht_ids; std::map validators; - std::vector collators; + std::map> collators; + bool collator_node_whiltelist_enabled = false; + std::set collator_node_whitelist; ton::PublicKeyHash full_node = ton::PublicKeyHash::zero(); std::vector full_node_slaves; std::map full_node_masters; @@ -120,8 +114,8 @@ struct Config { ton::UnixTime expire_at); td::Result config_add_validator_adnl_id(ton::PublicKeyHash perm_key, ton::PublicKeyHash adnl_id, ton::UnixTime expire_at); - td::Result config_add_collator(ton::PublicKeyHash addr, ton::ShardIdFull shard); - td::Result config_del_collator(ton::PublicKeyHash addr, ton::ShardIdFull shard); + td::Result config_add_collator(ton::adnl::AdnlNodeIdShort addr, ton::ShardIdFull shard); + td::Result config_del_collator(ton::adnl::AdnlNodeIdShort addr, ton::ShardIdFull shard); td::Result config_add_full_node_adnl_id(ton::PublicKeyHash id); td::Result config_add_full_node_slave(td::IPAddress addr, ton::PublicKey id); td::Result config_add_full_node_master(td::int32 port, ton::PublicKeyHash id); @@ -239,6 +233,7 @@ class ValidatorEngine : public td::actor::Actor { ton::BlockSeqno truncate_seqno_{0}; std::string session_logs_file_; bool fast_state_serializer_enabled_ = false; + std::string validator_telemetry_filename_; bool not_all_shards_ = false; std::vector add_shard_cmds_; @@ -329,6 +324,9 @@ class ValidatorEngine : public td::actor::Actor { void set_fast_state_serializer_enabled(bool value) { fast_state_serializer_enabled_ = value; } + void set_validator_telemetry_filename(std::string value) { + validator_telemetry_filename_ = std::move(value); + } void set_not_all_shards() { not_all_shards_ = true; } @@ -537,10 +535,21 @@ class ValidatorEngine : public td::actor::Actor { ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); void run_control_query(ton::ton_api::engine_validator_setCollatorOptionsJson &query, td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); + void run_control_query(ton::ton_api::engine_validator_collatorNodeSetWhitelistedValidator &query, + td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, + td::Promise promise); + void run_control_query(ton::ton_api::engine_validator_collatorNodeSetWhitelistEnabled &query, td::BufferSlice data, + ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); + void run_control_query(ton::ton_api::engine_validator_showCollatorNodeWhitelist &query, td::BufferSlice data, + ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); void run_control_query(ton::ton_api::engine_validator_setCollatorsList &query, td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); + void run_control_query(ton::ton_api::engine_validator_clearCollatorsList &query, td::BufferSlice data, + ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); void run_control_query(ton::ton_api::engine_validator_showCollatorsList &query, td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); + void run_control_query(ton::ton_api::engine_validator_getCollationManagerStats &query, td::BufferSlice data, + ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); void run_control_query(ton::ton_api::engine_validator_signOverlayMemberCertificate &query, td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); void run_control_query(ton::ton_api::engine_validator_importFastSyncMemberCertificate &query, td::BufferSlice data, diff --git a/validator/CMakeLists.txt b/validator/CMakeLists.txt index 60f66af37..ca403f5d9 100644 --- a/validator/CMakeLists.txt +++ b/validator/CMakeLists.txt @@ -57,7 +57,9 @@ set(VALIDATOR_HEADERS import-db-slice.hpp queue-size-counter.hpp + validator-telemetry.hpp + collation-manager.hpp collator-node.hpp manager-disk.h manager-disk.hpp @@ -74,6 +76,7 @@ set(VALIDATOR_HEADERS set(VALIDATOR_SOURCE apply-block.cpp block-handle.cpp + collation-manager.cpp collator-node.cpp get-next-key-blocks.cpp import-db-slice.cpp @@ -85,6 +88,7 @@ set(VALIDATOR_SOURCE validator-group.cpp validator-options.cpp queue-size-counter.cpp + validator-telemetry.cpp downloaders/wait-block-data.cpp downloaders/wait-block-state.cpp diff --git a/validator/collation-manager.cpp b/validator/collation-manager.cpp new file mode 100644 index 000000000..2ca3a5f10 --- /dev/null +++ b/validator/collation-manager.cpp @@ -0,0 +1,373 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#include "collation-manager.hpp" + +#include "collator-node.hpp" +#include "fabric.h" +#include "td/utils/Random.h" + +#include +#include +#include + +namespace ton::validator { + +void CollationManager::start_up() { + td::actor::send_closure(rldp_, &rldp::Rldp::add_id, local_id_); + update_collators_list(*opts_->get_collators_list()); +} + +void CollationManager::collate_block(ShardIdFull shard, BlockIdExt min_masterchain_block_id, + std::vector prev, Ed25519_PublicKey creator, + td::Ref validator_set, td::uint64 max_answer_size, + td::CancellationToken cancellation_token, td::Promise promise) { + if (shard.is_masterchain()) { + run_collate_query(shard, min_masterchain_block_id, std::move(prev), creator, std::move(validator_set), + opts_->get_collator_options(), manager_, td::Timestamp::in(10.0), std::move(promise), + std::move(cancellation_token), 0); + return; + } + collate_shard_block(shard, min_masterchain_block_id, std::move(prev), creator, std::move(validator_set), + max_answer_size, std::move(cancellation_token), std::move(promise), td::Timestamp::in(10.0)); +} + +void CollationManager::collate_shard_block(ShardIdFull shard, BlockIdExt min_masterchain_block_id, + std::vector prev, Ed25519_PublicKey creator, + td::Ref validator_set, td::uint64 max_answer_size, + td::CancellationToken cancellation_token, + td::Promise promise, td::Timestamp timeout) { + TRY_STATUS_PROMISE(promise, cancellation_token.check()); + ShardInfo* s = select_shard_info(shard); + if (s == nullptr) { + promise.set_error( + td::Status::Error(PSTRING() << "shard " << shard.to_str() << " is not configured in collators list")); + return; + } + + adnl::AdnlNodeIdShort selected_collator = adnl::AdnlNodeIdShort::zero(); + size_t selected_idx = 0; + switch (s->select_mode) { + case CollatorsList::mode_random: { + int cnt = 0; + for (size_t i = 0; i < s->collators.size(); ++i) { + adnl::AdnlNodeIdShort collator = s->collators[i]; + if (collators_[collator].alive) { + ++cnt; + if (td::Random::fast(1, cnt) == 1) { + selected_collator = collator; + selected_idx = i; + } + } + } + break; + } + case CollatorsList::mode_ordered: { + for (size_t i = 0; i < s->collators.size(); ++i) { + adnl::AdnlNodeIdShort collator = s->collators[i]; + if (collators_[collator].alive) { + selected_collator = collator; + selected_idx = i; + break; + } + } + break; + } + case CollatorsList::mode_round_robin: { + size_t iters = 0; + for (size_t i = s->cur_idx; iters < s->collators.size(); (++i) %= s->collators.size()) { + adnl::AdnlNodeIdShort& collator = s->collators[i]; + if (collators_[collator].alive) { + selected_collator = collator; + selected_idx = i; + s->cur_idx = (i + 1) % s->collators.size(); + break; + } + } + break; + } + } + + if (selected_collator.is_zero() && s->self_collate) { + run_collate_query(shard, min_masterchain_block_id, std::move(prev), creator, std::move(validator_set), + opts_->get_collator_options(), manager_, td::Timestamp::in(10.0), std::move(promise), + std::move(cancellation_token), 0); + return; + } + + std::vector> prev_blocks; + BlockId next_block_id{shard, 0}; + for (const BlockIdExt& p : prev) { + prev_blocks.push_back(create_tl_block_id(p)); + next_block_id.seqno = std::max(next_block_id.seqno, p.seqno() + 1); + } + + promise = [=, SelfId = actor_id(this), promise = std::move(promise), + retry_at = td::Timestamp::in(0.5)](td::Result R) mutable { + if (R.is_ok()) { + promise.set_value(R.move_as_ok()); + return; + } + if (!selected_collator.is_zero()) { + td::actor::send_closure(SelfId, &CollationManager::on_collate_query_error, selected_collator); + } + LOG(INFO) << "ERROR: collate query for " << next_block_id.to_str() << " to #" << selected_idx << " (" + << selected_collator << "): " << R.error(); + if (timeout < retry_at) { + promise.set_error(R.move_as_error()); + return; + } + delay_action( + [=, promise = std::move(promise)]() mutable { + td::actor::send_closure(SelfId, &CollationManager::collate_shard_block, shard, min_masterchain_block_id, prev, + creator, validator_set, max_answer_size, cancellation_token, std::move(promise), + timeout); + }, + retry_at); + }; + + if (selected_collator.is_zero()) { + promise.set_error(td::Status::Error(PSTRING() << "shard " << shard.to_str() << " has no alive collator node")); + return; + } + + td::BufferSlice query = create_serialize_tl_object( + create_tl_shard_id(shard), validator_set->get_catchain_seqno(), std::move(prev_blocks), creator.as_bits256()); + LOG(INFO) << "sending collate query for " << next_block_id.to_str() << ": send to #" << selected_idx << "(" + << selected_collator << ")"; + + td::Promise P = [=, SelfId = actor_id(this), promise = std::move(promise), + timer = td::Timer()](td::Result R) mutable { + TRY_RESULT_PROMISE_PREFIX(promise, data, std::move(R), "rldp query failed: "); + auto r_error = fetch_tl_object(data, true); + if (r_error.is_ok()) { + auto error = r_error.move_as_ok(); + promise.set_error(td::Status::Error(error->code_, error->message_)); + return; + } + TRY_RESULT_PROMISE(promise, f, fetch_tl_object(data, true)); + TRY_RESULT_PROMISE(promise, candidate, + CollatorNode::deserialize_candidate(std::move(f), td::narrow_cast(max_answer_size))); + if (candidate.pubkey.as_bits256() != creator.as_bits256()) { + promise.set_error(td::Status::Error("collate query: block candidate source mismatch")); + return; + } + if (candidate.id.id != next_block_id) { + promise.set_error(td::Status::Error("collate query: block id mismatch")); + return; + } + LOG(INFO) << "got collated block " << next_block_id.to_str() << " from #" << selected_idx << " (" + << selected_collator << ") in " << timer.elapsed() << "s"; + promise.set_result(std::move(candidate)); + }; + td::actor::send_closure(rldp_, &rldp::Rldp::send_query_ex, local_id_, selected_collator, "collatequery", std::move(P), + timeout, std::move(query), max_answer_size); +} + +void CollationManager::update_options(td::Ref opts) { + auto old_list = opts_->get_collators_list(); + opts_ = std::move(opts); + auto list = opts_->get_collators_list(); + if (old_list != list) { + update_collators_list(*list); + } +} + +void CollationManager::validator_group_started(ShardIdFull shard) { + if (active_validator_groups_[shard]++ != 0) { + return; + } + ShardInfo* s = select_shard_info(shard); + if (s == nullptr) { + return; + } + if (s->active_cnt++ != 0) { + return; + } + for (adnl::AdnlNodeIdShort id : s->collators) { + CollatorInfo& collator = collators_[id]; + collator.active_cnt++; + } + alarm(); +} + +void CollationManager::validator_group_finished(ShardIdFull shard) { + if (--active_validator_groups_[shard] != 0) { + return; + } + active_validator_groups_.erase(shard); + ShardInfo* s = select_shard_info(shard); + if (s == nullptr) { + return; + } + if (--s->active_cnt != 0) { + return; + } + for (adnl::AdnlNodeIdShort id : s->collators) { + CollatorInfo& collator = collators_[id]; + --collator.active_cnt; + } + alarm(); +} + +void CollationManager::get_stats( + td::Promise> promise) { + auto stats = create_tl_object(); + stats->adnl_id_ = local_id_.bits256_value(); + for (ShardInfo& s : shards_) { + auto obj = create_tl_object(); + obj->shard_id_ = create_tl_shard_id(s.shard_id); + obj->active_ = s.active_cnt; + obj->self_collate_ = s.self_collate; + switch (s.select_mode) { + case CollatorsList::mode_random: + obj->select_mode_ = "random"; + break; + case CollatorsList::mode_ordered: + obj->select_mode_ = "ordered"; + break; + case CollatorsList::mode_round_robin: + obj->select_mode_ = "round_robin"; + break; + } + for (adnl::AdnlNodeIdShort& id : s.collators) { + obj->collators_.push_back(id.bits256_value()); + } + stats->shards_.push_back(std::move(obj)); + } + for (auto& [id, collator] : collators_) { + auto obj = create_tl_object(); + obj->adnl_id_ = id.bits256_value(); + obj->active_ = collator.active_cnt; + obj->alive_ = collator.alive; + if (collator.active_cnt && !collator.sent_ping) { + obj->ping_in_ = collator.ping_at.in(); + } else { + obj->ping_in_ = -1.0; + } + stats->collators_.push_back(std::move(obj)); + } + promise.set_value(std::move(stats)); +} + +void CollationManager::update_collators_list(const CollatorsList& collators_list) { + shards_.clear(); + for (auto& [_, collator] : collators_) { + collator.active_cnt = 0; + } + auto old_collators = std::move(collators_); + collators_.clear(); + for (const auto& shard : collators_list.shards) { + shards_.push_back({.shard_id = shard.shard_id, .select_mode = shard.select_mode, .collators = shard.collators}); + for (auto id : shard.collators) { + auto it = old_collators.find(id); + if (it == old_collators.end()) { + collators_[id]; + } else { + collators_[id] = std::move(it->second); + old_collators.erase(it); + } + } + } + for (auto& [shard, _] : active_validator_groups_) { + ShardInfo* s = select_shard_info(shard); + if (s == nullptr) { + continue; + } + if (s->active_cnt++ != 0) { + continue; + } + for (adnl::AdnlNodeIdShort id : s->collators) { + CollatorInfo& collator = collators_[id]; + collator.active_cnt++; + } + } + alarm(); +} + +CollationManager::ShardInfo* CollationManager::select_shard_info(ShardIdFull shard) { + for (auto& s : shards_) { + if (shard_intersects(shard, s.shard_id)) { + return &s; + } + } + return nullptr; +} + +void CollationManager::alarm() { + alarm_timestamp() = td::Timestamp::never(); + for (auto& [id, collator] : collators_) { + if (collator.active_cnt == 0 || collator.sent_ping) { + continue; + } + if (collator.ping_at.is_in_past()) { + collator.sent_ping = true; + td::BufferSlice query = create_serialize_tl_object(0); + td::Promise P = [=, SelfId = actor_id(this)](td::Result R) mutable { + td::actor::send_closure(SelfId, &CollationManager::got_pong, id, std::move(R)); + }; + LOG(DEBUG) << "sending ping to " << id; + td::actor::send_closure(rldp_, &rldp::Rldp::send_query, local_id_, id, "collatorping", std::move(P), + td::Timestamp::in(2.0), std::move(query)); + } else { + alarm_timestamp().relax(collator.ping_at); + } + } +} + +void CollationManager::got_pong(adnl::AdnlNodeIdShort id, td::Result R) { + auto it = collators_.find(id); + if (it == collators_.end()) { + return; + } + CollatorInfo& collator = it->second; + collator.sent_ping = false; + + auto r_pong = [&]() -> td::Result> { + TRY_RESULT_PREFIX(data, std::move(R), "rldp query error: "); + auto r_error = fetch_tl_object(data, true); + if (r_error.is_ok()) { + auto error = r_error.move_as_ok(); + return td::Status::Error(error->code_, error->message_); + } + return fetch_tl_object(data, true); + }(); + if (r_pong.is_error()) { + LOG(DEBUG) << "pong from " << id << " : " << r_pong.move_as_error(); + collator.alive = false; + } else { + LOG(DEBUG) << "pong from " << id << " : OK"; + collator.alive = true; + } + collator.ping_at = td::Timestamp::in(td::Random::fast(10.0, 20.0)); + if (collator.active_cnt && !collator.sent_ping) { + alarm_timestamp().relax(collator.ping_at); + } +} + +void CollationManager::on_collate_query_error(adnl::AdnlNodeIdShort id) { + auto it = collators_.find(id); + if (it == collators_.end()) { + return; + } + CollatorInfo& collator = it->second; + collator.ping_at = td::Timestamp::now(); + if (collator.active_cnt && !collator.sent_ping) { + alarm_timestamp().relax(collator.ping_at); + } +} + +} // namespace ton::validator diff --git a/validator/collation-manager.hpp b/validator/collation-manager.hpp new file mode 100644 index 000000000..7ceea1e6b --- /dev/null +++ b/validator/collation-manager.hpp @@ -0,0 +1,87 @@ +/* + This file is part of TON Blockchain Library. + + TON Blockchain Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + TON Blockchain Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with TON Blockchain Library. If not, see . +*/ +#pragma once + +#include "interfaces/validator-manager.h" +#include "rldp/rldp.h" +#include + +namespace ton::validator { + +class ValidatorManager; + +class CollationManager : public td::actor::Actor { + public: + CollationManager(adnl::AdnlNodeIdShort local_id, td::Ref opts, + td::actor::ActorId manager, td::actor::ActorId rldp) + : local_id_(local_id), opts_(opts), manager_(manager), rldp_(rldp) { + } + + void start_up() override; + void alarm() override; + + void collate_block(ShardIdFull shard, BlockIdExt min_masterchain_block_id, std::vector prev, + Ed25519_PublicKey creator, td::Ref validator_set, td::uint64 max_answer_size, + td::CancellationToken cancellation_token, td::Promise promise); + + void update_options(td::Ref opts); + + void validator_group_started(ShardIdFull shard); + void validator_group_finished(ShardIdFull shard); + + void get_stats(td::Promise> promise); + + private: + adnl::AdnlNodeIdShort local_id_; + td::Ref opts_; + td::actor::ActorId manager_; + td::actor::ActorId rldp_; + + void collate_shard_block(ShardIdFull shard, BlockIdExt min_masterchain_block_id, std::vector prev, + Ed25519_PublicKey creator, td::Ref validator_set, td::uint64 max_answer_size, + td::CancellationToken cancellation_token, td::Promise promise, + td::Timestamp timeout); + + void update_collators_list(const CollatorsList& collators_list); + + struct CollatorInfo { + bool alive = false; + td::Timestamp ping_at = td::Timestamp::now(); + bool sent_ping = false; + size_t active_cnt = 0; + }; + std::map collators_; + + struct ShardInfo { + ShardIdFull shard_id; + CollatorsList::SelectMode select_mode; + std::vector collators; + bool self_collate = false; + size_t cur_idx = 0; + + size_t active_cnt = 0; + }; + std::vector shards_; + + std::map active_validator_groups_; + + ShardInfo* select_shard_info(ShardIdFull shard); + void got_pong(adnl::AdnlNodeIdShort id, td::Result R); + void on_collate_query_error(adnl::AdnlNodeIdShort id); +}; + +} // namespace ton::validator diff --git a/validator/collator-node.cpp b/validator/collator-node.cpp index 0b4204d86..cf2770d93 100644 --- a/validator/collator-node.cpp +++ b/validator/collator-node.cpp @@ -54,17 +54,22 @@ void CollatorNode::start_up() { td::actor::send_closure(adnl_, &adnl::Adnl::subscribe, local_id_, adnl::Adnl::int_to_bytestring(ton_api::collatorNode_generateBlock::ID), std::make_unique(actor_id(this))); + td::actor::send_closure(adnl_, &adnl::Adnl::subscribe, local_id_, + adnl::Adnl::int_to_bytestring(ton_api::collatorNode_ping::ID), + std::make_unique(actor_id(this))); td::actor::send_closure(rldp_, &rldp::Rldp::add_id, adnl::AdnlNodeIdShort(local_id_)); } void CollatorNode::tear_down() { td::actor::send_closure(adnl_, &adnl::Adnl::unsubscribe, local_id_, adnl::Adnl::int_to_bytestring(ton_api::collatorNode_generateBlock::ID)); + td::actor::send_closure(adnl_, &adnl::Adnl::unsubscribe, local_id_, + adnl::Adnl::int_to_bytestring(ton_api::collatorNode_ping::ID)); } void CollatorNode::add_shard(ShardIdFull shard) { CHECK(shard.is_valid_ext() && !shard.is_masterchain()); - if (std::find(collating_shards_.begin(), collating_shards_.end(), shard) != collating_shards_.end()) { + if (std::ranges::find(collating_shards_, shard) != collating_shards_.end()) { return; } LOG(INFO) << "Collator node: local_id=" << local_id_ << " , shard=" << shard.to_str(); @@ -72,7 +77,7 @@ void CollatorNode::add_shard(ShardIdFull shard) { } void CollatorNode::del_shard(ShardIdFull shard) { - auto it = std::find(collating_shards_.begin(), collating_shards_.end(), shard); + auto it = std::ranges::find(collating_shards_, shard); if (it != collating_shards_.end()) { collating_shards_.erase(it); } @@ -127,7 +132,7 @@ void CollatorNode::new_masterchain_block_notification(td::Ref } } for (auto it = validator_groups_.begin(); it != validator_groups_.end();) { - if (new_shards.count(it->first)) { + if (new_shards.contains(it->first)) { ++it; } else { it->second.cleanup(); @@ -253,7 +258,7 @@ void CollatorNode::CacheEntry::cancel(td::Status reason) { } static td::BufferSlice serialize_error(td::Status error) { - return create_serialize_tl_object(error.code(), error.message().c_str()); + return create_serialize_tl_object(error.code(), error.message().c_str()); } static BlockCandidate change_creator(BlockCandidate block, Ed25519_PublicKey creator, CatchainSeqno& cc_seqno, @@ -285,25 +290,32 @@ static BlockCandidate change_creator(BlockCandidate block, Ed25519_PublicKey cre void CollatorNode::receive_query(adnl::AdnlNodeIdShort src, td::BufferSlice data, td::Promise promise) { - td::Promise new_promise = [promise = std::move(promise), src](td::Result R) mutable { + promise = [promise = std::move(promise)](td::Result R) mutable { if (R.is_error()) { - LOG(INFO) << "adnl query from " << src << ", error: " << R.error(); if (R.error().code() == ErrorCode::timeout) { promise.set_error(R.move_as_error()); } else { promise.set_result(serialize_error(R.move_as_error())); } } else { - LOG(INFO) << "adnl query from " << src << ", success"; - promise.set_result(create_serialize_tl_object( - serialize_candidate(R.move_as_ok(), true))); + promise.set_result(R.move_as_ok()); } }; - if (!validator_adnl_ids_.count(src)) { - new_promise.set_error(td::Status::Error("src is not a validator")); + if (!opts_->check_collator_node_whitelist(src)) { + promise.set_error(td::Status::Error("not authorized")); + return; + } + if (!validator_adnl_ids_.contains(src)) { + promise.set_error(td::Status::Error("src is not a validator")); + return; + } + auto r_ping = fetch_tl_object(data, true); + if (r_ping.is_ok()) { + process_ping(src, *r_ping.ok_ref(), std::move(promise)); return; } - TRY_RESULT_PROMISE(new_promise, f, fetch_tl_object(data, true)); + + TRY_RESULT_PROMISE(promise, f, fetch_tl_object(data, true)); ShardIdFull shard = create_shard_id(f->shard_); CatchainSeqno cc_seqno = f->cc_seqno_; std::vector prev_blocks; @@ -311,6 +323,16 @@ void CollatorNode::receive_query(adnl::AdnlNodeIdShort src, td::BufferSlice data prev_blocks.push_back(create_block_id(b)); } Ed25519_PublicKey creator(f->creator_); + td::Promise new_promise = [promise = std::move(promise), src, + shard](td::Result R) mutable { + if (R.is_error()) { + LOG(INFO) << "collate query from " << src << ", shard=" << shard.to_str() << ": error: " << R.error(); + promise.set_error(R.move_as_error()); + } else { + LOG(INFO) << "collate query from " << src << ", shard=" << shard.to_str() << ": success"; + promise.set_result(serialize_tl_object(serialize_candidate(R.move_as_ok(), true), true)); + } + }; new_promise = [new_promise = std::move(new_promise), creator, manager = manager_](td::Result R) mutable { TRY_RESULT_PROMISE(new_promise, block, std::move(R)); @@ -409,7 +431,8 @@ void CollatorNode::generate_block(ShardIdFull shard, CatchainSeqno cc_seqno, std << ", time=" << timer.elapsed() << ": " << (R.is_ok() ? "OK" : R.error().to_string()); td::actor::send_closure(SelfId, &CollatorNode::process_result, cache_entry, std::move(R)); }, - cache_entry->cancellation_token_source.get_cancellation_token(), CollateMode::skip_store_candidate); + cache_entry->cancellation_token_source.get_cancellation_token(), + CollateMode::skip_store_candidate | CollateMode::from_collator_node); } void CollatorNode::process_result(std::shared_ptr cache_entry, td::Result R) { @@ -427,9 +450,15 @@ void CollatorNode::process_result(std::shared_ptr cache_entry, td::R cache_entry->promises.clear(); } +void CollatorNode::process_ping(adnl::AdnlNodeIdShort src, ton_api::collatorNode_ping& ping, + td::Promise promise) { + LOG(DEBUG) << "got ping from " << src; + promise.set_result(create_serialize_tl_object(0)); +} + bool CollatorNode::can_collate_shard(ShardIdFull shard) const { - return std::any_of(collating_shards_.begin(), collating_shards_.end(), - [&](const ShardIdFull& our_shard) { return shard_intersects(shard, our_shard); }); + return std::ranges::any_of(collating_shards_, + [&](const ShardIdFull& our_shard) { return shard_intersects(shard, our_shard); }); } tl_object_ptr CollatorNode::serialize_candidate(const BlockCandidate& block, diff --git a/validator/collator-node.hpp b/validator/collator-node.hpp index 1a71f08a4..a361ceb30 100644 --- a/validator/collator-node.hpp +++ b/validator/collator-node.hpp @@ -43,6 +43,7 @@ class CollatorNode : public td::actor::Actor { private: void receive_query(adnl::AdnlNodeIdShort src, td::BufferSlice data, td::Promise promise); + void process_ping(adnl::AdnlNodeIdShort src, ton_api::collatorNode_ping& ping, td::Promise promise); bool can_collate_shard(ShardIdFull shard) const; diff --git a/validator/fabric.h b/validator/fabric.h index 629065176..20358822e 100644 --- a/validator/fabric.h +++ b/validator/fabric.h @@ -27,7 +27,7 @@ namespace ton { namespace validator { enum ValidateMode { fake = 1 }; -enum CollateMode { skip_store_candidate = 1 }; +enum CollateMode { skip_store_candidate = 1, from_collator_node = 2 }; td::actor::ActorOwn create_db_actor(td::actor::ActorId manager, std::string db_root_, td::Ref opts); diff --git a/validator/full-node-fast-sync-overlays.cpp b/validator/full-node-fast-sync-overlays.cpp index a5789771b..270c53f07 100644 --- a/validator/full-node-fast-sync-overlays.cpp +++ b/validator/full-node-fast-sync-overlays.cpp @@ -87,9 +87,42 @@ void FullNodeFastSyncOverlay::process_block_candidate_broadcast(PublicKeyHash sr validator_set_hash, std::move(data)); } +void FullNodeFastSyncOverlay::process_telemetry_broadcast( + adnl::AdnlNodeIdShort src, const tl_object_ptr &telemetry) { + if (telemetry->adnl_id_ != src.bits256_value()) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": adnl_id mismatch"; + return; + } + auto now = (td::int32)td::Clocks::system(); + if (telemetry->timestamp_ < now - 60) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": too old (" + << now - telemetry->timestamp_ << "s ago)"; + return; + } + if (telemetry->timestamp_ > now + 60) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": too new (" + << telemetry->timestamp_ - now << "s in the future)"; + return; + } + VLOG(FULL_NODE_DEBUG) << "Got telemetry broadcast from " << src; + auto s = td::json_encode(td::ToJson(*telemetry), false); + std::erase_if(s, [](char c) { return c == '\n' || c == '\r'; }); + telemetry_file_ << s << "\n"; + telemetry_file_.flush(); + if (telemetry_file_.fail()) { + VLOG(FULL_NODE_WARNING) << "Failed to write telemetry to file"; + } +} + void FullNodeFastSyncOverlay::receive_broadcast(PublicKeyHash src, td::BufferSlice broadcast) { auto B = fetch_tl_object(std::move(broadcast), true); if (B.is_error()) { + if (collect_telemetry_ && src != local_id_.pubkey_hash()) { + auto R = fetch_tl_prefix(broadcast, true); + if (R.is_ok()) { + process_telemetry_broadcast(adnl::AdnlNodeIdShort{src}, R.ok()); + } + } return; } @@ -143,6 +176,30 @@ void FullNodeFastSyncOverlay::send_block_candidate(BlockIdExt block_id, Catchain local_id_.pubkey_hash(), overlay::Overlays::BroadcastFlagAnySender(), B.move_as_ok()); } +void FullNodeFastSyncOverlay::send_validator_telemetry(tl_object_ptr telemetry) { + process_telemetry_broadcast(local_id_, telemetry); + auto data = serialize_tl_object(telemetry, true); + if (data.size() <= overlay::Overlays::max_simple_broadcast_size()) { + td::actor::send_closure(overlays_, &overlay::Overlays::send_broadcast_ex, local_id_, overlay_id_, + local_id_.pubkey_hash(), 0, std::move(data)); + } else { + td::actor::send_closure(overlays_, &overlay::Overlays::send_broadcast_fec_ex, local_id_, overlay_id_, + local_id_.pubkey_hash(), 0, std::move(data)); + } +} + +void FullNodeFastSyncOverlay::collect_validator_telemetry(std::string filename) { + if (collect_telemetry_) { + telemetry_file_.close(); + } + collect_telemetry_ = true; + LOG(FULL_NODE_WARNING) << "Collecting validator telemetry to " << filename << " (local id: " << local_id_ << ")"; + telemetry_file_.open(filename, std::ios_base::app); + if (!telemetry_file_.is_open()) { + LOG(WARNING) << "Cannot open file " << filename << " for validator telemetry"; + } +} + void FullNodeFastSyncOverlay::start_up() { auto X = create_hash_tl_object(zero_state_file_hash_, create_tl_shard_id(shard_)); td::BufferSlice b{32}; @@ -261,14 +318,15 @@ void FullNodeFastSyncOverlay::get_stats_extra(td::Promise promise) promise.set_result(td::json_encode(td::ToJson(*res), true)); } -td::actor::ActorId FullNodeFastSyncOverlays::choose_overlay(ShardIdFull shard) { +std::pair, adnl::AdnlNodeIdShort> FullNodeFastSyncOverlays::choose_overlay( + ShardIdFull shard) { for (auto &p : id_to_overlays_) { auto &overlays = p.second.overlays_; ShardIdFull cur_shard = shard; while (true) { auto it = overlays.find(cur_shard); if (it != overlays.end()) { - return it->second.get(); + return {it->second.get(), p.first}; } if (cur_shard.pfx_len() == 0) { break; @@ -276,7 +334,20 @@ td::actor::ActorId FullNodeFastSyncOverlays::choose_ove cur_shard = shard_parent(cur_shard); } } - return {}; + return {td::actor::ActorId{}, adnl::AdnlNodeIdShort::zero()}; +} + +td::actor::ActorId FullNodeFastSyncOverlays::get_masterchain_overlay_for( + adnl::AdnlNodeIdShort adnl_id) { + auto it = id_to_overlays_.find(adnl_id); + if (it == id_to_overlays_.end()) { + return {}; + } + auto it2 = it->second.overlays_.find(ShardIdFull{masterchainId}); + if (it2 == it->second.overlays_.end()) { + return {}; + } + return it2->second.get(); } void FullNodeFastSyncOverlays::update_overlays(td::Ref state, @@ -291,7 +362,7 @@ void FullNodeFastSyncOverlays::update_overlays(td::Ref state, monitoring_shards.insert(ShardIdFull{masterchainId}); std::set all_shards; all_shards.insert(ShardIdFull{masterchainId}); - for (const auto& desc : state->get_shards()) { + for (const auto &desc : state->get_shards()) { ShardIdFull shard = desc->shard(); td::uint32 monitor_min_split = state->monitor_min_split_depth(shard.workchain); if (shard.pfx_len() > monitor_min_split) { diff --git a/validator/full-node-fast-sync-overlays.hpp b/validator/full-node-fast-sync-overlays.hpp index b89f88024..05d83071d 100644 --- a/validator/full-node-fast-sync-overlays.hpp +++ b/validator/full-node-fast-sync-overlays.hpp @@ -17,6 +17,7 @@ #pragma once #include "full-node.h" +#include namespace ton::validator::fullnode { @@ -32,6 +33,9 @@ class FullNodeFastSyncOverlay : public td::actor::Actor { void process_broadcast(PublicKeyHash src, ton_api::tonNode_newBlockCandidateBroadcastCompressed& query); void process_block_candidate_broadcast(PublicKeyHash src, ton_api::tonNode_Broadcast& query); + void process_telemetry_broadcast(adnl::AdnlNodeIdShort src, + const tl_object_ptr& telemetry); + template void process_broadcast(PublicKeyHash, T&) { VLOG(FULL_NODE_WARNING) << "dropping unknown broadcast"; @@ -42,6 +46,9 @@ class FullNodeFastSyncOverlay : public td::actor::Actor { void send_broadcast(BlockBroadcast broadcast); void send_block_candidate(BlockIdExt block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, td::BufferSlice data); + void send_validator_telemetry(tl_object_ptr telemetry); + + void collect_validator_telemetry(std::string filename); void start_up() override; void tear_down() override; @@ -96,11 +103,15 @@ class FullNodeFastSyncOverlay : public td::actor::Actor { void try_init(); void init(); void get_stats_extra(td::Promise promise); + + bool collect_telemetry_ = false; + std::ofstream telemetry_file_; }; class FullNodeFastSyncOverlays { public: - td::actor::ActorId choose_overlay(ShardIdFull shard); + std::pair, adnl::AdnlNodeIdShort> choose_overlay(ShardIdFull shard); + td::actor::ActorId get_masterchain_overlay_for(adnl::AdnlNodeIdShort adnl_id); void update_overlays(td::Ref state, std::set my_adnl_ids, std::set monitoring_shards, const FileHash& zero_state_file_hash, const td::actor::ActorId& keyring, const td::actor::ActorId& adnl, diff --git a/validator/full-node-private-overlay.cpp b/validator/full-node-private-overlay.cpp index e5ea1f0be..1acfbd4ed 100644 --- a/validator/full-node-private-overlay.cpp +++ b/validator/full-node-private-overlay.cpp @@ -19,6 +19,9 @@ #include "common/delay.h" #include "common/checksum.h" #include "full-node-serializer.hpp" +#include "auto/tl/ton_api_json.h" +#include "td/utils/JsonBuilder.h" +#include "tl/tl_json.h" namespace ton::validator::fullnode { @@ -85,15 +88,52 @@ void FullNodePrivateBlockOverlay::process_block_candidate_broadcast(PublicKeyHas validator_set_hash, std::move(data)); } +void FullNodePrivateBlockOverlay::process_telemetry_broadcast( + PublicKeyHash src, const tl_object_ptr& telemetry) { + if (telemetry->adnl_id_ != src.bits256_value()) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": adnl_id mismatch"; + return; + } + auto now = (td::int32)td::Clocks::system(); + if (telemetry->timestamp_ < now - 60) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": too old (" + << now - telemetry->timestamp_ << "s ago)"; + return; + } + if (telemetry->timestamp_ > now + 60) { + VLOG(FULL_NODE_WARNING) << "Invalid telemetry broadcast from " << src << ": too new (" + << telemetry->timestamp_ - now << "s in the future)"; + return; + } + VLOG(FULL_NODE_DEBUG) << "Got telemetry broadcast from " << src; + auto s = td::json_encode(td::ToJson(*telemetry), false); + std::erase_if(s, [](char c) { + return c == '\n' || c == '\r'; + }); + telemetry_file_ << s << "\n"; + telemetry_file_.flush(); + if (telemetry_file_.fail()) { + VLOG(FULL_NODE_WARNING) << "Failed to write telemetry to file"; + } +} + void FullNodePrivateBlockOverlay::receive_broadcast(PublicKeyHash src, td::BufferSlice broadcast) { if (adnl::AdnlNodeIdShort{src} == local_id_) { return; } auto B = fetch_tl_object(std::move(broadcast), true); if (B.is_error()) { + if (collect_telemetry_ && src != local_id_.pubkey_hash()) { + auto R = fetch_tl_prefix(broadcast, true); + if (R.is_ok()) { + process_telemetry_broadcast(src, R.ok()); + } + } return; } - ton_api::downcast_call(*B.move_as_ok(), [src, Self = this](auto &obj) { Self->process_broadcast(src, obj); }); + ton_api::downcast_call(*B.move_as_ok(), [src, Self = this](auto& obj) { + Self->process_broadcast(src, obj); + }); } void FullNodePrivateBlockOverlay::send_shard_block_info(BlockIdExt block_id, CatchainSeqno cc_seqno, @@ -144,6 +184,30 @@ void FullNodePrivateBlockOverlay::send_broadcast(BlockBroadcast broadcast) { local_id_.pubkey_hash(), overlay::Overlays::BroadcastFlagAnySender(), B.move_as_ok()); } +void FullNodePrivateBlockOverlay::send_validator_telemetry(tl_object_ptr telemetry) { + process_telemetry_broadcast(local_id_.pubkey_hash(), telemetry); + auto data = serialize_tl_object(telemetry, true); + if (data.size() <= overlay::Overlays::max_simple_broadcast_size()) { + td::actor::send_closure(overlays_, &overlay::Overlays::send_broadcast_ex, local_id_, overlay_id_, + local_id_.pubkey_hash(), 0, std::move(data)); + } else { + td::actor::send_closure(overlays_, &overlay::Overlays::send_broadcast_fec_ex, local_id_, overlay_id_, + local_id_.pubkey_hash(), 0, std::move(data)); + } +} + +void FullNodePrivateBlockOverlay::collect_validator_telemetry(std::string filename) { + if (collect_telemetry_) { + telemetry_file_.close(); + } + collect_telemetry_ = true; + LOG(FULL_NODE_WARNING) << "Collecting validator telemetry to " << filename << " (local id: " << local_id_ << ")"; + telemetry_file_.open(filename, std::ios_base::app); + if (!telemetry_file_.is_open()) { + LOG(WARNING) << "Cannot open file " << filename << " for validator telemetry"; + } +} + void FullNodePrivateBlockOverlay::start_up() { std::sort(nodes_.begin(), nodes_.end()); nodes_.erase(std::unique(nodes_.begin(), nodes_.end()), nodes_.end()); diff --git a/validator/full-node-private-overlay.hpp b/validator/full-node-private-overlay.hpp index a0022fa03..1e29f8c93 100644 --- a/validator/full-node-private-overlay.hpp +++ b/validator/full-node-private-overlay.hpp @@ -17,6 +17,7 @@ #pragma once #include "full-node.h" +#include namespace ton::validator::fullnode { @@ -32,6 +33,8 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { void process_broadcast(PublicKeyHash src, ton_api::tonNode_newBlockCandidateBroadcastCompressed &query); void process_block_candidate_broadcast(PublicKeyHash src, ton_api::tonNode_Broadcast &query); + void process_telemetry_broadcast(PublicKeyHash src, const tl_object_ptr& telemetry); + template void process_broadcast(PublicKeyHash, T &) { VLOG(FULL_NODE_WARNING) << "dropping unknown broadcast"; @@ -42,6 +45,9 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { void send_block_candidate(BlockIdExt block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, td::BufferSlice data); void send_broadcast(BlockBroadcast broadcast); + void send_validator_telemetry(tl_object_ptr telemetry); + + void collect_validator_telemetry(std::string filename); void set_config(FullNodeConfig config) { config_ = std::move(config); @@ -91,6 +97,9 @@ class FullNodePrivateBlockOverlay : public td::actor::Actor { void try_init(); void init(); + + bool collect_telemetry_ = false; + std::ofstream telemetry_file_; }; class FullNodeCustomOverlay : public td::actor::Actor { diff --git a/validator/full-node.cpp b/validator/full-node.cpp index bb806e281..d927eef2a 100644 --- a/validator/full-node.cpp +++ b/validator/full-node.cpp @@ -69,6 +69,7 @@ void FullNodeImpl::del_permanent_key(PublicKeyHash key, td::Promise pr } local_keys_.erase(key); private_block_overlays_.erase(key); + update_validator_telemetry_collector(); for (auto &p : custom_overlays_) { update_custom_overlay(p.second); } @@ -278,6 +279,7 @@ void FullNodeImpl::on_new_masterchain_block(td::Ref state, std fast_sync_overlays_.update_overlays(state, std::move(my_adnl_ids), std::move(monitoring_shards), zero_state_file_hash_, keyring_, adnl_, overlays_, validator_manager_, actor_id(this)); + update_validator_telemetry_collector(); } } @@ -337,7 +339,7 @@ void FullNodeImpl::send_shard_block_info(BlockIdExt block_id, CatchainSeqno cc_s td::actor::send_closure(private_block_overlays_.begin()->second, &FullNodePrivateBlockOverlay::send_shard_block_info, block_id, cc_seqno, data.clone()); } - auto fast_sync_overlay = fast_sync_overlays_.choose_overlay(ShardIdFull(masterchainId)); + auto fast_sync_overlay = fast_sync_overlays_.choose_overlay(ShardIdFull(masterchainId)).first; if (!fast_sync_overlay.empty()) { td::actor::send_closure(fast_sync_overlay, &FullNodeFastSyncOverlay::send_shard_block_info, block_id, cc_seqno, data.clone()); @@ -357,7 +359,7 @@ void FullNodeImpl::send_block_candidate(BlockIdExt block_id, CatchainSeqno cc_se td::actor::send_closure(private_block_overlays_.begin()->second, &FullNodePrivateBlockOverlay::send_block_candidate, block_id, cc_seqno, validator_set_hash, data.clone()); } - auto fast_sync_overlay = fast_sync_overlays_.choose_overlay(block_id.shard_full()); + auto fast_sync_overlay = fast_sync_overlays_.choose_overlay(block_id.shard_full()).first; if (!fast_sync_overlay.empty()) { td::actor::send_closure(fast_sync_overlay, &FullNodeFastSyncOverlay::send_block_candidate, block_id, cc_seqno, validator_set_hash, data.clone()); @@ -382,7 +384,7 @@ void FullNodeImpl::send_broadcast(BlockBroadcast broadcast, int mode) { td::actor::send_closure(private_block_overlays_.begin()->second, &FullNodePrivateBlockOverlay::send_broadcast, broadcast.clone()); } - auto fast_sync_overlay = fast_sync_overlays_.choose_overlay(broadcast.block_id.shard_full()); + auto fast_sync_overlay = fast_sync_overlays_.choose_overlay(broadcast.block_id.shard_full()).first; if (!fast_sync_overlay.empty()) { td::actor::send_closure(fast_sync_overlay, &FullNodeFastSyncOverlay::send_broadcast, broadcast.clone()); } @@ -502,10 +504,16 @@ td::actor::ActorId FullNodeImpl::get_shard(ShardIdFull shard) { if (pfx_len > wc_monitor_min_split_) { shard = shard_prefix(shard, wc_monitor_min_split_); } - auto it = shards_.find(shard); - if (it != shards_.end()) { - update_shard_actor(shard, it->second.active); - return it->second.actor.get(); + while (true) { + auto it = shards_.find(shard); + if (it != shards_.end()) { + update_shard_actor(shard, it->second.active); + return it->second.actor.get(); + } + if (shard.pfx_len() == 0) { + break; + } + shard = shard_parent(shard); } // Special case if shards_ was not yet initialized. @@ -587,6 +595,24 @@ void FullNodeImpl::new_key_block(BlockHandle handle) { } } +void FullNodeImpl::send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) { + if (use_old_private_overlays_) { + auto it = private_block_overlays_.find(key); + if (it == private_block_overlays_.end()) { + VLOG(FULL_NODE_INFO) << "Cannot send validator telemetry for " << key << " : no private block overlay"; + return; + } + td::actor::send_closure(it->second, &FullNodePrivateBlockOverlay::send_validator_telemetry, std::move(telemetry)); + } else { + auto overlay = fast_sync_overlays_.get_masterchain_overlay_for(adnl::AdnlNodeIdShort{telemetry->adnl_id_}); + if (overlay.empty()) { + VLOG(FULL_NODE_INFO) << "Cannot send validator telemetry for adnl id " << key << " : no fast sync overlay"; + return; + } + td::actor::send_closure(overlay, &FullNodeFastSyncOverlay::send_validator_telemetry, std::move(telemetry)); + } +} + void FullNodeImpl::process_block_broadcast(BlockBroadcast broadcast) { send_block_broadcast_to_custom_overlays(broadcast); td::actor::send_closure(validator_manager_, &ValidatorManagerInterface::prevalidate_block, std::move(broadcast), @@ -609,7 +635,49 @@ void FullNodeImpl::process_block_candidate_broadcast(BlockIdExt block_id, Catcha std::move(data)); } +void FullNodeImpl::set_validator_telemetry_filename(std::string value) { + validator_telemetry_filename_ = std::move(value); + update_validator_telemetry_collector(); +} + +void FullNodeImpl::update_validator_telemetry_collector() { + if (use_old_private_overlays_) { + if (validator_telemetry_filename_.empty() || private_block_overlays_.empty()) { + validator_telemetry_collector_key_ = PublicKeyHash::zero(); + return; + } + if (!private_block_overlays_.contains(validator_telemetry_collector_key_)) { + auto it = private_block_overlays_.begin(); + validator_telemetry_collector_key_ = it->first; + td::actor::send_closure(it->second, &FullNodePrivateBlockOverlay::collect_validator_telemetry, + validator_telemetry_filename_); + } + } else { + if (validator_telemetry_filename_.empty()) { + validator_telemetry_collector_key_ = PublicKeyHash::zero(); + return; + } + if (fast_sync_overlays_.get_masterchain_overlay_for(adnl::AdnlNodeIdShort{validator_telemetry_collector_key_}) + .empty()) { + auto [actor, adnl_id] = fast_sync_overlays_.choose_overlay(ShardIdFull{masterchainId}); + validator_telemetry_collector_key_ = adnl_id.pubkey_hash(); + if (!actor.empty()) { + td::actor::send_closure(actor, &FullNodeFastSyncOverlay::collect_validator_telemetry, + validator_telemetry_filename_); + } + } + } +} + void FullNodeImpl::start_up() { + // TODO: enable fast sync overlays by other means (e.g. some config param) + // TODO: in the future - remove the old private overlay entirely + // This env var is for testing + auto fast_sync_env = getenv("TON_FAST_SYNC_OVERLAYS"); + if (fast_sync_env && !strcmp(fast_sync_env, "1")) { + use_old_private_overlays_ = false; + } + update_shard_actor(ShardIdFull{masterchainId}, true); if (local_id_.is_zero()) { if (adnl_id_.is_zero()) { @@ -689,6 +757,9 @@ void FullNodeImpl::start_up() { void new_key_block(BlockHandle handle) override { td::actor::send_closure(id_, &FullNodeImpl::new_key_block, std::move(handle)); } + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) override { + td::actor::send_closure(id_, &FullNodeImpl::send_validator_telemetry, key, std::move(telemetry)); + } explicit Callback(td::actor::ActorId id) : id_(id) { } @@ -707,6 +778,7 @@ void FullNodeImpl::update_private_overlays() { } private_block_overlays_.clear(); + update_validator_telemetry_collector(); if (local_keys_.empty()) { return; } @@ -728,6 +800,7 @@ void FullNodeImpl::create_private_block_overlay(PublicKeyHash key) { private_block_overlays_[key] = td::actor::create_actor( "BlocksPrivateOverlay", current_validators_[key], std::move(nodes), zero_state_file_hash_, config_, keyring_, adnl_, rldp_, rldp2_, overlays_, validator_manager_, actor_id(this)); + update_validator_telemetry_collector(); } } diff --git a/validator/full-node.h b/validator/full-node.h index 627c0e55b..553cbf8e2 100644 --- a/validator/full-node.h +++ b/validator/full-node.h @@ -94,6 +94,8 @@ class FullNode : public td::actor::Actor { virtual void process_block_candidate_broadcast(BlockIdExt block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, td::BufferSlice data) = 0; + virtual void set_validator_telemetry_filename(std::string value) = 0; + virtual void import_fast_sync_member_certificate(adnl::AdnlNodeIdShort local_id, overlay::OverlayMemberCertificate cert) = 0; diff --git a/validator/full-node.hpp b/validator/full-node.hpp index d2de18201..9e254d7d4 100644 --- a/validator/full-node.hpp +++ b/validator/full-node.hpp @@ -88,11 +88,14 @@ class FullNodeImpl : public FullNode { void got_key_block_config(td::Ref config); void new_key_block(BlockHandle handle); + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry); void process_block_broadcast(BlockBroadcast broadcast) override; void process_block_candidate_broadcast(BlockIdExt block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, td::BufferSlice data) override; + void set_validator_telemetry_filename(std::string value) override; + void import_fast_sync_member_certificate(adnl::AdnlNodeIdShort local_id, overlay::OverlayMemberCertificate cert) override { fast_sync_overlays_.add_member_certificate(local_id, std::move(cert)); @@ -151,7 +154,7 @@ class FullNodeImpl : public FullNode { // Old overlays - one private overlay for all validators // New overlays (fast sync overlays) - semiprivate overlay per shard (monitor_min_split depth) // for validators and authorized nodes - bool use_old_private_overlays_ = false; // TODO: set from config or something + bool use_old_private_overlays_ = true; std::map> private_block_overlays_; bool broadcast_block_candidates_in_public_overlay_ = false; FullNodeFastSyncOverlays fast_sync_overlays_; @@ -170,6 +173,11 @@ class FullNodeImpl : public FullNode { void send_block_broadcast_to_custom_overlays(const BlockBroadcast& broadcast); void send_block_candidate_broadcast_to_custom_overlays(const BlockIdExt& block_id, CatchainSeqno cc_seqno, td::uint32 validator_set_hash, const td::BufferSlice& data); + + std::string validator_telemetry_filename_; + PublicKeyHash validator_telemetry_collector_key_ = PublicKeyHash::zero(); + + void update_validator_telemetry_collector(); }; } // namespace fullnode diff --git a/validator/impl/collator-impl.h b/validator/impl/collator-impl.h index d91ee1bf6..72154f861 100644 --- a/validator/impl/collator-impl.h +++ b/validator/impl/collator-impl.h @@ -45,7 +45,7 @@ class Collator final : public td::actor::Actor { } static constexpr long long supported_capabilities() { return ton::capCreateStatsEnabled | ton::capBounceMsgBody | ton::capReportVersion | ton::capShortDequeue | - ton::capStoreOutMsgQueueSize | ton::capMsgMetadata | ton::capDeferMessages; + ton::capStoreOutMsgQueueSize | ton::capMsgMetadata | ton::capDeferMessages | ton::capFullCollatedData; } using LtCellRef = block::LtCellRef; using NewOutMsg = block::NewOutMsg; diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index 5b82d481a..0054ea98a 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -16,6 +16,7 @@ Copyright 2017-2020 Telegram Systems LLP */ +#include "candidate-serializer.h" #include "collator-impl.h" #include "vm/boc.h" #include "td/db/utils/BlobView.h" @@ -77,7 +78,7 @@ static inline bool dbg(int c) { * @param timeout The timeout for the collator. * @param promise The promise to return the result. * @param cancellation_token Token to cancel collation. - * @param mode +1 - skip storing candidate to disk. + * @param mode +1 - skip storing candidate to disk, +2 - called from CollatorNode. * @param attempt_idx The index of the attempt, starting from 0. On later attempts collator decreases block limits and skips some steps. */ Collator::Collator(ShardIdFull shard, bool is_hardfork, BlockIdExt min_masterchain_block_id, @@ -730,6 +731,8 @@ bool Collator::unpack_last_mc_state() { store_out_msg_queue_size_ = config_->has_capability(ton::capStoreOutMsgQueueSize); msg_metadata_enabled_ = config_->has_capability(ton::capMsgMetadata); deferring_messages_enabled_ = config_->has_capability(ton::capDeferMessages); + full_collated_data_ = config_->has_capability(capFullCollatedData) || collator_opts_->force_full_collated_data; + LOG(DEBUG) << "full_collated_data is " << full_collated_data_; shard_conf_ = std::make_unique(*config_); prev_key_block_exists_ = config_->get_last_key_block(prev_key_block_, prev_key_block_lt_); if (prev_key_block_exists_) { @@ -749,15 +752,24 @@ bool Collator::unpack_last_mc_state() { LOG(INFO) << "Attempt #3: bytes, gas limits /= 2"; block_limits_->bytes.multiply_by(0.5); block_limits_->gas.multiply_by(0.5); + block_limits_->collated_data.multiply_by(0.5); } else if (attempt_idx_ == 4) { LOG(INFO) << "Attempt #4: bytes, gas limits /= 4"; block_limits_->bytes.multiply_by(0.25); block_limits_->gas.multiply_by(0.25); + block_limits_->collated_data.multiply_by(0.25); + } + if (collator_opts_->ignore_collated_data_limits) { + block_limits_->collated_data = block::ParamLimits{1 << 30, 1 << 30, 1 << 30}; } LOG(DEBUG) << "block limits: bytes [" << block_limits_->bytes.underload() << ", " << block_limits_->bytes.soft() << ", " << block_limits_->bytes.hard() << "]"; LOG(DEBUG) << "block limits: gas [" << block_limits_->gas.underload() << ", " << block_limits_->gas.soft() << ", " << block_limits_->gas.hard() << "]"; + LOG(DEBUG) << "block limits: lt_delta [" << block_limits_->lt_delta.underload() << ", " + << block_limits_->lt_delta.soft() << ", " << block_limits_->lt_delta.hard() << "]"; + LOG(DEBUG) << "block limits: collated_data_bytes [" << block_limits_->collated_data.underload() << ", " + << block_limits_->collated_data.soft() << ", " << block_limits_->collated_data.hard() << "]"; if (config_->has_capabilities() && (config_->get_capabilities() & ~supported_capabilities())) { LOG(ERROR) << "block generation capabilities " << config_->get_capabilities() << " have been enabled in global configuration, but we support only " << supported_capabilities() @@ -768,8 +780,6 @@ bool Collator::unpack_last_mc_state() { << " have been enabled in global configuration, but we support only " << supported_version() << " (upgrade validator software?)"; } - full_collated_data_ = config_->get_collator_config(false).full_collated_data; - LOG(DEBUG) << "full_collated_data is " << full_collated_data_; // TODO: extract start_lt and end_lt from prev_mc_block as well // std::cerr << " block::gen::ShardState::print_ref(mc_state_root) = "; // block::gen::t_ShardState.print_ref(std::cerr, mc_state_root, 2); @@ -817,6 +827,9 @@ bool Collator::request_neighbor_msg_queues() { auto neighbor_list = shard_conf_->get_neighbor_shard_hash_ids(shard_); LOG(DEBUG) << "got a preliminary list of " << neighbor_list.size() << " neighbors for " << shard_.to_str(); for (ton::BlockId blk_id : neighbor_list) { + if (blk_id.seqno == 0 && blk_id.shard_full() != shard_) { + continue; + } auto shard_ptr = shard_conf_->get_shard_hash(ton::ShardIdFull(blk_id)); if (shard_ptr.is_null()) { return fatal_error(-667, "cannot obtain shard hash for neighbor "s + blk_id.to_str()); @@ -3675,6 +3688,10 @@ static std::string block_full_comment(const block::BlockLimitStatus& block_limit if (!block_limit_status.limits.lt_delta.fits(cls, lt_delta)) { return PSTRING() << "block_full lt_delta " << lt_delta; } + auto collated_data_bytes = block_limit_status.collated_data_stat.estimate_proof_size(); + if (!block_limit_status.limits.collated_data.fits(cls, collated_data_bytes)) { + return PSTRING() << "block_full collated_data " << collated_data_bytes; + } return ""; } @@ -5807,7 +5824,8 @@ bool Collator::create_block_candidate() { << ") exceeds the limit in consensus config (" << consensus_config.max_block_size << ")"); } - if (block_candidate->collated_data.size() > consensus_config.max_collated_data_size) { + if (block_candidate->collated_data.size() > consensus_config.max_collated_data_size && + !collator_opts_->ignore_collated_data_limits) { return fatal_error(PSTRING() << "collated data size (" << block_candidate->collated_data.size() << ") exceeds the limit in consensus config (" << consensus_config.max_collated_data_size << ")"); @@ -5835,14 +5853,31 @@ bool Collator::create_block_candidate() { double work_time = work_timer_.elapsed(); double cpu_work_time = cpu_work_timer_.elapsed(); LOG(WARNING) << "Collate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s"; - stats_.bytes = block_limit_status_->estimate_block_size(); + stats_.actual_bytes = block_candidate->data.size(); + stats_.actual_collated_data_bytes = block_candidate->collated_data.size(); + stats_.estimated_bytes = block_limit_status_->estimate_block_size(); stats_.gas = block_limit_status_->gas_used; stats_.lt_delta = block_limit_status_->cur_lt - block_limit_status_->limits.start_lt; - stats_.cat_bytes = block_limit_status_->limits.classify_size(stats_.bytes); + stats_.estimated_collated_data_bytes = block_limit_status_->collated_data_stat.estimate_proof_size(); + stats_.cat_bytes = block_limit_status_->limits.classify_size(stats_.estimated_bytes); stats_.cat_gas = block_limit_status_->limits.classify_gas(stats_.gas); stats_.cat_lt_delta = block_limit_status_->limits.classify_lt(block_limit_status_->cur_lt); - td::actor::send_closure(manager, &ValidatorManager::record_collate_query_stats, block_candidate->id, work_time, - cpu_work_time, std::move(stats_)); + stats_.cat_collated_data_bytes = + block_limit_status_->limits.classify_collated_data_size(stats_.estimated_collated_data_bytes); + stats_.work_time = work_time; + stats_.cpu_work_time = cpu_work_time; + + // TODO: remove this later (currently needed to collect stats) + if (mode_ & CollateMode::from_collator_node) { + size_t d; + stats_.serialized_size = + validatorsession::compress_candidate_data(block_candidate->data, block_candidate->collated_data, d).ok().size(); + stats_.serialized_size_no_collated_data = + validatorsession::compress_candidate_data(block_candidate->data, td::Slice{}, d).ok().size(); + } + + td::actor::send_closure(manager, &ValidatorManager::record_collate_query_stats, block_candidate->id, + std::move(stats_)); return true; } diff --git a/validator/impl/shard.hpp b/validator/impl/shard.hpp index c8c7aca20..b13be0219 100644 --- a/validator/impl/shard.hpp +++ b/validator/impl/shard.hpp @@ -170,9 +170,6 @@ class MasterchainStateQ : public MasterchainState, public ShardStateQ { block::WorkchainSet get_workchain_list() const override { return config_ ? config_->get_workchain_list() : block::WorkchainSet(); } - block::CollatorConfig get_collator_config(bool need_collator_nodes) const override { - return config_ ? config_->get_collator_config(need_collator_nodes) : block::CollatorConfig(); - } private: ZeroStateIdExt zerostate_id_; diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index ca00e00e9..34c164eee 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -65,7 +65,7 @@ std::string ErrorCtx::as_string() const { * @param manager The ActorId of the ValidatorManager. * @param timeout The timeout for the validation. * @param promise The Promise to return the ValidateCandidateResult to. - * @param is_fake A boolean indicating if the validation is fake (performed when creating a hardfork). + * @param mode +1 - fake mode */ ValidateQuery::ValidateQuery(ShardIdFull shard, BlockIdExt min_masterchain_block_id, std::vector prev, BlockCandidate candidate, Ref validator_set, @@ -1536,6 +1536,9 @@ bool ValidateQuery::request_neighbor_queues() { auto neighbor_list = new_shard_conf_->get_neighbor_shard_hash_ids(shard_); LOG(DEBUG) << "got a preliminary list of " << neighbor_list.size() << " neighbors for " << shard_.to_str(); for (ton::BlockId blk_id : neighbor_list) { + if (blk_id.seqno == 0 && blk_id.shard_full() != shard_) { + continue; + } auto shard_ptr = new_shard_conf_->get_shard_hash(ton::ShardIdFull(blk_id)); if (shard_ptr.is_null()) { return reject_query("cannot obtain shard hash for neighbor "s + blk_id.to_str()); @@ -2305,6 +2308,12 @@ bool ValidateQuery::prepare_out_msg_queue_size() { have_out_msg_queue_size_in_state_ = true; return true; } + if (ps_.out_msg_queue_->is_empty()) { + old_out_msg_queue_size_ = 0; + out_msg_queue_size_known_ = true; + have_out_msg_queue_size_in_state_ = true; + return true; + } if (!store_out_msg_queue_size_) { // Don't need it return true; } diff --git a/validator/impl/validate-query.hpp b/validator/impl/validate-query.hpp index a21a56e90..585d553c5 100644 --- a/validator/impl/validate-query.hpp +++ b/validator/impl/validate-query.hpp @@ -113,7 +113,7 @@ class ValidateQuery : public td::actor::Actor { } static constexpr long long supported_capabilities() { return ton::capCreateStatsEnabled | ton::capBounceMsgBody | ton::capReportVersion | ton::capShortDequeue | - ton::capStoreOutMsgQueueSize | ton::capMsgMetadata | ton::capDeferMessages; + ton::capStoreOutMsgQueueSize | ton::capMsgMetadata | ton::capDeferMessages | ton::capFullCollatedData; } public: diff --git a/validator/impl/validator-set.cpp b/validator/impl/validator-set.cpp index 629337cf8..d4b0d647b 100644 --- a/validator/impl/validator-set.cpp +++ b/validator/impl/validator-set.cpp @@ -28,14 +28,14 @@ namespace ton { namespace validator { using td::Ref; -const ValidatorDescr *ValidatorSetQ::find_validator(const NodeIdShort &id) const { +const ValidatorDescr *ValidatorSetQ::get_validator(const NodeIdShort &id) const { auto it = std::lower_bound(ids_map_.begin(), ids_map_.end(), id, [](const auto &p, const auto &x) { return p.first < x; }); return it < ids_map_.end() && it->first == id ? &ids_[it->second] : nullptr; } bool ValidatorSetQ::is_validator(NodeIdShort id) const { - return find_validator(id); + return get_validator(id); } td::Result ValidatorSetQ::check_signatures(RootHash root_hash, FileHash file_hash, @@ -53,7 +53,7 @@ td::Result ValidatorSetQ::check_signatures(RootHash root_hash, } nodes.insert(sig.node); - auto vdescr = find_validator(sig.node); + auto vdescr = get_validator(sig.node); if (!vdescr) { return td::Status::Error(ErrorCode::protoviolation, "unknown node to sign"); } @@ -84,7 +84,7 @@ td::Result ValidatorSetQ::check_approve_signatures(RootHash roo } nodes.insert(sig.node); - auto vdescr = find_validator(sig.node); + auto vdescr = get_validator(sig.node); if (!vdescr) { return td::Status::Error(ErrorCode::protoviolation, "unknown node to sign"); } diff --git a/validator/impl/validator-set.hpp b/validator/impl/validator-set.hpp index 3141f36c5..951ca4b71 100644 --- a/validator/impl/validator-set.hpp +++ b/validator/impl/validator-set.hpp @@ -32,6 +32,7 @@ namespace validator { class ValidatorSetQ : public ValidatorSet { public: + const ValidatorDescr* get_validator(const NodeIdShort& id) const override; bool is_validator(NodeIdShort id) const override; CatchainSeqno get_catchain_seqno() const override { return cc_seqno_; @@ -62,8 +63,6 @@ class ValidatorSetQ : public ValidatorSet { ValidatorWeight total_weight_; std::vector ids_; std::vector> ids_map_; - - const ValidatorDescr* find_validator(const NodeIdShort& id) const; }; class ValidatorSetCompute { diff --git a/validator/interfaces/shard.h b/validator/interfaces/shard.h index 3383f6fc2..3546f0a39 100644 --- a/validator/interfaces/shard.h +++ b/validator/interfaces/shard.h @@ -85,7 +85,6 @@ class MasterchainState : virtual public ShardState { virtual bool check_old_mc_block_id(const ton::BlockIdExt& blkid, bool strict = false) const = 0; virtual td::Result> get_config_holder() const = 0; virtual block::WorkchainSet get_workchain_list() const = 0; - virtual block::CollatorConfig get_collator_config(bool need_collator_nodes) const = 0; virtual td::Status prepare() { return td::Status::OK(); } diff --git a/validator/interfaces/validator-manager.h b/validator/interfaces/validator-manager.h index a728c93b9..14d10eccc 100644 --- a/validator/interfaces/validator-manager.h +++ b/validator/interfaces/validator-manager.h @@ -54,13 +54,24 @@ struct AsyncSerializerState { }; struct CollationStats { - td::uint32 bytes, gas, lt_delta; - int cat_bytes, cat_gas, cat_lt_delta; + td::uint32 actual_bytes = 0, actual_collated_data_bytes = 0; + td::uint32 estimated_bytes = 0, gas = 0, lt_delta = 0, estimated_collated_data_bytes = 0; + int cat_bytes = 0, cat_gas = 0, cat_lt_delta = 0, cat_collated_data_bytes = 0; std::string limits_log; td::uint32 ext_msgs_total = 0; td::uint32 ext_msgs_filtered = 0; td::uint32 ext_msgs_accepted = 0; td::uint32 ext_msgs_rejected = 0; + double work_time = 0.0, cpu_work_time = 0.0; + td::uint32 serialized_size = 0, serialized_size_no_collated_data = 0; + + tl_object_ptr tl() const { + return create_tl_object( + actual_bytes, actual_collated_data_bytes, estimated_bytes, gas, lt_delta, estimated_collated_data_bytes, + cat_bytes, cat_gas, cat_lt_delta, cat_collated_data_bytes, limits_log, ext_msgs_total, ext_msgs_filtered, + ext_msgs_accepted, ext_msgs_rejected, work_time, cpu_work_time, serialized_size, + serialized_size_no_collated_data); + } }; using ValidateCandidateResult = td::Variant; @@ -146,6 +157,7 @@ class ValidatorManager : public ValidatorManagerInterface { virtual void send_ihr_message(td::Ref message) = 0; virtual void send_top_shard_block_description(td::Ref desc) = 0; virtual void send_block_broadcast(BlockBroadcast broadcast, int mode) = 0; + virtual void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) = 0; virtual void send_get_out_msg_queue_proof_request(ShardIdFull dst_shard, std::vector blocks, block::ImportedMsgQueueLimits limits, td::Promise>> promise) = 0; @@ -208,8 +220,7 @@ class ValidatorManager : public ValidatorManagerInterface { virtual void add_lite_query_stats(int lite_query_id) { } - virtual void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) { + virtual void record_collate_query_stats(BlockIdExt block_id, CollationStats stats) { } virtual void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { } diff --git a/validator/interfaces/validator-set.h b/validator/interfaces/validator-set.h index b71c0bfea..ad7fb9b55 100644 --- a/validator/interfaces/validator-set.h +++ b/validator/interfaces/validator-set.h @@ -30,6 +30,7 @@ namespace validator { class ValidatorSet : public td::CntObject { public: virtual ~ValidatorSet() = default; + virtual const ValidatorDescr* get_validator(const NodeIdShort& id) const = 0; virtual bool is_validator(NodeIdShort id) const = 0; virtual CatchainSeqno get_catchain_seqno() const = 0; virtual td::uint32 get_validator_set_hash() const = 0; diff --git a/validator/manager-disk.hpp b/validator/manager-disk.hpp index c54ccde55..354b0e435 100644 --- a/validator/manager-disk.hpp +++ b/validator/manager-disk.hpp @@ -267,6 +267,8 @@ class ValidatorManagerImpl : public ValidatorManager { void send_top_shard_block_description(td::Ref desc) override; void send_block_broadcast(BlockBroadcast broadcast, int mode) override { } + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) override { + } void send_get_out_msg_queue_proof_request(ShardIdFull dst_shard, std::vector blocks, block::ImportedMsgQueueLimits limits, td::Promise>> promise) override { @@ -457,6 +459,11 @@ class ValidatorManagerImpl : public ValidatorManager { UNREACHABLE(); } + void get_collation_manager_stats( + td::Promise> promise) override { + UNREACHABLE(); + } + void update_options(td::Ref opts) override { opts_ = std::move(opts); } diff --git a/validator/manager-hardfork.hpp b/validator/manager-hardfork.hpp index daf83a197..7f00119ad 100644 --- a/validator/manager-hardfork.hpp +++ b/validator/manager-hardfork.hpp @@ -337,6 +337,8 @@ class ValidatorManagerImpl : public ValidatorManager { } void send_block_broadcast(BlockBroadcast broadcast, int mode) override { } + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) override { + } void send_get_out_msg_queue_proof_request(ShardIdFull dst_shard, std::vector blocks, block::ImportedMsgQueueLimits limits, td::Promise>> promise) override { @@ -525,6 +527,11 @@ class ValidatorManagerImpl : public ValidatorManager { UNREACHABLE(); } + void get_collation_manager_stats( + td::Promise> promise) override { + UNREACHABLE(); + } + private: td::Ref opts_; diff --git a/validator/manager.cpp b/validator/manager.cpp index f97263570..4c8e6ecc8 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -799,7 +799,13 @@ void ValidatorManagerImpl::wait_neighbor_msg_queue_proofs( public: Worker(size_t pending, td::Promise>> promise) : pending_(pending), promise_(std::move(promise)) { - CHECK(pending_ > 0); + } + + void start_up() override { + if (pending_ == 0) { + promise_.set_result(std::move(result_)); + stop(); + } } void on_result(td::Ref res) { @@ -1769,6 +1775,11 @@ void ValidatorManagerImpl::send_block_broadcast(BlockBroadcast broadcast, int mo callback_->send_broadcast(std::move(broadcast), mode); } +void ValidatorManagerImpl::send_validator_telemetry(PublicKeyHash key, + tl_object_ptr telemetry) { + callback_->send_validator_telemetry(key, std::move(telemetry)); +} + void ValidatorManagerImpl::send_get_out_msg_queue_proof_request( ShardIdFull dst_shard, std::vector blocks, block::ImportedMsgQueueLimits limits, td::Promise>> promise) { @@ -1894,6 +1905,7 @@ void ValidatorManagerImpl::started(ValidatorManagerInitResult R) { if (opts_->nonfinal_ls_queries_enabled()) { candidates_buffer_ = td::actor::create_actor("candidates-buffer", actor_id(this)); } + init_validator_telemetry(); auto Q = td::PromiseCreator::lambda( [SelfId = actor_id(this)](td::Result>> R) { @@ -2085,6 +2097,7 @@ void ValidatorManagerImpl::new_masterchain_block() { td::actor::send_closure(serializer_, &AsyncStateSerializer::update_last_known_key_block_ts, last_key_block_handle_->unix_time()); } + init_validator_telemetry(); } update_shard_overlays(); @@ -2462,15 +2475,27 @@ td::actor::ActorOwn ValidatorManagerImpl::create_validator_group auto validator_id = get_validator(shard, validator_set); CHECK(!validator_id.is_zero()); + auto descr = validator_set->get_validator(validator_id.bits256_value()); + CHECK(descr); + auto adnl_id = adnl::AdnlNodeIdShort{ + descr->addr.is_zero() ? ValidatorFullId{descr->key}.compute_short_id().bits256_value() : descr->addr}; auto G = td::actor::create_actor( - PSTRING() << "valgroup" << shard.to_str(), shard, validator_id, session_id, validator_set, key_seqno, - last_masterchain_state_->get_collator_config(true), opts, keyring_, adnl_, rldp_, overlays_, db_root_, - actor_id(this), init_session, opts_->check_unsafe_resync_allowed(validator_set->get_catchain_seqno()), opts_, + PSTRING() << "valgroup" << shard.to_str(), shard, validator_id, session_id, validator_set, key_seqno, opts, + keyring_, adnl_, rldp_, overlays_, db_root_, actor_id(this), get_collation_manager(adnl_id), init_session, + opts_->check_unsafe_resync_allowed(validator_set->get_catchain_seqno()), opts_, opts_->need_monitor(shard, last_masterchain_state_)); return G; } } +td::actor::ActorId ValidatorManagerImpl::get_collation_manager(adnl::AdnlNodeIdShort adnl_id) { + auto &actor = collation_managers_[adnl_id]; + if (actor.empty()) { + actor = td::actor::create_actor("collation", adnl_id, opts_, actor_id(this), rldp_); + } + return actor.get(); +} + void ValidatorManagerImpl::add_handle_to_lru(BlockHandle handle) { auto it = handle_lru_map_.find(handle->id()); if (it != handle_lru_map_.end()) { @@ -3081,10 +3106,7 @@ void ValidatorManagerImpl::log_validator_session_stats(BlockIdExt block_id, tl_object_ptr collation_stats; if (it != recorded_block_stats_.end() && it->second.collator_stats_) { auto &stats = it->second.collator_stats_.value(); - collation_stats = create_tl_object( - stats.bytes, stats.gas, stats.lt_delta, stats.cat_bytes, stats.cat_gas, stats.cat_lt_delta, - stats.limits_log, stats.ext_msgs_total, stats.ext_msgs_filtered, stats.ext_msgs_accepted, - stats.ext_msgs_rejected); + collation_stats = stats.tl(); } std::string approvers, signers; for (bool x : producer.approvers) { @@ -3458,6 +3480,9 @@ void ValidatorManagerImpl::update_options(td::Ref opts) for (auto &collator : collator_nodes_) { td::actor::send_closure(collator.second.actor, &CollatorNode::update_options, opts); } + for (auto &[_, c] : collation_managers_) { + td::actor::send_closure(c, &CollationManager::update_options, opts); + } opts_ = std::move(opts); } @@ -3492,6 +3517,54 @@ void ValidatorManagerImpl::del_collator(adnl::AdnlNodeIdShort id, ShardIdFull sh } } +void ValidatorManagerImpl::get_collation_manager_stats( + td::Promise> promise) { + class Cb : public td::actor::Actor { + public: + explicit Cb(td::Promise> promise) + : promise_(std::move(promise)) { + } + + void got_stats(tl_object_ptr s) { + result_.push_back(std::move(s)); + dec_pending(); + } + + void inc_pending() { + ++pending_; + } + + void dec_pending() { + CHECK(pending_ > 0); + --pending_; + if (pending_ == 0) { + promise_.set_result(create_tl_object(std::move(result_))); + stop(); + } + } + + private: + td::Promise> promise_; + size_t pending_ = 1; + std::vector> result_; + }; + auto callback = td::actor::create_actor("stats", std::move(promise)).release(); + + for (auto &[_, actor] : collation_managers_) { + td::actor::send_closure(callback, &Cb::inc_pending); + td::actor::send_closure( + actor, &CollationManager::get_stats, + [callback](td::Result> R) { + if (R.is_error()) { + td::actor::send_closure(callback, &Cb::dec_pending); + } else { + td::actor::send_closure(callback, &Cb::got_stats, R.move_as_ok()); + } + }); + } + td::actor::send_closure(callback, &Cb::dec_pending); +} + void ValidatorManagerImpl::add_persistent_state_description(td::Ref desc) { auto now = (UnixTime)td::Clocks::system(); if (desc->end_time <= now) { @@ -3547,12 +3620,28 @@ td::actor::ActorOwn ValidatorManagerFactory::create( rldp, overlays); } -void ValidatorManagerImpl::record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) { +void ValidatorManagerImpl::record_collate_query_stats(BlockIdExt block_id, CollationStats stats) { auto &record = new_block_stats_record(block_id); - record.collator_work_time_ = work_time; - record.collator_cpu_work_time_ = cpu_work_time; + record.collator_work_time_ = stats.work_time; + record.collator_cpu_work_time_ = stats.cpu_work_time; record.collator_stats_ = std::move(stats); + + std::string fname = opts_->get_session_logs_file(); + if (fname.empty()) { + return; + } + + auto obj = create_tl_object(td::Clocks::system(), + create_tl_block_id(block_id), stats.tl()); + auto s = td::json_encode(td::ToJson(*obj.get()), false); + s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return c == '\n' || c == '\r'; }), s.end()); + + std::ofstream file; + file.open(fname, std::ios_base::app); + file << s << "\n"; + file.close(); + + LOG(DEBUG) << "Writing collation stats stats for " << block_id.id.to_str(); } void ValidatorManagerImpl::record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { @@ -3595,6 +3684,41 @@ void ValidatorManagerImpl::CheckedExtMsgCounter::before_query() { } } +void ValidatorManagerImpl::init_validator_telemetry() { + if (last_masterchain_state_.is_null()) { + return; + } + td::Ref validator_set = last_masterchain_state_->get_total_validator_set(0); + if (validator_set.is_null()) { + validator_telemetry_.clear(); + return; + } + std::set processed; + for (auto& key : temp_keys_) { + if (const ValidatorDescr* desc = validator_set->get_validator(key.bits256_value())) { + processed.insert(key); + adnl::AdnlNodeIdShort adnl_id; + if (desc->addr.is_zero()) { + adnl_id = adnl::AdnlNodeIdShort{ValidatorFullId{desc->key}.compute_short_id()}; + } else { + adnl_id = adnl::AdnlNodeIdShort{desc->addr}; + } + auto& telemetry = validator_telemetry_[key]; + if (telemetry.empty()) { + telemetry = td::actor::create_actor( + "telemetry", key, adnl_id, opts_->zero_block_id().file_hash, actor_id(this)); + } + } + } + for (auto it = validator_telemetry_.begin(); it != validator_telemetry_.end();) { + if (processed.contains(it->first)) { + ++it; + } else { + it = validator_telemetry_.erase(it); + } + } +} + } // namespace validator } // namespace ton diff --git a/validator/manager.hpp b/validator/manager.hpp index b7e457e27..c0d74456f 100644 --- a/validator/manager.hpp +++ b/validator/manager.hpp @@ -34,6 +34,7 @@ #include "rldp/rldp.h" #include "token-manager.h" #include "queue-size-counter.hpp" +#include "validator-telemetry.hpp" #include "impl/candidates-buffer.hpp" #include "collator-node.hpp" @@ -283,12 +284,15 @@ class ValidatorManagerImpl : public ValidatorManager { td::Ref validator_set, BlockSeqno key_seqno, validatorsession::ValidatorSessionOptions opts, bool create_catchain); + td::actor::ActorId get_collation_manager(adnl::AdnlNodeIdShort adnl_id); + struct ValidatorGroupEntry { td::actor::ActorOwn actor; ShardIdFull shard; }; std::map validator_groups_; std::map next_validator_groups_; + std::map> collation_managers_; std::set check_gc_list_; std::vector gc_list_; @@ -356,6 +360,7 @@ class ValidatorManagerImpl : public ValidatorManager { } void add_temp_key(PublicKeyHash key, td::Promise promise) override { temp_keys_.insert(key); + init_validator_telemetry(); promise.set_value(td::Unit()); } void del_permanent_key(PublicKeyHash key, td::Promise promise) override { @@ -364,6 +369,7 @@ class ValidatorManagerImpl : public ValidatorManager { } void del_temp_key(PublicKeyHash key, td::Promise promise) override { temp_keys_.erase(key); + init_validator_telemetry(); promise.set_value(td::Unit()); } @@ -521,6 +527,7 @@ class ValidatorManagerImpl : public ValidatorManager { void send_ihr_message(td::Ref message) override; void send_top_shard_block_description(td::Ref desc) override; void send_block_broadcast(BlockBroadcast broadcast, int mode) override; + void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) override; void send_get_out_msg_queue_proof_request(ShardIdFull dst_shard, std::vector blocks, block::ImportedMsgQueueLimits limits, td::Promise>> promise) override; @@ -634,6 +641,9 @@ class ValidatorManagerImpl : public ValidatorManager { void add_collator(adnl::AdnlNodeIdShort id, ShardIdFull shard) override; void del_collator(adnl::AdnlNodeIdShort id, ShardIdFull shard) override; + void get_collation_manager_stats( + td::Promise> promise) override; + void get_out_msg_queue_size(BlockIdExt block_id, td::Promise promise) override { if (queue_size_counter_.empty()) { if (last_masterchain_state_.is_null()) { @@ -774,11 +784,14 @@ class ValidatorManagerImpl : public ValidatorManager { std::map recorded_block_stats_; std::queue recorded_block_stats_lru_; - void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, - CollationStats stats) override; + void record_collate_query_stats(BlockIdExt block_id, CollationStats stats) override; void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) override; RecordedBlockStats &new_block_stats_record(BlockIdExt block_id); + std::map> validator_telemetry_; + + void init_validator_telemetry(); + struct Collator { td::actor::ActorOwn actor; std::set shards; diff --git a/validator/validator-group.cpp b/validator/validator-group.cpp index 55798a482..f95266b33 100644 --- a/validator/validator-group.cpp +++ b/validator/validator-group.cpp @@ -65,7 +65,10 @@ void ValidatorGroup::generate_block_candidate( td::actor::send_closure(SelfId, &ValidatorGroup::generated_block_candidate, source_info, std::move(cache), std::move(R)); }; - collate_block(source_info, td::Timestamp::in(10.0), std::move(P)); + td::uint64 max_answer_size = config_.max_block_size + config_.max_collated_data_size + 1024; + td::actor::send_closure(collation_manager_, &CollationManager::collate_block, shard_, min_masterchain_block_id_, + prev_block_ids_, Ed25519_PublicKey{local_id_full_.ed25519_value().raw()}, validator_set_, + max_answer_size, cancellation_token_source_.get_cancellation_token(), std::move(P)); } void ValidatorGroup::generated_block_candidate(validatorsession::BlockSourceInfo source_info, @@ -218,9 +221,9 @@ void ValidatorGroup::accept_block_query(BlockIdExt block_id, td::Ref return; } LOG_CHECK(R.error().code() == ErrorCode::timeout || R.error().code() == ErrorCode::notready) << R.move_as_error(); - td::actor::send_closure(SelfId, &ValidatorGroup::accept_block_query, block_id, std::move(block), - std::move(prev), std::move(sig_set), std::move(approve_sig_set), send_broadcast_mode, - std::move(promise), true); + td::actor::send_closure(SelfId, &ValidatorGroup::accept_block_query, block_id, std::move(block), std::move(prev), + std::move(sig_set), std::move(approve_sig_set), send_broadcast_mode, std::move(promise), + true); } else { promise.set_value(R.move_as_ok()); } @@ -292,7 +295,8 @@ std::unique_ptr ValidatorGroup::ma td::actor::send_closure(id_, &ValidatorGroup::generate_block_candidate, std::move(source_info), std::move(promise)); } - void on_block_committed(validatorsession::BlockSourceInfo source_info, validatorsession::ValidatorSessionRootHash root_hash, + void on_block_committed(validatorsession::BlockSourceInfo source_info, + validatorsession::ValidatorSessionRootHash root_hash, validatorsession::ValidatorSessionFileHash file_hash, td::BufferSlice data, std::vector> signatures, std::vector> approve_signatures, @@ -409,7 +413,7 @@ void ValidatorGroup::start(std::vector prev, BlockIdExt min_masterch stats.last_key_block_seqno = last_key_block_seqno_; stats.timestamp = td::Clocks::system(); td::uint32 idx = 0; - for (const auto& node : validator_set_->export_vector()) { + for (const auto &node : validator_set_->export_vector()) { PublicKeyHash id = ValidatorFullId{node.key}.compute_short_id(); if (id == local_id_) { stats.self_idx = idx; @@ -489,7 +493,7 @@ void ValidatorGroup::get_validator_group_info_for_litequery_cont( auto result = create_tl_object(); result->next_block_id_ = create_tl_lite_block_id_simple(next_block_id); - for (const BlockIdExt& prev : prev_block_ids_) { + for (const BlockIdExt &prev : prev_block_ids_) { result->prev_.push_back(create_tl_lite_block_id(prev)); } result->cc_seqno_ = validator_set_->get_catchain_seqno(); @@ -497,146 +501,6 @@ void ValidatorGroup::get_validator_group_info_for_litequery_cont( promise.set_result(std::move(result)); } -void ValidatorGroup::collate_block(validatorsession::BlockSourceInfo source_info, td::Timestamp timeout, - td::Promise promise, unsigned max_retries) { - if (source_info.round < last_known_round_id_) { - promise.set_error(td::Status::Error("too old")); - return; - } - BlockId next_block_id = create_next_block_id_simple(); - adnl::AdnlNodeIdShort collator_adnl_id = adnl::AdnlNodeIdShort::zero(); - bool self_collate = false; - bool trusted_collator = false; - - if (shard_.is_masterchain()) { - self_collate = true; - } else { - for (const auto &s : opts_->get_collators_list()->shards) { - if (!shard_intersects(s.shard_id, shard_)) { - continue; - } - if (!s.collators.empty()) { - const CollatorsList::Collator &col = s.collators[td::Random::fast(0, s.collators.size() - 1)]; - collator_adnl_id = col.adnl_id; - trusted_collator = col.trusted; - break; - } - } - if (collator_adnl_id.is_zero()) { - if (opts_->get_collators_list()->self_collate) { - self_collate = true; - } else if (opts_->get_collators_list()->use_config_41) { - // TODO: some way to choose node (similar to "unreliability" in full-node) - int cnt = 0; - for (const block::CollatorNodeDescr &c : collator_config_.collator_nodes) { - if (shard_intersects(shard_, c.shard)) { - if (td::Random::fast(0, cnt) == 0) { - collator_adnl_id = adnl::AdnlNodeIdShort(c.adnl_id); - } - ++cnt; - } - } - } - } - } - if (self_collate) { - run_collate_query(shard_, min_masterchain_block_id_, prev_block_ids_, - Ed25519_PublicKey{local_id_full_.ed25519_value().raw()}, validator_set_, - opts_->get_collator_options(), manager_, td::Timestamp::in(10.0), std::move(promise), - cancellation_token_source_.get_cancellation_token(), 0); - return; - } - if (collator_adnl_id.is_zero()) { - promise.set_error(td::Status::Error(PSTRING() << "no collator for shard " << shard_.to_str())); - return; - } - - promise = td::PromiseCreator::lambda([=, SelfId = actor_id(this), promise = std::move(promise), - timer = td::Timer()](td::Result R) mutable { - if (R.is_ok()) { - LOG(INFO) << "collate query for " << next_block_id.to_str() << ": success, time=" << timer.elapsed() << "s"; - promise.set_result(R.move_as_ok()); - return; - } - bool retry = (!timeout || !timeout.is_in_past()) && max_retries > 0; - LOG(WARNING) << "collate query for " << next_block_id.to_str() << ": " << R.error() << ", time=" << timer.elapsed() - << "s, " << (retry ? "retrying" : "giving up"); - if (retry) { - td::actor::send_closure(SelfId, &ValidatorGroup::collate_block, source_info, timeout, std::move(promise), - max_retries - 1); - } else { - promise.set_result(td::Status::Error(ErrorCode::timeout, "timeout")); - } - }); - - std::vector> prev_blocks; - for (const BlockIdExt &p : prev_block_ids_) { - prev_blocks.push_back(create_tl_block_id(p)); - } - td::BufferSlice query = create_serialize_tl_object( - create_tl_shard_id(shard_), validator_set_->get_catchain_seqno(), std::move(prev_blocks), - local_id_full_.ed25519_value().raw()); - - auto P = td::PromiseCreator::lambda( - [=, SelfId = actor_id(this), promise = std::move(promise)](td::Result R) mutable { - if (R.is_error()) { - promise.set_error(R.move_as_error_prefix("rldp query failed: ")); - return; - } - td::actor::send_closure(SelfId, &ValidatorGroup::receive_collate_query_response, source_info, R.move_as_ok(), - trusted_collator, std::move(promise)); - }); - LOG(INFO) << "sending collate query for " << next_block_id.to_str() << ": send to " << collator_adnl_id; - size_t max_answer_size = config_.max_block_size + config_.max_collated_data_size + 1024; - td::Timestamp query_timeout = td::Timestamp::in(10.0); - query_timeout.relax(timeout); - td::actor::send_closure(rldp_, &rldp::Rldp::send_query_ex, local_adnl_id_, collator_adnl_id, "collatequery", - std::move(P), timeout, std::move(query), max_answer_size); -} - -void ValidatorGroup::receive_collate_query_response(validatorsession::BlockSourceInfo source_info, td::BufferSlice data, - bool trusted_collator, td::Promise promise) { - if (source_info.round < last_known_round_id_) { - promise.set_error(td::Status::Error("too old")); - return; - } - TRY_RESULT_PROMISE(promise, f, fetch_tl_object(data, true)); - td::Result res; - ton_api::downcast_call(*f, td::overloaded( - [&](ton_api::collatorNode_generateBlockError &r) { - td::Status error = td::Status::Error(r.code_, r.message_); - res = error.move_as_error_prefix("collate query: "); - }, - [&](ton_api::collatorNode_generateBlockSuccess &r) { - res = CollatorNode::deserialize_candidate( - std::move(r.candidate_), - config_.max_block_size + config_.max_collated_data_size + 1024); - })); - TRY_RESULT_PROMISE(promise, candidate, std::move(res)); - if (candidate.pubkey.as_bits256() != local_id_full_.ed25519_value().raw()) { - promise.set_error(td::Status::Error("collate query: block candidate source mismatch")); - return; - } - if (candidate.id.shard_full() != shard_) { - promise.set_error(td::Status::Error("collate query: shard mismatch")); - return; - } - - if (trusted_collator) { - promise.set_result(std::move(candidate)); - return; - } - auto P = td::PromiseCreator::lambda( - [candidate = candidate.clone(), promise = std::move(promise)](td::Result> R) mutable { - if (R.is_error()) { - promise.set_error(R.move_as_error_prefix("validate received block error: ")); - return; - } - promise.set_result(std::move(candidate)); - }); - validate_block_candidate(source_info, std::move(candidate), std::move(P)); -} - } // namespace validator } // namespace ton diff --git a/validator/validator-group.hpp b/validator/validator-group.hpp index 38fc9fb44..fb7a4dcfb 100644 --- a/validator/validator-group.hpp +++ b/validator/validator-group.hpp @@ -18,6 +18,7 @@ */ #pragma once +#include "collation-manager.hpp" #include "interfaces/validator-manager.h" #include "validator-session/validator-session.h" @@ -59,6 +60,10 @@ class ValidatorGroup : public td::actor::Actor { init_ = false; create_session(); } + td::actor::send_closure(collation_manager_, &CollationManager::validator_group_started, shard_); + } + void tear_down() override { + td::actor::send_closure(collation_manager_, &CollationManager::validator_group_finished, shard_); } void get_validator_group_info_for_litequery( @@ -71,17 +76,17 @@ class ValidatorGroup : public td::actor::Actor { ValidatorGroup(ShardIdFull shard, PublicKeyHash local_id, ValidatorSessionId session_id, td::Ref validator_set, BlockSeqno last_key_block_seqno, - block::CollatorConfig collator_config, validatorsession::ValidatorSessionOptions config, - td::actor::ActorId keyring, td::actor::ActorId adnl, - td::actor::ActorId rldp, td::actor::ActorId overlays, - std::string db_root, td::actor::ActorId validator_manager, bool create_session, + validatorsession::ValidatorSessionOptions config, td::actor::ActorId keyring, + td::actor::ActorId adnl, td::actor::ActorId rldp, + td::actor::ActorId overlays, std::string db_root, + td::actor::ActorId validator_manager, + td::actor::ActorId collation_manager, bool create_session, bool allow_unsafe_self_blocks_resync, td::Ref opts, bool monitoring_shard) : shard_(shard) , local_id_(std::move(local_id)) , session_id_(session_id) , validator_set_(std::move(validator_set)) , last_key_block_seqno_(last_key_block_seqno) - , collator_config_(std::move(collator_config)) , config_(std::move(config)) , keyring_(keyring) , adnl_(adnl) @@ -89,6 +94,7 @@ class ValidatorGroup : public td::actor::Actor { , overlays_(overlays) , db_root_(std::move(db_root)) , manager_(validator_manager) + , collation_manager_(collation_manager) , init_(create_session) , allow_unsafe_self_blocks_resync_(allow_unsafe_self_blocks_resync) , opts_(std::move(opts)) @@ -97,10 +103,6 @@ class ValidatorGroup : public td::actor::Actor { private: std::unique_ptr make_validator_session_callback(); - void collate_block(validatorsession::BlockSourceInfo source_info, td::Timestamp timeout, - td::Promise promise, unsigned max_retries = 4); - void receive_collate_query_response(validatorsession::BlockSourceInfo source_info, td::BufferSlice data, - bool trusted_collator, td::Promise promise); struct PostponedAccept { RootHash root_hash; @@ -124,7 +126,6 @@ class ValidatorGroup : public td::actor::Actor { td::Ref validator_set_; BlockSeqno last_key_block_seqno_; - block::CollatorConfig collator_config_; validatorsession::ValidatorSessionOptions config_; td::actor::ActorId keyring_; @@ -133,6 +134,7 @@ class ValidatorGroup : public td::actor::Actor { td::actor::ActorId overlays_; std::string db_root_; td::actor::ActorId manager_; + td::actor::ActorId collation_manager_; td::actor::ActorOwn session_; adnl::AdnlNodeIdShort local_adnl_id_; diff --git a/validator/validator-options.cpp b/validator/validator-options.cpp index 4f9b8c538..230a5df46 100644 --- a/validator/validator-options.cpp +++ b/validator/validator-options.cpp @@ -26,29 +26,58 @@ namespace ton { namespace validator { -void CollatorsList::unpack(const ton_api::engine_validator_collatorsList& obj) { +td::Status CollatorsList::unpack(const ton_api::engine_validator_collatorsList& obj) { shards.clear(); - self_collate = obj.self_collate_; - use_config_41 = obj.use_config_41_; + self_collate = false; for (const auto& shard_obj : obj.shards_) { + ShardIdFull shard_id = create_shard_id(shard_obj->shard_id_); + if (shard_id.is_masterchain()) { + return td::Status::Error("masterchain shard in collators list"); + } + if (!shard_id.is_valid_ext()) { + return td::Status::Error(PSTRING() << "invalid shard " << shard_id.to_str()); + } shards.emplace_back(); Shard& shard = shards.back(); - shard.shard_id = create_shard_id(shard_obj->shard_id_); + shard.shard_id = shard_id; + shard.self_collate = shard_obj->self_collate_; + if (shard.self_collate) { + self_collate = true; + } + if (shard_obj->select_mode_.empty() || shard_obj->select_mode_ == "random") { + shard.select_mode = mode_random; + } else if (shard_obj->select_mode_ == "ordered") { + shard.select_mode = mode_ordered; + } else if (shard_obj->select_mode_ == "round_robin") { + shard.select_mode = mode_round_robin; + } else { + return td::Status::Error(PSTRING() << "invalid select mode '" << shard_obj->select_mode_ + << "' (allowed: 'random', 'ordered', 'round_robin')"); + } for (const auto& collator : shard_obj->collators_) { - shard.collators.push_back({adnl::AdnlNodeIdShort{collator->adnl_id_}, collator->trusted_}); + shard.collators.push_back(adnl::AdnlNodeIdShort{collator->adnl_id_}); } } + return td::Status::OK(); +} + +CollatorsList CollatorsList::default_list() { + CollatorsList list; + list.shards.push_back( + {.shard_id = ShardIdFull{basechainId, shardIdAll}, .select_mode = mode_random, .self_collate = true}); + list.self_collate = true; + return list; } -td::Ref ValidatorManagerOptions::create( - BlockIdExt zero_block_id, BlockIdExt init_block_id, - std::function check_shard, bool allow_blockchain_init, - double sync_blocks_before, double block_ttl, double state_ttl, double max_mempool_num, - double archive_ttl, double key_proof_ttl, bool initial_sync_disabled) { +td::Ref ValidatorManagerOptions::create(BlockIdExt zero_block_id, BlockIdExt init_block_id, + std::function check_shard, + bool allow_blockchain_init, double sync_blocks_before, + double block_ttl, double state_ttl, + double max_mempool_num, double archive_ttl, + double key_proof_ttl, bool initial_sync_disabled) { return td::make_ref(zero_block_id, init_block_id, std::move(check_shard), allow_blockchain_init, sync_blocks_before, block_ttl, state_ttl, - max_mempool_num, - archive_ttl, key_proof_ttl, initial_sync_disabled); + max_mempool_num, archive_ttl, key_proof_ttl, initial_sync_disabled); } } // namespace validator diff --git a/validator/validator-options.hpp b/validator/validator-options.hpp index 8e876625d..05cc73d31 100644 --- a/validator/validator-options.hpp +++ b/validator/validator-options.hpp @@ -157,6 +157,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { td::Ref get_collators_list() const override { return collators_list_; } + bool check_collator_node_whitelist(adnl::AdnlNodeIdShort id) const override { + return !collator_node_whitelist_enabled_ || collator_node_whitelist_.contains(id); + } void set_zero_block_id(BlockIdExt block_id) override { zero_block_id_ = block_id; @@ -255,6 +258,16 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { void set_collators_list(td::Ref list) override { collators_list_ = std::move(list); } + void set_collator_node_whitelisted_validator(adnl::AdnlNodeIdShort id, bool add) override { + if (add) { + collator_node_whitelist_.insert(id); + } else { + collator_node_whitelist_.erase(id); + } + } + void set_collator_node_whitelist_enabled(bool enabled) override { + collator_node_whitelist_enabled_ = enabled; + } ValidatorManagerOptionsImpl *make_copy() const override { return new ValidatorManagerOptionsImpl(*this); @@ -308,7 +321,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { bool state_serializer_enabled_ = true; td::Ref collator_options_{true}; bool fast_state_serializer_enabled_ = false; - td::Ref collators_list_{true, CollatorsList{}}; + td::Ref collators_list_{true, CollatorsList::default_list()}; + std::set collator_node_whitelist_; + bool collator_node_whitelist_enabled_ = false; }; } // namespace validator diff --git a/validator/validator-telemetry.cpp b/validator/validator-telemetry.cpp new file mode 100644 index 000000000..403dd6f9f --- /dev/null +++ b/validator/validator-telemetry.cpp @@ -0,0 +1,87 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#include "validator-telemetry.hpp" +#include "git.h" +#include "td/utils/Random.h" +#include "td/utils/port/uname.h" +#include "interfaces/validator-manager.h" + +namespace ton::validator { + +void ValidatorTelemetry::start_up() { + node_version_ = PSTRING() << "validator-engine, Commit: " << GitMetadata::CommitSHA1() + << ", Date: " << GitMetadata::CommitDate(); + + os_version_ = td::get_operating_system_version().str(); + + auto r_total_mem_stat = td::get_total_mem_stat(); + if (r_total_mem_stat.is_error()) { + LOG(WARNING) << "Cannot get RAM size: " << r_total_mem_stat.move_as_error(); + } else { + ram_size_ = r_total_mem_stat.ok().total_ram; + } + + auto r_cpu_cores = td::get_cpu_cores(); + if (r_cpu_cores.is_error()) { + LOG(WARNING) << "Cannot get CPU info: " << r_cpu_cores.move_as_error(); + } else { + cpu_cores_ = r_cpu_cores.move_as_ok(); + } + + LOG(DEBUG) << "Initializing validator telemetry, key = " << key_ << ", adnl_id = " << local_id_; + alarm_timestamp().relax(send_telemetry_at_ = td::Timestamp::in(td::Random::fast(30.0, 60.0))); +} + +void ValidatorTelemetry::alarm() { + if (send_telemetry_at_.is_in_past()) { + send_telemetry_at_ = td::Timestamp::never(); + send_telemetry(); + } + alarm_timestamp().relax(send_telemetry_at_); +} + +void ValidatorTelemetry::send_telemetry() { + send_telemetry_at_ = td::Timestamp::in(PERIOD); + + auto telemetry = create_tl_object(); + telemetry->flags_ = 0; + telemetry->timestamp_ = td::Clocks::system(); + telemetry->adnl_id_ = local_id_.bits256_value(); + telemetry->node_version_ = node_version_; + telemetry->os_version_ = os_version_; + telemetry->node_started_at_ = adnl::Adnl::adnl_start_time(); + telemetry->ram_size_ = ram_size_; + telemetry->cpu_cores_ = cpu_cores_; + telemetry->node_threads_ = (td::int32)td::actor::SchedulerContext::get() + ->scheduler_group() + ->schedulers.at(td::actor::SchedulerContext::get()->get_scheduler_id().value()) + .cpu_threads_count; + + LOG(DEBUG) << "Sending validator telemetry for adnl id " << local_id_; + td::actor::send_closure(manager_, &ValidatorManager::send_validator_telemetry, key_, std::move(telemetry)); +} + +} // namespace ton::validator diff --git a/validator/validator-telemetry.hpp b/validator/validator-telemetry.hpp new file mode 100644 index 000000000..73908bdd1 --- /dev/null +++ b/validator/validator-telemetry.hpp @@ -0,0 +1,66 @@ +/* + This file is part of TON Blockchain source code. + + TON Blockchain is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + TON Blockchain is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with TON Blockchain. If not, see . + + In addition, as a special exception, the copyright holders give permission + to link the code of portions of this program with the OpenSSL library. + You must obey the GNU General Public License in all respects for all + of the code used other than OpenSSL. If you modify file(s) with this + exception, you may extend this exception to your version of the file(s), + but you are not obligated to do so. If you do not wish to do so, delete this + exception statement from your version. If you delete this exception statement + from all source files in the program, then also delete it here. +*/ +#pragma once +#include "overlay.h" +#include "td/actor/actor.h" +#include "adnl/adnl.h" +#include "interfaces/shard.h" + +namespace ton::validator { +class ValidatorManager; + +class ValidatorTelemetry : public td::actor::Actor { +public: + ValidatorTelemetry(PublicKeyHash key, adnl::AdnlNodeIdShort local_id, td::Bits256 zero_state_file_hash, + td::actor::ActorId manager) + : key_(key) + , local_id_(local_id) + , zero_state_file_hash_(zero_state_file_hash) + , manager_(std::move(manager)) { + } + + void start_up() override; + void alarm() override; + +private: + PublicKeyHash key_; + adnl::AdnlNodeIdShort local_id_; + td::Bits256 zero_state_file_hash_; + td::actor::ActorId manager_; + + std::string node_version_; + std::string os_version_; + td::uint32 cpu_cores_ = 0; + td::uint64 ram_size_ = 0; + + td::Timestamp send_telemetry_at_ = td::Timestamp::never(); + + void send_telemetry(); + + static constexpr double PERIOD = 600.0; + static constexpr td::uint32 MAX_SIZE = 8192; +}; +} // namespace ton::validator \ No newline at end of file diff --git a/validator/validator.h b/validator/validator.h index 458076768..2cd771021 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -70,22 +70,28 @@ struct CollatorOptions : public td::CntObject { std::set> whitelist; // Prioritize these accounts on each phase of process_dispatch_queue std::set> prioritylist; + + // Always enable full_collated_data + bool force_full_collated_data = false; + // Ignore collated data size limits from block limits and catchain config + bool ignore_collated_data_limits = false; }; struct CollatorsList : public td::CntObject { - struct Collator { - adnl::AdnlNodeIdShort adnl_id; - bool trusted; + enum SelectMode { + mode_random, mode_ordered, mode_round_robin }; struct Shard { ShardIdFull shard_id; - std::vector collators; + SelectMode select_mode = mode_random; + std::vector collators; + bool self_collate = false; }; - bool self_collate = true; - bool use_config_41 = false; std::vector shards; + bool self_collate = false; - void unpack(const ton_api::engine_validator_collatorsList& obj); + td::Status unpack(const ton_api::engine_validator_collatorsList& obj); + static CollatorsList default_list(); }; struct ValidatorManagerOptions : public td::CntObject { @@ -130,6 +136,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual td::Ref get_collator_options() const = 0; virtual bool get_fast_state_serializer_enabled() const = 0; virtual td::Ref get_collators_list() const = 0; + virtual bool check_collator_node_whitelist(adnl::AdnlNodeIdShort id) const = 0; virtual void set_zero_block_id(BlockIdExt block_id) = 0; virtual void set_init_block_id(BlockIdExt block_id) = 0; @@ -163,6 +170,8 @@ struct ValidatorManagerOptions : public td::CntObject { virtual void set_collator_options(td::Ref value) = 0; virtual void set_fast_state_serializer_enabled(bool value) = 0; virtual void set_collators_list(td::Ref list) = 0; + virtual void set_collator_node_whitelisted_validator(adnl::AdnlNodeIdShort id, bool add) = 0; + virtual void set_collator_node_whitelist_enabled(bool enabled) = 0; static td::Ref create( BlockIdExt zero_block_id, BlockIdExt init_block_id, @@ -208,6 +217,7 @@ class ValidatorManagerInterface : public td::actor::Actor { td::Promise>> promise) = 0; virtual void new_key_block(BlockHandle handle) = 0; + virtual void send_validator_telemetry(PublicKeyHash key, tl_object_ptr telemetry) = 0; }; virtual ~ValidatorManagerInterface() = default; @@ -314,6 +324,9 @@ class ValidatorManagerInterface : public td::actor::Actor { virtual void add_collator(adnl::AdnlNodeIdShort id, ShardIdFull shard) = 0; virtual void del_collator(adnl::AdnlNodeIdShort id, ShardIdFull shard) = 0; + + virtual void get_collation_manager_stats( + td::Promise> promise) = 0; }; } // namespace validator