From f7e189f272b36a4c66426dd29e0b6d20e4c20d8a Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 15 Aug 2024 12:53:05 +0300 Subject: [PATCH 01/19] Fix UB in blst aggregate verify (#1107) --- crypto/vm/bls.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/vm/bls.cpp b/crypto/vm/bls.cpp index f6ccc275c..ff5179c72 100644 --- a/crypto/vm/bls.cpp +++ b/crypto/vm/bls.cpp @@ -93,12 +93,13 @@ bool aggregate_verify(const std::vector> &pubs_ms return false; } std::unique_ptr pairing = std::make_unique(true, DST); + blst::P2_Affine p2_zero; for (const auto &p : pubs_msgs) { blst::P1_Affine p1(p.first.data(), P1_SIZE); if (!p1.in_group() || p1.is_inf()) { return false; } - pairing->aggregate(&p1, nullptr, (const td::uint8 *)p.second.data(), p.second.size()); + pairing->aggregate(&p1, &p2_zero, (const td::uint8 *)p.second.data(), p.second.size()); } pairing->commit(); blst::P2_Affine p2(sig.data(), P2_SIZE); From 6515136061b465f41a9a9f4c548d3da09ec0a154 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 15 Aug 2024 15:25:16 +0300 Subject: [PATCH 02/19] Improve creating channels in adnl (#1108) * Improve creating channels in adnl * Improve handling of cryptographic keys --- adnl/adnl-peer.cpp | 22 +++++++++++++++------ adnl/adnl-peer.hpp | 3 ++- keyring/keyring.cpp | 38 ++++++++++++++++++++---------------- keyring/keyring.hpp | 7 +++---- keys/encryptor.cpp | 22 --------------------- keys/encryptor.h | 22 --------------------- keys/encryptor.hpp | 14 ++++++++++---- keys/keys.cpp | 47 +++++++++++++++++++++++++++++++++++++++++---- keys/keys.hpp | 14 ++++++++++++++ 9 files changed, 109 insertions(+), 80 deletions(-) diff --git a/adnl/adnl-peer.cpp b/adnl/adnl-peer.cpp index febbdac6e..d82486fed 100644 --- a/adnl/adnl-peer.cpp +++ b/adnl/adnl-peer.cpp @@ -269,7 +269,11 @@ void AdnlPeerPairImpl::send_messages_in(std::vector message size_t ptr = 0; bool first = true; do { + respond_with_nop_after_ = td::Timestamp::in(td::Random::fast(1.0, 2.0)); bool try_reinit = try_reinit_at_ && try_reinit_at_.is_in_past(); + if (try_reinit) { + try_reinit_at_ = td::Timestamp::in(td::Random::fast(0.5, 1.5)); + } bool via_channel = channel_ready_ && !try_reinit; size_t s = (via_channel ? channel_packet_header_max_size() : packet_header_max_size()); if (first) { @@ -504,12 +508,6 @@ void AdnlPeerPairImpl::create_channel(pubkeys::Ed25519 pub, td::uint32 date) { void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageCreateChannel &message) { create_channel(message.key(), message.date()); - if (respond_to_channel_create_after_.is_in_past()) { - respond_to_channel_create_after_ = td::Timestamp::in(td::Random::fast(1.0, 2.0)); - std::vector messages; - messages.emplace_back(adnlmessage::AdnlMessageNop{}, 0); - send_messages(std::move(messages)); - } } void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageConfirmChannel &message) { @@ -526,6 +524,7 @@ void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageConfirmChan } void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageCustom &message) { + respond_with_nop(); td::actor::send_closure(local_actor_, &AdnlLocalId::deliver, peer_id_short_, message.data()); } @@ -538,6 +537,7 @@ void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageReinit &mes } void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageQuery &message) { + respond_with_nop(); auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), query_id = message.query_id(), flags = static_cast(0)](td::Result R) { if (R.is_error()) { @@ -556,6 +556,7 @@ void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageQuery &mess } void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageAnswer &message) { + respond_with_nop(); auto Q = out_queries_.find(message.query_id()); if (Q == out_queries_.end()) { @@ -573,6 +574,7 @@ void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageAnswer &mes } void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessagePart &message) { + respond_with_nop(); auto size = message.total_size(); if (size > huge_packet_max_size()) { VLOG(ADNL_WARNING) << this << ": dropping too big huge message: size=" << size; @@ -635,6 +637,14 @@ void AdnlPeerPairImpl::delete_query(AdnlQueryId id) { } } +void AdnlPeerPairImpl::respond_with_nop() { + if (respond_with_nop_after_.is_in_past()) { + std::vector messages; + messages.emplace_back(adnlmessage::AdnlMessageNop{}, 0); + send_messages(std::move(messages)); + } +} + void AdnlPeerPairImpl::reinit(td::int32 date) { if (reinit_date_ == 0) { reinit_date_ = date; diff --git a/adnl/adnl-peer.hpp b/adnl/adnl-peer.hpp index e9a5d428e..40c9eb088 100644 --- a/adnl/adnl-peer.hpp +++ b/adnl/adnl-peer.hpp @@ -122,6 +122,7 @@ class AdnlPeerPairImpl : public AdnlPeerPair { } private: + void respond_with_nop(); void reinit(td::int32 date); td::Result, bool>> get_conn(bool direct_only); void create_channel(pubkeys::Ed25519 pub, td::uint32 date); @@ -214,7 +215,7 @@ class AdnlPeerPairImpl : public AdnlPeerPair { pubkeys::Ed25519 channel_pub_; td::int32 channel_pk_date_; td::actor::ActorOwn channel_; - td::Timestamp respond_to_channel_create_after_; + td::Timestamp respond_with_nop_after_; td::uint64 in_seqno_ = 0; td::uint64 out_seqno_ = 0; diff --git a/keyring/keyring.cpp b/keyring/keyring.cpp index 529a6b8b0..0f45879de 100644 --- a/keyring/keyring.cpp +++ b/keyring/keyring.cpp @@ -27,6 +27,16 @@ namespace ton { namespace keyring { +KeyringImpl::PrivateKeyDescr::PrivateKeyDescr(PrivateKey private_key, bool is_temp) + : public_key(private_key.compute_public_key()), is_temp(is_temp) { + auto D = private_key.create_decryptor_async(); + D.ensure(); + decryptor_sign = D.move_as_ok(); + D = private_key.create_decryptor_async(); + D.ensure(); + decryptor_decrypt = D.move_as_ok(); +} + void KeyringImpl::start_up() { if (db_root_.size() > 0) { td::mkdir(db_root_).ensure(); @@ -45,23 +55,19 @@ td::Result KeyringImpl::load_key(PublicKeyHash k auto name = db_root_ + "/" + key_hash.bits256_value().to_hex(); - auto R = td::read_file(td::CSlice{name}); + auto R = td::read_file_secure(td::CSlice{name}); if (R.is_error()) { return R.move_as_error_prefix("key not in db: "); } auto data = R.move_as_ok(); - auto R2 = PrivateKey::import(td::SecureString(data)); + auto R2 = PrivateKey::import(data); R2.ensure(); auto key = R2.move_as_ok(); - auto pub = key.compute_public_key(); - auto short_id = pub.compute_short_id(); + auto desc = std::make_unique(key, false); + auto short_id = desc->public_key.compute_short_id(); CHECK(short_id == key_hash); - - auto D = key.create_decryptor_async(); - D.ensure(); - - return map_.emplace(short_id, std::make_unique(D.move_as_ok(), pub, false)).first->second.get(); + return map_.emplace(short_id, std::move(desc)).first->second.get(); } void KeyringImpl::add_key(PrivateKey key, bool is_temp, td::Promise promise) { @@ -76,10 +82,7 @@ void KeyringImpl::add_key(PrivateKey key, bool is_temp, td::Promise pr if (db_root_.size() == 0) { CHECK(is_temp); } - auto D = key.create_decryptor_async(); - D.ensure(); - - map_.emplace(short_id, std::make_unique(D.move_as_ok(), pub, is_temp)); + map_.emplace(short_id, std::make_unique(key, is_temp)); if (!is_temp && key.exportable()) { auto S = key.export_as_slice(); @@ -139,7 +142,7 @@ void KeyringImpl::sign_message(PublicKeyHash key_hash, td::BufferSlice data, td: if (S.is_error()) { promise.set_error(S.move_as_error()); } else { - td::actor::send_closure(S.move_as_ok()->decryptor, &DecryptorAsync::sign, std::move(data), std::move(promise)); + td::actor::send_closure(S.move_as_ok()->decryptor_sign, &DecryptorAsync::sign, std::move(data), std::move(promise)); } } @@ -161,7 +164,7 @@ void KeyringImpl::sign_add_get_public_key(PublicKeyHash key_hash, td::BufferSlic } promise.set_value(std::pair{R.move_as_ok(), id}); }); - td::actor::send_closure(D->decryptor, &DecryptorAsync::sign, std::move(data), std::move(P)); + td::actor::send_closure(D->decryptor_sign, &DecryptorAsync::sign, std::move(data), std::move(P)); } void KeyringImpl::sign_messages(PublicKeyHash key_hash, std::vector data, @@ -171,7 +174,7 @@ void KeyringImpl::sign_messages(PublicKeyHash key_hash, std::vectordecryptor, &DecryptorAsync::sign_batch, std::move(data), + td::actor::send_closure(S.move_as_ok()->decryptor_sign, &DecryptorAsync::sign_batch, std::move(data), std::move(promise)); } } @@ -182,7 +185,8 @@ void KeyringImpl::decrypt_message(PublicKeyHash key_hash, td::BufferSlice data, if (S.is_error()) { promise.set_error(S.move_as_error()); } else { - td::actor::send_closure(S.move_as_ok()->decryptor, &DecryptorAsync::decrypt, std::move(data), std::move(promise)); + td::actor::send_closure(S.move_as_ok()->decryptor_decrypt, &DecryptorAsync::decrypt, std::move(data), + std::move(promise)); } } diff --git a/keyring/keyring.hpp b/keyring/keyring.hpp index fc67bd0fe..ec658305a 100644 --- a/keyring/keyring.hpp +++ b/keyring/keyring.hpp @@ -30,12 +30,11 @@ namespace keyring { class KeyringImpl : public Keyring { private: struct PrivateKeyDescr { - td::actor::ActorOwn decryptor; + td::actor::ActorOwn decryptor_sign; + td::actor::ActorOwn decryptor_decrypt; PublicKey public_key; bool is_temp; - PrivateKeyDescr(td::actor::ActorOwn decryptor, PublicKey public_key, bool is_temp) - : decryptor(std::move(decryptor)), public_key(public_key), is_temp(is_temp) { - } + PrivateKeyDescr(PrivateKey private_key, bool is_temp); }; public: diff --git a/keys/encryptor.cpp b/keys/encryptor.cpp index 0b93d36fc..8fef9a095 100644 --- a/keys/encryptor.cpp +++ b/keys/encryptor.cpp @@ -29,28 +29,6 @@ namespace ton { -td::Result> Encryptor::create(const ton_api::PublicKey *id) { - td::Result> res; - ton_api::downcast_call( - *const_cast(id), - td::overloaded([&](const ton_api::pub_unenc &obj) { res = std::make_unique(); }, - [&](const ton_api::pub_ed25519 &obj) { res = std::make_unique(obj.key_); }, - [&](const ton_api::pub_overlay &obj) { res = std::make_unique(); }, - [&](const ton_api::pub_aes &obj) { res = std::make_unique(obj.key_); })); - return res; -} - -td::Result> Decryptor::create(const ton_api::PrivateKey *id) { - td::Result> res; - ton_api::downcast_call( - *const_cast(id), - td::overloaded([&](const ton_api::pk_unenc &obj) { res = std::make_unique(); }, - [&](const ton_api::pk_ed25519 &obj) { res = std::make_unique(obj.key_); }, - [&](const ton_api::pk_overlay &obj) { res = std::make_unique(); }, - [&](const ton_api::pk_aes &obj) { res = std::make_unique(obj.key_); })); - return res; -} - td::Result EncryptorEd25519::encrypt(td::Slice data) { TRY_RESULT_PREFIX(pk, td::Ed25519::generate_private_key(), "failed to generate private key: "); TRY_RESULT_PREFIX(pubkey, pk.get_public_key(), "failed to get public key from private: "); diff --git a/keys/encryptor.h b/keys/encryptor.h index 3035a0cec..818c97d63 100644 --- a/keys/encryptor.h +++ b/keys/encryptor.h @@ -31,7 +31,6 @@ class Encryptor { virtual td::Result encrypt(td::Slice data) = 0; virtual td::Status check_signature(td::Slice message, td::Slice signature) = 0; virtual ~Encryptor() = default; - static td::Result> create(const ton_api::PublicKey *id); }; class Decryptor { @@ -40,7 +39,6 @@ class Decryptor { virtual td::Result sign(td::Slice data) = 0; virtual std::vector> sign_batch(std::vector data); virtual ~Decryptor() = default; - static td::Result> create(const ton_api::PrivateKey *id); }; class EncryptorAsync : public td::actor::Actor { @@ -61,16 +59,6 @@ class EncryptorAsync : public td::actor::Actor { void encrypt(td::BufferSlice data, td::Promise promise) { promise.set_result(encryptor_->encrypt(data.as_slice())); } - template - static td::Result> create(T &id) { - TRY_RESULT(d, Encryptor::create(id)); - return td::actor::create_actor("encryptor", std::move(d)); - } - template - static td::Result> create(T *id) { - TRY_RESULT(d, Encryptor::create(id)); - return td::actor::create_actor("encryptor", std::move(d)); - } }; class DecryptorAsync : public td::actor::Actor { @@ -94,16 +82,6 @@ class DecryptorAsync : public td::actor::Actor { } return decryptor_->sign_batch(v); } - template - static td::Result> create(T &id) { - TRY_RESULT(d, Decryptor::create(id)); - return td::actor::create_actor("decryptor", std::move(d)); - } - template - static td::Result> create(T *id) { - TRY_RESULT(d, Decryptor::create(id)); - return td::actor::create_actor("decryptor", std::move(d)); - } }; } // namespace ton diff --git a/keys/encryptor.hpp b/keys/encryptor.hpp index dbe882398..bcc841dc8 100644 --- a/keys/encryptor.hpp +++ b/keys/encryptor.hpp @@ -83,7 +83,7 @@ class EncryptorEd25519 : public Encryptor { td::Result encrypt(td::Slice data) override; td::Status check_signature(td::Slice message, td::Slice signature) override; - EncryptorEd25519(td::Bits256 key) : pub_(td::SecureString(as_slice(key))) { + EncryptorEd25519(const td::Bits256& key) : pub_(td::SecureString(as_slice(key))) { } }; @@ -94,7 +94,7 @@ class DecryptorEd25519 : public Decryptor { public: td::Result decrypt(td::Slice data) override; td::Result sign(td::Slice data) override; - DecryptorEd25519(td::Bits256 key) : pk_(td::SecureString(as_slice(key))) { + DecryptorEd25519(const td::Bits256& key) : pk_(td::SecureString(as_slice(key))) { } }; @@ -129,12 +129,15 @@ class EncryptorAES : public Encryptor { td::Bits256 shared_secret_; public: + ~EncryptorAES() override { + shared_secret_.set_zero_s(); + } td::Result encrypt(td::Slice data) override; td::Status check_signature(td::Slice message, td::Slice signature) override { return td::Status::Error("can no sign channel messages"); } - EncryptorAES(td::Bits256 shared_secret) : shared_secret_(shared_secret) { + EncryptorAES(const td::Bits256& shared_secret) : shared_secret_(shared_secret) { } }; @@ -143,11 +146,14 @@ class DecryptorAES : public Decryptor { td::Bits256 shared_secret_; public: + ~DecryptorAES() override { + shared_secret_.set_zero_s(); + } td::Result decrypt(td::Slice data) override; td::Result sign(td::Slice data) override { return td::Status::Error("can no sign channel messages"); } - DecryptorAES(td::Bits256 shared_secret) : shared_secret_(shared_secret) { + DecryptorAES(const td::Bits256& shared_secret) : shared_secret_(shared_secret) { } }; diff --git a/keys/keys.cpp b/keys/keys.cpp index 7d6c0c2c4..01afb26d2 100644 --- a/keys/keys.cpp +++ b/keys/keys.cpp @@ -21,6 +21,7 @@ #include "td/utils/overloaded.h" #include "tl-utils/tl-utils.hpp" #include "encryptor.h" +#include "encryptor.hpp" #include "crypto/Ed25519.h" namespace ton { @@ -63,12 +64,31 @@ td::Result PublicKey::import(td::Slice s) { return PublicKey{x}; } +td::Result> pubkeys::Ed25519::create_encryptor() const { + return std::make_unique(data_); +} + +td::Result> pubkeys::AES::create_encryptor() const { + return std::make_unique(data_); +} + +td::Result> pubkeys::Unenc::create_encryptor() const { + return std::make_unique(); +} + +td::Result> pubkeys::Overlay::create_encryptor() const { + return std::make_unique(); +} + td::Result> PublicKey::create_encryptor() const { - return Encryptor::create(tl().get()); + td::Result> res; + pub_key_.visit([&](auto &obj) { res = obj.create_encryptor(); }); + return res; } td::Result> PublicKey::create_encryptor_async() const { - return EncryptorAsync::create(tl().get()); + TRY_RESULT(encryptor, create_encryptor()); + return td::actor::create_actor("encryptor", std::move(encryptor)); } bool PublicKey::empty() const { @@ -109,6 +129,22 @@ privkeys::Ed25519::Ed25519(td::Ed25519::PrivateKey key) { data_.as_slice().copy_from(td::Slice(s)); } +td::Result> privkeys::Ed25519::create_decryptor() const { + return std::make_unique(data_); +} + +td::Result> privkeys::AES::create_decryptor() const { + return std::make_unique(data_); +} + +td::Result> privkeys::Unenc::create_decryptor() const { + return std::make_unique(); +} + +td::Result> privkeys::Overlay::create_decryptor() const { + return std::make_unique(); +} + pubkeys::Ed25519::Ed25519(td::Ed25519::PublicKey key) { auto s = key.as_octet_string(); CHECK(s.length() == 32); @@ -188,11 +224,14 @@ tl_object_ptr PrivateKey::tl() const { } td::Result> PrivateKey::create_decryptor() const { - return Decryptor::create(tl().get()); + td::Result> res; + priv_key_.visit([&](auto &obj) { res = obj.create_decryptor(); }); + return res; } td::Result> PrivateKey::create_decryptor_async() const { - return DecryptorAsync::create(tl().get()); + TRY_RESULT(decryptor, create_decryptor()); + return td::actor::create_actor("decryptor", std::move(decryptor)); } } // namespace ton diff --git a/keys/keys.hpp b/keys/keys.hpp index cf883bbe6..72d0845ac 100644 --- a/keys/keys.hpp +++ b/keys/keys.hpp @@ -110,6 +110,7 @@ class Ed25519 { tl_object_ptr tl() const { return create_tl_object(data_); } + td::Result> create_encryptor() const; bool operator==(const Ed25519 &with) const { return data_ == with.data_; } @@ -141,6 +142,7 @@ class AES { tl_object_ptr tl() const { return create_tl_object(data_); } + td::Result> create_encryptor() const; bool operator==(const AES &with) const { return data_ == with.data_; } @@ -172,6 +174,7 @@ class Unenc { tl_object_ptr tl() const { return create_tl_object(data_.clone_as_buffer_slice()); } + td::Result> create_encryptor() const; bool operator==(const Unenc &with) const { return data_.as_slice() == with.data_.as_slice(); } @@ -203,6 +206,7 @@ class Overlay { tl_object_ptr tl() const { return create_tl_object(data_.clone_as_buffer_slice()); } + td::Result> create_encryptor() const; bool operator==(const Overlay &with) const { return data_.as_slice() == with.data_.as_slice(); } @@ -223,6 +227,9 @@ class PublicKey { td::uint32 serialized_size() const { UNREACHABLE(); } + td::Result> create_encryptor() const { + UNREACHABLE(); + } bool operator==(const Empty &with) const { return false; } @@ -316,6 +323,7 @@ class Ed25519 { } tl_object_ptr pub_tl() const; pubkeys::Ed25519 pub() const; + td::Result> create_decryptor() const; static Ed25519 random(); }; @@ -359,6 +367,7 @@ class AES { pubkeys::AES pub() const { return pubkeys::AES{data_}; } + td::Result> create_decryptor() const; }; class Unenc { @@ -393,6 +402,7 @@ class Unenc { pubkeys::Unenc pub() const { return pubkeys::Unenc{data_.clone()}; } + td::Result> create_decryptor() const; }; class Overlay { @@ -427,6 +437,7 @@ class Overlay { pubkeys::Overlay pub() const { return pubkeys::Overlay{data_.clone()}; } + td::Result> create_decryptor() const; }; } // namespace privkeys @@ -450,6 +461,9 @@ class PrivateKey { PublicKey pub() const { UNREACHABLE(); } + td::Result> create_decryptor() const { + UNREACHABLE(); + } }; td::Variant priv_key_{Empty{}}; From a71d4132022fb4743a275de4ddefbd1fe9e649a1 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 15 Aug 2024 15:26:35 +0300 Subject: [PATCH 03/19] Improve dht lookup in overlays (#1104) Continue dht lookup even if value was found --- dht/dht-in.hpp | 1 + dht/dht-query.cpp | 34 +++++++++++++++++++++++--- dht/dht-query.hpp | 51 +++++++++++++++++++++++++++++++++------ dht/dht.cpp | 17 ++++++++++++- dht/dht.h | 1 + overlay/overlay.cpp | 49 +++++++++++++++++++------------------ overlay/overlay.hpp | 3 ++- tdutils/td/utils/Status.h | 6 +++++ 8 files changed, 127 insertions(+), 35 deletions(-) diff --git a/dht/dht-in.hpp b/dht/dht-in.hpp index c2d20455f..0d668d438 100644 --- a/dht/dht-in.hpp +++ b/dht/dht-in.hpp @@ -179,6 +179,7 @@ class DhtMemberImpl : public DhtMember { void get_value(DhtKey key, td::Promise result) override { get_value_in(key.compute_key_id(), std::move(result)); } + void get_value_many(DhtKey key, std::function callback, td::Promise promise) override; void alarm() override { alarm_timestamp() = td::Timestamp::in(1.0); diff --git a/dht/dht-query.cpp b/dht/dht-query.cpp index b84ef8c37..3d43b1069 100644 --- a/dht/dht-query.cpp +++ b/dht/dht-query.cpp @@ -210,8 +210,11 @@ void DhtQueryFindValue::on_result(td::Result R, adnl::AdnlNodeI send_get_nodes = true; return; } - promise_.set_value(std::move(value)); - need_stop = true; + if (on_value_found(std::move(value))) { + send_get_nodes = true; + } else { + need_stop = true; + } }, [&](ton_api::dht_valueNotFound &v) { add_nodes(DhtNodesList{std::move(v.nodes_), our_network_id()}); @@ -244,7 +247,32 @@ void DhtQueryFindValue::on_result_nodes(td::Result R, adnl::Adn } void DhtQueryFindValue::finish(DhtNodesList list) { - promise_.set_error(td::Status::Error(ErrorCode::notready, "dht key not found")); +} + +bool DhtQueryFindValueSingle::on_value_found(DhtValue value) { + promise_.set_value(std::move(value)); + found_ = true; + return false; +} + +void DhtQueryFindValueSingle::tear_down() { + if (!found_) { + promise_.set_error(td::Status::Error(ErrorCode::notready, "dht key not found")); + } +} + +bool DhtQueryFindValueMany::on_value_found(DhtValue value) { + callback_(std::move(value)); + found_ = true; + return true; +} + +void DhtQueryFindValueMany::tear_down() { + if (found_) { + promise_.set_value(td::Unit()); + } else { + promise_.set_error(td::Status::Error(ErrorCode::notready, "dht key not found")); + } } DhtQueryStore::DhtQueryStore(DhtValue key_value, DhtMember::PrintId print_id, adnl::AdnlNodeIdShort src, diff --git a/dht/dht-query.hpp b/dht/dht-query.hpp index e47403618..e305f107a 100644 --- a/dht/dht-query.hpp +++ b/dht/dht-query.hpp @@ -126,16 +126,11 @@ class DhtQueryFindNodes : public DhtQuery { }; class DhtQueryFindValue : public DhtQuery { - private: - td::Promise promise_; - public: DhtQueryFindValue(DhtKeyId key, DhtMember::PrintId print_id, adnl::AdnlNodeIdShort src, DhtNodesList list, td::uint32 k, td::uint32 a, td::int32 our_network_id, DhtNode self, bool client_only, - td::actor::ActorId node, td::actor::ActorId adnl, - td::Promise promise) - : DhtQuery(key, print_id, src, k, a, our_network_id, std::move(self), client_only, node, adnl) - , promise_(std::move(promise)) { + td::actor::ActorId node, td::actor::ActorId adnl) + : DhtQuery(key, print_id, src, k, a, our_network_id, std::move(self), client_only, node, adnl) { add_nodes(std::move(list)); } void send_one_query(adnl::AdnlNodeIdShort id) override; @@ -146,6 +141,48 @@ class DhtQueryFindValue : public DhtQuery { std::string get_name() const override { return "find value"; } + + virtual bool on_value_found(DhtValue value) = 0; +}; + +class DhtQueryFindValueSingle : public DhtQueryFindValue { + public: + DhtQueryFindValueSingle(DhtKeyId key, DhtMember::PrintId print_id, adnl::AdnlNodeIdShort src, DhtNodesList list, + td::uint32 k, td::uint32 a, td::int32 our_network_id, DhtNode self, bool client_only, + td::actor::ActorId node, td::actor::ActorId adnl, + td::Promise promise) + : DhtQueryFindValue(key, print_id, src, std::move(list), k, a, our_network_id, std::move(self), client_only, node, + adnl) + , promise_(std::move(promise)) { + add_nodes(std::move(list)); + } + bool on_value_found(DhtValue value) override; + void tear_down() override; + + private: + td::Promise promise_; + bool found_ = false; +}; + +class DhtQueryFindValueMany : public DhtQueryFindValue { + public: + DhtQueryFindValueMany(DhtKeyId key, DhtMember::PrintId print_id, adnl::AdnlNodeIdShort src, DhtNodesList list, + td::uint32 k, td::uint32 a, td::int32 our_network_id, DhtNode self, bool client_only, + td::actor::ActorId node, td::actor::ActorId adnl, + std::function callback, td::Promise promise) + : DhtQueryFindValue(key, print_id, src, std::move(list), k, a, our_network_id, std::move(self), client_only, node, + adnl) + , callback_(std::move(callback)) + , promise_(std::move(promise)) { + add_nodes(std::move(list)); + } + bool on_value_found(DhtValue value) override; + void tear_down() override; + + private: + std::function callback_; + td::Promise promise_; + bool found_ = false; }; class DhtQueryStore : public td::actor::Actor { diff --git a/dht/dht.cpp b/dht/dht.cpp index 8d7b02b7d..b774b5f91 100644 --- a/dht/dht.cpp +++ b/dht/dht.cpp @@ -470,7 +470,7 @@ void DhtMemberImpl::get_value_in(DhtKeyId key, td::Promise result) { network_id = network_id_, id = id_, client_only = client_only_](td::Result R) mutable { R.ensure(); - td::actor::create_actor("FindValueQuery", key, print_id, id, std::move(list), k, a, network_id, + td::actor::create_actor("FindValueQuery", key, print_id, id, std::move(list), k, a, network_id, R.move_as_ok(), client_only, SelfId, adnl, std::move(promise)) .release(); }); @@ -478,6 +478,21 @@ void DhtMemberImpl::get_value_in(DhtKeyId key, td::Promise result) { get_self_node(std::move(P)); } +void DhtMemberImpl::get_value_many(DhtKey key, std::function callback, td::Promise promise) { + DhtKeyId key_id = key.compute_key_id(); + auto P = td::PromiseCreator::lambda( + [key = key_id, callback = std::move(callback), promise = std::move(promise), SelfId = actor_id(this), + print_id = print_id(), adnl = adnl_, list = get_nearest_nodes(key_id, k_ * 2), k = k_, a = a_, + network_id = network_id_, id = id_, client_only = client_only_](td::Result R) mutable { + R.ensure(); + td::actor::create_actor("FindValueManyQuery", key, print_id, id, std::move(list), k, a, + network_id, R.move_as_ok(), client_only, SelfId, adnl, + std::move(callback), std::move(promise)) + .release(); + }); + get_self_node(std::move(P)); +} + void DhtMemberImpl::register_reverse_connection(adnl::AdnlNodeIdFull client, td::Promise promise) { auto client_short = client.compute_short_id(); td::uint32 ttl = (td::uint32)td::Clocks::system() + 300; diff --git a/dht/dht.h b/dht/dht.h index b9c65c8a7..5abff94a1 100644 --- a/dht/dht.h +++ b/dht/dht.h @@ -53,6 +53,7 @@ class Dht : public td::actor::Actor { virtual void set_value(DhtValue key_value, td::Promise result) = 0; virtual void get_value(DhtKey key, td::Promise result) = 0; + virtual void get_value_many(DhtKey key, std::function callback, td::Promise promise) = 0; virtual void register_reverse_connection(adnl::AdnlNodeIdFull client, td::Promise promise) = 0; virtual void request_reverse_ping(adnl::AdnlNode target, adnl::AdnlNodeIdShort client, diff --git a/overlay/overlay.cpp b/overlay/overlay.cpp index f964ccc5c..43b3b7e5a 100644 --- a/overlay/overlay.cpp +++ b/overlay/overlay.cpp @@ -283,11 +283,14 @@ void OverlayImpl::alarm() { } if (next_dht_query_ && next_dht_query_.is_in_past()) { next_dht_query_ = td::Timestamp::never(); - auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result res) { - td::actor::send_closure(SelfId, &OverlayImpl::receive_dht_nodes, std::move(res), true); - }); - td::actor::send_closure(dht_node_, &dht::Dht::get_value, dht::DhtKey{overlay_id_.pubkey_hash(), "nodes", 0}, - std::move(P)); + std::function callback = [SelfId = actor_id(this)](dht::DhtValue value) { + td::actor::send_closure(SelfId, &OverlayImpl::receive_dht_nodes, std::move(value)); + }; + td::Promise on_finish = [SelfId = actor_id(this)](td::Result R) { + td::actor::send_closure(SelfId, &OverlayImpl::dht_lookup_finished, R.move_as_status()); + }; + td::actor::send_closure(dht_node_, &dht::Dht::get_value_many, dht::DhtKey{overlay_id_.pubkey_hash(), "nodes", 0}, + std::move(callback), std::move(on_finish)); } if (update_db_at_.is_in_past()) { if (peers_.size() > 0) { @@ -311,30 +314,30 @@ void OverlayImpl::alarm() { } } -void OverlayImpl::receive_dht_nodes(td::Result res, bool dummy) { +void OverlayImpl::receive_dht_nodes(dht::DhtValue v) { CHECK(public_); - if (res.is_ok()) { - auto v = res.move_as_ok(); - auto R = fetch_tl_object(v.value().clone(), true); - if (R.is_ok()) { - auto r = R.move_as_ok(); - VLOG(OVERLAY_INFO) << this << ": received " << r->nodes_.size() << " nodes from overlay"; - VLOG(OVERLAY_EXTRA_DEBUG) << this << ": nodes: " << ton_api::to_string(r); - std::vector nodes; - for (auto &n : r->nodes_) { - auto N = OverlayNode::create(n); - if (N.is_ok()) { - nodes.emplace_back(N.move_as_ok()); - } + auto R = fetch_tl_object(v.value().clone(), true); + if (R.is_ok()) { + auto r = R.move_as_ok(); + VLOG(OVERLAY_INFO) << this << ": received " << r->nodes_.size() << " nodes from overlay"; + VLOG(OVERLAY_EXTRA_DEBUG) << this << ": nodes: " << ton_api::to_string(r); + std::vector nodes; + for (auto &n : r->nodes_) { + auto N = OverlayNode::create(n); + if (N.is_ok()) { + nodes.emplace_back(N.move_as_ok()); } - add_peers(std::move(nodes)); - } else { - VLOG(OVERLAY_WARNING) << this << ": incorrect value in DHT for overlay nodes: " << R.move_as_error(); } + add_peers(std::move(nodes)); } else { - VLOG(OVERLAY_NOTICE) << this << ": can not get value from DHT: " << res.move_as_error(); + VLOG(OVERLAY_WARNING) << this << ": incorrect value in DHT for overlay nodes: " << R.move_as_error(); } +} +void OverlayImpl::dht_lookup_finished(td::Status S) { + if (S.is_error()) { + VLOG(OVERLAY_NOTICE) << this << ": can not get value from DHT: " << S; + } if (!(next_dht_store_query_ && next_dht_store_query_.is_in_past())) { finish_dht_query(); return; diff --git a/overlay/overlay.hpp b/overlay/overlay.hpp index 90fcc43d7..8fb3d91d7 100644 --- a/overlay/overlay.hpp +++ b/overlay/overlay.hpp @@ -166,7 +166,8 @@ class OverlayImpl : public Overlay { certs_[key] = std::move(cert); } - void receive_dht_nodes(td::Result res, bool dummy); + void receive_dht_nodes(dht::DhtValue v); + void dht_lookup_finished(td::Status S); void update_dht_nodes(OverlayNode node); void update_neighbours(td::uint32 nodes_to_change); diff --git a/tdutils/td/utils/Status.h b/tdutils/td/utils/Status.h index 9c4f1b7df..8bc210dba 100644 --- a/tdutils/td/utils/Status.h +++ b/tdutils/td/utils/Status.h @@ -555,6 +555,12 @@ class Result { }; return status_.move_as_error_suffix(suffix); } + Status move_as_status() TD_WARN_UNUSED_RESULT { + if (status_.is_error()) { + return move_as_error(); + } + return Status::OK(); + } const T &ok() const { LOG_CHECK(status_.is_ok()) << status_; return value_; From 622dc8676ade370f84a241d621c13b84dfa247b6 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Fri, 16 Aug 2024 10:23:41 +0300 Subject: [PATCH 04/19] Limit the number of threads to 127 (#1111) --- dht-server/dht-server.cpp | 12 ++++++++---- validator-engine/validator-engine.cpp | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/dht-server/dht-server.cpp b/dht-server/dht-server.cpp index 025cf7d51..eb183cad6 100644 --- a/dht-server/dht-server.cpp +++ b/dht-server/dht-server.cpp @@ -1231,15 +1231,19 @@ int main(int argc, char *argv[]) { }); td::uint32 threads = 7; p.add_checked_option( - 't', "threads", PSTRING() << "number of threads (default=" << threads << ")", [&](td::Slice fname) { + 't', "threads", PSTRING() << "number of threads (default=" << threads << ")", [&](td::Slice arg) { td::int32 v; try { - v = std::stoi(fname.str()); + v = std::stoi(arg.str()); } catch (...) { return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: not a number"); } - if (v < 1 || v > 256) { - return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: should be in range [1..256]"); + if (v <= 0) { + return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: should be > 0"); + } + if (v > 127) { + LOG(WARNING) << "`--threads " << v << "` is too big, effective value will be 127"; + v = 127; } threads = v; return td::Status::OK(); diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index 88cef8d49..a9a3b21d4 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -4157,15 +4157,19 @@ int main(int argc, char *argv[]) { }); td::uint32 threads = 7; p.add_checked_option( - 't', "threads", PSTRING() << "number of threads (default=" << threads << ")", [&](td::Slice fname) { + 't', "threads", PSTRING() << "number of threads (default=" << threads << ")", [&](td::Slice arg) { td::int32 v; try { - v = std::stoi(fname.str()); + v = std::stoi(arg.str()); } catch (...) { return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: not a number"); } - if (v < 1 || v > 256) { - return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: should be in range [1..256]"); + if (v <= 0) { + return td::Status::Error(ton::ErrorCode::error, "bad value for --threads: should be > 0"); + } + if (v > 127) { + LOG(WARNING) << "`--threads " << v << "` is too big, effective value will be 127"; + v = 127; } threads = v; return td::Status::OK(); From b70090dae35c737d2f7246ee44a307d9b565ee4e Mon Sep 17 00:00:00 2001 From: krigga Date: Tue, 20 Aug 2024 19:50:59 +0300 Subject: [PATCH 05/19] Disable testing and fuzzing for openssl when building WASM packages (#1116) Co-authored-by: EmelyanenkoK --- assembly/wasm/fift-func-wasm-build-ubuntu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assembly/wasm/fift-func-wasm-build-ubuntu.sh b/assembly/wasm/fift-func-wasm-build-ubuntu.sh index e7a54d16f..2d3507b23 100644 --- a/assembly/wasm/fift-func-wasm-build-ubuntu.sh +++ b/assembly/wasm/fift-func-wasm-build-ubuntu.sh @@ -85,7 +85,7 @@ cd .. if [ ! -f "openssl/openssl_em" ]; then cd openssl make clean - emconfigure ./Configure linux-generic32 no-shared no-dso no-engine no-unit-test + emconfigure ./Configure linux-generic32 no-shared no-dso no-engine no-unit-test no-tests no-fuzz-afl no-fuzz-libfuzzer sed -i 's/CROSS_COMPILE=.*/CROSS_COMPILE=/g' Makefile sed -i 's/-ldl//g' Makefile sed -i 's/-O3/-Os/g' Makefile From dc26c3be671b13cd7d07f232df8be4ccb8cdc2e0 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Tue, 20 Aug 2024 19:54:16 +0300 Subject: [PATCH 06/19] Improve validator session stats (#1117) * Improve validator session stats * Collator stats: block limits, number of processed external messages * Collator and validator work time * Last key block seqno * Approvers and signers * End validator session stats --- catchain/catchain.h | 1 + catchain/catchain.hpp | 9 ++ tdutils/td/utils/Timer.cpp | 43 +++++++++ tdutils/td/utils/Timer.h | 18 ++++ tl/generate/scheme/ton_api.tl | 25 +++-- tl/generate/scheme/ton_api.tlo | Bin 91004 -> 92472 bytes validator-session/validator-session-types.h | 14 +++ validator-session/validator-session.cpp | 29 ++++++ validator-session/validator-session.h | 1 + validator-session/validator-session.hpp | 1 + validator/impl/collator-impl.h | 5 + validator/impl/collator.cpp | 63 ++++++++++++ validator/impl/validate-query.cpp | 21 ++++ validator/impl/validate-query.hpp | 4 + validator/interfaces/validator-manager.h | 17 ++++ validator/manager-disk.hpp | 3 + validator/manager-hardfork.hpp | 3 + validator/manager.cpp | 102 +++++++++++++++++--- validator/manager.hpp | 19 +++- validator/validator-group.cpp | 11 +++ validator/validator-group.hpp | 11 ++- 21 files changed, 373 insertions(+), 27 deletions(-) diff --git a/catchain/catchain.h b/catchain/catchain.h index 912957e56..c5c8af28d 100644 --- a/catchain/catchain.h +++ b/catchain/catchain.h @@ -96,6 +96,7 @@ class CatChain : public td::actor::Actor { virtual void send_query_via(const PublicKeyHash &dst, std::string name, td::Promise promise, td::Timestamp timeout, td::BufferSlice query, td::uint64 max_answer_size, td::actor::ActorId via) = 0; + virtual void get_source_heights(td::Promise> promise) = 0; virtual void destroy() = 0; static td::actor::ActorOwn create(std::unique_ptr callback, const CatChainOptions &opts, diff --git a/catchain/catchain.hpp b/catchain/catchain.hpp index 8c8bb99ae..586cf4744 100644 --- a/catchain/catchain.hpp +++ b/catchain/catchain.hpp @@ -115,6 +115,15 @@ class CatChainImpl : public CatChain { td::actor::send_closure(receiver_, &CatChainReceiverInterface::send_custom_query_data_via, dst, name, std::move(promise), timeout, std::move(query), max_answer_size, via); } + void get_source_heights(td::Promise> promise) override { + std::vector heights(top_source_blocks_.size(), 0); + for (size_t i = 0; i < top_source_blocks_.size(); ++i) { + if (top_source_blocks_[i]) { + heights[i] = top_source_blocks_[i]->height(); + } + } + promise.set_result(std::move(heights)); + } void destroy() override; CatChainImpl(std::unique_ptr callback, const CatChainOptions &opts, td::actor::ActorId keyring, td::actor::ActorId adnl, diff --git a/tdutils/td/utils/Timer.cpp b/tdutils/td/utils/Timer.cpp index 1f72fba96..24de099aa 100644 --- a/tdutils/td/utils/Timer.cpp +++ b/tdutils/td/utils/Timer.cpp @@ -91,4 +91,47 @@ double PerfWarningTimer::elapsed() const { return Time::now() - start_at_; } +static double thread_cpu_clock() { +#if defined(CLOCK_THREAD_CPUTIME_ID) + timespec ts; + int result = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); + CHECK(result == 0); + return (double)ts.tv_sec + (double)ts.tv_nsec * 1e-9; +#else + return 0.0; // TODO: MacOS and Windows support (currently cpu timer is used only in validators) +#endif +} + +ThreadCpuTimer::ThreadCpuTimer(bool is_paused) : is_paused_(is_paused) { + if (is_paused_) { + start_time_ = 0; + } else { + start_time_ = thread_cpu_clock(); + } +} + +void ThreadCpuTimer::pause() { + if (is_paused_) { + return; + } + elapsed_ += thread_cpu_clock() - start_time_; + is_paused_ = true; +} + +void ThreadCpuTimer::resume() { + if (!is_paused_) { + return; + } + start_time_ = thread_cpu_clock(); + is_paused_ = false; +} + +double ThreadCpuTimer::elapsed() const { + double res = elapsed_; + if (!is_paused_) { + res += thread_cpu_clock() - start_time_; + } + return res; +} + } // namespace td diff --git a/tdutils/td/utils/Timer.h b/tdutils/td/utils/Timer.h index 3e0cafbf5..a27cac8a7 100644 --- a/tdutils/td/utils/Timer.h +++ b/tdutils/td/utils/Timer.h @@ -62,4 +62,22 @@ class PerfWarningTimer { std::function callback_; }; +class ThreadCpuTimer { + public: + ThreadCpuTimer() : ThreadCpuTimer(false) { + } + explicit ThreadCpuTimer(bool is_paused); + ThreadCpuTimer(const ThreadCpuTimer &other) = default; + ThreadCpuTimer &operator=(const ThreadCpuTimer &other) = default; + + double elapsed() const; + void pause(); + void resume(); + + private: + double elapsed_{0}; + double start_time_; + bool is_paused_{false}; +}; + } // namespace td diff --git a/tl/generate/scheme/ton_api.tl b/tl/generate/scheme/ton_api.tl index b33ca5425..bf919b0fd 100644 --- a/tl/generate/scheme/ton_api.tl +++ b/tl/generate/scheme/ton_api.tl @@ -388,8 +388,8 @@ tonNode.newShardBlock block:tonNode.blockIdExt cc_seqno:int data:bytes = tonNode tonNode.blockBroadcastCompressed.data signatures:(vector tonNode.blockSignature) proof_data:bytes = tonNode.blockBroadcaseCompressed.Data; -tonNode.blockBroadcast id:tonNode.blockIdExt catchain_seqno:int validator_set_hash:int - signatures:(vector tonNode.blockSignature) +tonNode.blockBroadcast id:tonNode.blockIdExt catchain_seqno:int validator_set_hash:int + signatures:(vector tonNode.blockSignature) proof:bytes data:bytes = tonNode.Broadcast; tonNode.blockBroadcastCompressed id:tonNode.blockIdExt catchain_seqno:int validator_set_hash:int flags:# compressed:bytes = tonNode.Broadcast; @@ -769,13 +769,19 @@ http.server.config dhs:(vector http.server.dnsEntry) local_hosts:(vector http.se ---types--- -validatorSession.statsProducer id:int256 candidate_id:int256 block_status:int comment:string - block_timestamp:double is_accepted:Bool is_ours:Bool got_submit_at:double +validatorSession.collationStats bytes:int gas:int lt_delta:int cat_bytes:int cat_gas:int cat_lt_delta:int + limits_log:string ext_msgs_total:int ext_msgs_filtered:int ext_msgs_accepted:int ext_msgs_rejected:int = validadorSession.CollationStats; + +validatorSession.statsProducer id:int256 candidate_id:int256 block_status:int root_hash:int256 file_hash:int256 + comment:string block_timestamp:double is_accepted:Bool is_ours:Bool got_submit_at:double collation_time:double collated_at:double collation_cached:Bool + collation_work_time:double collation_cpu_work_time:double + collation_stats:validatorSession.collationStats validation_time:double validated_at:double validation_cached:Bool + validation_work_time:double validation_cpu_work_time:double gen_utime:double - approved_weight:long approved_33pct_at:double approved_66pct_at:double - signed_weight:long signed_33pct_at:double signed_66pct_at:double + approved_weight:long approved_33pct_at:double approved_66pct_at:double approvers:string + signed_weight:long signed_33pct_at:double signed_66pct_at:double signers:string serialize_time:double deserialize_time:double serialized_size:int = validatorSession.StatsProducer; validatorSession.statsRound timestamp:double producers:(vector validatorSession.statsProducer) = validatorSession.StatsRound; @@ -786,9 +792,14 @@ validatorSession.stats success:Bool id:tonNode.blockIdExt timestamp:double self: first_round:int rounds:(vector validatorSession.statsRound) = validatorSession.Stats; validatorSession.newValidatorGroupStats.node id:int256 weight:long = validatorSession.newValidatorGroupStats.Node; -validatorSession.newValidatorGroupStats session_id:int256 workchain:int shard:long cc_seqno:int timestamp:double +validatorSession.newValidatorGroupStats session_id:int256 workchain:int shard:long cc_seqno:int + last_key_block_seqno:int timestamp:double self_idx:int nodes:(vector validatorSession.newValidatorGroupStats.node) = validatorSession.NewValidatorGroupStats; +validatorSession.endValidatorGroupStats.node id:int256 catchain_blocks:int = validatorSession.endValidatorGroupStats.Node; +validatorSession.endValidatorGroupStats session_id:int256 timestamp:double + nodes:(vector validatorSession.endValidatorGroupStats.node) = validatorSession.EndValidatorGroupStats; + ---functions--- ---types--- diff --git a/tl/generate/scheme/ton_api.tlo b/tl/generate/scheme/ton_api.tlo index da1aa331d2048803f582b99ede705270f28a22ea..337dd071e3f746cd822236e2dcb63c194b379de4 100644 GIT binary patch delta 841 zcmex!jCIE)R^CUm^{p77fNLY~d3}|4min3UWr;bNDTyihMZu}X#hLkedd~SdIf*4e zR&YsTN%6!D5}WVn?_iXi_R1(;9iq4dq1ZJqW%@;aM&Zdn3?(K{F|=S=WW;%J@&&^c zES-~cgeKP*6-c&d`z+GI(3lF;xKM&oWU{2O{Ny)AKNx#AKQ*>L#R_)Q^ouNv!ivbw z014kbCgj5n^nmoU`4q$f(n!LI$;qh&B^bg*sadJX5MlAj2^JhIAonkp_$P=X7(t-{3Pu)C zC{I^#VpN!JAjBvz{e~c;%H$QkDiWYTWQ0auku#ePhi%(Iuv#8|#x<;+`kvg1oKb5kb=%#)B1M@S?Wl%h(?@*^aRL1_o%UXVjUo@boC zRg|$^R0Nt3fXNF`3spvW|5Mycsh;*50=|J@O1?0~Q`Bp4O&r$K2JQ0PuR5U3%*4@@b^8Ht&B@ku%P T$=St|57^3pWtZ1TFnR+3<5Dl9 delta 209 zcmdmSiS^GhR^CUm^{p77fPEwHdHu~220Iuhix^q3>_0DYW^#bh3KmawcJ;}Bj0z_2 zF#f^VwRxk7{i&^27{3TipCZJ#fEA=neezyobxsiL<}o23_UQ*j8MP-@6uC@)Bg&{U zT}Onm0c_g#4I+#mm{>vP$xT-jWfYsfTbz-T6T}DUlbAd)NO}4PamIk{9TJQ>jMFWo p7;C0WNHZ>({y>_sVtR!P&>WEB>3?JxJ0PqMSw@BJE^>@tya3`PN1gxx diff --git a/validator-session/validator-session-types.h b/validator-session/validator-session-types.h index e13c36d24..78a9b2460 100644 --- a/validator-session/validator-session-types.h +++ b/validator-session/validator-session-types.h @@ -77,6 +77,8 @@ struct ValidatorSessionStats { ValidatorSessionCandidateId candidate_id = ValidatorSessionCandidateId::zero(); int block_status = status_none; double block_timestamp = -1.0; + td::Bits256 root_hash = td::Bits256::zero(); + td::Bits256 file_hash = td::Bits256::zero(); std::string comment; bool is_accepted = false; @@ -159,11 +161,23 @@ struct NewValidatorGroupStats { ValidatorSessionId session_id = ValidatorSessionId::zero(); ShardIdFull shard{masterchainId}; CatchainSeqno cc_seqno = 0; + BlockSeqno last_key_block_seqno = 0; double timestamp = -1.0; td::uint32 self_idx = 0; std::vector nodes; }; +struct EndValidatorGroupStats { + struct Node { + PublicKeyHash id = PublicKeyHash::zero(); + td::uint32 catchain_blocks = 0; + }; + + ValidatorSessionId session_id = ValidatorSessionId::zero(); + double timestamp = -1.0; + std::vector nodes; +}; + } // namespace validatorsession } // namespace ton diff --git a/validator-session/validator-session.cpp b/validator-session/validator-session.cpp index 46dd44403..be5443785 100644 --- a/validator-session/validator-session.cpp +++ b/validator-session/validator-session.cpp @@ -270,6 +270,8 @@ void ValidatorSessionImpl::process_broadcast(PublicKeyHash src, td::BufferSlice } stat->deserialize_time = deserialize_time; stat->serialized_size = data.size(); + stat->root_hash = candidate->root_hash_; + stat->file_hash = file_hash; } if ((td::int32)block_round < (td::int32)cur_round_ - MAX_PAST_ROUND_BLOCK || @@ -468,6 +470,8 @@ void ValidatorSessionImpl::generated_block(td::uint32 round, ValidatorSessionCan stat->collated_at = td::Clocks::system(); stat->block_timestamp = td::Clocks::system(); stat->collation_cached = collation_cached; + stat->root_hash = root_hash; + stat->file_hash = file_hash; } if (round != cur_round_) { return; @@ -602,6 +606,8 @@ void ValidatorSessionImpl::try_approve_block(const SentBlock *block) { if (stat->block_timestamp <= 0.0) { stat->block_timestamp = td::Clocks::system(); } + stat->root_hash = B->root_hash_; + stat->file_hash = td::sha256_bits256(B->data_); } auto P = td::PromiseCreator::lambda([round = cur_round_, hash = block_id, root_hash = block->get_root_hash(), @@ -997,6 +1003,29 @@ void ValidatorSessionImpl::get_current_stats(td::Promise promise.set_result(cur_stats_); } +void ValidatorSessionImpl::get_end_stats(td::Promise promise) { + if (!started_) { + promise.set_error(td::Status::Error(ErrorCode::notready, "not started")); + return; + } + EndValidatorGroupStats stats; + stats.session_id = unique_hash_; + stats.timestamp = td::Clocks::system(); + stats.nodes.resize(description().get_total_nodes()); + for (size_t i = 0; i < stats.nodes.size(); ++i) { + stats.nodes[i].id = description().get_source_id(i); + } + td::actor::send_closure(catchain_, &catchain::CatChain::get_source_heights, + [promise = std::move(promise), + stats = std::move(stats)](td::Result> R) mutable { + TRY_RESULT_PROMISE(promise, heights, std::move(R)); + for (size_t i = 0; i < std::min(heights.size(), stats.nodes.size()); ++i) { + stats.nodes[i].catchain_blocks = heights[i]; + } + promise.set_result(std::move(stats)); + }); +} + void ValidatorSessionImpl::get_validator_group_info_for_litequery( td::uint32 cur_round, td::Promise>> promise) { diff --git a/validator-session/validator-session.h b/validator-session/validator-session.h index 0870f6718..2e1ed9b13 100644 --- a/validator-session/validator-session.h +++ b/validator-session/validator-session.h @@ -105,6 +105,7 @@ class ValidatorSession : public td::actor::Actor { virtual void start() = 0; virtual void destroy() = 0; virtual void get_current_stats(td::Promise promise) = 0; + virtual void get_end_stats(td::Promise promise) = 0; virtual void get_validator_group_info_for_litequery( td::uint32 cur_round, td::Promise>> promise) = 0; diff --git a/validator-session/validator-session.hpp b/validator-session/validator-session.hpp index 580582824..2ee4885b9 100644 --- a/validator-session/validator-session.hpp +++ b/validator-session/validator-session.hpp @@ -187,6 +187,7 @@ class ValidatorSessionImpl : public ValidatorSession { void start() override; void destroy() override; void get_current_stats(td::Promise promise) override; + void get_end_stats(td::Promise promise) override; void get_validator_group_info_for_litequery( td::uint32 cur_round, td::Promise>> promise) override; diff --git a/validator/impl/collator-impl.h b/validator/impl/collator-impl.h index 913a0ed87..b8d9e56d3 100644 --- a/validator/impl/collator-impl.h +++ b/validator/impl/collator-impl.h @@ -356,6 +356,11 @@ class Collator final : public td::actor::Actor { public: static td::uint32 get_skip_externals_queue_size(); + + private: + td::Timer work_timer_{true}; + td::ThreadCpuTimer cpu_work_timer_{true}; + CollationStats stats_; }; } // namespace validator diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index c6dd7caf2..f465c0f55 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -1772,6 +1772,12 @@ bool Collator::register_shard_block_creators(std::vector creator_li * @returns True if collation is successful, false otherwise. */ bool Collator::try_collate() { + work_timer_.resume(); + cpu_work_timer_.resume(); + SCOPE_EXIT { + work_timer_.pause(); + cpu_work_timer_.pause(); + }; if (!preinit_complete) { LOG(WARNING) << "running do_preinit()"; if (!do_preinit()) { @@ -3481,6 +3487,29 @@ bool Collator::process_inbound_message(Ref enq_msg, ton::LogicalT return true; } +/** + * Creates a string that explains which limit is exceeded. Used for collator stats. + * + * @param block_limit_status Status of block limits. + * @param cls Which limit class is exceeded. + * + * @returns String for collator stats. + */ +static std::string block_full_comment(const block::BlockLimitStatus& block_limit_status, unsigned cls) { + auto bytes = block_limit_status.estimate_block_size(); + if (!block_limit_status.limits.bytes.fits(cls, bytes)) { + return PSTRING() << "block_full bytes " << bytes; + } + if (!block_limit_status.limits.gas.fits(cls, block_limit_status.gas_used)) { + return PSTRING() << "block_full gas " << block_limit_status.gas_used; + } + auto lt_delta = block_limit_status.cur_lt - block_limit_status.limits.start_lt; + if (!block_limit_status.limits.lt_delta.fits(cls, lt_delta)) { + return PSTRING() << "block_full lt_delta " << lt_delta; + } + return ""; +} + /** * Processes inbound internal messages from message queues of the neighbors. * Messages are processed until the normal limit is reached, soft timeout is reached or there are no more messages. @@ -3495,11 +3524,14 @@ bool Collator::process_inbound_internal_messages() { block_full_ = !block_limit_status_->fits(block::ParamLimits::cl_normal); if (block_full_) { LOG(INFO) << "BLOCK FULL, stop processing inbound internal messages"; + stats_.limits_log += PSTRING() << "INBOUND_INT_MESSAGES: " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n"; break; } if (soft_timeout_.is_in_past(td::Timestamp::now())) { block_full_ = true; LOG(WARNING) << "soft timeout reached, stop processing inbound internal messages"; + stats_.limits_log += PSTRING() << "INBOUND_INT_MESSAGES: timeout\n"; break; } auto kv = nb_out_msgs_->extract_cur(); @@ -3547,15 +3579,23 @@ bool Collator::process_inbound_external_messages() { } if (full) { LOG(INFO) << "BLOCK FULL, stop processing external messages"; + stats_.limits_log += PSTRING() << "INBOUND_EXT_MESSAGES: " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_soft) << "\n"; break; } if (medium_timeout_.is_in_past(td::Timestamp::now())) { LOG(WARNING) << "medium timeout reached, stop processing inbound external messages"; + stats_.limits_log += PSTRING() << "INBOUND_EXT_MESSAGES: timeout\n"; break; } auto ext_msg = ext_msg_struct.cell; ton::Bits256 hash{ext_msg->get_hash().bits()}; int r = process_external_message(std::move(ext_msg)); + if (r > 0) { + ++stats_.ext_msgs_accepted; + } else { + ++stats_.ext_msgs_rejected; + } if (r < 0) { bad_ext_msgs_.emplace_back(ext_msg_struct.hash); return false; @@ -3661,11 +3701,15 @@ bool Collator::process_dispatch_queue() { block_full_ = !block_limit_status_->fits(block::ParamLimits::cl_normal); if (block_full_) { LOG(INFO) << "BLOCK FULL, stop processing dispatch queue"; + stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) + << "\n"; return true; } if (soft_timeout_.is_in_past(td::Timestamp::now())) { block_full_ = true; LOG(WARNING) << "soft timeout reached, stop processing dispatch queue"; + stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": timeout\n"; return true; } StdSmcAddress src_addr; @@ -3715,6 +3759,7 @@ bool Collator::process_dispatch_queue() { ++total_count; if (total_count >= max_total_count[iter]) { dispatch_queue_total_limit_reached_ = true; + stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": total limit reached\n"; break; } } @@ -4064,6 +4109,8 @@ bool Collator::process_new_messages(bool enqueue_only) { if ((block_full_ || have_unprocessed_account_dispatch_queue_) && !enqueue_only) { LOG(INFO) << "BLOCK FULL, enqueue all remaining new messages"; enqueue_only = true; + stats_.limits_log += PSTRING() << "NEW_MESSAGES: " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n"; } LOG(DEBUG) << "have message with lt=" << msg.lt; int res = process_one_new_message(std::move(msg), enqueue_only); @@ -4072,6 +4119,8 @@ bool Collator::process_new_messages(bool enqueue_only) { } else if (res == 3) { LOG(INFO) << "All remaining new messages must be enqueued (BLOCK FULL)"; enqueue_only = true; + stats_.limits_log += PSTRING() << "NEW_MESSAGES: " + << block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal) << "\n"; } } return true; @@ -5435,6 +5484,18 @@ bool Collator::create_block_candidate() { td::actor::send_closure_later(manager, &ValidatorManager::complete_external_messages, std::move(delay_ext_msgs_), std::move(bad_ext_msgs_)); } + + double work_time = work_timer_.elapsed(); + double cpu_work_time = cpu_work_timer_.elapsed(); + LOG(WARNING) << "Collate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s"; + stats_.bytes = block_limit_status_->estimate_block_size(); + stats_.gas = block_limit_status_->gas_used; + stats_.lt_delta = block_limit_status_->cur_lt - block_limit_status_->limits.start_lt; + stats_.cat_bytes = block_limit_status_->limits.classify_size(stats_.bytes); + stats_.cat_gas = block_limit_status_->limits.classify_gas(stats_.gas); + stats_.cat_lt_delta = block_limit_status_->limits.classify_lt(block_limit_status_->cur_lt); + td::actor::send_closure(manager, &ValidatorManager::record_collate_query_stats, block_candidate->id, work_time, + cpu_work_time, std::move(stats_)); return true; } @@ -5539,6 +5600,7 @@ void Collator::after_get_external_messages(td::Result ext_msg_cell = ext_msg->root_cell(); @@ -5550,6 +5612,7 @@ void Collator::after_get_external_messages(td::Resulthash()); } } diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index 8c39a1ab4..003b7f9f7 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -118,6 +118,7 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) { error = error_ctx() + error; LOG(ERROR) << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error; if (main_promise) { + record_stats(); errorlog::ErrorLog::log(PSTRING() << "REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error << ": data=" << block_candidate.id.file_hash.to_hex() << " collated_data=" << block_candidate.collated_file_hash.to_hex()); @@ -155,6 +156,7 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason) error = error_ctx() + error; LOG(ERROR) << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error; if (main_promise) { + record_stats(); errorlog::ErrorLog::log(PSTRING() << "SOFT REJECT: aborting validation of block candidate for " << shard_.to_str() << " : " << error << ": data=" << block_candidate.id.file_hash.to_hex() << " collated_data=" << block_candidate.collated_file_hash.to_hex()); @@ -177,6 +179,7 @@ bool ValidateQuery::fatal_error(td::Status error) { error.ensure_error(); LOG(ERROR) << "aborting validation of block candidate for " << shard_.to_str() << " : " << error.to_string(); if (main_promise) { + record_stats(); auto c = error.code(); if (c <= -667 && c >= -670) { errorlog::ErrorLog::log(PSTRING() << "FATAL ERROR: aborting validation of block candidate for " << shard_.to_str() @@ -234,6 +237,7 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) { */ void ValidateQuery::finish_query() { if (main_promise) { + record_stats(); LOG(WARNING) << "validate query done"; main_promise.set_result(now_); } @@ -6764,6 +6768,12 @@ bool ValidateQuery::try_validate() { if (pending) { return true; } + work_timer_.resume(); + cpu_work_timer_.resume(); + SCOPE_EXIT { + work_timer_.pause(); + cpu_work_timer_.pause(); + }; try { if (!stage_) { LOG(WARNING) << "try_validate stage 0"; @@ -6903,6 +6913,17 @@ void ValidateQuery::written_candidate() { finish_query(); } +/** + * Sends validation work time to manager. + */ +void ValidateQuery::record_stats() { + double work_time = work_timer_.elapsed(); + double cpu_work_time = cpu_work_timer_.elapsed(); + LOG(WARNING) << "Validate query work time = " << work_time << "s, cpu time = " << cpu_work_time << "s"; + td::actor::send_closure(manager, &ValidatorManager::record_validate_query_stats, block_candidate.id, work_time, + cpu_work_time); +} + } // namespace validator } // namespace ton diff --git a/validator/impl/validate-query.hpp b/validator/impl/validate-query.hpp index 824afb49d..104950938 100644 --- a/validator/impl/validate-query.hpp +++ b/validator/impl/validate-query.hpp @@ -398,6 +398,10 @@ class ValidateQuery : public td::actor::Actor { } return true; } + + td::Timer work_timer_{true}; + td::ThreadCpuTimer cpu_work_timer_{true}; + void record_stats(); }; } // namespace validator diff --git a/validator/interfaces/validator-manager.h b/validator/interfaces/validator-manager.h index 0e9fab73b..b6016bc2b 100644 --- a/validator/interfaces/validator-manager.h +++ b/validator/interfaces/validator-manager.h @@ -52,6 +52,16 @@ struct AsyncSerializerState { UnixTime last_written_block_ts; }; +struct CollationStats { + td::uint32 bytes, gas, lt_delta; + int cat_bytes, cat_gas, cat_lt_delta; + std::string limits_log; + td::uint32 ext_msgs_total = 0; + td::uint32 ext_msgs_filtered = 0; + td::uint32 ext_msgs_accepted = 0; + td::uint32 ext_msgs_rejected = 0; +}; + using ValidateCandidateResult = td::Variant; class ValidatorManager : public ValidatorManagerInterface { @@ -173,6 +183,7 @@ class ValidatorManager : public ValidatorManagerInterface { virtual void log_validator_session_stats(BlockIdExt block_id, validatorsession::ValidatorSessionStats stats) = 0; virtual void log_new_validator_group_stats(validatorsession::NewValidatorGroupStats stats) = 0; + virtual void log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) = 0; virtual void get_block_handle_for_litequery(BlockIdExt block_id, td::Promise promise) = 0; virtual void get_block_data_for_litequery(BlockIdExt block_id, td::Promise> promise) = 0; @@ -192,6 +203,12 @@ class ValidatorManager : public ValidatorManagerInterface { virtual void add_lite_query_stats(int lite_query_id) { } + virtual void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, + CollationStats stats) { + } + virtual void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { + } + static bool is_persistent_state(UnixTime ts, UnixTime prev_ts) { return ts / (1 << 17) != prev_ts / (1 << 17); } diff --git a/validator/manager-disk.hpp b/validator/manager-disk.hpp index a77be2725..3a77f2301 100644 --- a/validator/manager-disk.hpp +++ b/validator/manager-disk.hpp @@ -388,6 +388,9 @@ class ValidatorManagerImpl : public ValidatorManager { void log_new_validator_group_stats(validatorsession::NewValidatorGroupStats stats) override { UNREACHABLE(); } + void log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) override { + UNREACHABLE(); + } void get_out_msg_queue_size(BlockIdExt block_id, td::Promise promise) override { if (queue_size_counter_.empty()) { queue_size_counter_ = diff --git a/validator/manager-hardfork.hpp b/validator/manager-hardfork.hpp index e7175b77b..cf4d3799f 100644 --- a/validator/manager-hardfork.hpp +++ b/validator/manager-hardfork.hpp @@ -450,6 +450,9 @@ class ValidatorManagerImpl : public ValidatorManager { void log_new_validator_group_stats(validatorsession::NewValidatorGroupStats stats) override { UNREACHABLE(); } + void log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) override { + UNREACHABLE(); + } void get_out_msg_queue_size(BlockIdExt block_id, td::Promise promise) override { if (queue_size_counter_.empty()) { queue_size_counter_ = diff --git a/validator/manager.cpp b/validator/manager.cpp index eb082d91e..fa592a788 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -44,6 +44,7 @@ #include "td/utils/JsonBuilder.h" #include "common/delay.h" +#include "td/utils/filesystem.h" #include "validator/stats-merger.h" @@ -2044,7 +2045,7 @@ void ValidatorManagerImpl::update_shards() { } new_validator_groups_.emplace(val_group_id, std::move(it2->second)); } else { - auto G = create_validator_group(val_group_id, shard, val_set, opts, started_); + auto G = create_validator_group(val_group_id, shard, val_set, key_seqno, opts, started_); if (!G.empty()) { td::actor::send_closure(G, &ValidatorGroup::start, prev, last_masterchain_block_id_, last_masterchain_state_->get_unix_time()); @@ -2100,7 +2101,7 @@ void ValidatorManagerImpl::update_shards() { } new_validator_groups_.emplace(val_group_id, std::move(it2->second)); } else { - auto G = create_validator_group(val_group_id, shard, val_set, opts, started_); + auto G = create_validator_group(val_group_id, shard, val_set, key_seqno, opts, started_); if (!G.empty()) { td::actor::send_closure(G, &ValidatorGroup::start, prev, last_masterchain_block_id_, last_masterchain_state_->get_unix_time()); @@ -2127,7 +2128,7 @@ void ValidatorManagerImpl::update_shards() { } else { new_next_validator_groups_.emplace( val_group_id, - ValidatorGroupEntry{create_validator_group(val_group_id, shard, val_set, opts, started_), shard}); + ValidatorGroupEntry{create_validator_group(val_group_id, shard, val_set, key_seqno, opts, started_), shard}); } } } @@ -2230,7 +2231,7 @@ ValidatorSessionId ValidatorManagerImpl::get_validator_set_id(ShardIdFull shard, } td::actor::ActorOwn ValidatorManagerImpl::create_validator_group( - ValidatorSessionId session_id, ShardIdFull shard, td::Ref validator_set, + ValidatorSessionId session_id, ShardIdFull shard, td::Ref validator_set, BlockSeqno key_seqno, validatorsession::ValidatorSessionOptions opts, bool init_session) { if (check_gc_list_.count(session_id) == 1) { return td::actor::ActorOwn{}; @@ -2241,8 +2242,8 @@ td::actor::ActorOwn ValidatorManagerImpl::create_validator_group auto validator_id = get_validator(shard, validator_set); CHECK(!validator_id.is_zero()); auto G = td::actor::create_actor( - "validatorgroup", shard, validator_id, session_id, validator_set, opts, keyring_, adnl_, rldp_, overlays_, - db_root_, actor_id(this), init_session, + "validatorgroup", shard, validator_id, session_id, validator_set, key_seqno, opts, keyring_, adnl_, rldp_, + overlays_, db_root_, actor_id(this), init_session, opts_->check_unsafe_resync_allowed(validator_set->get_catchain_seqno()), opts_); return G; } @@ -2831,13 +2832,35 @@ void ValidatorManagerImpl::log_validator_session_stats(BlockIdExt block_id, for (const auto &round : stats.rounds) { std::vector> producers; for (const auto &producer : round.producers) { + BlockIdExt cur_block_id{block_id.id, producer.root_hash, producer.file_hash}; + auto it = recorded_block_stats_.find(cur_block_id); + tl_object_ptr collation_stats; + if (it != recorded_block_stats_.end() && it->second.collator_stats_) { + auto &stats = it->second.collator_stats_.value(); + collation_stats = create_tl_object( + stats.bytes, stats.gas, stats.lt_delta, stats.cat_bytes, stats.cat_gas, stats.cat_lt_delta, + stats.limits_log, stats.ext_msgs_total, stats.ext_msgs_filtered, stats.ext_msgs_accepted, + stats.ext_msgs_rejected); + } + std::string approvers, signers; + for (bool x : producer.approvers) { + approvers += (x ? '1' : '0'); + } + for (bool x : producer.signers) { + signers += (x ? '1' : '0'); + } producers.push_back(create_tl_object( - producer.id.bits256_value(), producer.candidate_id, producer.block_status, producer.comment, - producer.block_timestamp, producer.is_accepted, producer.is_ours, producer.got_submit_at, - producer.collation_time, producer.collated_at, producer.collation_cached, producer.validation_time, - producer.validated_at, producer.validation_cached, producer.gen_utime, producer.approved_weight, - producer.approved_33pct_at, producer.approved_66pct_at, producer.signed_weight, producer.signed_33pct_at, - producer.signed_66pct_at, producer.serialize_time, producer.deserialize_time, producer.serialized_size)); + producer.id.bits256_value(), producer.candidate_id, producer.block_status, producer.root_hash, + producer.file_hash, producer.comment, producer.block_timestamp, producer.is_accepted, producer.is_ours, + producer.got_submit_at, producer.collation_time, producer.collated_at, producer.collation_cached, + it == recorded_block_stats_.end() ? -1.0 : it->second.collator_work_time_, + it == recorded_block_stats_.end() ? -1.0 : it->second.collator_cpu_work_time_, std::move(collation_stats), + producer.validation_time, producer.validated_at, producer.validation_cached, + it == recorded_block_stats_.end() ? -1.0 : it->second.validator_work_time_, + it == recorded_block_stats_.end() ? -1.0 : it->second.validator_cpu_work_time_, producer.gen_utime, + producer.approved_weight, producer.approved_33pct_at, producer.approved_66pct_at, std::move(approvers), + producer.signed_weight, producer.signed_33pct_at, producer.signed_66pct_at, std::move(signers), + producer.serialize_time, producer.deserialize_time, producer.serialized_size)); } rounds.push_back(create_tl_object(round.timestamp, std::move(producers))); } @@ -2869,8 +2892,8 @@ void ValidatorManagerImpl::log_new_validator_group_stats(validatorsession::NewVa create_tl_object(node.id.bits256_value(), node.weight)); } auto obj = create_tl_object( - stats.session_id, stats.shard.workchain, stats.shard.shard, stats.cc_seqno, stats.timestamp, stats.self_idx, - std::move(nodes)); + stats.session_id, stats.shard.workchain, stats.shard.shard, stats.cc_seqno, stats.last_key_block_seqno, + stats.timestamp, stats.self_idx, std::move(nodes)); auto s = td::json_encode(td::ToJson(*obj.get()), false); s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return c == '\n' || c == '\r'; }), s.end()); @@ -2879,7 +2902,31 @@ void ValidatorManagerImpl::log_new_validator_group_stats(validatorsession::NewVa file << s << "\n"; file.close(); - LOG(INFO) << "Writing new validator group stats for " << stats.shard.to_str(); + LOG(INFO) << "Writing new validator group stats for " << stats.session_id << " shard=" << stats.shard.to_str() + << " cc_seqno=" << stats.cc_seqno; +} + +void ValidatorManagerImpl::log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) { + std::string fname = opts_->get_session_logs_file(); + if (fname.empty()) { + return; + } + std::vector> nodes; + for (const auto &node : stats.nodes) { + nodes.push_back(create_tl_object(node.id.bits256_value(), + node.catchain_blocks)); + } + auto obj = create_tl_object(stats.session_id, stats.timestamp, + std::move(nodes)); + auto s = td::json_encode(td::ToJson(*obj.get()), false); + s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return c == '\n' || c == '\r'; }), s.end()); + + std::ofstream file; + file.open(fname, std::ios_base::app); + file << s << "\n"; + file.close(); + + LOG(INFO) << "Writing end validator group stats for " << stats.session_id; } void ValidatorManagerImpl::get_block_handle_for_litequery(BlockIdExt block_id, td::Promise promise) { @@ -3165,6 +3212,31 @@ td::actor::ActorOwn ValidatorManagerFactory::create( rldp, overlays); } +void ValidatorManagerImpl::record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, + CollationStats stats) { + auto &record = new_block_stats_record(block_id); + record.collator_work_time_ = work_time; + record.collator_cpu_work_time_ = cpu_work_time; + record.collator_stats_ = std::move(stats); +} + +void ValidatorManagerImpl::record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) { + auto &record = new_block_stats_record(block_id); + record.validator_work_time_ = work_time; + record.validator_cpu_work_time_ = cpu_work_time; +} + +ValidatorManagerImpl::RecordedBlockStats &ValidatorManagerImpl::new_block_stats_record(BlockIdExt block_id) { + if (!recorded_block_stats_.count(block_id)) { + recorded_block_stats_lru_.push(block_id); + if (recorded_block_stats_lru_.size() > 4096) { + recorded_block_stats_.erase(recorded_block_stats_lru_.front()); + recorded_block_stats_lru_.pop(); + } + } + return recorded_block_stats_[block_id]; +} + size_t ValidatorManagerImpl::CheckedExtMsgCounter::get_msg_count(WorkchainId wc, StdSmcAddress addr) { before_query(); auto it1 = counter_cur_.find({wc, addr}); diff --git a/validator/manager.hpp b/validator/manager.hpp index 12354c634..99aa4e0e1 100644 --- a/validator/manager.hpp +++ b/validator/manager.hpp @@ -38,6 +38,7 @@ #include #include #include +#include namespace ton { @@ -261,7 +262,7 @@ class ValidatorManagerImpl : public ValidatorManager { BlockSeqno last_key_block_seqno, const validatorsession::ValidatorSessionOptions &opts); td::actor::ActorOwn create_validator_group(ValidatorSessionId session_id, ShardIdFull shard, - td::Ref validator_set, + td::Ref validator_set, BlockSeqno key_seqno, validatorsession::ValidatorSessionOptions opts, bool create_catchain); struct ValidatorGroupEntry { @@ -589,6 +590,7 @@ class ValidatorManagerImpl : public ValidatorManager { void log_validator_session_stats(BlockIdExt block_id, validatorsession::ValidatorSessionStats stats) override; void log_new_validator_group_stats(validatorsession::NewValidatorGroupStats stats) override; + void log_end_validator_group_stats(validatorsession::EndValidatorGroupStats stats) override; void update_options(td::Ref opts) override; @@ -708,6 +710,21 @@ class ValidatorManagerImpl : public ValidatorManager { td::uint32 ls_stats_check_ext_messages_{0}; td::actor::ActorOwn candidates_buffer_; + + struct RecordedBlockStats { + double collator_work_time_ = -1.0; + double collator_cpu_work_time_ = -1.0; + td::optional collator_stats_; + double validator_work_time_ = -1.0; + double validator_cpu_work_time_ = -1.0; + }; + std::map recorded_block_stats_; + std::queue recorded_block_stats_lru_; + + void record_collate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time, + CollationStats stats) override; + void record_validate_query_stats(BlockIdExt block_id, double work_time, double cpu_work_time) override; + RecordedBlockStats &new_block_stats_record(BlockIdExt block_id); }; } // namespace validator diff --git a/validator/validator-group.cpp b/validator/validator-group.cpp index fc3ebe541..4b61c07cd 100644 --- a/validator/validator-group.cpp +++ b/validator/validator-group.cpp @@ -386,6 +386,7 @@ void ValidatorGroup::start(std::vector prev, BlockIdExt min_masterch stats.session_id = session_id_; stats.shard = shard_; stats.cc_seqno = validator_set_->get_catchain_seqno(); + stats.last_key_block_seqno = last_key_block_seqno_; stats.timestamp = td::Clocks::system(); td::uint32 idx = 0; for (const auto& node : validator_set_->export_vector()) { @@ -417,6 +418,16 @@ void ValidatorGroup::destroy() { td::actor::send_closure(manager, &ValidatorManager::log_validator_session_stats, block_id, std::move(stats)); }); + td::actor::send_closure(session_, &validatorsession::ValidatorSession::get_end_stats, + [manager = manager_](td::Result R) { + if (R.is_error()) { + LOG(DEBUG) << "Failed to get validator session end stats: " << R.move_as_error(); + return; + } + auto stats = R.move_as_ok(); + td::actor::send_closure(manager, &ValidatorManager::log_end_validator_group_stats, + std::move(stats)); + }); auto ses = session_.release(); delay_action([ses]() mutable { td::actor::send_closure(ses, &validatorsession::ValidatorSession::destroy); }, td::Timestamp::in(10.0)); diff --git a/validator/validator-group.hpp b/validator/validator-group.hpp index 3499da9d7..936d2fdc7 100644 --- a/validator/validator-group.hpp +++ b/validator/validator-group.hpp @@ -69,15 +69,17 @@ class ValidatorGroup : public td::actor::Actor { } ValidatorGroup(ShardIdFull shard, PublicKeyHash local_id, ValidatorSessionId session_id, - td::Ref validator_set, validatorsession::ValidatorSessionOptions config, - td::actor::ActorId keyring, td::actor::ActorId adnl, - td::actor::ActorId rldp, td::actor::ActorId overlays, - std::string db_root, td::actor::ActorId validator_manager, bool create_session, + td::Ref validator_set, BlockSeqno last_key_block_seqno, + validatorsession::ValidatorSessionOptions config, td::actor::ActorId keyring, + td::actor::ActorId adnl, td::actor::ActorId rldp, + td::actor::ActorId overlays, std::string db_root, + td::actor::ActorId validator_manager, bool create_session, bool allow_unsafe_self_blocks_resync, td::Ref opts) : shard_(shard) , local_id_(std::move(local_id)) , session_id_(session_id) , validator_set_(std::move(validator_set)) + , last_key_block_seqno_(last_key_block_seqno) , config_(std::move(config)) , keyring_(keyring) , adnl_(adnl) @@ -115,6 +117,7 @@ class ValidatorGroup : public td::actor::Actor { UnixTime min_ts_; td::Ref validator_set_; + BlockSeqno last_key_block_seqno_; validatorsession::ValidatorSessionOptions config_; td::actor::ActorId keyring_; From 9803d004c44efcdf55820149270030e91ff57cc6 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Tue, 20 Aug 2024 19:55:01 +0300 Subject: [PATCH 07/19] Fix getting creator stats in lite-client (#1115) --- lite-client/lite-client.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lite-client/lite-client.cpp b/lite-client/lite-client.cpp index 020aca705..02a5fab67 100644 --- a/lite-client/lite-client.cpp +++ b/lite-client/lite-client.cpp @@ -3429,9 +3429,7 @@ void TestNode::got_creator_stats(ton::BlockIdExt req_blkid, ton::BlockIdExt blki promise.set_error(td::Status::Error(PSLICE() << "invalid CreatorStats record with key " << key.to_hex())); return; } - if (mc_cnt.modified_since(min_utime) || shard_cnt.modified_since(min_utime)) { - func(key, mc_cnt, shard_cnt); - } + func(key, mc_cnt, shard_cnt); allow_eq = false; } if (complete) { From 18305ab2e6767e9de7820cdac5a43ce8cc108eef Mon Sep 17 00:00:00 2001 From: neodix42 Date: Fri, 23 Aug 2024 11:59:40 +0300 Subject: [PATCH 08/19] * update links to docker image (#1109) * include fift and func into Docker image Co-authored-by: EmelyanenkoK Co-authored-by: neodiX --- Dockerfile | 6 +++++- docker/ton-ali.yaml | 2 +- docker/ton-aws.yaml | 2 +- docker/ton-gcp.yaml | 2 +- docker/ton-metal-lb.yaml | 2 +- docker/ton-node-port.yaml | 2 +- 6 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index cf4187630..f4ea43759 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,7 @@ RUN apt-get update && \ apt-get install -y wget curl libatomic1 openssl libsecp256k1-dev libsodium-dev libmicrohttpd-dev liblz4-dev libjemalloc-dev htop net-tools netcat iptraf-ng jq tcpdump pv plzip && \ rm -rf /var/lib/apt/lists/* -RUN mkdir -p /var/ton-work/db /var/ton-work/scripts +RUN mkdir -p /var/ton-work/db /var/ton-work/scripts /usr/share/ton/smartcont/ /usr/lib/fift/ COPY --from=builder /ton/build/storage/storage-daemon/storage-daemon /usr/local/bin/ COPY --from=builder /ton/build/storage/storage-daemon/storage-daemon-cli /usr/local/bin/ @@ -35,6 +35,10 @@ COPY --from=builder /ton/build/lite-client/lite-client /usr/local/bin/ COPY --from=builder /ton/build/validator-engine/validator-engine /usr/local/bin/ COPY --from=builder /ton/build/validator-engine-console/validator-engine-console /usr/local/bin/ COPY --from=builder /ton/build/utils/generate-random-id /usr/local/bin/ +COPY --from=builder /ton/build/crypto/fift /usr/local/bin/ +COPY --from=builder /ton/build/crypto/func /usr/local/bin/ +COPY --from=builder /ton/crypto/smartcont/* /usr/share/ton/smartcont/ +COPY --from=builder /ton/crypto/fift/lib/* /usr/lib/fift/ WORKDIR /var/ton-work/db COPY ./docker/init.sh ./docker/control.template /var/ton-work/scripts/ diff --git a/docker/ton-ali.yaml b/docker/ton-ali.yaml index 03ffbdb0f..2dd5daf6a 100644 --- a/docker/ton-ali.yaml +++ b/docker/ton-ali.yaml @@ -27,7 +27,7 @@ spec: spec: containers: - name: validator-engine-container - image: ghcr.io/neodix42/ton:latest + image: ghcr.io/ton-blockchain/ton:latest env: - name: PUBLIC_IP value: "" diff --git a/docker/ton-aws.yaml b/docker/ton-aws.yaml index da16cbae9..4cab1b556 100644 --- a/docker/ton-aws.yaml +++ b/docker/ton-aws.yaml @@ -27,7 +27,7 @@ spec: spec: containers: - name: validator-engine-container - image: ghcr.io/neodix42/ton:latest + image: ghcr.io/ton-blockchain/ton:latest env: - name: PUBLIC_IP value: "" diff --git a/docker/ton-gcp.yaml b/docker/ton-gcp.yaml index 0ded5a794..e3d89a06e 100644 --- a/docker/ton-gcp.yaml +++ b/docker/ton-gcp.yaml @@ -27,7 +27,7 @@ spec: spec: containers: - name: validator-engine-container - image: ghcr.io/neodix42/ton:latest + image: ghcr.io/ton-blockchain/ton:latest env: - name: PUBLIC_IP value: "" diff --git a/docker/ton-metal-lb.yaml b/docker/ton-metal-lb.yaml index ceaf3a7c0..1e62a45ab 100644 --- a/docker/ton-metal-lb.yaml +++ b/docker/ton-metal-lb.yaml @@ -11,7 +11,7 @@ spec: claimName: validator-engine-pvc containers: - name: validator-engine-container - image: ghcr.io/neodix42/ton:latest + image: ghcr.io/ton-blockchain/ton:latest env: - name: PUBLIC_IP value: "" diff --git a/docker/ton-node-port.yaml b/docker/ton-node-port.yaml index ec594031f..a9f1fe5c8 100644 --- a/docker/ton-node-port.yaml +++ b/docker/ton-node-port.yaml @@ -21,7 +21,7 @@ spec: claimName: validator-engine-pvc containers: - name: validator-engine-container - image: ghcr.io/neodix42/ton:latest + image: ghcr.io/ton-blockchain/ton:latest env: - name: PUBLIC_IP value: "" From 31840a7aa3808c7e40c2b9c77222962561ddbb19 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Mon, 26 Aug 2024 17:53:42 +0300 Subject: [PATCH 09/19] Set default state ttl to 86400, set serializer delay to up to 6h (#1125) --- validator/state-serializer.cpp | 2 +- validator/validator.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/validator/state-serializer.cpp b/validator/state-serializer.cpp index b27561b63..f7ea7efe9 100644 --- a/validator/state-serializer.cpp +++ b/validator/state-serializer.cpp @@ -160,7 +160,7 @@ void AsyncStateSerializer::next_iteration() { LOG(ERROR) << "started serializing persistent state for " << masterchain_handle_->id().id.to_str(); // block next attempts immediately, but send actual request later running_ = true; - double delay = td::Random::fast(0, 3600); + double delay = td::Random::fast(0, 3600 * 6); LOG(WARNING) << "serializer delay = " << delay << "s"; delay_action( [SelfId = actor_id(this)]() { diff --git a/validator/validator.h b/validator/validator.h index 3bceec6fe..bbe22bb43 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -145,7 +145,7 @@ struct ValidatorManagerOptions : public td::CntObject { std::function check_shard = [](ShardIdFull, CatchainSeqno, ShardCheckMode) { return true; }, bool allow_blockchain_init = false, double sync_blocks_before = 3600, double block_ttl = 86400, - double state_ttl = 3600, double archive_ttl = 86400 * 7, double key_proof_ttl = 86400 * 3650, + double state_ttl = 86400, double archive_ttl = 86400 * 7, double key_proof_ttl = 86400 * 3650, double max_mempool_num = 999999, bool initial_sync_disabled = false); }; From b5fd8fa610f6043b2373171f71f469232bb8d415 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Tue, 27 Aug 2024 18:10:17 +0300 Subject: [PATCH 10/19] Disable state serializer on masterchain validators (#1129) --- validator/manager.cpp | 12 ++++++ validator/state-serializer.cpp | 72 ++++++++++++++++++++-------------- validator/state-serializer.hpp | 13 ++++-- 3 files changed, 64 insertions(+), 33 deletions(-) diff --git a/validator/manager.cpp b/validator/manager.cpp index fa592a788..8b7d0eb1d 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -2057,6 +2057,7 @@ void ValidatorManagerImpl::update_shards() { } } + bool validating_masterchain = false; if (allow_validate_) { for (auto &desc : new_shards) { auto shard = desc.first; @@ -2073,6 +2074,9 @@ void ValidatorManagerImpl::update_shards() { auto validator_id = get_validator(shard, val_set); if (!validator_id.is_zero()) { + if (shard.is_masterchain()) { + validating_masterchain = true; + } auto val_group_id = get_validator_set_id(shard, val_set, opts_hash, key_seqno, opts); if (force_recover) { @@ -2167,6 +2171,14 @@ void ValidatorManagerImpl::update_shards() { td::actor::send_closure(SelfId, &ValidatorManagerImpl::written_destroyed_validator_sessions, std::move(gc)); }); td::actor::send_closure(db_, &Db::update_destroyed_validator_sessions, gc_list_, std::move(P)); + + if (!serializer_.empty()) { + td::actor::send_closure( + serializer_, &AsyncStateSerializer::auto_disable_serializer, + validating_masterchain && + last_masterchain_state_->get_validator_set(ShardIdFull{masterchainId})->export_vector().size() * 2 <= + last_masterchain_state_->get_total_validator_set(0)->export_vector().size()); + } } } // namespace validator diff --git a/validator/state-serializer.cpp b/validator/state-serializer.cpp index f7ea7efe9..ef79d33cb 100644 --- a/validator/state-serializer.cpp +++ b/validator/state-serializer.cpp @@ -95,7 +95,8 @@ void AsyncStateSerializer::request_previous_state_files() { } void AsyncStateSerializer::got_previous_state_files(std::vector> files) { - previous_state_files_ = std::move(files); + previous_state_cache_ = std::make_shared(); + previous_state_cache_->state_files = std::move(files); request_masterchain_state(); } @@ -151,7 +152,10 @@ void AsyncStateSerializer::next_iteration() { need_serialize(masterchain_handle_)) { if (!have_masterchain_state_ && !opts_->get_state_serializer_enabled()) { LOG(ERROR) << "skipping serializing persistent state for " << masterchain_handle_->id().id.to_str() - << ": serializer is disabled"; + << ": serializer is disabled (by user)"; + } else if (!have_masterchain_state_ && auto_disabled_) { + LOG(ERROR) << "skipping serializing persistent state for " << masterchain_handle_->id().id.to_str() + << ": serializer is disabled (automatically)"; } else if (!have_masterchain_state_ && have_newer_persistent_state(masterchain_handle_->unix_time())) { LOG(ERROR) << "skipping serializing persistent state for " << masterchain_handle_->id().id.to_str() << ": newer key block with ts=" << last_known_key_block_ts_ << " exists"; @@ -182,9 +186,7 @@ void AsyncStateSerializer::next_iteration() { } last_key_block_ts_ = masterchain_handle_->unix_time(); last_key_block_id_ = masterchain_handle_->id(); - previous_state_files_ = {}; previous_state_cache_ = {}; - previous_state_cur_shards_ = {}; } if (!saved_to_db_) { running_ = true; @@ -252,27 +254,24 @@ class CachedCellDbReader : public vm::CellDbReader { td::uint64 cached_reqs_ = 0; }; -void AsyncStateSerializer::prepare_previous_state_cache(ShardIdFull shard) { - if (!opts_->get_fast_state_serializer_enabled()) { - return; - } +void AsyncStateSerializer::PreviousStateCache::prepare_cache(ShardIdFull shard) { std::vector prev_shards; - for (const auto& [_, prev_shard] : previous_state_files_) { + for (const auto& [_, prev_shard] : state_files) { if (shard_intersects(shard, prev_shard)) { prev_shards.push_back(prev_shard); } } - if (prev_shards == previous_state_cur_shards_) { + if (prev_shards == cur_shards) { return; } - previous_state_cur_shards_ = std::move(prev_shards); - previous_state_cache_ = {}; - if (previous_state_cur_shards_.empty()) { + cur_shards = std::move(prev_shards); + cache = {}; + if (cur_shards.empty()) { return; } td::Timer timer; LOG(WARNING) << "Preloading previous persistent state for shard " << shard.to_str() << " (" - << previous_state_cur_shards_.size() << " files)"; + << cur_shards.size() << " files)"; std::map> cells; std::function)> dfs = [&](td::Ref cell) { td::Bits256 hash = cell->get_hash().bits(); @@ -285,7 +284,7 @@ void AsyncStateSerializer::prepare_previous_state_cache(ShardIdFull shard) { dfs(cs.prefetch_ref(i)); } }; - for (const auto& [file, prev_shard] : previous_state_files_) { + for (const auto& [file, prev_shard] : state_files) { if (!shard_intersects(shard, prev_shard)) { continue; } @@ -300,22 +299,20 @@ void AsyncStateSerializer::prepare_previous_state_cache(ShardIdFull shard) { LOG(WARNING) << "Deserialize error : " << r_root.move_as_error(); continue; } - r_data = {}; + r_data.clear(); dfs(r_root.move_as_ok()); } LOG(WARNING) << "Preloaded previous state: " << cells.size() << " cells in " << timer.elapsed() << "s"; - previous_state_cache_ = std::make_shared>>(std::move(cells)); + cache = std::make_shared>>(std::move(cells)); } void AsyncStateSerializer::got_masterchain_state(td::Ref state, std::shared_ptr cell_db_reader) { - if (!opts_->get_state_serializer_enabled()) { + if (!opts_->get_state_serializer_enabled() || auto_disabled_) { stored_masterchain_state(); return; } LOG(ERROR) << "serializing masterchain state " << masterchain_handle_->id().id.to_str(); - prepare_previous_state_cache(state->get_shard()); - auto new_cell_db_reader = std::make_shared(cell_db_reader, previous_state_cache_); have_masterchain_state_ = true; CHECK(next_idx_ == 0); CHECK(shards_.size() == 0); @@ -325,10 +322,16 @@ void AsyncStateSerializer::got_masterchain_state(td::Ref state shards_.push_back(v->top_block_id()); } - auto write_data = [hash = state->root_cell()->get_hash(), cell_db_reader = new_cell_db_reader, + auto write_data = [shard = state->get_shard(), hash = state->root_cell()->get_hash(), cell_db_reader, + previous_state_cache = previous_state_cache_, + fast_serializer_enabled = opts_->get_fast_state_serializer_enabled(), cancellation_token = cancellation_token_source_.get_cancellation_token()](td::FileFd& fd) mutable { - auto res = vm::std_boc_serialize_to_file_large(cell_db_reader, hash, fd, 31, std::move(cancellation_token)); - cell_db_reader->print_stats(); + if (fast_serializer_enabled) { + previous_state_cache->prepare_cache(shard); + } + auto new_cell_db_reader = std::make_shared(cell_db_reader, previous_state_cache->cache); + auto res = vm::std_boc_serialize_to_file_large(new_cell_db_reader, hash, fd, 31, std::move(cancellation_token)); + new_cell_db_reader->print_stats(); return res; }; auto P = td::PromiseCreator::lambda([SelfId = actor_id(this)](td::Result R) { @@ -375,17 +378,21 @@ void AsyncStateSerializer::got_shard_handle(BlockHandle handle) { void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Ref state, std::shared_ptr cell_db_reader) { - if (!opts_->get_state_serializer_enabled()) { + if (!opts_->get_state_serializer_enabled() || auto_disabled_) { success_handler(); return; } LOG(ERROR) << "serializing shard state " << handle->id().id.to_str(); - prepare_previous_state_cache(state->get_shard()); - auto new_cell_db_reader = std::make_shared(cell_db_reader, previous_state_cache_); - auto write_data = [hash = state->root_cell()->get_hash(), cell_db_reader = new_cell_db_reader, + auto write_data = [shard = state->get_shard(), hash = state->root_cell()->get_hash(), cell_db_reader, + previous_state_cache = previous_state_cache_, + fast_serializer_enabled = opts_->get_fast_state_serializer_enabled(), cancellation_token = cancellation_token_source_.get_cancellation_token()](td::FileFd& fd) mutable { - auto res = vm::std_boc_serialize_to_file_large(cell_db_reader, hash, fd, 31, std::move(cancellation_token)); - cell_db_reader->print_stats(); + if (fast_serializer_enabled) { + previous_state_cache->prepare_cache(shard); + } + auto new_cell_db_reader = std::make_shared(cell_db_reader, previous_state_cache->cache); + auto res = vm::std_boc_serialize_to_file_large(new_cell_db_reader, hash, fd, 31, std::move(cancellation_token)); + new_cell_db_reader->print_stats(); return res; }; auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), handle](td::Result R) { @@ -427,6 +434,13 @@ void AsyncStateSerializer::update_options(td::Ref opts) } } +void AsyncStateSerializer::auto_disable_serializer(bool disabled) { + auto_disabled_ = disabled; + if (auto_disabled_) { + cancellation_token_source_.cancel(); + } +} + bool AsyncStateSerializer::need_monitor(ShardIdFull shard) { return opts_->need_monitor(shard); diff --git a/validator/state-serializer.hpp b/validator/state-serializer.hpp index 6d966f930..68606d1ea 100644 --- a/validator/state-serializer.hpp +++ b/validator/state-serializer.hpp @@ -37,6 +37,7 @@ class AsyncStateSerializer : public td::actor::Actor { bool saved_to_db_ = true; td::Ref opts_; + bool auto_disabled_ = false; td::CancellationTokenSource cancellation_token_source_; UnixTime last_known_key_block_ts_ = 0; @@ -48,11 +49,14 @@ class AsyncStateSerializer : public td::actor::Actor { bool have_masterchain_state_ = false; std::vector shards_; - std::vector> previous_state_files_; - std::shared_ptr>> previous_state_cache_; - std::vector previous_state_cur_shards_; + struct PreviousStateCache { + std::vector> state_files; + std::shared_ptr>> cache; + std::vector cur_shards; - void prepare_previous_state_cache(ShardIdFull shard); + void prepare_cache(ShardIdFull shard); + }; + std::shared_ptr previous_state_cache_; public: AsyncStateSerializer(BlockIdExt block_id, td::Ref opts, @@ -105,6 +109,7 @@ class AsyncStateSerializer : public td::actor::Actor { void success_handler(); void update_options(td::Ref opts); + void auto_disable_serializer(bool disabled); }; } // namespace validator From e9bd4823824efea4b16248de41dbebe5c73987c8 Mon Sep 17 00:00:00 2001 From: EmelyanenkoK Date: Tue, 27 Aug 2024 18:17:43 +0300 Subject: [PATCH 11/19] Increase moderate misbehavior threshold --- lite-client/lite-client.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lite-client/lite-client.cpp b/lite-client/lite-client.cpp index 02a5fab67..da2fd6ff9 100644 --- a/lite-client/lite-client.cpp +++ b/lite-client/lite-client.cpp @@ -3737,10 +3737,10 @@ void TestNode::continue_check_validator_load3(std::unique_ptr Date: Wed, 28 Aug 2024 14:09:22 +0300 Subject: [PATCH 12/19] Allow unlimited catchain.getBlock requests (#1132) --- catchain/catchain-receiver-source.h | 2 +- catchain/catchain-receiver.cpp | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/catchain/catchain-receiver-source.h b/catchain/catchain-receiver-source.h index 55035e779..136906a02 100644 --- a/catchain/catchain-receiver-source.h +++ b/catchain/catchain-receiver-source.h @@ -61,7 +61,7 @@ class CatChainReceiverSource { virtual td::BufferSlice fork_proof() const = 0; virtual bool fork_is_found() const = 0; - // One block can be sent to one node only a limited number of times to prevent DoS + // One block can be sent to one node in catchain.getDifference only a limited number of times to prevent DoS virtual bool allow_send_block(CatChainBlockHash hash) = 0; static td::Result> create(CatChainReceiver *chain, PublicKey pub_key, diff --git a/catchain/catchain-receiver.cpp b/catchain/catchain-receiver.cpp index 82779e3be..ade4726fb 100644 --- a/catchain/catchain-receiver.cpp +++ b/catchain/catchain-receiver.cpp @@ -697,12 +697,8 @@ void CatChainReceiverImpl::process_query(adnl::AdnlNodeIdShort src, ton_api::cat } else { CatChainReceiverSource *S = get_source_by_adnl_id(src); CHECK(S != nullptr); - if (S->allow_send_block(it->second->get_hash())) { - promise.set_value(serialize_tl_object(create_tl_object(it->second->export_tl()), - true, it->second->get_payload().as_slice())); - } else { - promise.set_error(td::Status::Error("block was requested too many times")); - } + promise.set_value(serialize_tl_object(create_tl_object(it->second->export_tl()), + true, it->second->get_payload().as_slice())); } } From 97c57c3386e54efc33836ce1095baa08cce9344a Mon Sep 17 00:00:00 2001 From: neodix42 Date: Wed, 28 Aug 2024 18:59:28 +0400 Subject: [PATCH 13/19] add gh action to create docker image with a specified branch. Branch name will be used as image's tag name. (#1133) Co-authored-by: neodiX --- .../workflows/docker-ubuntu-branch-image.yml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 .github/workflows/docker-ubuntu-branch-image.yml diff --git a/.github/workflows/docker-ubuntu-branch-image.yml b/.github/workflows/docker-ubuntu-branch-image.yml new file mode 100644 index 000000000..d749afa2e --- /dev/null +++ b/.github/workflows/docker-ubuntu-branch-image.yml @@ -0,0 +1,48 @@ +name: Docker Ubuntu 22.04 branch image + +on: + workflow_dispatch: + push: + branches-ignore: + - master + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build-and-push: + runs-on: ubuntu-22.04 + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Get tag as branch name + id: tag + run: | + echo "TAG=${GITHUB_REF##*/}" >> $GITHUB_OUTPUT + + - name: Build and push + id: docker_build + uses: docker/build-push-action@v6 + with: + platforms: linux/amd64 + push: true + context: ./ + tags: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tag.outputs.TAG }} From e08111159f7aa46219ed5bc198ac13a4caa8782d Mon Sep 17 00:00:00 2001 From: EmelyanenkoK Date: Fri, 30 Aug 2024 17:00:06 +0300 Subject: [PATCH 14/19] Dynamic catchain delays, state serialization improvements (#1140) * Validator improvements * Fix cancelling state serialization * Disable state serializer on all mainnet validators * Flag --catchain-max-block-delay-slow * Set default catchain-max-block-delay to 0.4, delay-slow to 1.0 --------- Co-authored-by: SpyCheese --- catchain/catchain-receiver.cpp | 6 ++++++ validator-engine/validator-engine.cpp | 17 +++++++++++++++-- validator-engine/validator-engine.hpp | 5 ++++- validator-session/validator-session-state.h | 8 ++++++++ validator-session/validator-session.cpp | 18 +++++++++++++++--- validator-session/validator-session.h | 2 +- validator-session/validator-session.hpp | 8 ++++++-- validator/impl/shard.cpp | 2 ++ validator/impl/shard.hpp | 4 ++++ validator/interfaces/shard.h | 1 + validator/manager.cpp | 18 ++++++------------ validator/state-serializer.cpp | 2 +- validator/validator-group.cpp | 10 ++++++---- validator/validator-options.hpp | 8 +++++++- validator/validator.h | 2 ++ 15 files changed, 84 insertions(+), 27 deletions(-) diff --git a/catchain/catchain-receiver.cpp b/catchain/catchain-receiver.cpp index ade4726fb..a6ecf0611 100644 --- a/catchain/catchain-receiver.cpp +++ b/catchain/catchain-receiver.cpp @@ -368,6 +368,12 @@ void CatChainReceiverImpl::add_block(td::BufferSlice payload, std::vectorheight_ + 1; + auto max_block_height = get_max_block_height(opts_, sources_.size()); + if (height > max_block_height) { + VLOG(CATCHAIN_WARNING) << this << ": cannot create block: max height exceeded (" << max_block_height << ")"; + active_send_ = false; + return; + } auto block_data = create_tl_object(std::move(prev), std::move(deps_arr)); auto block = create_tl_object(incarnation_, local_idx_, height, std::move(block_data), td::BufferSlice()); diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index a9a3b21d4..11f1a9a28 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -1448,6 +1448,9 @@ td::Status ValidatorEngine::load_global_config() { if (catchain_max_block_delay_) { validator_options_.write().set_catchain_max_block_delay(catchain_max_block_delay_.value()); } + if (catchain_max_block_delay_slow_) { + validator_options_.write().set_catchain_max_block_delay_slow(catchain_max_block_delay_slow_.value()); + } std::vector h; for (auto &x : conf.validator_->hardforks_) { @@ -4072,7 +4075,7 @@ int main(int argc, char *argv[]) { logger_ = td::TsFileLog::create(fname.str()).move_as_ok(); td::log_interface = logger_.get(); }); - p.add_checked_option('s', "state-ttl", "state will be gc'd after this time (in seconds) default=3600", + p.add_checked_option('s', "state-ttl", "state will be gc'd after this time (in seconds) default=86400", [&](td::Slice fname) { auto v = td::to_double(fname); if (v <= 0) { @@ -4233,7 +4236,7 @@ int main(int argc, char *argv[]) { "preload all cells from CellDb on startup (recommended to use with big enough celldb-cache-size and celldb-direct-io)", [&]() { acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_celldb_preload_all, true); }); }); p.add_checked_option( - '\0', "catchain-max-block-delay", "delay before creating a new catchain block, in seconds (default: 0.5)", + '\0', "catchain-max-block-delay", "delay before creating a new catchain block, in seconds (default: 0.4)", [&](td::Slice s) -> td::Status { auto v = td::to_double(s); if (v < 0) { @@ -4242,6 +4245,16 @@ int main(int argc, char *argv[]) { acts.push_back([&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_catchain_max_block_delay, v); }); return td::Status::OK(); }); + p.add_checked_option( + '\0', "catchain-max-block-delay-slow", "max extended catchain block delay (for too long rounds), (default: 1.0)", + [&](td::Slice s) -> td::Status { + auto v = td::to_double(s); + if (v < 0) { + return td::Status::Error("catchain-max-block-delay-slow should be non-negative"); + } + acts.push_back([&x, v]() { td::actor::send_closure(x, &ValidatorEngine::set_catchain_max_block_delay_slow, v); }); + return td::Status::OK(); + }); p.add_option( '\0', "fast-state-serializer", "faster persistent state serializer, but requires more RAM (enabled automatically on machines with >= 90GB RAM)", diff --git a/validator-engine/validator-engine.hpp b/validator-engine/validator-engine.hpp index b00c97130..7212a0b50 100644 --- a/validator-engine/validator-engine.hpp +++ b/validator-engine/validator-engine.hpp @@ -214,7 +214,7 @@ class ValidatorEngine : public td::actor::Actor { td::optional celldb_cache_size_ = 1LL << 30; bool celldb_direct_io_ = false; bool celldb_preload_all_ = false; - td::optional catchain_max_block_delay_; + td::optional catchain_max_block_delay_, catchain_max_block_delay_slow_; bool read_config_ = false; bool started_keyring_ = false; bool started_ = false; @@ -300,6 +300,9 @@ class ValidatorEngine : public td::actor::Actor { void set_catchain_max_block_delay(double value) { catchain_max_block_delay_ = value; } + void set_catchain_max_block_delay_slow(double value) { + catchain_max_block_delay_slow_ = value; + } void set_fast_state_serializer_enabled(bool value) { fast_state_serializer_enabled_ = value; } diff --git a/validator-session/validator-session-state.h b/validator-session/validator-session-state.h index 35910535d..4efaf77ff 100644 --- a/validator-session/validator-session-state.h +++ b/validator-session/validator-session-state.h @@ -478,6 +478,14 @@ class ValidatorSessionState : public ValidatorSessionDescription::RootObject { auto get_ts(td::uint32 src_idx) const { return att_->at(src_idx); } + td::uint32 cur_attempt_in_round(const ValidatorSessionDescription& desc) const { + td::uint32 first_attempt = cur_round_->get_first_attempt(desc.get_self_idx()); + td::uint32 cur_attempt = desc.get_attempt_seqno(desc.get_ts()); + if (cur_attempt < first_attempt || first_attempt == 0) { + return 0; + } + return cur_attempt - first_attempt; + } const SentBlock* choose_block_to_sign(ValidatorSessionDescription& desc, td::uint32 src_idx, bool& found) const; const SentBlock* get_committed_block(ValidatorSessionDescription& desc, td::uint32 seqno) const; diff --git a/validator-session/validator-session.cpp b/validator-session/validator-session.cpp index be5443785..246f7e58a 100644 --- a/validator-session/validator-session.cpp +++ b/validator-session/validator-session.cpp @@ -813,13 +813,25 @@ void ValidatorSessionImpl::request_new_block(bool now) { } else { double lambda = 10.0 / description().get_total_nodes(); double x = -1 / lambda * log(td::Random::fast(1, 999) * 0.001); - if (x > catchain_max_block_delay_) { // default = 0.5 - x = catchain_max_block_delay_; - } + x = std::min(x, get_current_max_block_delay()); // default = 0.4 td::actor::send_closure(catchain_, &catchain::CatChain::need_new_block, td::Timestamp::in(x)); } } +double ValidatorSessionImpl::get_current_max_block_delay() const { + td::uint32 att = real_state_->cur_attempt_in_round(*description_); + td::uint32 att1 = description_->opts().max_round_attempts; + if (att <= att1) { + return catchain_max_block_delay_; + } + td::uint32 att2 = att1 + 4; + if (att >= att2) { + return catchain_max_block_delay_slow_; + } + return catchain_max_block_delay_ + + (catchain_max_block_delay_slow_ - catchain_max_block_delay_) * (double)(att - att1) / (double)(att2 - att1); +} + void ValidatorSessionImpl::on_new_round(td::uint32 round) { if (round != 0) { CHECK(cur_round_ < round); diff --git a/validator-session/validator-session.h b/validator-session/validator-session.h index 2e1ed9b13..e60330b09 100644 --- a/validator-session/validator-session.h +++ b/validator-session/validator-session.h @@ -109,7 +109,7 @@ class ValidatorSession : public td::actor::Actor { virtual void get_validator_group_info_for_litequery( td::uint32 cur_round, td::Promise>> promise) = 0; - virtual void set_catchain_max_block_delay(double value) = 0; + virtual void set_catchain_max_block_delay(double delay, double delay_slow) = 0; static td::actor::ActorOwn create( catchain::CatChainSessionId session_id, ValidatorSessionOptions opts, PublicKeyHash local_id, diff --git a/validator-session/validator-session.hpp b/validator-session/validator-session.hpp index 2ee4885b9..39f196d8b 100644 --- a/validator-session/validator-session.hpp +++ b/validator-session/validator-session.hpp @@ -91,6 +91,7 @@ class ValidatorSessionImpl : public ValidatorSession { std::unique_ptr description_; double catchain_max_block_delay_ = 0.4; + double catchain_max_block_delay_slow_ = 1.0; void on_new_round(td::uint32 round); void on_catchain_started(); @@ -150,6 +151,7 @@ class ValidatorSessionImpl : public ValidatorSession { } void request_new_block(bool now); + double get_current_max_block_delay() const; void get_broadcast_p2p(PublicKeyHash node, ValidatorSessionFileHash file_hash, ValidatorSessionCollatedDataFileHash collated_data_file_hash, PublicKeyHash src, td::uint32 round, ValidatorSessionRootHash root_hash, td::Promise promise, @@ -191,8 +193,10 @@ class ValidatorSessionImpl : public ValidatorSession { void get_validator_group_info_for_litequery( td::uint32 cur_round, td::Promise>> promise) override; - void set_catchain_max_block_delay(double value) override { - catchain_max_block_delay_ = value; + + void set_catchain_max_block_delay(double delay, double delay_slow) override { + catchain_max_block_delay_ = delay; + catchain_max_block_delay_slow_ = delay_slow; } void process_blocks(std::vector blocks); diff --git a/validator/impl/shard.cpp b/validator/impl/shard.cpp index e899926a0..9c4245b6a 100644 --- a/validator/impl/shard.cpp +++ b/validator/impl/shard.cpp @@ -44,6 +44,7 @@ ShardStateQ::ShardStateQ(const ShardStateQ& other) , root(other.root) , lt(other.lt) , utime(other.utime) + , global_id_(other.global_id_) , before_split_(other.before_split_) , fake_split_(other.fake_split_) , fake_merge_(other.fake_merge_) { @@ -121,6 +122,7 @@ td::Status ShardStateQ::init() { } lt = info.gen_lt; utime = info.gen_utime; + global_id_ = info.global_id; before_split_ = info.before_split; block::ShardId id{info.shard_id}; ton::BlockId hdr_id{ton::ShardIdFull(id), info.seq_no}; diff --git a/validator/impl/shard.hpp b/validator/impl/shard.hpp index 99a9e8b08..d9a7dd655 100644 --- a/validator/impl/shard.hpp +++ b/validator/impl/shard.hpp @@ -38,6 +38,7 @@ class ShardStateQ : virtual public ShardState { Ref root; LogicalTime lt{0}; UnixTime utime{0}; + td::int32 global_id_{0}; bool before_split_{false}; bool fake_split_{false}; bool fake_merge_{false}; @@ -81,6 +82,9 @@ class ShardStateQ : virtual public ShardState { LogicalTime get_logical_time() const override { return lt; } + td::int32 get_global_id() const override { + return global_id_; + } td::optional get_master_ref() const override { return master_ref; } diff --git a/validator/interfaces/shard.h b/validator/interfaces/shard.h index 35fe4bc9a..85022e6dd 100644 --- a/validator/interfaces/shard.h +++ b/validator/interfaces/shard.h @@ -39,6 +39,7 @@ class ShardState : public td::CntObject { virtual UnixTime get_unix_time() const = 0; virtual LogicalTime get_logical_time() const = 0; + virtual td::int32 get_global_id() const = 0; virtual ShardIdFull get_shard() const = 0; virtual BlockSeqno get_seqno() const = 0; virtual BlockIdExt get_block_id() const = 0; diff --git a/validator/manager.cpp b/validator/manager.cpp index 8b7d0eb1d..79927d0ef 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -2057,7 +2057,6 @@ void ValidatorManagerImpl::update_shards() { } } - bool validating_masterchain = false; if (allow_validate_) { for (auto &desc : new_shards) { auto shard = desc.first; @@ -2074,9 +2073,6 @@ void ValidatorManagerImpl::update_shards() { auto validator_id = get_validator(shard, val_set); if (!validator_id.is_zero()) { - if (shard.is_masterchain()) { - validating_masterchain = true; - } auto val_group_id = get_validator_set_id(shard, val_set, opts_hash, key_seqno, opts); if (force_recover) { @@ -2171,16 +2167,14 @@ void ValidatorManagerImpl::update_shards() { td::actor::send_closure(SelfId, &ValidatorManagerImpl::written_destroyed_validator_sessions, std::move(gc)); }); td::actor::send_closure(db_, &Db::update_destroyed_validator_sessions, gc_list_, std::move(P)); + } - if (!serializer_.empty()) { - td::actor::send_closure( - serializer_, &AsyncStateSerializer::auto_disable_serializer, - validating_masterchain && - last_masterchain_state_->get_validator_set(ShardIdFull{masterchainId})->export_vector().size() * 2 <= - last_masterchain_state_->get_total_validator_set(0)->export_vector().size()); - } + if (!serializer_.empty()) { + td::actor::send_closure( + serializer_, &AsyncStateSerializer::auto_disable_serializer, + !validator_groups_.empty() && last_masterchain_state_->get_global_id() == -239); // mainnet only } -} // namespace validator +} void ValidatorManagerImpl::written_destroyed_validator_sessions(std::vector> list) { for (auto &v : list) { diff --git a/validator/state-serializer.cpp b/validator/state-serializer.cpp index ef79d33cb..516d8177f 100644 --- a/validator/state-serializer.cpp +++ b/validator/state-serializer.cpp @@ -378,6 +378,7 @@ void AsyncStateSerializer::got_shard_handle(BlockHandle handle) { void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Ref state, std::shared_ptr cell_db_reader) { + next_idx_++; if (!opts_->get_state_serializer_enabled() || auto_disabled_) { success_handler(); return; @@ -406,7 +407,6 @@ void AsyncStateSerializer::got_shard_state(BlockHandle handle, td::Refid(), masterchain_handle_->id(), write_data, std::move(P)); - next_idx_++; } void AsyncStateSerializer::fail_handler(td::Status reason) { diff --git a/validator/validator-group.cpp b/validator/validator-group.cpp index 4b61c07cd..bd545f607 100644 --- a/validator/validator-group.cpp +++ b/validator/validator-group.cpp @@ -348,10 +348,12 @@ void ValidatorGroup::create_session() { << ".", allow_unsafe_self_blocks_resync_); } - if (opts_->get_catchain_max_block_delay()) { - td::actor::send_closure(session_, &validatorsession::ValidatorSession::set_catchain_max_block_delay, - opts_->get_catchain_max_block_delay().value()); - } + double catchain_delay = opts_->get_catchain_max_block_delay() ? opts_->get_catchain_max_block_delay().value() : 0.4; + double catchain_delay_slow = + std::max(catchain_delay, + opts_->get_catchain_max_block_delay_slow() ? opts_->get_catchain_max_block_delay_slow().value() : 1.0); + td::actor::send_closure(session_, &validatorsession::ValidatorSession::set_catchain_max_block_delay, catchain_delay, + catchain_delay_slow); if (started_) { td::actor::send_closure(session_, &validatorsession::ValidatorSession::start); } diff --git a/validator/validator-options.hpp b/validator/validator-options.hpp index 04aeb69bb..900a682fd 100644 --- a/validator/validator-options.hpp +++ b/validator/validator-options.hpp @@ -141,6 +141,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { td::optional get_catchain_max_block_delay() const override { return catchain_max_block_delay_; } + td::optional get_catchain_max_block_delay_slow() const override { + return catchain_max_block_delay_slow_; + } bool get_state_serializer_enabled() const override { return state_serializer_enabled_; } @@ -230,6 +233,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { void set_catchain_max_block_delay(double value) override { catchain_max_block_delay_ = value; } + void set_catchain_max_block_delay_slow(double value) override { + catchain_max_block_delay_slow_ = value; + } void set_state_serializer_enabled(bool value) override { state_serializer_enabled_ = value; } @@ -289,7 +295,7 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { td::optional celldb_cache_size_; bool celldb_direct_io_ = false; bool celldb_preload_all_ = false; - td::optional catchain_max_block_delay_; + td::optional catchain_max_block_delay_, catchain_max_block_delay_slow_; bool state_serializer_enabled_ = true; td::Ref collator_options_{true}; bool fast_state_serializer_enabled_ = false; diff --git a/validator/validator.h b/validator/validator.h index bbe22bb43..2171954a3 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -105,6 +105,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual bool get_celldb_direct_io() const = 0; virtual bool get_celldb_preload_all() const = 0; virtual td::optional get_catchain_max_block_delay() const = 0; + virtual td::optional get_catchain_max_block_delay_slow() const = 0; virtual bool get_state_serializer_enabled() const = 0; virtual td::Ref get_collator_options() const = 0; virtual bool get_fast_state_serializer_enabled() const = 0; @@ -136,6 +137,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual void set_celldb_direct_io(bool value) = 0; virtual void set_celldb_preload_all(bool value) = 0; virtual void set_catchain_max_block_delay(double value) = 0; + virtual void set_catchain_max_block_delay_slow(double value) = 0; virtual void set_state_serializer_enabled(bool value) = 0; virtual void set_collator_options(td::Ref value) = 0; virtual void set_fast_state_serializer_enabled(bool value) = 0; From b2b79fead1e90708b514d6d8dfd0dc63f1a4fe92 Mon Sep 17 00:00:00 2001 From: EmelyanenkoK Date: Tue, 3 Sep 2024 13:34:31 +0300 Subject: [PATCH 15/19] Ratelimit nochannel ADNL packets (#1147) * Get ADNL stats in validator console * Add timestamp to stats * Limit nochannel adnl packets --------- Co-authored-by: SpyCheese --- adnl/adnl-channel.cpp | 20 +- adnl/adnl-local-id.cpp | 97 +++- adnl/adnl-local-id.h | 19 + adnl/adnl-peer-table.cpp | 86 +++- adnl/adnl-peer-table.h | 2 +- adnl/adnl-peer-table.hpp | 4 +- adnl/adnl-peer.cpp | 436 ++++++++++++------ adnl/adnl-peer.h | 8 +- adnl/adnl-peer.hpp | 41 +- adnl/adnl.h | 2 + adnl/utils.hpp | 34 ++ tl/generate/scheme/ton_api.tl | 23 +- tl/generate/scheme/ton_api.tlo | Bin 92472 -> 94732 bytes .../validator-engine-console-query.cpp | 163 +++++++ .../validator-engine-console-query.h | 44 ++ .../validator-engine-console.cpp | 2 + validator-engine/validator-engine.cpp | 22 + validator-engine/validator-engine.hpp | 2 + 18 files changed, 834 insertions(+), 171 deletions(-) diff --git a/adnl/adnl-channel.cpp b/adnl/adnl-channel.cpp index 5c8229ca4..4da9d2eed 100644 --- a/adnl/adnl-channel.cpp +++ b/adnl/adnl-channel.cpp @@ -112,16 +112,16 @@ void AdnlChannelImpl::send_message(td::uint32 priority, td::actor::ActorId R) { - if (R.is_error()) { - VLOG(ADNL_WARNING) << id << ": dropping IN message: can not decrypt: " << R.move_as_error(); - } else { - auto packet = R.move_as_ok(); - packet.set_remote_addr(addr); - td::actor::send_closure(peer, &AdnlPeerPair::receive_packet_from_channel, channel_id, std::move(packet)); - } - }); + auto P = td::PromiseCreator::lambda([peer = peer_pair_, channel_id = channel_in_id_, addr, id = print_id(), + size = data.size()](td::Result R) { + if (R.is_error()) { + VLOG(ADNL_WARNING) << id << ": dropping IN message: can not decrypt: " << R.move_as_error(); + } else { + auto packet = R.move_as_ok(); + packet.set_remote_addr(addr); + td::actor::send_closure(peer, &AdnlPeerPair::receive_packet_from_channel, channel_id, std::move(packet), size); + } + }); decrypt(std::move(data), std::move(P)); } diff --git a/adnl/adnl-local-id.cpp b/adnl/adnl-local-id.cpp index b48182763..d72fc7bcc 100644 --- a/adnl/adnl-local-id.cpp +++ b/adnl/adnl-local-id.cpp @@ -41,20 +41,34 @@ AdnlAddressList AdnlLocalId::get_addr_list() const { } void AdnlLocalId::receive(td::IPAddress addr, td::BufferSlice data) { - auto P = td::PromiseCreator::lambda( - [peer_table = peer_table_, dst = short_id_, addr, id = print_id()](td::Result R) { - if (R.is_error()) { - VLOG(ADNL_WARNING) << id << ": dropping IN message: cannot decrypt: " << R.move_as_error(); - } else { - auto packet = R.move_as_ok(); - packet.set_remote_addr(addr); - td::actor::send_closure(peer_table, &AdnlPeerTable::receive_decrypted_packet, dst, std::move(packet)); - } - }); - + InboundRateLimiter& rate_limiter = inbound_rate_limiter_[addr]; + if (!rate_limiter.rate_limiter.take()) { + VLOG(ADNL_NOTICE) << this << ": dropping IN message: rate limit exceeded"; + add_dropped_packet_stats(addr); + return; + } + ++rate_limiter.currently_decrypting_packets; + auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), peer_table = peer_table_, dst = short_id_, addr, + id = print_id(), size = data.size()](td::Result R) { + td::actor::send_closure(SelfId, &AdnlLocalId::decrypt_packet_done, addr); + if (R.is_error()) { + VLOG(ADNL_WARNING) << id << ": dropping IN message: cannot decrypt: " << R.move_as_error(); + } else { + auto packet = R.move_as_ok(); + packet.set_remote_addr(addr); + td::actor::send_closure(peer_table, &AdnlPeerTable::receive_decrypted_packet, dst, std::move(packet), size); + } + }); decrypt(std::move(data), std::move(P)); } +void AdnlLocalId::decrypt_packet_done(td::IPAddress addr) { + auto it = inbound_rate_limiter_.find(addr); + CHECK(it != inbound_rate_limiter_.end()); + --it->second.currently_decrypting_packets; + add_decrypted_packet_stats(addr); +} + void AdnlLocalId::deliver(AdnlNodeIdShort src, td::BufferSlice data) { auto s = std::move(data); for (auto &cb : cb_) { @@ -292,6 +306,67 @@ void AdnlLocalId::update_packet(AdnlPacket packet, bool update_id, bool sign, td } } +void AdnlLocalId::get_stats(td::Promise> promise) { + auto stats = create_tl_object(); + stats->short_id_ = short_id_.bits256_value(); + for (auto &[ip, x] : inbound_rate_limiter_) { + if (x.currently_decrypting_packets != 0) { + stats->current_decrypt_.push_back(create_tl_object( + ip.is_valid() ? PSTRING() << ip.get_ip_str() << ":" << ip.get_port() : "", x.currently_decrypting_packets)); + } + } + prepare_packet_stats(); + stats->packets_recent_ = packet_stats_prev_.tl(); + stats->packets_total_ = packet_stats_total_.tl(); + stats->packets_total_->ts_start_ = (double)Adnl::adnl_start_time(); + stats->packets_total_->ts_end_ = td::Clocks::system(); + promise.set_result(std::move(stats)); +} + +void AdnlLocalId::add_decrypted_packet_stats(td::IPAddress addr) { + prepare_packet_stats(); + ++packet_stats_cur_.decrypted_packets[addr]; + ++packet_stats_total_.decrypted_packets[addr]; +} + +void AdnlLocalId::add_dropped_packet_stats(td::IPAddress addr) { + prepare_packet_stats(); + ++packet_stats_cur_.dropped_packets[addr]; + ++packet_stats_total_.dropped_packets[addr]; +} + +void AdnlLocalId::prepare_packet_stats() { + double now = td::Clocks::system(); + if (now >= packet_stats_cur_.ts_end) { + packet_stats_prev_ = std::move(packet_stats_cur_); + packet_stats_cur_ = {}; + auto now_int = (int)td::Clocks::system(); + packet_stats_cur_.ts_start = (double)(now_int / 60 * 60); + packet_stats_cur_.ts_end = packet_stats_cur_.ts_start + 60.0; + if (packet_stats_prev_.ts_end < now - 60.0) { + packet_stats_prev_ = {}; + packet_stats_prev_.ts_end = packet_stats_cur_.ts_start; + packet_stats_prev_.ts_start = packet_stats_prev_.ts_end - 60.0; + } + } +} + +tl_object_ptr AdnlLocalId::PacketStats::tl() const { + auto obj = create_tl_object(); + obj->ts_start_ = ts_start; + obj->ts_end_ = ts_end; + for (const auto &[ip, packets] : decrypted_packets) { + obj->decrypted_packets_.push_back(create_tl_object( + ip.is_valid() ? PSTRING() << ip.get_ip_str() << ":" << ip.get_port() : "", packets)); + } + for (const auto &[ip, packets] : dropped_packets) { + obj->dropped_packets_.push_back(create_tl_object( + ip.is_valid() ? PSTRING() << ip.get_ip_str() << ":" << ip.get_port() : "", packets)); + } + return obj; +} + + } // namespace adnl } // namespace ton diff --git a/adnl/adnl-local-id.h b/adnl/adnl-local-id.h index c9ecfff16..be9d79d2c 100644 --- a/adnl/adnl-local-id.h +++ b/adnl/adnl-local-id.h @@ -55,6 +55,7 @@ class AdnlLocalId : public td::actor::Actor { void deliver(AdnlNodeIdShort src, td::BufferSlice data); void deliver_query(AdnlNodeIdShort src, td::BufferSlice data, td::Promise promise); void receive(td::IPAddress addr, td::BufferSlice data); + void decrypt_packet_done(td::IPAddress addr); void subscribe(std::string prefix, std::unique_ptr callback); void unsubscribe(std::string prefix); @@ -77,6 +78,8 @@ class AdnlLocalId : public td::actor::Actor { void update_packet(AdnlPacket packet, bool update_id, bool sign, td::int32 update_addr_list_if, td::int32 update_priority_addr_list_if, td::Promise promise); + void get_stats(td::Promise> promise); + td::uint32 get_mode() { return mode_; } @@ -101,6 +104,22 @@ class AdnlLocalId : public td::actor::Actor { td::uint32 mode_; + struct InboundRateLimiter { + RateLimiter rate_limiter = RateLimiter(75, 0.33); + td::uint64 currently_decrypting_packets = 0; + }; + std::map inbound_rate_limiter_; + struct PacketStats { + double ts_start = 0.0, ts_end = 0.0; + std::map decrypted_packets; + std::map dropped_packets; + + tl_object_ptr tl() const; + } packet_stats_cur_, packet_stats_prev_, packet_stats_total_; + void add_decrypted_packet_stats(td::IPAddress addr); + void add_dropped_packet_stats(td::IPAddress addr); + void prepare_packet_stats(); + void publish_address_list(); }; diff --git a/adnl/adnl-peer-table.cpp b/adnl/adnl-peer-table.cpp index 548915159..d885623a9 100644 --- a/adnl/adnl-peer-table.cpp +++ b/adnl/adnl-peer-table.cpp @@ -84,7 +84,7 @@ void AdnlPeerTableImpl::receive_packet(td::IPAddress addr, AdnlCategoryMask cat_ << " (len=" << (data.size() + 32) << ")"; } -void AdnlPeerTableImpl::receive_decrypted_packet(AdnlNodeIdShort dst, AdnlPacket packet) { +void AdnlPeerTableImpl::receive_decrypted_packet(AdnlNodeIdShort dst, AdnlPacket packet, td::uint64 serialized_size) { packet.run_basic_checks().ensure(); if (!packet.inited_from_short()) { @@ -119,7 +119,7 @@ void AdnlPeerTableImpl::receive_decrypted_packet(AdnlNodeIdShort dst, AdnlPacket return; } td::actor::send_closure(it->second, &AdnlPeer::receive_packet, dst, it2->second.mode, it2->second.local_id.get(), - std::move(packet)); + std::move(packet), serialized_size); } void AdnlPeerTableImpl::add_peer(AdnlNodeIdShort local_id, AdnlNodeIdFull id, AdnlAddressList addr_list) { @@ -385,6 +385,88 @@ void AdnlPeerTableImpl::get_conn_ip_str(AdnlNodeIdShort l_id, AdnlNodeIdShort p_ td::actor::send_closure(it->second, &AdnlPeer::get_conn_ip_str, l_id, std::move(promise)); } +void AdnlPeerTableImpl::get_stats(td::Promise> promise) { + class Cb : public td::actor::Actor { + public: + explicit Cb(td::Promise> promise) : promise_(std::move(promise)) { + } + + void got_local_id_stats(tl_object_ptr local_id) { + auto &local_id_stats = local_id_stats_[local_id->short_id_]; + if (local_id_stats) { + local_id->peers_ = std::move(local_id_stats->peers_); + } + local_id_stats = std::move(local_id); + dec_pending(); + } + + void got_peer_stats(std::vector> peer_pairs) { + for (auto &peer_pair : peer_pairs) { + auto &local_id_stats = local_id_stats_[peer_pair->local_id_]; + if (local_id_stats == nullptr) { + local_id_stats = create_tl_object(); + local_id_stats->short_id_ = peer_pair->local_id_; + } + local_id_stats->peers_.push_back(std::move(peer_pair)); + } + dec_pending(); + } + + void inc_pending() { + ++pending_; + } + + void dec_pending() { + CHECK(pending_ > 0); + --pending_; + if (pending_ == 0) { + auto stats = create_tl_object(); + stats->timestamp_ = td::Clocks::system(); + for (auto &[id, local_id_stats] : local_id_stats_) { + stats->local_ids_.push_back(std::move(local_id_stats)); + } + promise_.set_result(std::move(stats)); + stop(); + } + } + + private: + td::Promise> promise_; + size_t pending_ = 1; + + std::map> local_id_stats_; + }; + auto callback = td::actor::create_actor("adnlstats", std::move(promise)).release(); + + for (auto &[id, local_id] : local_ids_) { + td::actor::send_closure(callback, &Cb::inc_pending); + td::actor::send_closure(local_id.local_id, &AdnlLocalId::get_stats, + [id = id, callback](td::Result> R) { + if (R.is_error()) { + VLOG(ADNL_NOTICE) + << "failed to get stats for local id " << id << " : " << R.move_as_error(); + td::actor::send_closure(callback, &Cb::dec_pending); + } else { + td::actor::send_closure(callback, &Cb::got_local_id_stats, R.move_as_ok()); + } + }); + } + for (auto &[id, peer] : peers_) { + td::actor::send_closure(callback, &Cb::inc_pending); + td::actor::send_closure( + peer, &AdnlPeer::get_stats, + [id = id, callback](td::Result>> R) { + if (R.is_error()) { + VLOG(ADNL_NOTICE) << "failed to get stats for peer " << id << " : " << R.move_as_error(); + td::actor::send_closure(callback, &Cb::dec_pending); + } else { + td::actor::send_closure(callback, &Cb::got_peer_stats, R.move_as_ok()); + } + }); + } + td::actor::send_closure(callback, &Cb::dec_pending); +} + } // namespace adnl } // namespace ton diff --git a/adnl/adnl-peer-table.h b/adnl/adnl-peer-table.h index cb7da6135..055f32ac1 100644 --- a/adnl/adnl-peer-table.h +++ b/adnl/adnl-peer-table.h @@ -90,7 +90,7 @@ class AdnlPeerTable : public Adnl { virtual void answer_query(AdnlNodeIdShort src, AdnlNodeIdShort dst, AdnlQueryId query_id, td::BufferSlice data) = 0; virtual void receive_packet(td::IPAddress addr, AdnlCategoryMask cat_mask, td::BufferSlice data) = 0; - virtual void receive_decrypted_packet(AdnlNodeIdShort dst, AdnlPacket packet) = 0; + virtual void receive_decrypted_packet(AdnlNodeIdShort dst, AdnlPacket packet, td::uint64 serialized_size) = 0; virtual void send_message_in(AdnlNodeIdShort src, AdnlNodeIdShort dst, AdnlMessage message, td::uint32 flags) = 0; virtual void register_channel(AdnlChannelIdShort id, AdnlNodeIdShort local_id, diff --git a/adnl/adnl-peer-table.hpp b/adnl/adnl-peer-table.hpp index 1c30b84c7..12f64fcb2 100644 --- a/adnl/adnl-peer-table.hpp +++ b/adnl/adnl-peer-table.hpp @@ -44,7 +44,7 @@ class AdnlPeerTableImpl : public AdnlPeerTable { void add_static_nodes_from_config(AdnlNodesList nodes) override; void receive_packet(td::IPAddress addr, AdnlCategoryMask cat_mask, td::BufferSlice data) override; - void receive_decrypted_packet(AdnlNodeIdShort dst, AdnlPacket data) override; + void receive_decrypted_packet(AdnlNodeIdShort dst, AdnlPacket data, td::uint64 serialized_size) override; void send_message_in(AdnlNodeIdShort src, AdnlNodeIdShort dst, AdnlMessage message, td::uint32 flags) override; void send_message(AdnlNodeIdShort src, AdnlNodeIdShort dst, td::BufferSlice data) override { send_message_ex(src, dst, std::move(data), 0); @@ -108,6 +108,8 @@ class AdnlPeerTableImpl : public AdnlPeerTable { td::Promise, AdnlAddress>> promise) override; void get_conn_ip_str(AdnlNodeIdShort l_id, AdnlNodeIdShort p_id, td::Promise promise) override; + void get_stats(td::Promise> promise) override; + struct PrintId {}; PrintId print_id() const { return PrintId{}; diff --git a/adnl/adnl-peer.cpp b/adnl/adnl-peer.cpp index d82486fed..2b0077a8e 100644 --- a/adnl/adnl-peer.cpp +++ b/adnl/adnl-peer.cpp @@ -26,6 +26,7 @@ #include "td/utils/base64.h" #include "td/utils/Random.h" #include "auto/tl/ton_api.h" +#include "td/utils/overloaded.h" namespace ton { @@ -50,9 +51,13 @@ void AdnlPeerPairImpl::start_up() { } void AdnlPeerPairImpl::alarm() { - if (next_dht_query_at_ && next_dht_query_at_.is_in_past()) { - next_dht_query_at_ = td::Timestamp::never(); - discover(); + if (!disable_dht_query_) { + disable_dht_query_ = true; + if (next_dht_query_at_ && next_dht_query_at_.is_in_past()) { + next_dht_query_at_ = td::Timestamp::never(); + discover(); + } + alarm_timestamp().relax(next_dht_query_at_); } if (next_db_update_at_ && next_db_update_at_.is_in_past()) { if (received_from_db_ && received_from_static_nodes_ && !peer_id_.empty()) { @@ -68,11 +73,8 @@ void AdnlPeerPairImpl::alarm() { } if (retry_send_at_ && retry_send_at_.is_in_past()) { retry_send_at_ = td::Timestamp::never(); - auto messages = std::move(pending_messages_); - pending_messages_.clear(); - send_messages_in(std::move(messages), false); + send_messages_from_queue(); } - alarm_timestamp().relax(next_dht_query_at_); alarm_timestamp().relax(next_db_update_at_); alarm_timestamp().relax(retry_send_at_); } @@ -207,18 +209,24 @@ void AdnlPeerPairImpl::receive_packet_checked(AdnlPacket packet) { } } -void AdnlPeerPairImpl::receive_packet_from_channel(AdnlChannelIdShort id, AdnlPacket packet) { +void AdnlPeerPairImpl::receive_packet_from_channel(AdnlChannelIdShort id, AdnlPacket packet, + td::uint64 serialized_size) { + add_packet_stats(serialized_size, /* in = */ true, /* channel = */ true); if (id != channel_in_id_) { VLOG(ADNL_NOTICE) << this << ": dropping IN message: outdated channel id" << id; return; } - if (channel_inited_) { + if (channel_inited_ && !channel_ready_) { channel_ready_ = true; + if (!out_messages_queue_.empty()) { + td::actor::send_closure(actor_id(this), &AdnlPeerPairImpl::send_messages_from_queue); + } } receive_packet_checked(std::move(packet)); } -void AdnlPeerPairImpl::receive_packet(AdnlPacket packet) { +void AdnlPeerPairImpl::receive_packet(AdnlPacket packet, td::uint64 serialized_size) { + add_packet_stats(serialized_size, /* in = */ true, /* channel = */ false); packet.run_basic_checks().ensure(); if (!encryptor_) { @@ -239,132 +247,132 @@ void AdnlPeerPairImpl::deliver_message(AdnlMessage message) { message.visit([&](const auto &obj) { this->process_message(obj); }); } -void AdnlPeerPairImpl::send_messages_in(std::vector messages, bool allow_postpone) { - for (td::int32 idx = 0; idx < 2; idx++) { - std::vector not_sent; +void AdnlPeerPairImpl::send_messages_from_queue() { + while (!out_messages_queue_.empty() && out_messages_queue_.front().second.is_in_past()) { + out_messages_queue_total_size_ -= out_messages_queue_.front().first.size(); + add_expired_msg_stats(out_messages_queue_.front().first.size()); + out_messages_queue_.pop(); + VLOG(ADNL_NOTICE) << this << ": dropping OUT message: message in queue expired"; + } + if (out_messages_queue_.empty()) { + return; + } - auto connR = get_conn(idx == 1); - if (connR.is_error()) { - if (!allow_postpone) { - VLOG(ADNL_NOTICE) << this << ": dropping OUT messages: cannot get conn: " << connR.move_as_error(); - return; - } - VLOG(ADNL_INFO) << this << ": delaying OUT messages: cannot get conn: " << connR.move_as_error(); - if (!retry_send_at_) { - retry_send_at_.relax(td::Timestamp::in(10.0)); - alarm_timestamp().relax(retry_send_at_); - } - for (auto &m : messages) { - pending_messages_.push_back(std::move(m)); - } + auto connR = get_conn(); + if (connR.is_error()) { + disable_dht_query_ = false; + retry_send_at_.relax(td::Timestamp::in(message_in_queue_ttl_ - 1.0)); + alarm_timestamp().relax(retry_send_at_); + VLOG(ADNL_INFO) << this << ": delaying OUT messages: cannot get conn: " << connR.move_as_error(); + return; + } + disable_dht_query_ = true; + auto C = connR.move_as_ok(); + auto conn = std::move(C.first); + bool is_direct = C.second; + + bool first = !skip_init_packet_; + while (!out_messages_queue_.empty()) { + bool try_reinit = try_reinit_at_ && try_reinit_at_.is_in_past(); + bool via_channel = channel_ready_ && !try_reinit; + if (!via_channel && !nochannel_rate_limiter_.take()) { + alarm_timestamp().relax(retry_send_at_ = nochannel_rate_limiter_.ready_at()); return; } - auto C = connR.move_as_ok(); - bool is_direct = C.second; - auto conn = std::move(C.first); - if (idx == 1) { - CHECK(is_direct); + if (try_reinit) { + try_reinit_at_ = td::Timestamp::in(td::Random::fast(0.5, 1.5)); } + respond_with_nop_after_ = td::Timestamp::in(td::Random::fast(1.0, 2.0)); - size_t ptr = 0; - bool first = true; - do { - respond_with_nop_after_ = td::Timestamp::in(td::Random::fast(1.0, 2.0)); - bool try_reinit = try_reinit_at_ && try_reinit_at_.is_in_past(); - if (try_reinit) { - try_reinit_at_ = td::Timestamp::in(td::Random::fast(0.5, 1.5)); - } - bool via_channel = channel_ready_ && !try_reinit; - size_t s = (via_channel ? channel_packet_header_max_size() : packet_header_max_size()); - if (first) { - s += 2 * addr_list_max_size(); - } - - AdnlPacket packet; - packet.set_seqno(++out_seqno_); - packet.set_confirm_seqno(in_seqno_); + size_t s = (via_channel ? channel_packet_header_max_size() : packet_header_max_size()); + if (first) { + s += 2 * addr_list_max_size(); + } - if (first) { - if (!channel_inited_) { - auto M = adnlmessage::AdnlMessageCreateChannel{channel_pub_, channel_pk_date_}; - s += M.size(); - packet.add_message(std::move(M)); - } else if (!channel_ready_) { - auto M = adnlmessage::AdnlMessageConfirmChannel{channel_pub_, peer_channel_pub_, channel_pk_date_}; - s += M.size(); - packet.add_message(std::move(M)); - } + AdnlPacket packet; + packet.set_seqno(++out_seqno_); + packet.set_confirm_seqno(in_seqno_); + + if (first) { + if (!channel_inited_) { + auto M = adnlmessage::AdnlMessageCreateChannel{channel_pub_, channel_pk_date_}; + s += M.size(); + packet.add_message(std::move(M)); + } else if (!channel_ready_) { + auto M = adnlmessage::AdnlMessageConfirmChannel{channel_pub_, peer_channel_pub_, channel_pk_date_}; + s += M.size(); + packet.add_message(std::move(M)); } + } - if (!addr_list_.empty()) { - packet.set_received_addr_list_version(addr_list_.version()); + if (!addr_list_.empty()) { + packet.set_received_addr_list_version(addr_list_.version()); + } + if (!priority_addr_list_.empty()) { + packet.set_received_priority_addr_list_version(priority_addr_list_.version()); + } + + skip_init_packet_ = true; + while (!out_messages_queue_.empty()) { + auto &M = out_messages_queue_.front().first; + if (!is_direct && (M.flags() & Adnl::SendFlags::direct_only)) { + out_messages_queue_total_size_ -= M.size(); + out_messages_queue_.pop(); + continue; } - if (!priority_addr_list_.empty()) { - packet.set_received_priority_addr_list_version(priority_addr_list_.version()); + CHECK(M.size() <= get_mtu()); + if (s + M.size() <= AdnlNetworkManager::get_mtu()) { + s += M.size(); + out_messages_queue_total_size_ -= M.size(); + packet.add_message(M.release()); + out_messages_queue_.pop(); + skip_init_packet_ = false; + } else { + break; } + } + + if (!via_channel) { + packet.set_reinit_date(Adnl::adnl_start_time(), reinit_date_); + packet.set_source(local_id_); + } - while (ptr < messages.size()) { - auto &M = messages[ptr]; - if (!is_direct && (M.flags() & Adnl::SendFlags::direct_only)) { - not_sent.push_back(std::move(M)); - continue; + if (!first) { + if (!channel_inited_) { + auto M = adnlmessage::AdnlMessageCreateChannel{channel_pub_, channel_pk_date_}; + if (s + M.size() <= AdnlNetworkManager::get_mtu()) { + s += M.size(); + packet.add_message(std::move(M)); } - CHECK(M.size() <= get_mtu()); + } else if (!channel_ready_) { + auto M = adnlmessage::AdnlMessageConfirmChannel{channel_pub_, peer_channel_pub_, channel_pk_date_}; if (s + M.size() <= AdnlNetworkManager::get_mtu()) { s += M.size(); - packet.add_message(M.release()); - ptr++; - } else { - break; + packet.add_message(std::move(M)); } } + } - if (!via_channel) { - packet.set_reinit_date(Adnl::adnl_start_time(), reinit_date_); - packet.set_source(local_id_); - } - - if (!first) { - if (!channel_inited_) { - auto M = adnlmessage::AdnlMessageCreateChannel{channel_pub_, channel_pk_date_}; - if (s + M.size() <= AdnlNetworkManager::get_mtu()) { - s += M.size(); - packet.add_message(std::move(M)); - } - } else if (!channel_ready_) { - auto M = adnlmessage::AdnlMessageConfirmChannel{channel_pub_, peer_channel_pub_, channel_pk_date_}; - if (s + M.size() <= AdnlNetworkManager::get_mtu()) { - s += M.size(); - packet.add_message(std::move(M)); - } - } + packet.run_basic_checks().ensure(); + auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), conn, id = print_id(), + via_channel](td::Result res) { + if (res.is_error()) { + LOG(ERROR) << id << ": dropping OUT message: error while creating packet: " << res.move_as_error(); + } else { + td::actor::send_closure(SelfId, &AdnlPeerPairImpl::send_packet_continue, res.move_as_ok(), conn, via_channel); } - - packet.run_basic_checks().ensure(); - auto P = td::PromiseCreator::lambda([SelfId = actor_id(this), conn, id = print_id(), - via_channel](td::Result res) { - if (res.is_error()) { - LOG(ERROR) << id << ": dropping OUT message: error while creating packet: " << res.move_as_error(); - } else { - td::actor::send_closure(SelfId, &AdnlPeerPairImpl::send_packet_continue, res.move_as_ok(), conn, via_channel); - } - }); - - td::actor::send_closure(local_actor_, &AdnlLocalId::update_packet, std::move(packet), - (!channel_ready_ && ack_seqno_ == 0 && in_seqno_ == 0) || try_reinit, !via_channel, - (first || s + addr_list_max_size() <= AdnlNetworkManager::get_mtu()) - ? (try_reinit ? 0 : peer_recv_addr_list_version_) - : 0x7fffffff, - (first || s + 2 * addr_list_max_size() <= AdnlNetworkManager::get_mtu()) - ? peer_recv_priority_addr_list_version_ - : 0x7fffffff, - std::move(P)); - first = false; - } while (ptr < messages.size()); - messages = std::move(not_sent); - if (!messages.size()) { - break; - } + }); + + td::actor::send_closure(local_actor_, &AdnlLocalId::update_packet, std::move(packet), + (!channel_ready_ && ack_seqno_ == 0 && in_seqno_ == 0) || try_reinit, !via_channel, + (first || s + addr_list_max_size() <= AdnlNetworkManager::get_mtu()) + ? (try_reinit ? 0 : peer_recv_addr_list_version_) + : 0x7fffffff, + (first || s + 2 * addr_list_max_size() <= AdnlNetworkManager::get_mtu()) + ? peer_recv_priority_addr_list_version_ + : 0x7fffffff, + std::move(P)); + first = false; } } @@ -395,7 +403,11 @@ void AdnlPeerPairImpl::send_messages(std::vector messages) } } } - send_messages_in(std::move(new_vec), true); + for (auto &m : new_vec) { + out_messages_queue_total_size_ += m.size(); + out_messages_queue_.emplace(std::move(m), td::Timestamp::in(message_in_queue_ttl_)); + } + send_messages_from_queue(); } void AdnlPeerPairImpl::send_packet_continue(AdnlPacket packet, td::actor::ActorId conn, @@ -407,6 +419,7 @@ void AdnlPeerPairImpl::send_packet_continue(AdnlPacket packet, td::actor::ActorI auto B = serialize_tl_object(packet.tl(), true); if (via_channel) { if (channel_ready_) { + add_packet_stats(B.size(), /* in = */ false, /* channel = */ true); td::actor::send_closure(channel_, &AdnlChannel::send_message, priority_, conn, std::move(B)); } else { VLOG(ADNL_WARNING) << this << ": dropping OUT message [" << local_id_ << "->" << peer_id_short_ @@ -434,6 +447,7 @@ void AdnlPeerPairImpl::send_packet_continue(AdnlPacket packet, td::actor::ActorI S.remove_prefix(32); S.copy_from(X.as_slice()); + add_packet_stats(B.size(), /* in = */ false, /* channel = */ false); td::actor::send_closure(conn, &AdnlNetworkConnection::send, local_id_, peer_id_short_, priority_, std::move(enc)); } @@ -520,7 +534,10 @@ void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageConfirmChan VLOG(ADNL_NOTICE) << this << ": received adnl.message.confirmChannel with old key"; return; } - channel_ready_ = true; + if (!channel_ready_) { + channel_ready_ = true; + send_messages_from_queue(); + } } void AdnlPeerPairImpl::process_message(const adnlmessage::AdnlMessageCustom &message) { @@ -674,7 +691,7 @@ void AdnlPeerPairImpl::reinit(td::int32 date) { } } -td::Result, bool>> AdnlPeerPairImpl::get_conn(bool direct_only) { +td::Result, bool>> AdnlPeerPairImpl::get_conn() { if (!priority_addr_list_.empty() && priority_addr_list_.expire_at() < td::Clocks::system()) { priority_addr_list_ = AdnlAddressList{}; priority_conns_.clear(); @@ -692,14 +709,18 @@ td::Result, bool>> AdnlPeerP } } - for (auto &conn : priority_conns_) { - if (conn.ready() && (!direct_only || conn.is_direct())) { - return std::make_pair(conn.conn.get(), conn.is_direct()); + for (int direct_only = 1; direct_only >= 0; --direct_only) { + for (auto &conn : priority_conns_) { + if (conn.ready() && (!direct_only || conn.is_direct())) { + return std::make_pair(conn.conn.get(), conn.is_direct()); + } } } - for (auto &conn : conns_) { - if (conn.ready() && (!direct_only || conn.is_direct())) { - return std::make_pair(conn.conn.get(), conn.is_direct()); + for (int direct_only = 1; direct_only >= 0; --direct_only) { + for (auto &conn : conns_) { + if (conn.ready() && (!direct_only || conn.is_direct())) { + return std::make_pair(conn.conn.get(), conn.is_direct()); + } } } return td::Status::Error(ErrorCode::notready, "no active connections"); @@ -787,6 +808,47 @@ void AdnlPeerPairImpl::get_conn_ip_str(td::Promise promise) { promise.set_value("undefined"); } +void AdnlPeerPairImpl::get_stats(td::Promise> promise) { + auto stats = create_tl_object(); + stats->local_id_ = local_id_.bits256_value(); + stats->peer_id_ = peer_id_short_.bits256_value(); + for (const AdnlAddress &addr : addr_list_.addrs()) { + ton_api::downcast_call(*addr->tl(), td::overloaded( + [&](const ton_api::adnl_address_udp &obj) { + stats->ip_str_ = PSTRING() << td::IPAddress::ipv4_to_str(obj.ip_) << ":" + << obj.port_; + }, + [&](const auto &) {})); + if (!stats->ip_str_.empty()) { + break; + } + } + + prepare_packet_stats(); + stats->last_in_packet_ts_ = last_in_packet_ts_; + stats->last_out_packet_ts_ = last_out_packet_ts_; + stats->packets_total_ = packet_stats_total_.tl(); + stats->packets_total_->ts_start_ = started_ts_; + stats->packets_total_->ts_end_ = td::Clocks::system(); + stats->packets_recent_ = packet_stats_prev_.tl(); + + if (channel_ready_) { + stats->channel_status_ = 2; + } else if (channel_inited_) { + stats->channel_status_ = 1; + } else { + stats->channel_status_ = 0; + } + stats->try_reinit_at_ = (try_reinit_at_ ? try_reinit_at_.at_unix() : 0.0); + stats->connection_ready_ = + std::any_of(conns_.begin(), conns_.end(), [](const Conn &conn) { return conn.ready(); }) || + std::any_of(priority_conns_.begin(), priority_conns_.end(), [](const Conn &conn) { return conn.ready(); }); + stats->out_queue_messages_ = out_messages_queue_.size(); + stats->out_queue_bytes_ = out_messages_queue_total_size_; + + promise.set_result(std::move(stats)); +} + void AdnlPeerImpl::update_id(AdnlNodeIdFull id) { CHECK(id.compute_short_id() == peer_id_short_); if (!peer_id_.empty()) { @@ -810,10 +872,8 @@ void AdnlPeerPairImpl::Conn::create_conn(td::actor::ActorId pe void AdnlPeerPairImpl::conn_change_state(AdnlConnectionIdShort id, bool ready) { if (ready) { - if (pending_messages_.size() > 0) { - auto messages = std::move(pending_messages_); - pending_messages_.clear(); - send_messages_in(std::move(messages), true); + if (out_messages_queue_.empty()) { + send_messages_from_queue(); } } } @@ -835,7 +895,7 @@ td::actor::ActorOwn AdnlPeer::create(td::actor::ActorId dst_actor, - AdnlPacket packet) { + AdnlPacket packet, td::uint64 serialized_size) { if (packet.inited_from()) { update_id(packet.from()); } @@ -853,7 +913,7 @@ void AdnlPeerImpl::receive_packet(AdnlNodeIdShort dst, td::uint32 dst_mode, td:: } } - td::actor::send_closure(it->second.get(), &AdnlPeerPair::receive_packet, std::move(packet)); + td::actor::send_closure(it->second.get(), &AdnlPeerPair::receive_packet, std::move(packet), serialized_size); } void AdnlPeerImpl::send_messages(AdnlNodeIdShort src, td::uint32 src_mode, td::actor::ActorId src_actor, @@ -933,6 +993,56 @@ void AdnlPeerImpl::update_addr_list(AdnlNodeIdShort local_id, td::uint32 local_m td::actor::send_closure(it->second, &AdnlPeerPair::update_addr_list, std::move(addr_list)); } +void AdnlPeerImpl::get_stats(td::Promise>> promise) { + class Cb : public td::actor::Actor { + public: + explicit Cb(td::Promise>> promise) + : promise_(std::move(promise)) { + } + + void got_peer_pair_stats(tl_object_ptr peer_pair) { + result_.push_back(std::move(peer_pair)); + dec_pending(); + } + + void inc_pending() { + ++pending_; + } + + void dec_pending() { + CHECK(pending_ > 0); + --pending_; + if (pending_ == 0) { + promise_.set_result(std::move(result_)); + stop(); + } + } + + private: + td::Promise>> promise_; + size_t pending_ = 1; + std::vector> result_; + }; + auto callback = td::actor::create_actor("adnlpeerstats", std::move(promise)).release(); + + for (auto &[local_id, peer_pair] : peer_pairs_) { + td::actor::send_closure(callback, &Cb::inc_pending); + td::actor::send_closure(peer_pair, &AdnlPeerPair::get_stats, + [local_id = local_id, peer_id = peer_id_short_, + callback](td::Result> R) { + if (R.is_error()) { + VLOG(ADNL_NOTICE) << "failed to get stats for peer pair " << peer_id << "->" << local_id + << " : " << R.move_as_error(); + td::actor::send_closure(callback, &Cb::dec_pending); + } else { + td::actor::send_closure(callback, &Cb::got_peer_pair_stats, R.move_as_ok()); + } + }); + } + td::actor::send_closure(callback, &Cb::dec_pending); +} + + void AdnlPeerPairImpl::got_data_from_db(td::Result R) { received_from_db_ = false; if (R.is_error()) { @@ -1016,6 +1126,66 @@ void AdnlPeerPairImpl::request_reverse_ping_result(td::Result R) { } } +void AdnlPeerPairImpl::add_packet_stats(td::uint64 bytes, bool in, bool channel) { + prepare_packet_stats(); + auto add_stats = [&](PacketStats &stats) { + if (in) { + ++stats.in_packets; + stats.in_bytes += bytes; + if (channel) { + ++stats.in_packets_channel; + stats.in_bytes_channel += bytes; + } + } else { + ++stats.out_packets; + stats.out_bytes += bytes; + if (channel) { + ++stats.out_packets_channel; + stats.out_bytes_channel += bytes; + } + } + }; + add_stats(packet_stats_cur_); + add_stats(packet_stats_total_); + if (in) { + last_in_packet_ts_ = td::Clocks::system(); + } else { + last_out_packet_ts_ = td::Clocks::system(); + } +} + +void AdnlPeerPairImpl::add_expired_msg_stats(td::uint64 bytes) { + prepare_packet_stats(); + auto add_stats = [&](PacketStats &stats) { + ++stats.out_expired_messages; + stats.out_expired_bytes += bytes; + }; + add_stats(packet_stats_cur_); + add_stats(packet_stats_total_); +} + +void AdnlPeerPairImpl::prepare_packet_stats() { + double now = td::Clocks::system(); + if (now >= packet_stats_cur_.ts_end) { + packet_stats_prev_ = std::move(packet_stats_cur_); + packet_stats_cur_ = {}; + auto now_int = (int)now; + packet_stats_cur_.ts_start = (double)(now_int / 60 * 60); + packet_stats_cur_.ts_end = packet_stats_cur_.ts_start + 60.0; + if (packet_stats_prev_.ts_end < now - 60.0) { + packet_stats_prev_ = {}; + packet_stats_prev_.ts_end = packet_stats_cur_.ts_start; + packet_stats_prev_.ts_start = packet_stats_prev_.ts_end - 60.0; + } + } +} + +tl_object_ptr AdnlPeerPairImpl::PacketStats::tl() const { + return create_tl_object(ts_start, ts_end, in_packets, in_bytes, in_packets_channel, + in_bytes_channel, out_packets, out_bytes, out_packets_channel, + out_bytes_channel, out_expired_messages, out_expired_bytes); +} + } // namespace adnl } // namespace ton diff --git a/adnl/adnl-peer.h b/adnl/adnl-peer.h index 8488e82ee..b7d6adc0f 100644 --- a/adnl/adnl-peer.h +++ b/adnl/adnl-peer.h @@ -39,9 +39,9 @@ class AdnlPeer; class AdnlPeerPair : public td::actor::Actor { public: - virtual void receive_packet_from_channel(AdnlChannelIdShort id, AdnlPacket packet) = 0; + virtual void receive_packet_from_channel(AdnlChannelIdShort id, AdnlPacket packet, td::uint64 serialized_size) = 0; virtual void receive_packet_checked(AdnlPacket packet) = 0; - virtual void receive_packet(AdnlPacket packet) = 0; + virtual void receive_packet(AdnlPacket packet, td::uint64 serialized_size) = 0; virtual void send_messages(std::vector message) = 0; inline void send_message(OutboundAdnlMessage message) { @@ -59,6 +59,7 @@ class AdnlPeerPair : public td::actor::Actor { virtual void update_peer_id(AdnlNodeIdFull id) = 0; virtual void update_addr_list(AdnlAddressList addr_list) = 0; virtual void get_conn_ip_str(td::Promise promise) = 0; + virtual void get_stats(td::Promise> promise) = 0; static td::actor::ActorOwn create(td::actor::ActorId network_manager, td::actor::ActorId peer_table, td::uint32 local_mode, @@ -71,7 +72,7 @@ class AdnlPeerPair : public td::actor::Actor { class AdnlPeer : public td::actor::Actor { public: virtual void receive_packet(AdnlNodeIdShort dst, td::uint32 dst_mode, td::actor::ActorId dst_actor, - AdnlPacket message) = 0; + AdnlPacket message, td::uint64 serialized_size) = 0; virtual void send_messages(AdnlNodeIdShort src, td::uint32 src_mode, td::actor::ActorId src_actor, std::vector messages) = 0; virtual void send_query(AdnlNodeIdShort src, td::uint32 src_mode, td::actor::ActorId src_actor, @@ -100,6 +101,7 @@ class AdnlPeer : public td::actor::Actor { td::actor::ActorId local_actor, AdnlAddressList addr_list) = 0; virtual void update_dht_node(td::actor::ActorId dht_node) = 0; virtual void get_conn_ip_str(AdnlNodeIdShort l_id, td::Promise promise) = 0; + virtual void get_stats(td::Promise>> promise) = 0; }; } // namespace adnl diff --git a/adnl/adnl-peer.hpp b/adnl/adnl-peer.hpp index 40c9eb088..d25a24cf4 100644 --- a/adnl/adnl-peer.hpp +++ b/adnl/adnl-peer.hpp @@ -20,6 +20,7 @@ #include #include +#include #include "adnl-peer.h" #include "adnl-peer-table.h" @@ -66,12 +67,12 @@ class AdnlPeerPairImpl : public AdnlPeerPair { void discover(); - void receive_packet_from_channel(AdnlChannelIdShort id, AdnlPacket packet) override; + void receive_packet_from_channel(AdnlChannelIdShort id, AdnlPacket packet, td::uint64 serialized_size) override; void receive_packet_checked(AdnlPacket packet) override; - void receive_packet(AdnlPacket packet) override; + void receive_packet(AdnlPacket packet, td::uint64 serialized_size) override; void deliver_message(AdnlMessage message); - void send_messages_in(std::vector messages, bool allow_postpone); + void send_messages_from_queue(); void send_messages(std::vector messages) override; void send_packet_continue(AdnlPacket packet, td::actor::ActorId conn, bool via_channel); void send_query(std::string name, td::Promise promise, td::Timestamp timeout, td::BufferSlice data, @@ -89,6 +90,7 @@ class AdnlPeerPairImpl : public AdnlPeerPair { void update_peer_id(AdnlNodeIdFull id) override; void get_conn_ip_str(td::Promise promise) override; + void get_stats(td::Promise> promise) override; void got_data_from_db(td::Result R); void got_data_from_static_nodes(td::Result R); @@ -124,7 +126,7 @@ class AdnlPeerPairImpl : public AdnlPeerPair { private: void respond_with_nop(); void reinit(td::int32 date); - td::Result, bool>> get_conn(bool direct_only); + td::Result, bool>> get_conn(); void create_channel(pubkeys::Ed25519 pub, td::uint32 date); bool received_packet(td::uint64 seqno) const { @@ -183,11 +185,11 @@ class AdnlPeerPairImpl : public AdnlPeerPair { Conn() { } - bool ready() { + bool ready() const { return !conn.empty() && conn.get_actor_unsafe().is_active(); } - bool is_direct() { + bool is_direct() const { return addr->is_public(); } @@ -195,7 +197,14 @@ class AdnlPeerPairImpl : public AdnlPeerPair { td::actor::ActorId adnl); }; - std::vector pending_messages_; + // Messages waiting for connection or for nochannel rate limiter + std::queue> out_messages_queue_; + td::uint64 out_messages_queue_total_size_ = 0; + RateLimiter nochannel_rate_limiter_ = RateLimiter(50, 0.5); // max 50, period = 0.5s + td::Timestamp retry_send_at_ = td::Timestamp::never(); + bool disable_dht_query_ = false; + bool skip_init_packet_ = false; + double message_in_queue_ttl_ = 10.0; td::actor::ActorId network_manager_; td::actor::ActorId peer_table_; @@ -254,7 +263,6 @@ class AdnlPeerPairImpl : public AdnlPeerPair { td::Timestamp next_dht_query_at_ = td::Timestamp::never(); td::Timestamp next_db_update_at_ = td::Timestamp::never(); - td::Timestamp retry_send_at_ = td::Timestamp::never(); td::Timestamp last_received_packet_ = td::Timestamp::never(); td::Timestamp try_reinit_at_ = td::Timestamp::never(); @@ -262,12 +270,26 @@ class AdnlPeerPairImpl : public AdnlPeerPair { bool has_reverse_addr_ = false; td::Timestamp request_reverse_ping_after_ = td::Timestamp::now(); bool request_reverse_ping_active_ = false; + + struct PacketStats { + double ts_start = 0.0, ts_end = 0.0; + td::uint64 in_packets = 0, in_bytes = 0, in_packets_channel = 0, in_bytes_channel = 0; + td::uint64 out_packets = 0, out_bytes = 0, out_packets_channel = 0, out_bytes_channel = 0; + td::uint64 out_expired_messages = 0, out_expired_bytes = 0; + + tl_object_ptr tl() const; + } packet_stats_cur_, packet_stats_prev_, packet_stats_total_; + double last_in_packet_ts_ = 0.0, last_out_packet_ts_ = 0.0; + double started_ts_ = td::Clocks::system(); + void add_packet_stats(td::uint64 bytes, bool in, bool channel); + void add_expired_msg_stats(td::uint64 bytes); + void prepare_packet_stats(); }; class AdnlPeerImpl : public AdnlPeer { public: void receive_packet(AdnlNodeIdShort dst, td::uint32 dst_mode, td::actor::ActorId dst_actor, - AdnlPacket packet) override; + AdnlPacket packet, td::uint64 serialized_size) override; void send_messages(AdnlNodeIdShort src, td::uint32 src_mode, td::actor::ActorId src_actor, std::vector messages) override; void send_query(AdnlNodeIdShort src, td::uint32 src_mode, td::actor::ActorId src_actor, std::string name, @@ -280,6 +302,7 @@ class AdnlPeerImpl : public AdnlPeer { AdnlAddressList addr_list) override; void update_dht_node(td::actor::ActorId dht_node) override; void get_conn_ip_str(AdnlNodeIdShort l_id, td::Promise promise) override; + void get_stats(td::Promise>> promise) override; //void check_signature(td::BufferSlice data, td::BufferSlice signature, td::Promise promise) override; AdnlPeerImpl(td::actor::ActorId network_manager, td::actor::ActorId peer_table, diff --git a/adnl/adnl.h b/adnl/adnl.h index a1c39d5e4..a276e0c21 100644 --- a/adnl/adnl.h +++ b/adnl/adnl.h @@ -121,6 +121,8 @@ class Adnl : public AdnlSenderInterface { virtual void create_tunnel(AdnlNodeIdShort dst, td::uint32 size, td::Promise, AdnlAddress>> promise) = 0; + virtual void get_stats(td::Promise> promise) = 0; + static td::actor::ActorOwn create(std::string db, td::actor::ActorId keyring); static std::string int_to_bytestring(td::int32 id) { diff --git a/adnl/utils.hpp b/adnl/utils.hpp index 50aec2efb..18d3f2074 100644 --- a/adnl/utils.hpp +++ b/adnl/utils.hpp @@ -40,6 +40,40 @@ inline bool adnl_node_is_older(AdnlNode &a, AdnlNode &b) { return a.addr_list().version() < b.addr_list().version(); } +class RateLimiter { +public: + explicit RateLimiter(td::uint32 capacity, double period) : capacity_(capacity), period_(period), remaining_(capacity) { + } + + bool take() { + while (remaining_ < capacity_ && increment_at_.is_in_past()) { + ++remaining_; + increment_at_ += period_; + } + if (remaining_) { + --remaining_; + if (increment_at_.is_in_past()) { + increment_at_ = td::Timestamp::in(period_); + } + return true; + } + return false; + } + + td::Timestamp ready_at() const { + if (remaining_) { + return td::Timestamp::now(); + } + return increment_at_; + } + +private: + td::uint32 capacity_; + double period_; + td::uint32 remaining_; + td::Timestamp increment_at_ = td::Timestamp::never(); +}; + } // namespace adnl } // namespace ton diff --git a/tl/generate/scheme/ton_api.tl b/tl/generate/scheme/ton_api.tl index bf919b0fd..e15e18612 100644 --- a/tl/generate/scheme/ton_api.tl +++ b/tl/generate/scheme/ton_api.tl @@ -144,7 +144,26 @@ adnl.message.part hash:int256 total_size:int offset:int data:bytes = adnl.Messag ---types--- adnl.db.node.key local_id:int256 peer_id:int256 = adnl.db.Key; -adnl.db.node.value date:int id:PublicKey addr_list:adnl.addressList priority_addr_list:adnl.addressList = adnl.db.node.Value; +adnl.db.node.value date:int id:PublicKey addr_list:adnl.addressList priority_addr_list:adnl.addressList = adnl.db.node.Value; + +adnl.stats.packets ts_start:double ts_end:double + in_packets:long in_bytes:long in_packets_channel:long in_bytes_channel:long + out_packets:long out_bytes:long out_packets_channel:long out_bytes_channel:long + out_expired_messages:long out_expired_bytes:long = adnl.stats.Packets; +adnl.stats.peerPair local_id:int256 peer_id:int256 ip_str:string + packets_recent:adnl.stats.packets packets_total:adnl.stats.packets + last_out_packet_ts:double last_in_packet_ts:double + connection_ready:Bool channel_status:int try_reinit_at:double + out_queue_messages:long out_queue_bytes:long + = adnl.stats.PeerPair; +adnl.stats.ipPackets ip_str:string packets:long = adnl.stats.IpPackets; +adnl.stats.localIdPackets ts_start:double ts_end:double + decrypted_packets:(vector adnl.stats.ipPackets) dropped_packets:(vector adnl.stats.ipPackets) = adnl.stats.LocalIdPackets; +adnl.stats.localId short_id:int256 + current_decrypt:(vector adnl.stats.ipPackets) + packets_recent:adnl.stats.localIdPackets packets_total:adnl.stats.localIdPackets + peers:(vector adnl.stats.peerPair) = adnl.stats.LocalId; +adnl.stats timestamp:double local_ids:(vector adnl.stats.localId) = adnl.Stats; ---functions--- @@ -723,6 +742,8 @@ engine.validator.setStateSerializerEnabled enabled:Bool = engine.validator.Succe engine.validator.setCollatorOptionsJson json:string = engine.validator.Success; engine.validator.getCollatorOptionsJson = engine.validator.JsonConfig; +engine.validator.getAdnlStats = adnl.Stats; + ---types--- storage.pong = storage.Pong; diff --git a/tl/generate/scheme/ton_api.tlo b/tl/generate/scheme/ton_api.tlo index 337dd071e3f746cd822236e2dcb63c194b379de4..3d39734ef92df8a772a892713ffbe0493d8a4015 100644 GIT binary patch delta 1339 zcmZ`(O=uHA7~R#RjcJ>vNlLT%Ypa$b)|R3m_)`!IQd%XVQYwXIyF1umy1VYqiUBWz zcj@riyDxU57KO)n3Uk#vDUWMTD%P zQnD5vr)w|69|uKJTRE__Gu7bhk`rKE9mtErq*v`v138iDvyL|nM2_BA2Sjaj^7{6^ zps*4bMkk>6=2>E58`-hZA89t3;<%F=ffhIIRUtYZbfD}xAV3|q2vczO^M^eVd)BZp zIWd7f9EtMn=rUnJ%Qs9y!uG z)7=vk*sfWIY1T0|xNz1F;ymG8KhI!pm;nORfnV!C_(d2$;pS4hC0f!MF`Eqi%tYvN zrh*Yo^owj~qS2JqSw+_oHIKr}=8{5izyv7=823!22e+ZwNR9Zt@7itFe0Zu(-DGH|DI$D*ylh delta 64 zcmeBq!n)%UEAOM(`c@23z_pS0I?H4p);F8KuszUY?A?6TbiUGNmdFa`%^}G@PBAuY Q_tj%OF1kI&g>i!}0NGC$ZU6uP diff --git a/validator-engine-console/validator-engine-console-query.cpp b/validator-engine-console/validator-engine-console-query.cpp index 41721ab96..372fa8121 100644 --- a/validator-engine-console/validator-engine-console-query.cpp +++ b/validator-engine-console/validator-engine-console-query.cpp @@ -1263,3 +1263,166 @@ td::Status GetCollatorOptionsJsonQuery::receive(td::BufferSlice data) { td::TerminalIO::out() << "saved config to " << file_name_ << "\n"; return td::Status::OK(); } + +td::Status GetAdnlStatsJsonQuery::run() { + TRY_RESULT_ASSIGN(file_name_, tokenizer_.get_token()); + TRY_STATUS(tokenizer_.check_endl()); + return td::Status::OK(); +} + +td::Status GetAdnlStatsJsonQuery::send() { + auto b = + ton::create_serialize_tl_object(); + td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); + return td::Status::OK(); +} + +td::Status GetAdnlStatsJsonQuery::receive(td::BufferSlice data) { + TRY_RESULT_PREFIX(f, ton::fetch_tl_object(data.as_slice(), true), + "received incorrect answer: "); + auto s = td::json_encode(td::ToJson(*f), true); + TRY_STATUS(td::write_file(file_name_, s)); + td::TerminalIO::out() << "saved adnl stats to " << file_name_ << "\n"; + return td::Status::OK(); +} + +td::Status GetAdnlStatsQuery::run() { + TRY_STATUS(tokenizer_.check_endl()); + return td::Status::OK(); +} + +td::Status GetAdnlStatsQuery::send() { + auto b = + ton::create_serialize_tl_object(); + td::actor::send_closure(console_, &ValidatorEngineConsole::envelope_send_query, std::move(b), create_promise()); + return td::Status::OK(); +} + +td::Status GetAdnlStatsQuery::receive(td::BufferSlice data) { + TRY_RESULT_PREFIX(stats, ton::fetch_tl_object(data.as_slice(), true), + "received incorrect answer: "); + td::StringBuilder sb; + sb << "================================= ADNL STATS =================================\n"; + bool first = true; + double now = td::Clocks::system(); + for (auto &local_id : stats->local_ids_) { + if (first) { + first = false; + } else { + sb << "\n"; + } + sb << "LOCAL ID " << local_id->short_id_ << "\n"; + if (!local_id->current_decrypt_.empty()) { + std::sort(local_id->current_decrypt_.begin(), local_id->current_decrypt_.end(), + [](const ton::tl_object_ptr &a, + const ton::tl_object_ptr &b) { + return a->packets_ > b->packets_; + }); + td::uint64 total = 0; + for (auto &x : local_id->current_decrypt_) { + total += x->packets_; + } + sb << " Packets in decryptor: total=" << total; + for (auto &x : local_id->current_decrypt_) { + sb << " " << (x->ip_str_.empty() ? "unknown" : x->ip_str_) << "=" << x->packets_; + } + sb << "\n"; + } + auto print_local_id_packets = [&](const std::string &name, + std::vector> &vec) { + if (vec.empty()) { + return; + } + std::sort(vec.begin(), vec.end(), + [](const ton::tl_object_ptr &a, + const ton::tl_object_ptr &b) { + return a->packets_ > b->packets_; + }); + td::uint64 total = 0; + for (auto &x : vec) { + total += x->packets_; + } + sb << " " << name << ": total=" << total; + int cnt = 0; + for (auto &x : vec) { + ++cnt; + if (cnt >= 8) { + sb << " ..."; + break; + } + sb << " " << (x->ip_str_.empty() ? "unknown" : x->ip_str_) << "=" << x->packets_; + } + sb << "\n"; + }; + print_local_id_packets("Decrypted packets (recent)", local_id->packets_recent_->decrypted_packets_); + print_local_id_packets("Dropped packets (recent)", local_id->packets_recent_->dropped_packets_); + print_local_id_packets("Decrypted packets (total)", local_id->packets_total_->decrypted_packets_); + print_local_id_packets("Dropped packets (total)", local_id->packets_total_->dropped_packets_); + sb << " PEERS (" << local_id->peers_.size() << "):\n"; + std::sort(local_id->peers_.begin(), local_id->peers_.end(), + [](const ton::tl_object_ptr &a, + const ton::tl_object_ptr &b) { + return a->packets_recent_->in_bytes_ + a->packets_recent_->out_bytes_ > + b->packets_recent_->in_bytes_ + b->packets_recent_->out_bytes_; + }); + for (auto &peer : local_id->peers_) { + sb << " PEER " << peer->peer_id_ << "\n"; + sb << " Address: " << (peer->ip_str_.empty() ? "unknown" : peer->ip_str_) << "\n"; + sb << " Connection " << (peer->connection_ready_ ? "ready" : "not ready") << ", "; + switch (peer->channel_status_) { + case 0: + sb << "channel: none\n"; + break; + case 1: + sb << "channel: inited\n"; + break; + case 2: + sb << "channel: ready\n"; + break; + default: + sb << "\n"; + } + + auto print_packets = [&](const std::string &name, + const ton::tl_object_ptr &obj) { + if (obj->in_packets_) { + sb << " In (" << name << "): " << obj->in_packets_ << " packets (" + << td::format::as_size(obj->in_bytes_) << "), channel: " << obj->in_packets_channel_ << " packets (" + << td::format::as_size(obj->in_bytes_channel_) << ")\n"; + } + if (obj->out_packets_) { + sb << " Out (" << name << "): " << obj->out_packets_ << " packets (" + << td::format::as_size(obj->out_bytes_) << "), channel: " << obj->out_packets_channel_ << " packets (" + << td::format::as_size(obj->out_bytes_channel_) << ")\n"; + } + if (obj->out_expired_messages_) { + sb << " Out expired (" << name << "): " << obj->out_expired_messages_ << " messages (" + << td::format::as_size(obj->out_expired_bytes_) << ")\n"; + } + }; + print_packets("recent", peer->packets_recent_); + print_packets("total", peer->packets_total_); + + sb << " Last in packet: "; + if (peer->last_in_packet_ts_) { + sb << now - peer->last_in_packet_ts_ << " s ago"; + } else { + sb << "never"; + } + sb << " Last out packet: "; + if (peer->last_out_packet_ts_) { + sb << now - peer->last_out_packet_ts_ << " s ago"; + } else { + sb << "never"; + } + sb << "\n"; + if (peer->out_queue_messages_) { + sb << " Out message queue: " << peer->out_queue_messages_ << " messages (" + << td::format::as_size(peer->out_queue_bytes_) << ")\n"; + } + } + } + sb << "==============================================================================\n"; + td::TerminalIO::out() << sb.as_cslice(); + return td::Status::OK(); +} diff --git a/validator-engine-console/validator-engine-console-query.h b/validator-engine-console/validator-engine-console-query.h index 08ac1572a..6314d6199 100644 --- a/validator-engine-console/validator-engine-console-query.h +++ b/validator-engine-console/validator-engine-console-query.h @@ -1292,3 +1292,47 @@ class GetCollatorOptionsJsonQuery : public Query { private: std::string file_name_; }; + +class GetAdnlStatsJsonQuery : public Query { + public: + GetAdnlStatsJsonQuery(td::actor::ActorId console, Tokenizer tokenizer) + : Query(console, std::move(tokenizer)) { + } + td::Status run() override; + td::Status send() override; + td::Status receive(td::BufferSlice data) override; + static std::string get_name() { + return "getadnlstatsjson"; + } + static std::string get_help() { + return "getadnlstatsjson \tsave adnl stats to "; + } + std::string name() const override { + return get_name(); + } + + private: + std::string file_name_; +}; + +class GetAdnlStatsQuery : public Query { + public: + GetAdnlStatsQuery(td::actor::ActorId console, Tokenizer tokenizer) + : Query(console, std::move(tokenizer)) { + } + td::Status run() override; + td::Status send() override; + td::Status receive(td::BufferSlice data) override; + static std::string get_name() { + return "getadnlstats"; + } + static std::string get_help() { + return "getadnlstats\tdisplay adnl stats"; + } + std::string name() const override { + return get_name(); + } + + private: + std::string file_name_; +}; diff --git a/validator-engine-console/validator-engine-console.cpp b/validator-engine-console/validator-engine-console.cpp index d8a230801..1ec0f3803 100644 --- a/validator-engine-console/validator-engine-console.cpp +++ b/validator-engine-console/validator-engine-console.cpp @@ -150,6 +150,8 @@ void ValidatorEngineConsole::run() { add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); add_query_runner(std::make_unique>()); + add_query_runner(std::make_unique>()); + add_query_runner(std::make_unique>()); } bool ValidatorEngineConsole::envelope_send_query(td::BufferSlice query, td::Promise promise) { diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index 11f1a9a28..31f79275e 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -3866,6 +3866,28 @@ void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_getCollat } } +void ValidatorEngine::run_control_query(ton::ton_api::engine_validator_getAdnlStats &query, td::BufferSlice data, + ton::PublicKeyHash src, td::uint32 perm, td::Promise promise) { + if (!(perm & ValidatorEnginePermissions::vep_default)) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::error, "not authorized"))); + return; + } + if (adnl_.empty()) { + promise.set_value(create_control_query_error(td::Status::Error(ton::ErrorCode::notready, "not started"))); + return; + } + td::actor::send_closure( + adnl_, &ton::adnl::Adnl::get_stats, + [promise = std::move(promise)](td::Result> R) mutable { + if (R.is_ok()) { + promise.set_value(ton::serialize_tl_object(R.move_as_ok(), true)); + } else { + promise.set_value( + create_control_query_error(td::Status::Error(ton::ErrorCode::notready, "failed to get adnl stats"))); + } + }); +} + void ValidatorEngine::process_control_query(td::uint16 port, ton::adnl::AdnlNodeIdShort src, ton::adnl::AdnlNodeIdShort dst, td::BufferSlice data, td::Promise promise) { diff --git a/validator-engine/validator-engine.hpp b/validator-engine/validator-engine.hpp index 7212a0b50..2e94dd1ef 100644 --- a/validator-engine/validator-engine.hpp +++ b/validator-engine/validator-engine.hpp @@ -492,6 +492,8 @@ class ValidatorEngine : public td::actor::Actor { ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); void run_control_query(ton::ton_api::engine_validator_getCollatorOptionsJson &query, td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); + void run_control_query(ton::ton_api::engine_validator_getAdnlStats &query, td::BufferSlice data, + ton::PublicKeyHash src, td::uint32 perm, td::Promise promise); template void run_control_query(T &query, td::BufferSlice data, ton::PublicKeyHash src, td::uint32 perm, td::Promise promise) { From e35b34de22109596a54d1357dcce92d63002ba95 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Wed, 4 Sep 2024 11:38:29 +0300 Subject: [PATCH 16/19] Don't deserialize continuations in LS runSmcMethod (#1151) --- validator/impl/liteserver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validator/impl/liteserver.cpp b/validator/impl/liteserver.cpp index d6fad7ee2..7bedf7fe4 100644 --- a/validator/impl/liteserver.cpp +++ b/validator/impl/liteserver.cpp @@ -870,7 +870,7 @@ void LiteQuery::perform_runSmcMethod(BlockIdExt blkid, WorkchainId workchain, St vm::FakeVmStateLimits fstate(1000); // limit recursive (de)serialization calls vm::VmStateInterface::Guard guard(&fstate); auto cs = vm::load_cell_slice(res.move_as_ok()); - if (!(vm::Stack::deserialize_to(cs, stack_, 0) && cs.empty_ext())) { + if (!(vm::Stack::deserialize_to(cs, stack_, 2 /* no continuations */) && cs.empty_ext())) { fatal_error("parameter list boc cannot be deserialized as a VmStack"); return; } From cb69f307e9bd58506b048534df0593379878a949 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 5 Sep 2024 13:04:57 +0300 Subject: [PATCH 17/19] Add "lastgcdmasterchainstate" to validator stats (#1154) --- validator/db/celldb.cpp | 13 +++++++++++++ validator/db/celldb.hpp | 3 +++ validator/db/rootdb.cpp | 4 ++++ validator/db/rootdb.hpp | 1 + validator/interfaces/db.h | 1 + validator/manager.cpp | 17 +++++++++++++++++ 6 files changed, 39 insertions(+) diff --git a/validator/db/celldb.cpp b/validator/db/celldb.cpp index 463e6e34a..1701ae588 100644 --- a/validator/db/celldb.cpp +++ b/validator/db/celldb.cpp @@ -188,12 +188,17 @@ void CellDbIn::store_cell(BlockIdExt block_id, td::Ref cell, td::Promi if (!opts_->get_disable_rocksdb_stats()) { cell_db_statistics_.store_cell_time_.insert(timer.elapsed() * 1e6); } + LOG(DEBUG) << "Stored state " << block_id.to_str(); } void CellDbIn::get_cell_db_reader(td::Promise> promise) { promise.set_result(boc_->get_cell_db_reader()); } +void CellDbIn::get_last_deleted_mc_state(td::Promise promise) { + promise.set_result(last_deleted_mc_state_); +} + void CellDbIn::flush_db_stats() { if (opts_->get_disable_rocksdb_stats()) { return; @@ -320,6 +325,10 @@ void CellDbIn::gc_cont2(BlockHandle handle) { if (!opts_->get_disable_rocksdb_stats()) { cell_db_statistics_.gc_cell_time_.insert(timer.elapsed() * 1e6); } + if (handle->id().is_masterchain()) { + last_deleted_mc_state_ = handle->id().seqno(); + } + LOG(DEBUG) << "Deleted state " << handle->id().to_str(); } void CellDbIn::skip_gc() { @@ -453,6 +462,10 @@ void CellDb::get_cell_db_reader(td::Promise> p td::actor::send_closure(cell_db_, &CellDbIn::get_cell_db_reader, std::move(promise)); } +void CellDb::get_last_deleted_mc_state(td::Promise promise) { + td::actor::send_closure(cell_db_, &CellDbIn::get_last_deleted_mc_state, std::move(promise)); +} + void CellDb::start_up() { CellDbBase::start_up(); boc_ = vm::DynamicBagOfCellsDb::create(); diff --git a/validator/db/celldb.hpp b/validator/db/celldb.hpp index b3857971c..335d8a08e 100644 --- a/validator/db/celldb.hpp +++ b/validator/db/celldb.hpp @@ -61,6 +61,7 @@ class CellDbIn : public CellDbBase { void load_cell(RootHash hash, td::Promise> promise); void store_cell(BlockIdExt block_id, td::Ref cell, td::Promise> promise); void get_cell_db_reader(td::Promise> promise); + void get_last_deleted_mc_state(td::Promise promise); void migrate_cell(td::Bits256 hash); @@ -143,6 +144,7 @@ class CellDbIn : public CellDbBase { std::shared_ptr snapshot_statistics_; CellDbStatistics cell_db_statistics_; td::Timestamp statistics_flush_at_ = td::Timestamp::never(); + BlockSeqno last_deleted_mc_state_ = 0; public: class MigrationProxy : public td::actor::Actor { @@ -167,6 +169,7 @@ class CellDb : public CellDbBase { boc_->set_loader(std::make_unique(std::move(snapshot), on_load_callback_)).ensure(); } void get_cell_db_reader(td::Promise> promise); + void get_last_deleted_mc_state(td::Promise promise); CellDb(td::actor::ActorId root_db, std::string path, td::Ref opts) : root_db_(root_db), path_(path), opts_(opts) { diff --git a/validator/db/rootdb.cpp b/validator/db/rootdb.cpp index 93dcfc91f..3071f565d 100644 --- a/validator/db/rootdb.cpp +++ b/validator/db/rootdb.cpp @@ -274,6 +274,10 @@ void RootDb::get_cell_db_reader(td::Promise> p td::actor::send_closure(cell_db_, &CellDb::get_cell_db_reader, std::move(promise)); } +void RootDb::get_last_deleted_mc_state(td::Promise promise) { + td::actor::send_closure(cell_db_, &CellDb::get_last_deleted_mc_state, std::move(promise)); +} + void RootDb::store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) { td::actor::send_closure(archive_db_, &ArchiveManager::add_persistent_state, block_id, masterchain_block_id, diff --git a/validator/db/rootdb.hpp b/validator/db/rootdb.hpp index 45044e4f8..061e9add8 100644 --- a/validator/db/rootdb.hpp +++ b/validator/db/rootdb.hpp @@ -63,6 +63,7 @@ class RootDb : public Db { td::Promise> promise) override; void get_block_state(ConstBlockHandle handle, td::Promise> promise) override; void get_cell_db_reader(td::Promise> promise) override; + void get_last_deleted_mc_state(td::Promise promise) override; void store_block_handle(BlockHandle handle, td::Promise promise) override; void get_block_handle(BlockIdExt id, td::Promise promise) override; diff --git a/validator/interfaces/db.h b/validator/interfaces/db.h index 8bbf7f31f..e0d88e4e7 100644 --- a/validator/interfaces/db.h +++ b/validator/interfaces/db.h @@ -51,6 +51,7 @@ class Db : public td::actor::Actor { td::Promise> promise) = 0; virtual void get_block_state(ConstBlockHandle handle, td::Promise> promise) = 0; virtual void get_cell_db_reader(td::Promise> promise) = 0; + virtual void get_last_deleted_mc_state(td::Promise promise) = 0; virtual void store_persistent_state_file(BlockIdExt block_id, BlockIdExt masterchain_block_id, td::BufferSlice state, td::Promise promise) = 0; diff --git a/validator/manager.cpp b/validator/manager.cpp index 79927d0ef..9058be76a 100644 --- a/validator/manager.cpp +++ b/validator/manager.cpp @@ -2767,6 +2767,23 @@ void ValidatorManagerImpl::prepare_stats(td::Promiseid().seqno()](td::Result R) mutable { + TRY_RESULT_PROMISE(promise, seqno, std::move(R)); + std::string s; + if (seqno == 0) { + s = "none"; + } else if (seqno <= gc_seqno) { + s = PSTRING() << seqno << " (gc_seqno-" << (gc_seqno - seqno) << ")"; + } else { + s = PSTRING() << seqno << " (gc_seqno+" << (seqno - gc_seqno) << ")"; + } + std::vector> vec; + vec.emplace_back("lastgcdmasterchainstate", std::move(s)); + promise.set_value(std::move(vec)); + }); } if (!shard_client_.empty()) { From e32a74e9c5bfb084fa460a78c2e12718e1361637 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 5 Sep 2024 15:56:07 +0300 Subject: [PATCH 18/19] Collator: change deferring behavior when out queue size is big (#1135) --- tl/generate/scheme/ton_api.tl | 3 ++- tl/generate/scheme/ton_api.tlo | Bin 94732 -> 94848 bytes validator-engine/validator-engine.cpp | 8 +++++++ validator/impl/collator.cpp | 32 ++++++++++++++++++++++---- validator/validator.h | 5 ++++ 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/tl/generate/scheme/ton_api.tl b/tl/generate/scheme/ton_api.tl index e15e18612..10b1a9762 100644 --- a/tl/generate/scheme/ton_api.tl +++ b/tl/generate/scheme/ton_api.tl @@ -628,7 +628,8 @@ engine.validator.customOverlaysConfig overlays:(vector engine.validator.customOv engine.validator.collatorOptions deferring_enabled:Bool defer_messages_after:int defer_out_queue_size_limit:long dispatch_phase_2_max_total:int dispatch_phase_3_max_total:int - dispatch_phase_2_max_per_initiator:int dispatch_phase_3_max_per_initiator:int = engine.validator.CollatorOptions; + dispatch_phase_2_max_per_initiator:int dispatch_phase_3_max_per_initiator:int + whitelist:(vector string) prioritylist:(vector string) = engine.validator.CollatorOptions; ---functions--- ---types--- diff --git a/tl/generate/scheme/ton_api.tlo b/tl/generate/scheme/ton_api.tlo index 3d39734ef92df8a772a892713ffbe0493d8a4015..7ee378c1a1e3b7d56de9074fdc07a848e7c50ca1 100644 GIT binary patch delta 105 zcmeBq!rJhZbwi5?%Zn+mCv5HzF=%52u@t6Xv}P2Zd|{RVCy0CVn2-> parse_collator_optio } else { opts.dispatch_phase_3_max_per_initiator = {}; } + for (const std::string& s : f.whitelist_) { + TRY_RESULT(addr, block::StdAddress::parse(s)); + opts.whitelist.emplace(addr.workchain, addr.addr); + } + for (const std::string& s : f.prioritylist_) { + TRY_RESULT(addr, block::StdAddress::parse(s)); + opts.prioritylist.emplace(addr.workchain, addr.addr); + } return ref; } diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index f465c0f55..2cf77bfa3 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -3066,7 +3066,7 @@ int Collator::process_one_new_message(block::NewOutMsg msg, bool enqueue_only, R bool defer = false; if (!from_dispatch_queue) { if (deferring_messages_enabled_ && collator_opts_->deferring_enabled && !is_special && !is_special_account && - msg.msg_idx != 0) { + !collator_opts_->whitelist.count({src_wc, src_addr}) && msg.msg_idx != 0) { if (++sender_generated_messages_count_[src_addr] >= collator_opts_->defer_messages_after || out_msg_queue_size_ > defer_out_queue_size_limit_) { defer = true; @@ -3697,6 +3697,8 @@ bool Collator::process_dispatch_queue() { vm::AugmentedDictionary cur_dispatch_queue{dispatch_queue_->get_root(), 256, block::tlb::aug_DispatchQueue}; std::map, size_t> count_per_initiator; size_t total_count = 0; + auto prioritylist = collator_opts_->prioritylist; + auto prioritylist_iter = prioritylist.begin(); while (!cur_dispatch_queue.is_empty()) { block_full_ = !block_limit_status_->fits(block::ParamLimits::cl_normal); if (block_full_) { @@ -3713,9 +3715,30 @@ bool Collator::process_dispatch_queue() { return true; } StdSmcAddress src_addr; - auto account_dispatch_queue = block::get_dispatch_queue_min_lt_account(cur_dispatch_queue, src_addr); + td::Ref account_dispatch_queue; + while (!prioritylist.empty()) { + if (prioritylist_iter == prioritylist.end()) { + prioritylist_iter = prioritylist.begin(); + } + auto priority_addr = *prioritylist_iter; + if (priority_addr.first != workchain() || !is_our_address(priority_addr.second)) { + prioritylist_iter = prioritylist.erase(prioritylist_iter); + continue; + } + src_addr = priority_addr.second; + account_dispatch_queue = cur_dispatch_queue.lookup(src_addr); + if (account_dispatch_queue.is_null()) { + prioritylist_iter = prioritylist.erase(prioritylist_iter); + } else { + ++prioritylist_iter; + break; + } + } if (account_dispatch_queue.is_null()) { - return fatal_error("invalid dispatch queue in shard state"); + account_dispatch_queue = block::get_dispatch_queue_min_lt_account(cur_dispatch_queue, src_addr); + if (account_dispatch_queue.is_null()) { + return fatal_error("invalid dispatch queue in shard state"); + } } vm::Dictionary dict{64}; td::uint64 dict_size; @@ -3735,7 +3758,8 @@ bool Collator::process_dispatch_queue() { // Remove message from DispatchQueue bool ok; if (iter == 0 || - (iter == 1 && sender_generated_messages_count_[src_addr] >= collator_opts_->defer_messages_after)) { + (iter == 1 && sender_generated_messages_count_[src_addr] >= collator_opts_->defer_messages_after && + !collator_opts_->whitelist.count({workchain(), src_addr}))) { ok = cur_dispatch_queue.lookup_delete(src_addr).not_null(); } else { dict.lookup_delete(key); diff --git a/validator/validator.h b/validator/validator.h index 2171954a3..afc32f3b9 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -64,6 +64,11 @@ struct CollatorOptions : public td::CntObject { td::uint32 dispatch_phase_3_max_total = 150; td::uint32 dispatch_phase_2_max_per_initiator = 20; td::optional dispatch_phase_3_max_per_initiator; // Default - depends on out msg queue size + + // Don't defer messages from these accounts + std::set> whitelist; + // Prioritize these accounts on each phase of process_dispatch_queue + std::set> prioritylist; }; struct ValidatorManagerOptions : public td::CntObject { From 89e1cd9738fe04aba95b992490a7b44cc12e68d4 Mon Sep 17 00:00:00 2001 From: SpyCheese Date: Thu, 5 Sep 2024 16:08:52 +0300 Subject: [PATCH 19/19] Adapt test-adnl to rate limits --- test/test-adnl.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/test-adnl.cpp b/test/test-adnl.cpp index 85e965a4e..45011fbc0 100644 --- a/test/test-adnl.cpp +++ b/test/test-adnl.cpp @@ -225,13 +225,19 @@ int main() { auto f = td::Clocks::system(); scheduler.run_in_context([&] { - for (td::uint32 i = 1; i <= ton::adnl::Adnl::huge_packet_max_size(); i++) { + // Don't send too many packets + // Channels are disabled, so packet rate is limited + for (td::uint32 i : {1, 2, 3, 4, 100, 500, 900}) { + remaining++; + td::actor::send_closure(adnl, &ton::adnl::Adnl::send_message, src, dst, send_packet(i)); + } + for (td::uint32 i = 1024; i <= ton::adnl::Adnl::huge_packet_max_size() /* 1024 * 8 */; i += 1024) { remaining++; td::actor::send_closure(adnl, &ton::adnl::Adnl::send_message, src, dst, send_packet(i)); } }); - auto t = td::Timestamp::in(320.0); + auto t = td::Timestamp::in(60.0); while (scheduler.run(1)) { if (!remaining) { break; @@ -241,7 +247,7 @@ int main() { } } - LOG(ERROR) << "successfully tested delivering of packets of all sizes. Time=" << (td::Clocks::system() - f); + LOG(ERROR) << "successfully tested delivering of packets of various sizes. Time=" << (td::Clocks::system() - f); scheduler.run_in_context([&] { td::actor::send_closure(network_manager, &ton::adnl::TestLoopbackNetworkManager::add_node_id, src, true, true);