Skip to content

Commit

Permalink
Fix extimating block size, repeat collation on error
Browse files Browse the repository at this point in the history
  • Loading branch information
SpyCheese committed Sep 16, 2024
1 parent eea95ae commit 1f5e490
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 31 deletions.
6 changes: 6 additions & 0 deletions crypto/block/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,12 @@ struct ParamLimits {
bool deserialize(vm::CellSlice& cs);
int classify(td::uint64 value) const;
bool fits(unsigned cls, td::uint64 value) const;
void multiply_by(double x) {
CHECK(x > 0.0);
for (td::uint32& y : limits_) {
y = (td::uint32)std::min<double>(y * x, 1e9);
}
}

private:
std::array<td::uint32, limits_cnt> limits_;
Expand Down
2 changes: 1 addition & 1 deletion validator/fabric.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void run_validate_query(ShardIdFull shard, BlockIdExt min_masterchain_block_id,
void run_collate_query(ShardIdFull shard, const BlockIdExt& min_masterchain_block_id, std::vector<BlockIdExt> prev,
Ed25519_PublicKey creator, td::Ref<ValidatorSet> validator_set,
td::Ref<CollatorOptions> collator_opts, td::actor::ActorId<ValidatorManager> manager,
td::Timestamp timeout, td::Promise<BlockCandidate> promise);
td::Timestamp timeout, td::Promise<BlockCandidate> promise, int attempt_idx = 0);
void run_collate_hardfork(ShardIdFull shard, const BlockIdExt& min_masterchain_block_id, std::vector<BlockIdExt> prev,
td::actor::ActorId<ValidatorManager> manager, td::Timestamp timeout,
td::Promise<BlockCandidate> promise);
Expand Down
6 changes: 5 additions & 1 deletion validator/impl/collator-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ class Collator final : public td::actor::Actor {
td::Timestamp timeout;
td::Timestamp queue_cleanup_timeout_, soft_timeout_, medium_timeout_;
td::Promise<BlockCandidate> main_promise;
int attempt_idx_;
bool allow_repeat_collation_ = false;
ton::BlockSeqno last_block_seqno{0};
ton::BlockSeqno prev_mc_block_seqno{0};
ton::BlockSeqno new_block_seqno{0};
Expand All @@ -90,7 +92,8 @@ class Collator final : public td::actor::Actor {
public:
Collator(ShardIdFull shard, bool is_hardfork, BlockIdExt min_masterchain_block_id, std::vector<BlockIdExt> prev,
Ref<ValidatorSet> validator_set, Ed25519_PublicKey collator_id, Ref<CollatorOptions> collator_opts,
td::actor::ActorId<ValidatorManager> manager, td::Timestamp timeout, td::Promise<BlockCandidate> promise);
td::actor::ActorId<ValidatorManager> manager, td::Timestamp timeout, td::Promise<BlockCandidate> promise,
int attempt_idx);
~Collator() override = default;
bool is_busy() const {
return busy_;
Expand Down Expand Up @@ -318,6 +321,7 @@ class Collator final : public td::actor::Actor {
bool insert_out_msg(Ref<vm::Cell> out_msg);
bool insert_out_msg(Ref<vm::Cell> out_msg, td::ConstBitPtr msg_hash);
bool register_out_msg_queue_op(bool force = false);
bool register_dispatch_queue_op(bool force = false);
bool update_min_mc_seqno(ton::BlockSeqno some_mc_seqno);
bool combine_account_transactions();
bool update_public_libraries();
Expand Down
88 changes: 64 additions & 24 deletions validator/impl/collator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,13 @@ using td::Ref;
using namespace std::literals::string_literals;

// Don't increase MERGE_MAX_QUEUE_LIMIT too much: merging requires cleaning the whole queue in out_msg_queue_cleanup
static const td::uint32 FORCE_SPLIT_QUEUE_SIZE = 4096;
static const td::uint32 SPLIT_MAX_QUEUE_SIZE = 100000;
static const td::uint32 MERGE_MAX_QUEUE_SIZE = 2047;
static const td::uint32 SKIP_EXTERNALS_QUEUE_SIZE = 8000;
static const int HIGH_PRIORITY_EXTERNAL = 10; // don't skip high priority externals when queue is big
static constexpr td::uint32 FORCE_SPLIT_QUEUE_SIZE = 4096;
static constexpr td::uint32 SPLIT_MAX_QUEUE_SIZE = 100000;
static constexpr td::uint32 MERGE_MAX_QUEUE_SIZE = 2047;
static constexpr td::uint32 SKIP_EXTERNALS_QUEUE_SIZE = 8000;
static constexpr int HIGH_PRIORITY_EXTERNAL = 10; // don't skip high priority externals when queue is big

static constexpr int MAX_ATTEMPTS = 4;

#define DBG(__n) dbg(__n)&&
#define DSTART int __dcnt = 0;
Expand All @@ -74,11 +76,12 @@ static inline bool dbg(int c) {
* @param manager The ActorId of the ValidatorManager.
* @param timeout The timeout for the collator.
* @param promise The promise to return the result.
* @param attempt_idx The index of the attempt, starting from 0. On later attempts collator decreases block limits and skips some steps.
*/
Collator::Collator(ShardIdFull shard, bool is_hardfork, BlockIdExt min_masterchain_block_id,
std::vector<BlockIdExt> prev, td::Ref<ValidatorSet> validator_set, Ed25519_PublicKey collator_id,
Ref<CollatorOptions> collator_opts, td::actor::ActorId<ValidatorManager> manager,
td::Timestamp timeout, td::Promise<BlockCandidate> promise)
td::Timestamp timeout, td::Promise<BlockCandidate> promise, int attempt_idx)
: shard_(shard)
, is_hardfork_(is_hardfork)
, min_mc_block_id{min_masterchain_block_id}
Expand All @@ -93,6 +96,7 @@ Collator::Collator(ShardIdFull shard, bool is_hardfork, BlockIdExt min_mastercha
, soft_timeout_(td::Timestamp::at(timeout.at() - 3.0))
, medium_timeout_(td::Timestamp::at(timeout.at() - 1.5))
, main_promise(std::move(promise))
, attempt_idx_(attempt_idx)
, perf_timer_("collate", 0.1, [manager](double duration) {
send_closure(manager, &ValidatorManager::add_perf_timer_stat, "collate", duration);
}) {
Expand All @@ -107,7 +111,8 @@ Collator::Collator(ShardIdFull shard, bool is_hardfork, BlockIdExt min_mastercha
* The results of these queries are handled by corresponding callback functions.
*/
void Collator::start_up() {
LOG(WARNING) << "Collator for shard " << shard_.to_str() << " started";
LOG(WARNING) << "Collator for shard " << shard_.to_str() << " started"
<< (attempt_idx_ ? PSTRING() << " (attempt #" << attempt_idx_ << ")" : "");
LOG(DEBUG) << "Previous block #1 is " << prev_blocks.at(0).to_str();
if (prev_blocks.size() > 1) {
LOG(DEBUG) << "Previous block #2 is " << prev_blocks.at(1).to_str();
Expand Down Expand Up @@ -340,7 +345,13 @@ bool Collator::fatal_error(td::Status error) {
error.ensure_error();
LOG(ERROR) << "cannot generate block candidate for " << show_shard(shard_) << " : " << error.to_string();
if (busy_) {
main_promise(std::move(error));
if (allow_repeat_collation_ && attempt_idx_ + 1 < MAX_ATTEMPTS && !is_hardfork_ && !timeout.is_in_past()) {
LOG(WARNING) << "Repeating collation (attempt #" << attempt_idx_ + 1 << ")";
run_collate_query(shard_, min_mc_block_id, prev_blocks, created_by_, validator_set_, collator_opts_, manager,
td::Timestamp::in(10.0), std::move(main_promise), attempt_idx_ + 1);
} else {
main_promise(std::move(error));
}
busy_ = false;
}
stop();
Expand Down Expand Up @@ -712,6 +723,15 @@ bool Collator::unpack_last_mc_state() {
return fatal_error(limits.move_as_error());
}
block_limits_ = limits.move_as_ok();
if (attempt_idx_ == 2) {
LOG(INFO) << "Attempt #2: bytes, gas limits /= 2";
block_limits_->bytes.multiply_by(0.5);
block_limits_->gas.multiply_by(0.5);
} else if (attempt_idx_ == 3) {
LOG(INFO) << "Attempt #3: bytes, gas limits /= 4";
block_limits_->bytes.multiply_by(0.25);
block_limits_->gas.multiply_by(0.25);
}
LOG(DEBUG) << "block limits: bytes [" << block_limits_->bytes.underload() << ", " << block_limits_->bytes.soft()
<< ", " << block_limits_->bytes.hard() << "]";
LOG(DEBUG) << "block limits: gas [" << block_limits_->gas.underload() << ", " << block_limits_->gas.soft() << ", "
Expand Down Expand Up @@ -2093,6 +2113,7 @@ bool Collator::do_collate() {
if (max_lt == start_lt) {
++max_lt;
}
allow_repeat_collation_ = true;
// NB: interchanged 1.2 and 1.1 (is this always correct?)
// 1.1. re-adjust neighbors' out_msg_queues (for oneself)
if (!add_trivial_neighbor()) {
Expand Down Expand Up @@ -3565,6 +3586,10 @@ bool Collator::process_inbound_external_messages() {
LOG(INFO) << "skipping processing of inbound external messages";
return true;
}
if (attempt_idx_ >= 2) {
LOG(INFO) << "Attempt #" << attempt_idx_ << ": skip external messages";
return true;
}
if (out_msg_queue_size_ > SKIP_EXTERNALS_QUEUE_SIZE) {
LOG(INFO) << "skipping processing of inbound external messages (except for high-priority) because out_msg_queue is "
"too big ("
Expand Down Expand Up @@ -3692,6 +3717,10 @@ bool Collator::process_dispatch_queue() {
if (max_per_initiator[iter] == 0 || max_total_count[iter] == 0) {
continue;
}
if (iter > 0 && attempt_idx_ >= 1) {
LOG(INFO) << "Attempt #" << attempt_idx_ << ": skip process_dispatch_queue";
break;
}
vm::AugmentedDictionary cur_dispatch_queue{dispatch_queue_->get_root(), 256, block::tlb::aug_DispatchQueue};
std::map<std::tuple<WorkchainId, StdSmcAddress, LogicalTime>, size_t> count_per_initiator;
size_t total_count = 0;
Expand All @@ -3704,13 +3733,13 @@ bool Collator::process_dispatch_queue() {
stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": "
<< block_full_comment(*block_limit_status_, block::ParamLimits::cl_normal)
<< "\n";
return true;
return register_dispatch_queue_op(true);
}
if (soft_timeout_.is_in_past(td::Timestamp::now())) {
block_full_ = true;
LOG(WARNING) << "soft timeout reached, stop processing dispatch queue";
stats_.limits_log += PSTRING() << "DISPATCH_QUEUE_STAGE_" << iter << ": timeout\n";
return true;
return register_dispatch_queue_op(true);
}
StdSmcAddress src_addr;
td::Ref<vm::CellSlice> account_dispatch_queue;
Expand Down Expand Up @@ -3788,6 +3817,7 @@ bool Collator::process_dispatch_queue() {
if (iter == 0) {
have_unprocessed_account_dispatch_queue_ = false;
}
register_dispatch_queue_op(true);
}
return true;
}
Expand All @@ -3811,12 +3841,7 @@ bool Collator::process_deferred_message(Ref<vm::CellSlice> enq_msg, StdSmcAddres
return fatal_error(PSTRING() << "failed to delete message from DispatchQueue: address=" << src_addr.to_hex()
<< ", lt=" << lt);
}
++dispatch_queue_ops_;
if (!(dispatch_queue_ops_ & 63)) {
if (!block_limit_status_->add_proof(dispatch_queue_->get_root_cell())) {
return false;
}
}
register_dispatch_queue_op();
++sender_generated_messages_count_[src_addr];

LogicalTime enqueued_lt = 0;
Expand Down Expand Up @@ -3909,6 +3934,7 @@ bool Collator::process_deferred_message(Ref<vm::CellSlice> enq_msg, StdSmcAddres
++unprocessed_deferred_messages_[src_addr];
LOG(INFO) << "delivering deferred message from account " << src_addr.to_hex() << ", lt=" << lt
<< ", emitted_lt=" << emitted_lt;
block_limit_status_->add_cell(msg_env);
register_new_msg(std::move(new_msg));
msg_metadata = std::move(env.metadata);
return true;
Expand Down Expand Up @@ -4088,11 +4114,7 @@ bool Collator::enqueue_message(block::NewOutMsg msg, td::RefInt256 fwd_fees_rema
}
++dispatch_dict_size;
dispatch_queue_->set(src_addr, block::pack_account_dispatch_queue(dispatch_dict, dispatch_dict_size));
++dispatch_queue_ops_;
if (!(dispatch_queue_ops_ & 63)) {
return block_limit_status_->add_proof(dispatch_queue_->get_root_cell());
}
return true;
return register_dispatch_queue_op();
}

auto next_hop = block::interpolate_addr(src_prefix, dest_prefix, route_info.second);
Expand Down Expand Up @@ -4973,6 +4995,23 @@ bool Collator::register_out_msg_queue_op(bool force) {
}
}

/**
* Registers a dispatch queue message queue operation.
* Adds the proof to the block limit status every 64 operations.
*
* @param force If true, the proof will always be added to the block limit status.
*
* @returns True if the operation was successfully registered, false otherwise.
*/
bool Collator::register_dispatch_queue_op(bool force) {
++dispatch_queue_ops_;
if (force || !(dispatch_queue_ops_ & 63)) {
return block_limit_status_->add_proof(dispatch_queue_->get_root_cell());
} else {
return true;
}
}

/**
* Creates a new shard state and the Merkle update.
*
Expand Down Expand Up @@ -5098,9 +5137,10 @@ bool Collator::compute_out_msg_queue_info(Ref<vm::Cell>& out_msg_queue_info) {
vm::CellSlice maybe_extra = cb.as_cellslice();
cb.reset();

return register_out_msg_queue_op(true) && out_msg_queue_->append_dict_to_bool(cb) // _ out_queue:OutMsgQueue
&& processed_upto_->pack(cb) // proc_info:ProcessedInfo
&& cb.append_cellslice_bool(maybe_extra) // extra:(Maybe OutMsgQueueExtra)
return register_out_msg_queue_op(true) && register_dispatch_queue_op(true) &&
out_msg_queue_->append_dict_to_bool(cb) // _ out_queue:OutMsgQueue
&& processed_upto_->pack(cb) // proc_info:ProcessedInfo
&& cb.append_cellslice_bool(maybe_extra) // extra:(Maybe OutMsgQueueExtra)
&& cb.finalize_to(out_msg_queue_info);
}

Expand Down
12 changes: 7 additions & 5 deletions validator/impl/fabric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,16 +214,18 @@ void run_validate_query(ShardIdFull shard, BlockIdExt min_masterchain_block_id,
void run_collate_query(ShardIdFull shard, const BlockIdExt& min_masterchain_block_id, std::vector<BlockIdExt> prev,
Ed25519_PublicKey creator, td::Ref<ValidatorSet> validator_set,
td::Ref<CollatorOptions> collator_opts, td::actor::ActorId<ValidatorManager> manager,
td::Timestamp timeout, td::Promise<BlockCandidate> promise) {
td::Timestamp timeout, td::Promise<BlockCandidate> promise, int attempt_idx) {
BlockSeqno seqno = 0;
for (auto& p : prev) {
if (p.seqno() > seqno) {
seqno = p.seqno();
}
}
td::actor::create_actor<Collator>(PSTRING() << "collate" << shard.to_str() << ":" << (seqno + 1), shard, false,
min_masterchain_block_id, std::move(prev), std::move(validator_set), creator,
std::move(collator_opts), std::move(manager), timeout, std::move(promise))
td::actor::create_actor<Collator>(PSTRING() << "collate" << shard.to_str() << ":" << (seqno + 1)
<< (attempt_idx ? "_" + td::to_string(attempt_idx) : ""),
shard, false, min_masterchain_block_id, std::move(prev), std::move(validator_set),
creator, std::move(collator_opts), std::move(manager), timeout, std::move(promise),
attempt_idx)
.release();
}

Expand All @@ -239,7 +241,7 @@ void run_collate_hardfork(ShardIdFull shard, const BlockIdExt& min_masterchain_b
td::actor::create_actor<Collator>(PSTRING() << "collate" << shard.to_str() << ":" << (seqno + 1), shard, true,
min_masterchain_block_id, std::move(prev), td::Ref<ValidatorSet>{},
Ed25519_PublicKey{Bits256::zero()}, td::Ref<CollatorOptions>{true},
std::move(manager), timeout, std::move(promise))
std::move(manager), timeout, std::move(promise), 0)
.release();
}

Expand Down

0 comments on commit 1f5e490

Please sign in to comment.