diff --git a/tdutils/td/utils/Timer.cpp b/tdutils/td/utils/Timer.cpp index 0f6a7d6ed..1f72fba96 100644 --- a/tdutils/td/utils/Timer.cpp +++ b/tdutils/td/utils/Timer.cpp @@ -87,4 +87,8 @@ void PerfWarningTimer::reset() { start_at_ = 0; } +double PerfWarningTimer::elapsed() const { + return Time::now() - start_at_; +} + } // namespace td diff --git a/tdutils/td/utils/Timer.h b/tdutils/td/utils/Timer.h index 18ea35d7c..3e0cafbf5 100644 --- a/tdutils/td/utils/Timer.h +++ b/tdutils/td/utils/Timer.h @@ -53,6 +53,7 @@ class PerfWarningTimer { PerfWarningTimer &operator=(PerfWarningTimer &&) = delete; ~PerfWarningTimer(); void reset(); + double elapsed() const; private: string name_; diff --git a/validator/collator-node.cpp b/validator/collator-node.cpp index 516555fae..2cc06b340 100644 --- a/validator/collator-node.cpp +++ b/validator/collator-node.cpp @@ -233,11 +233,13 @@ void CollatorNode::receive_query(adnl::AdnlNodeIdShort src, td::BufferSlice data cache_entry = cache_[cache_key] = std::make_shared(); } if (cache_entry->result) { + LOG(INFO) << "Using cached result"; new_promise.set_result(cache_entry->result.value().clone()); return; } cache_entry->promises.push_back(std::move(new_promise)); if (cache_entry->started) { + LOG(INFO) << "Collating of this block is already in progress, waiting"; return; } cache_entry->started = true; diff --git a/validator/full-node-shard.cpp b/validator/full-node-shard.cpp index 02bf0e39d..b34432a93 100644 --- a/validator/full-node-shard.cpp +++ b/validator/full-node-shard.cpp @@ -927,7 +927,6 @@ void FullNodeShardImpl::download_archive(BlockSeqno masterchain_seqno, std::stri void FullNodeShardImpl::download_out_msg_queue_proof(BlockIdExt block_id, ShardIdFull dst_shard, td::Timestamp timeout, td::Promise> promise) { // TODO: maybe more complex download (like other requests here) - // TODO: estimate max size auto &b = choose_neighbour(true); if (b.adnl_id == adnl::AdnlNodeIdShort::zero()) { promise.set_error(td::Status::Error(ErrorCode::notready, "no nodes")); diff --git a/validator/impl/collator.cpp b/validator/impl/collator.cpp index fc31d2645..2d91489e0 100644 --- a/validator/impl/collator.cpp +++ b/validator/impl/collator.cpp @@ -279,6 +279,7 @@ bool Collator::fatal_error(td::Status error) { error.ensure_error(); LOG(ERROR) << "cannot generate block candidate for " << show_shard(shard_) << " : " << error.to_string(); if (busy_) { + LOG(INFO) << "collation took " << perf_timer_.elapsed() << " s"; main_promise(std::move(error)); busy_ = false; } @@ -4226,6 +4227,7 @@ void Collator::return_block_candidate(td::Result saved) { } else { CHECK(block_candidate); LOG(INFO) << "sending new BlockCandidate to Promise"; + LOG(INFO) << "collation took " << perf_timer_.elapsed() << " s"; main_promise(block_candidate->clone()); busy_ = false; stop(); diff --git a/validator/impl/validate-query.cpp b/validator/impl/validate-query.cpp index f9e5be275..4c7384ee9 100644 --- a/validator/impl/validate-query.cpp +++ b/validator/impl/validate-query.cpp @@ -88,6 +88,7 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) { << " collated_data=" << block_candidate.collated_file_hash.to_hex()); errorlog::ErrorLog::log_file(block_candidate.data.clone()); errorlog::ErrorLog::log_file(block_candidate.collated_data.clone()); + LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s"; main_promise.set_result(CandidateReject{std::move(error), std::move(reason)}); } stop(); @@ -108,6 +109,7 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason) << " collated_data=" << block_candidate.collated_file_hash.to_hex()); errorlog::ErrorLog::log_file(block_candidate.data.clone()); errorlog::ErrorLog::log_file(block_candidate.collated_data.clone()); + LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s"; main_promise.set_result(CandidateReject{std::move(error), std::move(reason)}); } stop(); @@ -126,6 +128,7 @@ bool ValidateQuery::fatal_error(td::Status error) { errorlog::ErrorLog::log_file(block_candidate.data.clone()); errorlog::ErrorLog::log_file(block_candidate.collated_data.clone()); } + LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s"; main_promise(std::move(error)); } stop(); @@ -147,6 +150,7 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) { void ValidateQuery::finish_query() { if (main_promise) { + LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s"; main_promise.set_result(now_); } stop(); diff --git a/validator/validator-group.cpp b/validator/validator-group.cpp index 7cc92f804..bc6a9812c 100644 --- a/validator/validator-group.cpp +++ b/validator/validator-group.cpp @@ -133,7 +133,7 @@ void ValidatorGroup::validate_block_candidate(td::uint32 round_id, BlockCandidat P.set_error(td::Status::Error(ErrorCode::notready, "validator group not started")); return; } - VLOG(VALIDATOR_DEBUG) << "validating block candidate " << next_block_id; + VLOG(VALIDATOR_DEBUG) << "validating block candidate " << next_block_id.to_str(); block.id = next_block_id; run_validate_query(shard_, min_masterchain_block_id_, prev_block_ids_, std::move(block), validator_set_, manager_, td::Timestamp::in(15.0), std::move(P), @@ -397,6 +397,7 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo promise.set_error(td::Status::Error("too old")); return; } + BlockId next_block_id = create_next_block_id_simple(); adnl::AdnlNodeIdShort collator = adnl::AdnlNodeIdShort::zero(); // TODO: some way to choose node (similar to "unreliability" in full-node) int cnt = 0; @@ -413,20 +414,23 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo return; } - if (max_retries > 0) { - promise = td::PromiseCreator::lambda( - [=, SelfId = actor_id(this), promise = std::move(promise)](td::Result R) mutable { - if (R.is_ok()) { - promise.set_result(R.move_as_ok()); - } else if (timeout && timeout.is_in_past()) { - promise.set_result(R.move_as_error()); - } else { - LOG(WARNING) << "collate query error, retrying: " << R.move_as_error(); - td::actor::send_closure(SelfId, &ValidatorGroup::send_collate_query, round_id, timeout, std::move(promise), - max_retries - 1); - } - }); - } + promise = td::PromiseCreator::lambda([=, SelfId = actor_id(this), promise = std::move(promise), + timer = td::Timer()](td::Result R) mutable { + if (R.is_ok()) { + LOG(WARNING) << "collate query for " << next_block_id.to_str() << ": success, time=" << timer.elapsed() << "s"; + promise.set_result(R.move_as_ok()); + return; + } + bool retry = (!timeout || !timeout.is_in_past()) && max_retries > 0; + LOG(WARNING) << "collate query for " << next_block_id.to_str() << ": " << R.error() << ", time=" << timer.elapsed() + << "s, " << (retry ? "retrying" : "giving up"); + if (retry) { + td::actor::send_closure(SelfId, &ValidatorGroup::send_collate_query, round_id, timeout, std::move(promise), + max_retries - 1); + } else { + promise.set_result(td::Status::Error(ErrorCode::timeout, "timeout")); + } + }); std::vector> prev_blocks; for (const BlockIdExt &p : prev_block_ids_) { @@ -445,7 +449,7 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo td::actor::send_closure(SelfId, &ValidatorGroup::receive_collate_query_response, round_id, R.move_as_ok(), std::move(promise)); }); - LOG(INFO) << "collate query for " << create_next_block_id_simple().to_str() << ": send query to " << collator; + LOG(INFO) << "sending collate query for " << next_block_id.to_str() << ": send to " << collator; size_t max_answer_size = config_.max_block_size + config_.max_collated_data_size + 256; td::Timestamp query_timeout = td::Timestamp::in(10.0); query_timeout.relax(timeout);