Skip to content

Commit

Permalink
More logs for collators and validators
Browse files Browse the repository at this point in the history
  • Loading branch information
SpyCheese committed Jul 18, 2023
1 parent 3265e39 commit aa4f576
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 17 deletions.
4 changes: 4 additions & 0 deletions tdutils/td/utils/Timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,8 @@ void PerfWarningTimer::reset() {
start_at_ = 0;
}

double PerfWarningTimer::elapsed() const {
return Time::now() - start_at_;
}

} // namespace td
1 change: 1 addition & 0 deletions tdutils/td/utils/Timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class PerfWarningTimer {
PerfWarningTimer &operator=(PerfWarningTimer &&) = delete;
~PerfWarningTimer();
void reset();
double elapsed() const;

private:
string name_;
Expand Down
2 changes: 2 additions & 0 deletions validator/collator-node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,11 +233,13 @@ void CollatorNode::receive_query(adnl::AdnlNodeIdShort src, td::BufferSlice data
cache_entry = cache_[cache_key] = std::make_shared<CacheEntry>();
}
if (cache_entry->result) {
LOG(INFO) << "Using cached result";
new_promise.set_result(cache_entry->result.value().clone());
return;
}
cache_entry->promises.push_back(std::move(new_promise));
if (cache_entry->started) {
LOG(INFO) << "Collating of this block is already in progress, waiting";
return;
}
cache_entry->started = true;
Expand Down
1 change: 0 additions & 1 deletion validator/full-node-shard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,6 @@ void FullNodeShardImpl::download_archive(BlockSeqno masterchain_seqno, std::stri
void FullNodeShardImpl::download_out_msg_queue_proof(BlockIdExt block_id, ShardIdFull dst_shard, td::Timestamp timeout,
td::Promise<td::Ref<OutMsgQueueProof>> promise) {
// TODO: maybe more complex download (like other requests here)
// TODO: estimate max size
auto &b = choose_neighbour(true);
if (b.adnl_id == adnl::AdnlNodeIdShort::zero()) {
promise.set_error(td::Status::Error(ErrorCode::notready, "no nodes"));
Expand Down
2 changes: 2 additions & 0 deletions validator/impl/collator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ bool Collator::fatal_error(td::Status error) {
error.ensure_error();
LOG(ERROR) << "cannot generate block candidate for " << show_shard(shard_) << " : " << error.to_string();
if (busy_) {
LOG(INFO) << "collation took " << perf_timer_.elapsed() << " s";
main_promise(std::move(error));
busy_ = false;
}
Expand Down Expand Up @@ -4226,6 +4227,7 @@ void Collator::return_block_candidate(td::Result<td::Unit> saved) {
} else {
CHECK(block_candidate);
LOG(INFO) << "sending new BlockCandidate to Promise";
LOG(INFO) << "collation took " << perf_timer_.elapsed() << " s";
main_promise(block_candidate->clone());
busy_ = false;
stop();
Expand Down
4 changes: 4 additions & 0 deletions validator/impl/validate-query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ bool ValidateQuery::reject_query(std::string error, td::BufferSlice reason) {
<< " collated_data=" << block_candidate.collated_file_hash.to_hex());
errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(CandidateReject{std::move(error), std::move(reason)});
}
stop();
Expand All @@ -108,6 +109,7 @@ bool ValidateQuery::soft_reject_query(std::string error, td::BufferSlice reason)
<< " collated_data=" << block_candidate.collated_file_hash.to_hex());
errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(CandidateReject{std::move(error), std::move(reason)});
}
stop();
Expand All @@ -126,6 +128,7 @@ bool ValidateQuery::fatal_error(td::Status error) {
errorlog::ErrorLog::log_file(block_candidate.data.clone());
errorlog::ErrorLog::log_file(block_candidate.collated_data.clone());
}
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise(std::move(error));
}
stop();
Expand All @@ -147,6 +150,7 @@ bool ValidateQuery::fatal_error(std::string err_msg, int err_code) {

void ValidateQuery::finish_query() {
if (main_promise) {
LOG(INFO) << "validation took " << perf_timer_.elapsed() << " s";
main_promise.set_result(now_);
}
stop();
Expand Down
36 changes: 20 additions & 16 deletions validator/validator-group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ void ValidatorGroup::validate_block_candidate(td::uint32 round_id, BlockCandidat
P.set_error(td::Status::Error(ErrorCode::notready, "validator group not started"));
return;
}
VLOG(VALIDATOR_DEBUG) << "validating block candidate " << next_block_id;
VLOG(VALIDATOR_DEBUG) << "validating block candidate " << next_block_id.to_str();
block.id = next_block_id;
run_validate_query(shard_, min_masterchain_block_id_, prev_block_ids_, std::move(block), validator_set_, manager_,
td::Timestamp::in(15.0), std::move(P),
Expand Down Expand Up @@ -397,6 +397,7 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo
promise.set_error(td::Status::Error("too old"));
return;
}
BlockId next_block_id = create_next_block_id_simple();
adnl::AdnlNodeIdShort collator = adnl::AdnlNodeIdShort::zero();
// TODO: some way to choose node (similar to "unreliability" in full-node)
int cnt = 0;
Expand All @@ -413,20 +414,23 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo
return;
}

if (max_retries > 0) {
promise = td::PromiseCreator::lambda(
[=, SelfId = actor_id(this), promise = std::move(promise)](td::Result<BlockCandidate> R) mutable {
if (R.is_ok()) {
promise.set_result(R.move_as_ok());
} else if (timeout && timeout.is_in_past()) {
promise.set_result(R.move_as_error());
} else {
LOG(WARNING) << "collate query error, retrying: " << R.move_as_error();
td::actor::send_closure(SelfId, &ValidatorGroup::send_collate_query, round_id, timeout, std::move(promise),
max_retries - 1);
}
});
}
promise = td::PromiseCreator::lambda([=, SelfId = actor_id(this), promise = std::move(promise),
timer = td::Timer()](td::Result<BlockCandidate> R) mutable {
if (R.is_ok()) {
LOG(WARNING) << "collate query for " << next_block_id.to_str() << ": success, time=" << timer.elapsed() << "s";
promise.set_result(R.move_as_ok());
return;
}
bool retry = (!timeout || !timeout.is_in_past()) && max_retries > 0;
LOG(WARNING) << "collate query for " << next_block_id.to_str() << ": " << R.error() << ", time=" << timer.elapsed()
<< "s, " << (retry ? "retrying" : "giving up");
if (retry) {
td::actor::send_closure(SelfId, &ValidatorGroup::send_collate_query, round_id, timeout, std::move(promise),
max_retries - 1);
} else {
promise.set_result(td::Status::Error(ErrorCode::timeout, "timeout"));
}
});

std::vector<tl_object_ptr<ton_api::tonNode_blockIdExt>> prev_blocks;
for (const BlockIdExt &p : prev_block_ids_) {
Expand All @@ -445,7 +449,7 @@ void ValidatorGroup::send_collate_query(td::uint32 round_id, td::Timestamp timeo
td::actor::send_closure(SelfId, &ValidatorGroup::receive_collate_query_response, round_id, R.move_as_ok(),
std::move(promise));
});
LOG(INFO) << "collate query for " << create_next_block_id_simple().to_str() << ": send query to " << collator;
LOG(INFO) << "sending collate query for " << next_block_id.to_str() << ": send to " << collator;
size_t max_answer_size = config_.max_block_size + config_.max_collated_data_size + 256;
td::Timestamp query_timeout = td::Timestamp::in(10.0);
query_timeout.relax(timeout);
Expand Down

0 comments on commit aa4f576

Please sign in to comment.