From 0cc3232d6fc1d6cf74b5cfd46172c7d613b63576 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Fri, 28 Jul 2023 17:56:18 +0800 Subject: [PATCH] [Improve](topn opt) modify fetch rpc timeout from 20s to 30s, since fetch is quite heavy sometimes (#22163) --- be/src/common/config.cpp | 2 +- be/src/service/internal_service.cpp | 50 +++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 38065e34deca13..bbf7c3c6b0eb82 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -237,7 +237,7 @@ DEFINE_Int32(doris_max_remote_scanner_thread_pool_thread_num, "512"); DEFINE_Int32(doris_scanner_thread_pool_queue_size, "102400"); // default thrift client connect timeout(in seconds) DEFINE_mInt32(thrift_connect_timeout_seconds, "3"); -DEFINE_mInt32(fetch_rpc_timeout_seconds, "20"); +DEFINE_mInt32(fetch_rpc_timeout_seconds, "30"); // default thrift client retry interval (in milliseconds) DEFINE_mInt64(thrift_client_retry_interval_ms, "1000"); // max row count number for single scan range, used in segmentv1 diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 924e8295393743..2d0b20ce8f9ff5 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -1476,10 +1476,23 @@ void PInternalServiceImpl::response_slave_tablet_pull_rowset( } } +template +auto scope_timer_run(Func fn, int64_t* cost) -> decltype(fn()) { + MonotonicStopWatch watch; + watch.start(); + auto res = fn(); + *cost += watch.elapsed_time() / 1000 / 1000; + return res; +} + Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, PMultiGetResponse* response) { OlapReaderStatistics stats; vectorized::Block result_block; + int64_t acquire_tablet_ms = 0; + int64_t acquire_rowsets_ms = 0; + int64_t acquire_segments_ms = 0; + int64_t lookup_row_data_ms = 0; // init desc TupleDescriptor desc(request.desc()); @@ -1501,16 +1514,21 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, const auto& row_loc = request.row_locs(i); MonotonicStopWatch watch; watch.start(); - TabletSharedPtr tablet = StorageEngine::instance()->tablet_manager()->get_tablet( - row_loc.tablet_id(), true /*include deleted*/); + TabletSharedPtr tablet = scope_timer_run( + [&]() { + return StorageEngine::instance()->tablet_manager()->get_tablet( + row_loc.tablet_id(), true /*include deleted*/); + }, + &acquire_tablet_ms); RowsetId rowset_id; rowset_id.init(row_loc.rowset_id()); if (!tablet) { continue; } // We ensured it's rowset is not released when init Tablet reader param, rowset->update_delayed_expired_timestamp(); - BetaRowsetSharedPtr rowset = std::static_pointer_cast( - StorageEngine::instance()->get_quering_rowset(rowset_id)); + BetaRowsetSharedPtr rowset = std::static_pointer_cast(scope_timer_run( + [&]() { return StorageEngine::instance()->get_quering_rowset(rowset_id); }, + &acquire_rowsets_ms)); if (!rowset) { LOG(INFO) << "no such rowset " << rowset_id; continue; @@ -1523,7 +1541,11 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, *response->add_row_locs() = row_loc; }); SegmentCacheHandle segment_cache; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(rowset, &segment_cache, true)); + RETURN_IF_ERROR(scope_timer_run( + [&]() { + return SegmentLoader::instance()->load_segments(rowset, &segment_cache, true); + }, + &acquire_segments_ms)); // find segment auto it = std::find_if(segment_cache.get_segments().begin(), segment_cache.get_segments().end(), @@ -1541,7 +1563,11 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, CHECK(tablet->tablet_schema()->store_row_column()); RowLocation loc(rowset_id, segment->id(), row_loc.ordinal_id()); string* value = response->add_binary_row_data(); - RETURN_IF_ERROR(tablet->lookup_row_data({}, loc, rowset, &desc, stats, *value)); + RETURN_IF_ERROR(scope_timer_run( + [&]() { + return tablet->lookup_row_data({}, loc, rowset, &desc, stats, *value); + }, + &lookup_row_data_ms)); row_size = value->size(); continue; } @@ -1570,7 +1596,6 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, RETURN_IF_ERROR( segment->new_column_iterator(full_read_schema.column(index), &column_iterator)); segment_v2::ColumnIteratorOptions opt; - OlapReaderStatistics stats; opt.file_reader = segment->file_reader().get(); opt.stats = &stats; opt.use_page_cache = !config::disable_storage_page_cache; @@ -1591,6 +1616,17 @@ Status PInternalServiceImpl::_multi_get(const PMultiGetRequest& request, &uncompressed_size, &compressed_size, segment_v2::CompressionTypePB::LZ4)); } + + LOG(INFO) << "Query stats: " + << fmt::format( + "hit_cached_pages:{}, total_pages_read:{}, compressed_bytes_read:{}, " + "io_latency:{}ns, " + "uncompressed_bytes_read:{}," + "acquire_tablet_ms:{}, acquire_rowsets_ms:{}, acquire_segments_ms:{}, " + "lookup_row_data_ms:{}", + stats.cached_pages_num, stats.total_pages_num, stats.compressed_bytes_read, + stats.io_ns, stats.uncompressed_bytes_read, acquire_tablet_ms, + acquire_rowsets_ms, acquire_segments_ms, lookup_row_data_ms); return Status::OK(); }