From ecd06cfef86f72a16af834c47ff2d6ea025e427c Mon Sep 17 00:00:00 2001 From: JinyongHa Date: Tue, 22 Feb 2022 06:13:16 +0000 Subject: [PATCH] dedup_tool: fixed mistaken exception handling and defined debug out macro --- src/tools/ceph_dedup_tool.cc | 166 ++++++++++++++++++----------------- 1 file changed, 84 insertions(+), 82 deletions(-) diff --git a/src/tools/ceph_dedup_tool.cc b/src/tools/ceph_dedup_tool.cc index ab1ea3b884de74..02c9ba0f152ecb 100644 --- a/src/tools/ceph_dedup_tool.cc +++ b/src/tools/ceph_dedup_tool.cc @@ -563,6 +563,8 @@ void ChunkScrub::chunk_scrub_common() cout << "--done--" << std::endl; } +#define DEBUG_OUT(x) if(debug==1){std::cout< flushed_objects; static std::shared_mutex flushed_lock; - std::list dedupable_objects; size_t chunk_size; fp_type_t fp_type; }; @@ -701,61 +702,68 @@ SampleDedup::fp_type_t SampleDedup::get_fp_type(string fp_algo) { } void SampleDedup::crawl() { - prepare_rados(); - ObjectCursor shard_start; - ObjectCursor shard_end; - std::tie(shard_start, shard_end) = get_shard_boundary(); - cout << "new iteration thread: " << n < objects; - // Get the list of object IDs to deduplicate - std::tie(objects, current_object) = get_objects( - current_object, - shard_end, - 100); - - // Pick few objects to be processed. Crawling mode decides how many - // objects to pick (sampling ratio). Lower sampling ratio makes crawler - // have lower crawling overhead but find less duplication. - std::set sampled_indexes = sample_object(objects.size()); - for (size_t index : sampled_indexes) { - ObjectItem target = objects[index]; - // Only process dirty objects which are expected not processed yet - if (is_dirty(target)) { - try_dedup_and_accumulate_result(target); + try { + if (fp_type == fp_type_t::UNKNOWN) { + throw std::logic_error("unknown fingerprint algorithm"); + } + prepare_rados(); + ObjectCursor shard_start; + ObjectCursor shard_end; + std::tie(shard_start, shard_end) = get_shard_boundary(); + cout << "new iteration thread: " << n < objects; + // Get the list of object IDs to deduplicate + std::tie(objects, current_object) = get_objects( + current_object, + shard_end, + 100); + + // Pick few objects to be processed. Crawling mode decides how many + // objects to pick (sampling ratio). Lower sampling ratio makes crawler + // have lower crawling overhead but find less duplication. + std::set sampled_indexes = sample_object(objects.size()); + for (size_t index : sampled_indexes) { + ObjectItem target = objects[index]; + // Only process dirty objects which are expected not processed yet + if (is_dirty(target)) { + try_dedup_and_accumulate_result(target); + } } } - } - map set_chunk_completions; - // Do set_chunk to make found duplicable chunks can be evicted by tier_evict() - for (auto& duplicable_chunk : duplicable_chunks) { - auto completion = set_chunk(duplicable_chunk); - if (completion != nullptr) { - set_chunk_completions[duplicable_chunk.oid] = completion; + map set_chunk_completions; + // Do set_chunk to make found duplicable chunks can be evicted by tier_evict() + for (auto& duplicable_chunk : duplicable_chunks) { + auto completion = set_chunk(duplicable_chunk); + if (completion != nullptr) { + set_chunk_completions[duplicable_chunk.oid] = completion; + } } - } - vector evict_completions; - for (auto& oid : oid_for_evict) { - auto completion_iter = set_chunk_completions.find(oid); - // Related set_chunk should be completed before tier_evict because - // tier_evict() only evict data processed by set_chunk() or tier_flush() - if (completion_iter != set_chunk_completions.end()) { - auto completion = completion_iter->second; + vector evict_completions; + for (auto& oid : oid_for_evict) { + auto completion_iter = set_chunk_completions.find(oid); + // Related set_chunk should be completed before tier_evict because + // tier_evict() only evict data processed by set_chunk() or tier_flush() + if (completion_iter != set_chunk_completions.end()) { + auto completion = completion_iter->second; + completion->wait_for_complete(); + delete completion; + } + auto completion = do_evict(oid); + evict_completions.push_back(completion); + } + for (auto& completion : evict_completions) { completion->wait_for_complete(); delete completion; } - auto completion = do_evict(oid); - evict_completions.push_back(completion); + } catch (std::exception& e) { + cerr << "exception : " << e.what() << std::endl; } - for (auto& completion : evict_completions) { - completion->wait_for_complete(); - delete completion; - } - cout << "done iteration thread: " << n <, ObjectCursor> SampleDedup::get_objects( &objects, &next); if (ret < 0 ) { - cerr << "error object_list : " << cpp_strerror(ret) << std::endl; - throw std::exception(); + throw system_error(ret, generic_category(), "error object_list"); } return std::make_tuple(objects, next); @@ -886,16 +899,13 @@ void SampleDedup::try_dedup_and_accumulate_result(ObjectItem& object) { .fingerprint = fingerprint, .data = chunk_data }; - if (debug) { - cout << "check " << chunk_info.oid << " fp " << fingerprint << " " << - chunk_info.start << ", " << chunk_info.size << std::endl; - } + + DEBUG_OUT("check " << chunk_info.oid << " fp " << fingerprint << " " + << chunk_info.start << ", " << chunk_info.size << std::endl); if (check_duplicated(fingerprint)) { - if (debug) { - cout << "duplication oid " << chunk_info.oid << " " << - chunk_info.fingerprint << " " << chunk_info.start << - ", " << chunk_info.size << std::endl; - } + DEBUG_OUT("duplication oid " << chunk_info.oid << " " + << chunk_info.fingerprint << " " << chunk_info.start << ", " + << chunk_info.size << std::endl); add_duplication(chunk_info); duplicated_size += chunk_data.length(); @@ -905,16 +915,13 @@ void SampleDedup::try_dedup_and_accumulate_result(ObjectItem& object) { } size_t object_size = data.length(); - if (debug) { - cout << "oid " << object.oid << " object_size " << object_size - << " dup size " << duplicated_size << std::endl; - } + + DEBUG_OUT("oid " << object.oid << " object_size " << object_size + << " dup size " << duplicated_size << std::endl); // if the chunks in an object are duplicated higher than object_dedup_threshold, // try deduplicate whole object via tier_flush if (check_whole_object_dedupable(duplicated_size, object_size)) { - if (debug) { - cout << "dedup object " << object.oid << std::endl; - } + DEBUG_OUT("dedup object " << object.oid << std::endl); flush_duplicable_object(object); } @@ -926,16 +933,15 @@ void SampleDedup::try_dedup_and_accumulate_result(ObjectItem& object) { bufferlist SampleDedup::read_object(ObjectItem& object) { bufferlist whole_data; size_t offset = 0; - if (debug) { - cout << "read object " << object.oid << std::endl; - } + DEBUG_OUT("read object " << object.oid << std::endl); int ret = -1; while (ret != 0) { bufferlist partial_data; ret = io_ctx.read(object.oid, partial_data, max_read_size, offset); if (ret < 0) { cerr << "read object error " << object.oid << " offset " << offset - << " size " << max_read_size << std::endl; + << " size " << max_read_size << " error(" << cpp_strerror(ret) + << std::endl; bufferlist empty_buf; return empty_buf; } @@ -1034,9 +1040,7 @@ bool SampleDedup::check_whole_object_dedupable( void SampleDedup::flush_duplicable_object(ObjectItem& object) { ObjectReadOperation op; op.tier_flush(); - if (debug) { - cout << "try flush " << object.oid << " " << &flushed_objects< &opts, iterative = true; } string base_pool_name; - auto i = opts.find("pool"); + i = opts.find("pool"); if (i != opts.end()) { base_pool_name = i->second.c_str(); } else {