From cc86a22eb1ee7fbb0b015b4517006a79c5b58af0 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Wed, 12 Jun 2024 11:15:10 -0700 Subject: [PATCH] WIP: EnforceReadOpts Summary: Follow-up from #12757. New infrastructure to DEBUG builds to catch certain ReadOptions being ignored in a context where they should be in effect. This currently only applies to checking that no IO happens when read_tier == kBlockCacheTier. The check is in effect for unit tests and for stress/crash tests. Specifically, an `EnforceReadOpts` object on the stack establishes a thread-local context under which we assert no IOs are performed if the provided ReadOptions said it should be forbidden. Test Plan: Reports failure before production code fix in #12757 --- CMakeLists.txt | 1 + TARGETS | 1 + src.mk | 1 + table/block_based/block_based_table_reader.cc | 8 +++-- table/block_based/block_prefetcher.cc | 1 + table/block_fetcher.cc | 2 ++ table/table_reader.cc | 35 +++++++++++++++++++ table/table_reader.h | 34 ++++++++++++++++++ 8 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 table/table_reader.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index c2f5ecd011f..10c0afb59ad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -851,6 +851,7 @@ set(SOURCES table/sst_file_writer.cc table/table_factory.cc table/table_properties.cc + table/table_reader.cc table/two_level_iterator.cc table/unique_id.cc test_util/sync_point.cc diff --git a/TARGETS b/TARGETS index a274da1518b..09711875bf1 100644 --- a/TARGETS +++ b/TARGETS @@ -229,6 +229,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[ "table/sst_file_writer.cc", "table/table_factory.cc", "table/table_properties.cc", + "table/table_reader.cc", "table/two_level_iterator.cc", "table/unique_id.cc", "test_util/sync_point.cc", diff --git a/src.mk b/src.mk index 23cf348e1eb..81fe095c750 100644 --- a/src.mk +++ b/src.mk @@ -221,6 +221,7 @@ LIB_SOURCES = \ table/sst_file_writer.cc \ table/table_factory.cc \ table/table_properties.cc \ + table/table_reader.cc \ table/two_level_iterator.cc \ table/unique_id.cc \ test_util/sync_point.cc \ diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index af46b4dffba..92c0e070a08 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -138,6 +138,9 @@ extern const std::string kHashIndexPrefixesMetadataBlock; BlockBasedTable::~BlockBasedTable() { auto ua = rep_->uncache_aggressiveness.LoadRelaxed(); if (ua > 0 && rep_->table_options.block_cache) { + ReadOptions ropts; + ropts.read_tier = kBlockCacheTier; // No I/O + EnforceReadOpts enforce(ropts); if (rep_->filter) { rep_->filter->EraseFromCacheBeforeDestruction(ua); } @@ -147,8 +150,6 @@ BlockBasedTable::~BlockBasedTable() { // index. Right now the iterator errors out as soon as there's an // index partition not in cache. IndexBlockIter iiter_on_stack; - ReadOptions ropts; - ropts.read_tier = kBlockCacheTier; // No I/O auto iiter = NewIndexIterator( ropts, /*disable_prefix_seek=*/false, &iiter_on_stack, /*get_context=*/nullptr, /*lookup_context=*/nullptr); @@ -194,6 +195,7 @@ Status ReadAndParseBlockFromFile( MemoryAllocator* memory_allocator, bool for_compaction, bool async_read) { assert(result); + EnforceReadOpts enforce(options); BlockContents contents; BlockFetcher block_fetcher( file, prefetch_buffer, footer, options, handle, &contents, ioptions, @@ -1520,6 +1522,7 @@ InternalIteratorBase* BlockBasedTable::NewIndexIterator( BlockCacheLookupContext* lookup_context) const { assert(rep_ != nullptr); assert(rep_->index_reader != nullptr); + EnforceReadOpts enforce(read_options); // We don't return pinned data from index blocks, so no need // to set `block_contents_pinned`. @@ -1622,6 +1625,7 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache( GetContext* get_context, BlockCacheLookupContext* lookup_context, BlockContents* contents, bool async_read, bool use_block_cache_for_lookup) const { + EnforceReadOpts enforce(ro); assert(out_parsed_block != nullptr); const bool no_io = (ro.read_tier == kBlockCacheTier); BlockCacheInterface block_cache{ diff --git a/table/block_based/block_prefetcher.cc b/table/block_based/block_prefetcher.cc index ec8a91868ce..1871ae0a320 100644 --- a/table/block_based/block_prefetcher.cc +++ b/table/block_based/block_prefetcher.cc @@ -37,6 +37,7 @@ void BlockPrefetcher::PrefetchIfNeeded( if (!s.ok()) { return; } + EnforceReadOpts::UsedIO(); s = rep->file->Prefetch(opts, offset, len + compaction_readahead_size_); if (s.ok()) { readahead_limit_ = offset + len + compaction_readahead_size_; diff --git a/table/block_fetcher.cc b/table/block_fetcher.cc index 1316d7302dd..cbce8acf516 100644 --- a/table/block_fetcher.cc +++ b/table/block_fetcher.cc @@ -249,6 +249,7 @@ void BlockFetcher::ReadBlock(bool retry) { read_req.status.PermitUncheckedError(); // Actual file read if (io_status_.ok()) { + EnforceReadOpts::UsedIO(); if (file_->use_direct_io()) { PERF_TIMER_GUARD(block_read_time); PERF_CPU_TIMER_GUARD( @@ -410,6 +411,7 @@ IOStatus BlockFetcher::ReadAsyncBlockContents() { if (!io_s.ok()) { return io_s; } + EnforceReadOpts::UsedIO(); io_s = status_to_io_status(prefetch_buffer_->PrefetchAsync( opts, file_, handle_.offset(), block_size_with_trailer_, &slice_)); if (io_s.IsTryAgain()) { diff --git a/table/table_reader.cc b/table/table_reader.cc new file mode 100644 index 00000000000..745f0288c1c --- /dev/null +++ b/table/table_reader.cc @@ -0,0 +1,35 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "table/table_reader.h" + +namespace ROCKSDB_NAMESPACE { + +#ifndef NDEBUG +namespace { +// Thread local flags to connect different instances and calls to +// EnforceReadOpts within a thread. +thread_local bool tl_enforce_block_cache_tier = false; +} // namespace + +EnforceReadOpts::EnforceReadOpts(const ReadOptions& read_opts) { + saved_enforce_block_cache_tier = tl_enforce_block_cache_tier; + if (read_opts.read_tier == ReadTier::kBlockCacheTier) { + tl_enforce_block_cache_tier = true; + } else { + // We should not be entertaining a full read in a context only allowing + // memory-only reads. + assert(tl_enforce_block_cache_tier == false); + } +} + +EnforceReadOpts::~EnforceReadOpts() { + tl_enforce_block_cache_tier = saved_enforce_block_cache_tier; +} + +void EnforceReadOpts::UsedIO() { assert(!tl_enforce_block_cache_tier); } +#endif + +} // namespace ROCKSDB_NAMESPACE diff --git a/table/table_reader.h b/table/table_reader.h index 9faf8c1c3e5..26ba5c84d01 100644 --- a/table/table_reader.h +++ b/table/table_reader.h @@ -199,4 +199,38 @@ class TableReader { } }; +// A checker that ReadOptions are respected within a call, only in DEBUG +// builds. An EnforceReadOpts should be created on the stack and (with RAII) +// only destroyed when the function (or the ReadOptions) are finished. +// Currently this only checks that IO functions are not called within a call +// using ReadOptions::read_tier = kBlockCacheTier. +class EnforceReadOpts { + public: + // no copies + EnforceReadOpts(const EnforceReadOpts&) = delete; + EnforceReadOpts& operator=(const EnforceReadOpts&) = delete; + +#ifdef NDEBUG + // Optimize away to nothing in release build + explicit EnforceReadOpts(const ReadOptions&) {} + static void UsedIO() {} + + private: +#else + // This thread enters a context in which these read options are enforced + explicit EnforceReadOpts(const ReadOptions& read_opts); + + // Restores old context + ~EnforceReadOpts(); + + // Called from IO functions to check that context permits them + static void UsedIO(); + + private: + // For saving previous values of thread-local flags, to be restored in + // destructor. + bool saved_enforce_block_cache_tier; +#endif +}; + } // namespace ROCKSDB_NAMESPACE