Skip to content

Commit

Permalink
WIP: EnforceReadOpts
Browse files Browse the repository at this point in the history
Summary: Follow-up from facebook#12757. New infrastructure to DEBUG builds to
catch certain ReadOptions being ignored in a context where they should
be in effect. This currently only applies to checking that no IO happens
when read_tier == kBlockCacheTier. The check is in effect for unit tests
and for stress/crash tests.

Specifically, an `EnforceReadOpts` object on the stack establishes a
thread-local context under which we assert no IOs are performed if the
provided ReadOptions said it should be forbidden.

Test Plan: Reports failure before production code fix in facebook#12757
  • Loading branch information
pdillinger committed Jun 12, 2024
1 parent d64eac2 commit cc86a22
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 2 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,7 @@ set(SOURCES
table/sst_file_writer.cc
table/table_factory.cc
table/table_properties.cc
table/table_reader.cc
table/two_level_iterator.cc
table/unique_id.cc
test_util/sync_point.cc
Expand Down
1 change: 1 addition & 0 deletions TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ cpp_library_wrapper(name="rocksdb_lib", srcs=[
"table/sst_file_writer.cc",
"table/table_factory.cc",
"table/table_properties.cc",
"table/table_reader.cc",
"table/two_level_iterator.cc",
"table/unique_id.cc",
"test_util/sync_point.cc",
Expand Down
1 change: 1 addition & 0 deletions src.mk
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ LIB_SOURCES = \
table/sst_file_writer.cc \
table/table_factory.cc \
table/table_properties.cc \
table/table_reader.cc \
table/two_level_iterator.cc \
table/unique_id.cc \
test_util/sync_point.cc \
Expand Down
8 changes: 6 additions & 2 deletions table/block_based/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ extern const std::string kHashIndexPrefixesMetadataBlock;
BlockBasedTable::~BlockBasedTable() {
auto ua = rep_->uncache_aggressiveness.LoadRelaxed();
if (ua > 0 && rep_->table_options.block_cache) {
ReadOptions ropts;
ropts.read_tier = kBlockCacheTier; // No I/O
EnforceReadOpts enforce(ropts);
if (rep_->filter) {
rep_->filter->EraseFromCacheBeforeDestruction(ua);
}
Expand All @@ -147,8 +150,6 @@ BlockBasedTable::~BlockBasedTable() {
// index. Right now the iterator errors out as soon as there's an
// index partition not in cache.
IndexBlockIter iiter_on_stack;
ReadOptions ropts;
ropts.read_tier = kBlockCacheTier; // No I/O
auto iiter = NewIndexIterator(
ropts, /*disable_prefix_seek=*/false, &iiter_on_stack,
/*get_context=*/nullptr, /*lookup_context=*/nullptr);
Expand Down Expand Up @@ -194,6 +195,7 @@ Status ReadAndParseBlockFromFile(
MemoryAllocator* memory_allocator, bool for_compaction, bool async_read) {
assert(result);

EnforceReadOpts enforce(options);
BlockContents contents;
BlockFetcher block_fetcher(
file, prefetch_buffer, footer, options, handle, &contents, ioptions,
Expand Down Expand Up @@ -1520,6 +1522,7 @@ InternalIteratorBase<IndexValue>* BlockBasedTable::NewIndexIterator(
BlockCacheLookupContext* lookup_context) const {
assert(rep_ != nullptr);
assert(rep_->index_reader != nullptr);
EnforceReadOpts enforce(read_options);

// We don't return pinned data from index blocks, so no need
// to set `block_contents_pinned`.
Expand Down Expand Up @@ -1622,6 +1625,7 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
GetContext* get_context, BlockCacheLookupContext* lookup_context,
BlockContents* contents, bool async_read,
bool use_block_cache_for_lookup) const {
EnforceReadOpts enforce(ro);
assert(out_parsed_block != nullptr);
const bool no_io = (ro.read_tier == kBlockCacheTier);
BlockCacheInterface<TBlocklike> block_cache{
Expand Down
1 change: 1 addition & 0 deletions table/block_based/block_prefetcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ void BlockPrefetcher::PrefetchIfNeeded(
if (!s.ok()) {
return;
}
EnforceReadOpts::UsedIO();
s = rep->file->Prefetch(opts, offset, len + compaction_readahead_size_);
if (s.ok()) {
readahead_limit_ = offset + len + compaction_readahead_size_;
Expand Down
2 changes: 2 additions & 0 deletions table/block_fetcher.cc
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ void BlockFetcher::ReadBlock(bool retry) {
read_req.status.PermitUncheckedError();
// Actual file read
if (io_status_.ok()) {
EnforceReadOpts::UsedIO();
if (file_->use_direct_io()) {
PERF_TIMER_GUARD(block_read_time);
PERF_CPU_TIMER_GUARD(
Expand Down Expand Up @@ -410,6 +411,7 @@ IOStatus BlockFetcher::ReadAsyncBlockContents() {
if (!io_s.ok()) {
return io_s;
}
EnforceReadOpts::UsedIO();
io_s = status_to_io_status(prefetch_buffer_->PrefetchAsync(
opts, file_, handle_.offset(), block_size_with_trailer_, &slice_));
if (io_s.IsTryAgain()) {
Expand Down
35 changes: 35 additions & 0 deletions table/table_reader.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).

#include "table/table_reader.h"

namespace ROCKSDB_NAMESPACE {

#ifndef NDEBUG
namespace {
// Thread local flags to connect different instances and calls to
// EnforceReadOpts within a thread.
thread_local bool tl_enforce_block_cache_tier = false;
} // namespace

EnforceReadOpts::EnforceReadOpts(const ReadOptions& read_opts) {
saved_enforce_block_cache_tier = tl_enforce_block_cache_tier;
if (read_opts.read_tier == ReadTier::kBlockCacheTier) {
tl_enforce_block_cache_tier = true;
} else {
// We should not be entertaining a full read in a context only allowing
// memory-only reads.
assert(tl_enforce_block_cache_tier == false);
}
}

EnforceReadOpts::~EnforceReadOpts() {
tl_enforce_block_cache_tier = saved_enforce_block_cache_tier;
}

void EnforceReadOpts::UsedIO() { assert(!tl_enforce_block_cache_tier); }
#endif

} // namespace ROCKSDB_NAMESPACE
34 changes: 34 additions & 0 deletions table/table_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,4 +199,38 @@ class TableReader {
}
};

// A checker that ReadOptions are respected within a call, only in DEBUG
// builds. An EnforceReadOpts should be created on the stack and (with RAII)
// only destroyed when the function (or the ReadOptions) are finished.
// Currently this only checks that IO functions are not called within a call
// using ReadOptions::read_tier = kBlockCacheTier.
class EnforceReadOpts {
public:
// no copies
EnforceReadOpts(const EnforceReadOpts&) = delete;
EnforceReadOpts& operator=(const EnforceReadOpts&) = delete;

#ifdef NDEBUG
// Optimize away to nothing in release build
explicit EnforceReadOpts(const ReadOptions&) {}
static void UsedIO() {}

private:
#else
// This thread enters a context in which these read options are enforced
explicit EnforceReadOpts(const ReadOptions& read_opts);

// Restores old context
~EnforceReadOpts();

// Called from IO functions to check that context permits them
static void UsedIO();

private:
// For saving previous values of thread-local flags, to be restored in
// destructor.
bool saved_enforce_block_cache_tier;
#endif
};

} // namespace ROCKSDB_NAMESPACE

0 comments on commit cc86a22

Please sign in to comment.