From 5893d5e2a1f27dcdc2ca74f06a7fdb207be0237a Mon Sep 17 00:00:00 2001 From: Andrew Kryczka Date: Fri, 5 Feb 2021 15:55:34 -0800 Subject: [PATCH] Allow range deletions in `*TransactionDB` only when safe (#7929) Summary: Explicitly reject all range deletions on `TransactionDB` or `OptimisticTransactionDB`, except when the user provides sufficient promises that allow us to proceed safely. The necessary promises are described in the API doc for `TransactionDB::DeleteRange()`. There is currently no way to provide enough promises to make it safe in `OptimisticTransactionDB`. Fixes https://github.com/facebook/rocksdb/issues/7913. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7929 Test Plan: unit tests covering the cases it's permitted/rejected Reviewed By: ltamasi Differential Revision: D26240254 Pulled By: ajkr fbshipit-source-id: 2834a0ce64cc3e4c3799e35b885a5e79c2f4f6d9 --- HISTORY.md | 5 ++ .../utilities/optimistic_transaction_db.h | 2 + include/rocksdb/utilities/transaction_db.h | 11 ++++ .../optimistic_transaction_db_impl.h | 16 ++++++ .../optimistic_transaction_test.cc | 11 ++++ utilities/transactions/transaction_test.cc | 50 +++++++++++++++++++ .../transactions/write_prepared_txn_db.cc | 4 +- 7 files changed, 98 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 3233385ab59..643c745d639 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,9 @@ # Rocksdb Change Log +## Unreleased +### Bug Fixes +* Since 6.15.0, `TransactionDB` returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. There are certain cases where range deletion can still be used on such DBs; see the API doc on `TransactionDB::DeleteRange()` for details. +* `OptimisticTransactionDB` now returns error `Status`es from calls to `DeleteRange()` and calls to `Write()` where the `WriteBatch` contains a range deletion. Previously such operations may have succeeded while not providing the expected transactional guarantees. + ## 6.15.4 (01/21/2021) ### Bug Fixes * Fix a race condition between DB startups and shutdowns in managing the periodic background worker threads. One effect of this race condition could be the process being terminated. diff --git a/include/rocksdb/utilities/optimistic_transaction_db.h b/include/rocksdb/utilities/optimistic_transaction_db.h index 5356df71f39..c070e49a309 100644 --- a/include/rocksdb/utilities/optimistic_transaction_db.h +++ b/include/rocksdb/utilities/optimistic_transaction_db.h @@ -51,6 +51,8 @@ struct OptimisticTransactionDBOptions { uint32_t occ_lock_buckets = (1 << 20); }; +// Range deletions (including those in `WriteBatch`es passed to `Write()`) are +// incompatible with `OptimisticTransactionDB` and will return a non-OK `Status` class OptimisticTransactionDB : public StackableDB { public: // Open an OptimisticTransactionDB similar to DB::Open(). diff --git a/include/rocksdb/utilities/transaction_db.h b/include/rocksdb/utilities/transaction_db.h index 2e1a0a17139..71416f55b3d 100644 --- a/include/rocksdb/utilities/transaction_db.h +++ b/include/rocksdb/utilities/transaction_db.h @@ -244,6 +244,17 @@ class TransactionDB : public StackableDB { // falls back to the un-optimized version of ::Write return Write(opts, updates); } + // Transactional `DeleteRange()` is not yet supported. + // However, users who know their deleted range does not conflict with + // anything can still use it via the `Write()` API. In all cases, the + // `Write()` overload specifying `TransactionDBWriteOptimizations` must be + // used and `skip_concurrency_control` must be set. When using either + // WRITE_PREPARED or WRITE_UNPREPARED , `skip_duplicate_key_check` must + // additionally be set. + virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*, + const Slice&, const Slice&) override { + return Status::NotSupported(); + } // Open a TransactionDB similar to DB::Open(). // Internally call PrepareWrap() and WrapDB() // If the return status is not ok, then dbptr is set to nullptr. diff --git a/utilities/transactions/optimistic_transaction_db_impl.h b/utilities/transactions/optimistic_transaction_db_impl.h index d895d49b896..a23d9a06d7c 100644 --- a/utilities/transactions/optimistic_transaction_db_impl.h +++ b/utilities/transactions/optimistic_transaction_db_impl.h @@ -46,6 +46,22 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB { const OptimisticTransactionOptions& txn_options, Transaction* old_txn) override; + // Transactional `DeleteRange()` is not yet supported. + virtual Status DeleteRange(const WriteOptions&, ColumnFamilyHandle*, + const Slice&, const Slice&) override { + return Status::NotSupported(); + } + + // Range deletions also must not be snuck into `WriteBatch`es as they are + // incompatible with `OptimisticTransactionDB`. + virtual Status Write(const WriteOptions& write_opts, + WriteBatch* batch) override { + if (batch->HasDeleteRange()) { + return Status::NotSupported(); + } + return OptimisticTransactionDB::Write(write_opts, batch); + } + size_t GetLockBucketsSize() const { return bucketed_locks_.size(); } OccValidationPolicy GetValidatePolicy() const { return validate_policy_; } diff --git a/utilities/transactions/optimistic_transaction_test.cc b/utilities/transactions/optimistic_transaction_test.cc index 21c8cdac4ee..3256c2405ff 100644 --- a/utilities/transactions/optimistic_transaction_test.cc +++ b/utilities/transactions/optimistic_transaction_test.cc @@ -1098,6 +1098,17 @@ TEST_P(OptimisticTransactionTest, IteratorTest) { delete txn; } +TEST_P(OptimisticTransactionTest, DeleteRangeSupportTest) { + // `OptimisticTransactionDB` does not allow range deletion in any API. + ASSERT_TRUE( + txn_db + ->DeleteRange(WriteOptions(), txn_db->DefaultColumnFamily(), "a", "b") + .IsNotSupported()); + WriteBatch wb; + ASSERT_OK(wb.DeleteRange("a", "b")); + ASSERT_NOK(txn_db->Write(WriteOptions(), &wb)); +} + TEST_P(OptimisticTransactionTest, SavepointTest) { WriteOptions write_options; ReadOptions read_options, snapshot_read_options; diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index 72068cb85d6..f52c0be942f 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -4835,6 +4835,56 @@ TEST_P(TransactionTest, MergeTest) { ASSERT_EQ("a,3", value); } +TEST_P(TransactionTest, DeleteRangeSupportTest) { + // The `DeleteRange()` API is banned everywhere. + ASSERT_TRUE( + db->DeleteRange(WriteOptions(), db->DefaultColumnFamily(), "a", "b") + .IsNotSupported()); + + // But range deletions can be added via the `Write()` API by specifying the + // proper flags to promise there are no conflicts according to the DB type + // (see `TransactionDB::DeleteRange()` API doc for details). + for (bool skip_concurrency_control : {false, true}) { + for (bool skip_duplicate_key_check : {false, true}) { + ASSERT_OK(db->Put(WriteOptions(), "a", "val")); + WriteBatch wb; + ASSERT_OK(wb.DeleteRange("a", "b")); + TransactionDBWriteOptimizations flags; + flags.skip_concurrency_control = skip_concurrency_control; + flags.skip_duplicate_key_check = skip_duplicate_key_check; + Status s = db->Write(WriteOptions(), flags, &wb); + std::string value; + switch (txn_db_options.write_policy) { + case WRITE_COMMITTED: + if (skip_concurrency_control) { + ASSERT_OK(s); + ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound()); + } else { + ASSERT_NOK(s); + ASSERT_OK(db->Get(ReadOptions(), "a", &value)); + } + break; + case WRITE_PREPARED: + // Intentional fall-through + case WRITE_UNPREPARED: + if (skip_concurrency_control && skip_duplicate_key_check) { + ASSERT_OK(s); + ASSERT_TRUE(db->Get(ReadOptions(), "a", &value).IsNotFound()); + } else { + ASSERT_NOK(s); + ASSERT_OK(db->Get(ReadOptions(), "a", &value)); + } + break; + } + // Without any promises from the user, range deletion via other `Write()` + // APIs are still banned. + ASSERT_OK(db->Put(WriteOptions(), "a", "val")); + ASSERT_NOK(db->Write(WriteOptions(), &wb)); + ASSERT_OK(db->Get(ReadOptions(), "a", &value)); + } + } +} + TEST_P(TransactionTest, DeferSnapshotTest) { WriteOptions write_options; ReadOptions read_options; diff --git a/utilities/transactions/write_prepared_txn_db.cc b/utilities/transactions/write_prepared_txn_db.cc index a1b67bd1ff7..e26b1f0746e 100644 --- a/utilities/transactions/write_prepared_txn_db.cc +++ b/utilities/transactions/write_prepared_txn_db.cc @@ -157,7 +157,9 @@ Status WritePreparedTxnDB::WriteInternal(const WriteOptions& write_options_orig, // TODO(myabandeh): add an option to allow user skipping this cost SubBatchCounter counter(*GetCFComparatorMap()); auto s = batch->Iterate(&counter); - assert(s.ok()); + if (!s.ok()) { + return s; + } batch_cnt = counter.BatchCount(); WPRecordTick(TXN_DUPLICATE_KEY_OVERHEAD); ROCKS_LOG_DETAILS(info_log_, "Duplicate key overhead: %" PRIu64 " batches",