From 8e46b3a0c52c32bf5c86723cf79d0d6b8c4fa502 Mon Sep 17 00:00:00 2001 From: Giuseppe Baccini Date: Thu, 26 Oct 2023 15:30:29 +0200 Subject: [PATCH] rgw/sfs: honor retry_raced_bucket_write mechanism Updating bucket's metadata concurrently by two or more threads is allowed in radosgw. There is a retry mechanism: retry_raced_bucket_write(), that expects the bucket references to fetch the latest data from the persistent store. rgw/sfs driver didn't implement try_refresh_info() in its bucket class definition; this could cause two references to the same bucket to potentially lead to partial metadata updates. Fixes: https://github.com/aquarist-labs/s3gw/issues/637 Signed-off-by: Giuseppe Baccini --- src/rgw/driver/sfs/bucket.cc | 32 ++- src/test/rgw/sfs/test_rgw_sfs_sfs_bucket.cc | 234 +++++++++++++++++++- 2 files changed, 263 insertions(+), 3 deletions(-) diff --git a/src/rgw/driver/sfs/bucket.cc b/src/rgw/driver/sfs/bucket.cc index 93e906ad3db2c4..fbfe93074cdebe 100644 --- a/src/rgw/driver/sfs/bucket.cc +++ b/src/rgw/driver/sfs/bucket.cc @@ -534,11 +534,39 @@ int SFSBucket::abort_multiparts( return sfs::SFSMultipartUploadV2::abort_multiparts(dpp, store, this); } +/** + * @brief Refresh this bucket's rep with the state obtained from the store. + * Indeed it can happen that the rep of this bucket is obsolete due to + * concurrent threads updating metadata using their own SFSBucket instance. + */ int SFSBucket::try_refresh_info( const DoutPrefixProvider* dpp, ceph::real_time* /*pmtime*/ ) { - ldpp_dout(dpp, 10) << __func__ << ": TODO" << dendl; - return -ENOTSUP; + auto bref = store->get_bucket_ref(get_name()); + // maybe excessively safe approach: instead of directly assign the returned + // value to this.bucket let's check first if bref is valid. + if (!bref) { + lsfs_dout(dpp, 0) << fmt::format("no such bucket! {}", get_name()) << dendl; + return -ERR_NO_SUCH_BUCKET; + } + bucket = bref; + + // update views like we do in the constructor + + // info view + get_info() = bucket->get_info(); + + // attrs view + set_attrs(bucket->get_attrs()); + + // acls view + auto it = attrs.find(RGW_ATTR_ACL); + if (it != attrs.end()) { + auto lval = it->second.cbegin(); + acls.decode(lval); + } + + return 0; } int SFSBucket::read_usage( diff --git a/src/test/rgw/sfs/test_rgw_sfs_sfs_bucket.cc b/src/test/rgw/sfs/test_rgw_sfs_sfs_bucket.cc index e37781f2c0ab5b..fecf2ee593930a 100644 --- a/src/test/rgw/sfs/test_rgw_sfs_sfs_bucket.cc +++ b/src/test/rgw/sfs/test_rgw_sfs_sfs_bucket.cc @@ -15,6 +15,49 @@ #include "rgw/driver/sfs/sqlite/sqlite_users.h" #include "rgw/rgw_sal_sfs.h" +/* + These structs are in-memory mockable versions of actual structs/classes + that have a private rep. + Real types normally populate their rep via encode/decode methods. + For the sake of convenience, we define binary equivalent types with + public editable members. +*/ +namespace mockable { +struct DefaultRetention { + std::string mode; + int days; + int years; + + bool operator==(const DefaultRetention& other) const { + return this->mode == other.mode && this->days == other.days && + this->years == other.years; + } +}; + +struct ObjectLockRule { + mockable::DefaultRetention defaultRetention; + + bool operator==(const ObjectLockRule& other) const { + return this->defaultRetention == other.defaultRetention; + } +}; + +struct RGWObjectLock { + bool enabled; + bool rule_exist; + mockable::ObjectLockRule rule; + + bool operator==(const RGWObjectLock& other) const { + return this->enabled == other.enabled && + this->rule_exist == other.rule_exist && this->rule == other.rule; + } +}; + +mockable::RGWObjectLock& actual2mock(::RGWObjectLock& actual) { + return (mockable::RGWObjectLock&)actual; +} +} // namespace mockable + /* HINT s3gw.db will create here: /tmp/rgw_sfs_tests @@ -1437,7 +1480,8 @@ TEST_F(TestSFSBucket, ListNamespaceMultipartsBasics) { .object_name = "object_name", .path_uuid = uuid, .meta_str = "metastr", - .mtime = now}; + .mtime = now + }; int id = multipart.insert(mpop); ASSERT_GE(id, 0); @@ -1453,3 +1497,191 @@ TEST_F(TestSFSBucket, ListNamespaceMultipartsBasics) { EXPECT_EQ(results.objs[0].key.name, std::to_string(id)); EXPECT_EQ(results.objs[0].meta.mtime, now); } + +TEST_F(TestSFSBucket, RacedBucketMetadataWriteOperations) { + auto ceph_context = std::make_shared(CEPH_ENTITY_TYPE_CLIENT); + ceph_context->_conf.set_val("rgw_sfs_data_path", getTestDir()); + ceph_context->_log->start(); + auto store = new rgw::sal::SFStore(ceph_context.get(), getTestDir()); + + NoDoutPrefix ndp(ceph_context.get(), 1); + RGWEnv env; + env.init(ceph_context.get()); + createUser("usr_id", store->db_conn); + + rgw_user arg_user("", "usr_id", ""); + auto user = store->get_user(arg_user); + + rgw_bucket arg_bucket("t_id", "b_name", ""); + rgw_placement_rule arg_pl_rule("default", "STANDARD"); + std::string arg_swift_ver_location; + RGWQuotaInfo arg_quota_info; + RGWAccessControlPolicy arg_aclp_d = get_aclp_default(); + rgw::sal::Attrs arg_attrs; + + RGWBucketInfo arg_info = get_binfo(); + obj_version arg_objv; + bool existed = false; + req_info arg_req_info(ceph_context.get(), &env); + + std::unique_ptr bucket_from_create; + + EXPECT_EQ( + user->create_bucket( + &ndp, //dpp + arg_bucket, //b + "zg1", //zonegroup_id + arg_pl_rule, //placement_rule + arg_swift_ver_location, //swift_ver_location + &arg_quota_info, //pquota_info + arg_aclp_d, //policy + arg_attrs, //attrs + arg_info, //info + arg_objv, //ep_objv + false, //exclusive + false, //obj_lock_enabled + &existed, //existed + arg_req_info, //req_info + &bucket_from_create, //bucket + null_yield //optional_yield + ), + 0 + ); + + std::unique_ptr bucket_from_store_1; + + EXPECT_EQ( + store->get_bucket( + &ndp, user.get(), arg_info.bucket, &bucket_from_store_1, null_yield + ), + 0 + ); + + std::unique_ptr bucket_from_store_2; + + EXPECT_EQ( + store->get_bucket( + &ndp, user.get(), arg_info.bucket, &bucket_from_store_2, null_yield + ), + 0 + ); + + EXPECT_EQ(*bucket_from_store_1, *bucket_from_store_2); + + // merge_and_store_attrs + + rgw::sal::Attrs new_attrs; + RGWAccessControlPolicy arg_aclp = get_aclp_1(); + { + bufferlist acl_bl; + arg_aclp.encode(acl_bl); + new_attrs[RGW_ATTR_ACL] = acl_bl; + } + + EXPECT_EQ( + bucket_from_store_1->merge_and_store_attrs(&ndp, new_attrs, null_yield), 0 + ); + + // assert b1 contains the RGW_ATTR_ACL attribute + auto acl_bl_1 = bucket_from_store_1->get_attrs().find(RGW_ATTR_ACL); + EXPECT_NE(bucket_from_store_1->get_attrs().end(), acl_bl_1); + + // assert b2 does not contain the RGW_ATTR_ACL attribute + auto acl_bl_2 = bucket_from_store_2->get_attrs().find(RGW_ATTR_ACL); + EXPECT_EQ(bucket_from_store_2->get_attrs().end(), acl_bl_2); + + // put_info + + RGWObjectLock obj_lock; + mockable::RGWObjectLock& ol = mockable::actual2mock(obj_lock); + ol.enabled = true; + ol.rule.defaultRetention.years = 12; + ol.rule.defaultRetention.days = 31; + ol.rule.defaultRetention.mode = "GOVERNANCE"; + ol.rule_exist = true; + + bucket_from_store_2->get_info().obj_lock = obj_lock; + EXPECT_EQ(bucket_from_store_2->put_info(&ndp, false, real_time()), 0); + + auto& ol1 = mockable::actual2mock(bucket_from_store_1->get_info().obj_lock); + auto& ol2 = mockable::actual2mock(bucket_from_store_2->get_info().obj_lock); + + // obj lock structure in memory cannot be equal at this point in memory for the + // two b1 and b2 objects; this simulates two threads updating metadata over the + // same bucket using their own bucket reference (as it happens actually when 2 + // concurrent calls are issued from one or more S3 clients). + EXPECT_NE(ol1, ol2); + + // Getting now a third object from the backing store should fetch an image equal to + // b2 since that object is the latest one that did a put_info(). + // merge_and_store_attrs() done with b1 should now be lost due to b2.put_info(). + std::unique_ptr bucket_from_store_3; + EXPECT_EQ( + store->get_bucket( + &ndp, user.get(), arg_info.bucket, &bucket_from_store_3, null_yield + ), + 0 + ); + + // ol2 and ol3 should be the same. + auto& ol3 = mockable::actual2mock(bucket_from_store_3->get_info().obj_lock); + EXPECT_EQ(ol2, ol3); + + // We expect to have lost RGW_ATTR_ACL attribute in the backing store. + auto acl_bl_3 = bucket_from_store_3->get_attrs().find(RGW_ATTR_ACL); + EXPECT_EQ(bucket_from_store_3->get_attrs().end(), acl_bl_3); + + // Now we repeat the updates interposing the try_refresh_info() on b2. + // try_refresh_info() refreshes b2's rep with the state obtained + // from the store. + EXPECT_EQ( + bucket_from_store_1->merge_and_store_attrs(&ndp, new_attrs, null_yield), 0 + ); + EXPECT_EQ(bucket_from_store_2->try_refresh_info(&ndp, nullptr), 0); + EXPECT_EQ(bucket_from_store_2->put_info(&ndp, false, real_time()), 0); + + // let's refetch b3 from store. + EXPECT_EQ( + store->get_bucket( + &ndp, user.get(), arg_info.bucket, &bucket_from_store_3, null_yield + ), + 0 + ); + + // Now all the views over b1, b2 and b3 should be the same, given that the + // underlying reps are (hopefully) the same. + + // get_info() view + ol1 = mockable::actual2mock(bucket_from_store_1->get_info().obj_lock); + ol2 = mockable::actual2mock(bucket_from_store_2->get_info().obj_lock); + ol3 = mockable::actual2mock(bucket_from_store_3->get_info().obj_lock); + EXPECT_EQ(ol1, ol2); + EXPECT_EQ(ol2, ol3); + + // get_attrs() view and acls views + acl_bl_1 = bucket_from_store_1->get_attrs().find(RGW_ATTR_ACL); + EXPECT_NE(bucket_from_store_1->get_attrs().end(), acl_bl_1); + acl_bl_2 = bucket_from_store_2->get_attrs().find(RGW_ATTR_ACL); + EXPECT_NE(bucket_from_store_2->get_attrs().end(), acl_bl_2); + acl_bl_3 = bucket_from_store_3->get_attrs().find(RGW_ATTR_ACL); + EXPECT_NE(bucket_from_store_3->get_attrs().end(), acl_bl_3); + + { + RGWAccessControlPolicy aclp; + auto ci_lval = acl_bl_1->second.cbegin(); + aclp.decode(ci_lval); + EXPECT_EQ(aclp, arg_aclp); + } + { + RGWAccessControlPolicy aclp; + auto ci_lval = acl_bl_2->second.cbegin(); + aclp.decode(ci_lval); + EXPECT_EQ(aclp, arg_aclp); + } + { + RGWAccessControlPolicy aclp; + auto ci_lval = acl_bl_3->second.cbegin(); + aclp.decode(ci_lval); + EXPECT_EQ(aclp, arg_aclp); + } +}