From a66f8283ac4a07a8cf17ff1d2085c6a6e7deb672 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Mon, 10 Apr 2023 13:40:32 -0400 Subject: [PATCH 1/8] rgw: dencode "removed" flag in RGWOLHInfo When an OLH structure was dencoded the "removed" flag was not included. That flag specifies whether the OLH is a delete marker. This adds that flag to the dencoding. Signed-off-by: J. Eric Ivancich --- src/rgw/driver/rados/rgw_rados.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index b714cbd56601..09a762de29a4 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -10073,6 +10073,7 @@ void RGWOLHInfo::generate_test_instances(list &o) void RGWOLHInfo::dump(Formatter *f) const { encode_json("target", target, f); + encode_json("removed", removed, f); } void RGWOLHPendingInfo::dump(Formatter *f) const From 0865174cca641923c7b78fafd75acb597cb37399 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Mon, 22 May 2023 15:46:17 -0400 Subject: [PATCH 2/8] rgw: allow object locator to be specified in cls_rgw_bucket_complete_op When a tag is not specified in bucket operation (i.e., it's not transactional), the prepare function does not have to be called. But then there was no way to specify a locator for the object since that wasn't a parameter in the complete phase. This adds an object locator parameter to complete. Signed-off-by: J. Eric Ivancich --- src/cls/rgw/cls_rgw_client.cc | 4 +++- src/cls/rgw/cls_rgw_client.h | 3 ++- src/rgw/driver/rados/rgw_rados.cc | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cls/rgw/cls_rgw_client.cc b/src/cls/rgw/cls_rgw_client.cc index 73a79490a2d5..5e7fba88f24a 100644 --- a/src/cls/rgw/cls_rgw_client.cc +++ b/src/cls/rgw/cls_rgw_client.cc @@ -286,7 +286,8 @@ void cls_rgw_bucket_complete_op(ObjectWriteOperation& o, RGWModifyOp op, const s const rgw_bucket_dir_entry_meta& dir_meta, const list *remove_objs, bool log_op, uint16_t bilog_flags, - const rgw_zone_set *zones_trace) + const rgw_zone_set *zones_trace, + const std::string& obj_locator) { bufferlist in; @@ -295,6 +296,7 @@ void cls_rgw_bucket_complete_op(ObjectWriteOperation& o, RGWModifyOp op, const s call.tag = tag; call.key = key; call.ver = ver; + call.locator = obj_locator; call.meta = dir_meta; call.log_op = log_op; call.bilog_flags = bilog_flags; diff --git a/src/cls/rgw/cls_rgw_client.h b/src/cls/rgw/cls_rgw_client.h index 139dbdb19395..1ae49c877bb4 100644 --- a/src/cls/rgw/cls_rgw_client.h +++ b/src/cls/rgw/cls_rgw_client.h @@ -355,7 +355,8 @@ void cls_rgw_bucket_complete_op(librados::ObjectWriteOperation& o, RGWModifyOp o const cls_rgw_obj_key& key, const rgw_bucket_dir_entry_meta& dir_meta, const std::list *remove_objs, bool log_op, - uint16_t bilog_op, const rgw_zone_set *zones_trace); + uint16_t bilog_op, const rgw_zone_set *zones_trace, + const std::string& obj_locator = ""); // ignored if it's the empty string void cls_rgw_remove_obj(librados::ObjectWriteOperation& o, std::list& keep_attr_prefixes); void cls_rgw_obj_store_pg_ver(librados::ObjectWriteOperation& o, const std::string& attr); diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 09a762de29a4..bb17ff3c6317 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -8839,7 +8839,7 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify cls_rgw_obj_key key(ent.key.name, ent.key.instance); cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING); cls_rgw_bucket_complete_op(o, op, tag, ver, key, dir_meta, remove_objs, - svc.zone->need_to_log_data(), bilog_flags, &zones_trace); + svc.zone->need_to_log_data(), bilog_flags, &zones_trace, obj.key.get_loc()); complete_op_data *arg; index_completion_manager->create_completion(obj, op, tag, ver, key, dir_meta, remove_objs, svc.zone->need_to_log_data(), bilog_flags, &zones_trace, &arg); From 535b87ff5e5ad10987f577cb639becb7b81c1879 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Mon, 12 Jun 2023 12:53:52 -0400 Subject: [PATCH 3/8] rgw: allow Object::Read:prepare to return epoch to caller The data is readily avaialble and we have the Params structure to optionally request information back from the caller, so we'll use this to optinally surface the epoch. Signed-off-by: J. Eric Ivancich --- src/rgw/driver/rados/rgw_rados.cc | 9 +++++++-- src/rgw/driver/rados/rgw_rados.h | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index bb17ff3c6317..4f1eea44dc3e 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -6359,10 +6359,15 @@ int RGWRados::Object::Read::prepare(optional_yield y, const DoutPrefixProvider * } } - if (params.obj_size) + if (params.obj_size) { *params.obj_size = astate->size; - if (params.lastmod) + } + if (params.lastmod) { *params.lastmod = astate->mtime; + } + if (params.epoch) { + *params.epoch = astate->epoch; + } return 0; } diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index 29e0e70cc490..76e5b6c13e19 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -767,9 +767,11 @@ class RGWRados uint64_t *obj_size; std::map *attrs; rgw_obj *target_obj; + uint64_t *epoch; Params() : lastmod(nullptr), obj_size(nullptr), attrs(nullptr), - target_obj(nullptr) {} + target_obj(nullptr), epoch(nullptr) + {} } params; explicit Read(RGWRados::Object *_source) : source(_source) {} From f731570bb11a473d4acc6ad75dce77c3be0204aa Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Mon, 12 Jun 2023 13:41:02 -0400 Subject: [PATCH 4/8] rgw: enhance functionality of `radosgw-admin object reindex ...` Adds ability to handle versioned buckets to reindexing. Also, it completes the addition of the index entries without the subsequent need to list the bucket. Signed-off-by: J. Eric Ivancich --- doc/man/8/radosgw-admin.rst | 3 + src/rgw/driver/rados/rgw_rados.cc | 272 +++++++++++++++++++++++++++--- src/rgw/driver/rados/rgw_rados.h | 22 ++- src/rgw/rgw_admin.cc | 12 +- 4 files changed, 282 insertions(+), 27 deletions(-) diff --git a/doc/man/8/radosgw-admin.rst b/doc/man/8/radosgw-admin.rst index 54a66e17a0b8..e477ccb84f39 100644 --- a/doc/man/8/radosgw-admin.rst +++ b/doc/man/8/radosgw-admin.rst @@ -146,6 +146,9 @@ which are as follows: :command:`object rewrite` Rewrite the specified object. +:command:`object reindex` + Add an object to its bucket's index. Used rarely for emergency repairs. + :command:`objects expire` Run expired objects cleanup. diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 4f1eea44dc3e..8048f1bcab47 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -117,6 +117,32 @@ static string default_storage_extra_pool_suffix = "rgw.buckets.non-ec"; static RGWObjCategory main_category = RGWObjCategory::Main; #define RGW_USAGE_OBJ_PREFIX "usage." +// reads attribute as std::string +static inline void read_attr(std::map& attrs, + const std::string& attr_name, + std::string& dest, + bool* found = nullptr) { + auto i = attrs.find(attr_name); + if (i != attrs.end()) { + dest = rgw_bl_str(i->second); + } + if (found) *found = i != attrs.end(); +} + +// reads attribute as bufferlist +static inline void read_attr(std::map& attrs, + const std::string& attr_name, + bufferlist& dest, + bool* found = nullptr) { + auto i = attrs.find(attr_name); + if (i != attrs.cend()) { + dest = i->second; // copy + } + if (found) { + *found = i != attrs.end(); + } +} + rgw_raw_obj rgw_obj_select::get_raw_obj(RGWRados* store) const { if (!is_raw) { @@ -3620,33 +3646,235 @@ int RGWRados::rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj, c attrset, 0, real_time(), NULL, dpp, y); } -int RGWRados::reindex_obj(const RGWBucketInfo& bucket_info, - const rgw_obj& obj, + +int RGWRados::reindex_obj(rgw::sal::Driver* driver, + RGWBucketInfo& bucket_info, + const rgw_obj& head_obj, const DoutPrefixProvider* dpp, optional_yield y) { - if (bucket_info.versioned()) { - ldpp_dout(dpp, 10) << "WARNING: " << __func__ << - ": cannot process versioned bucket \"" << - bucket_info.bucket.get_key() << "\"" << - dendl; - return -ENOTSUP; + // used for trimming pending entries; max value means all versions trimmed + const uint64_t max_ver = std::numeric_limits::max(); + // used for linking an olh + const std::string empty_op_tag = ""; + + int ret; + RGWObjectCtx obj_ctx(driver); + + // aids in printing out name of bucket/object + auto p = [](const rgw_obj& o) -> std::string { + std::stringstream ss; + ss << o.bucket.name << ':' << o.key; + return ss.str(); + }; + + // since the code for linking a versioned object and adding a delete + // marker is so similar, we bring the common OLH-handling code into + // this lambda + auto link_helper = [&](const bool is_delete_marker, + rgw_bucket_dir_entry_meta& meta, + const std::string& log_tag) -> int { + int ret = 0; + + // convert the head object name into the OLH object by removing + // the instance info + rgw_obj olh_obj = head_obj; + olh_obj.key.instance.clear(); + + RGWObjState* olh_state { nullptr }; + RGWObjManifest* olh_manifest { nullptr }; // we don't use, but must send in + ret = get_obj_state(dpp, &obj_ctx, bucket_info, olh_obj, + &olh_state, &olh_manifest, + false, // don't follow olh + y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << + ": during " << log_tag << " get_obj_state on OLH object " << + olh_obj.key << " returned: " << cpp_strerror(-ret) << dendl; + return ret; + } + + // In order to update the data in the OLH object we're calling + // bucket_index_link_olh followed by bucket_index_trim_olh_log + // since that churns metadata less than a call to set_olh + // would. bucket_index_link_olh does leave entries in the OLH + // object's pending log since normally OLH updates are paired with + // other ops, but we remove such entries below. + ret = bucket_index_link_olh(dpp, + bucket_info, + *olh_state, + head_obj, + is_delete_marker, + empty_op_tag, + &meta, + 0, // zero olh_epoch means calculated in CLS + ceph::real_clock::zero(), // unmod_since + true, // high_precision_time + y, + nullptr, // zones trace + false); // log data change + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << + ": during " << log_tag << " set_index_link_olh returned: " << + cpp_strerror(-ret) << dendl; + return ret; + } + + // bucket_)index_link_olh leaves a pending_log entry in the OLH; + // this trims it out + ret = bucket_index_trim_olh_log(dpp, + bucket_info, + *olh_state, + head_obj, + max_ver, + y); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << + ": during " << log_tag << + " bucket_index_trim_olh_log returned: " << + cpp_strerror(-ret) << dendl; + return ret; + } + + return 0; + }; // link_helper lambda + + librados::IoCtx head_obj_ctx; + ret = get_obj_head_ioctx(dpp, bucket_info, head_obj, &head_obj_ctx); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << + ": get_obj_head_ioctx for " << p(head_obj) << " returned: " << + cpp_strerror(-ret) << dendl; + return ret; } - Bucket target(this, bucket_info); - RGWRados::Bucket::UpdateIndex update_idx(&target, obj); - const std::string* no_write_tag = nullptr; + const int64_t pool_id = head_obj_ctx.get_id(); + const bool is_versioned = bucket_info.versioned(); + const bool has_instance = ! head_obj.key.instance.empty(); + + ldpp_dout(dpp, 20) << "INFO: " << __func__ << ": reindexing " << + p(head_obj) << dendl; + + RGWObjState *head_state { nullptr }; + RGWObjManifest *head_manifest { nullptr }; - int ret = update_idx.prepare(dpp, RGWModifyOp::CLS_RGW_OP_ADD, no_write_tag, y); + // if head_obj does not exist does not return -ENOENT but instead + // sets head_state->exists to false + ret = get_obj_state(dpp, &obj_ctx, bucket_info, head_obj, + &head_state, &head_manifest, + false, // don't follow olh + y); if (ret < 0) { ldpp_dout(dpp, 0) << "ERROR: " << __func__ << - ": update index prepare for \"" << obj << "\" returned: " << + ": get_obj_state on " << p(head_obj) << " returned: " << cpp_strerror(-ret) << dendl; return ret; } - return 0; -} + if (! head_state->exists && is_versioned && has_instance) { + // head object does not exist if it's a delete marker; handle here + // and return + ldpp_dout(dpp, 20) << "INFO: " << __func__ << ": indexing " << + p(head_obj) << " as delete marker" << dendl; + + // empty metadata object is fine for delete marker + rgw_bucket_dir_entry_meta meta; + + return link_helper(true, meta, "set delete marker"); + } else if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << + ": unable to complete stat of " << p(head_obj) << "; returned: " << + cpp_strerror(-ret) << dendl; + return ret; + } + + // data we'll pull from head object xattrs + std::string etag; + std::string content_type; + std::string storage_class; + bufferlist acl_bl; + bool found_olh_info { false }; + bufferlist olh_info_bl; + bool appendable { false }; + bufferlist part_num_bl; + + rgw::sal::Attrs& attr_set = head_state->attrset; + read_attr(attr_set, RGW_ATTR_ETAG, etag); + read_attr(attr_set, RGW_ATTR_CONTENT_TYPE, content_type); + read_attr(attr_set, RGW_ATTR_STORAGE_CLASS, storage_class); + read_attr(attr_set, RGW_ATTR_ACL, acl_bl); + read_attr(attr_set, RGW_ATTR_OLH_INFO, olh_info_bl, &found_olh_info); + read_attr(attr_set, RGW_ATTR_APPEND_PART_NUM, part_num_bl, &appendable); + + // check for a pure OLH object and if so exit early + if (found_olh_info) { + try { + auto iter = olh_info_bl.cbegin(); + RGWOLHInfo info; + decode(info, iter); + if (! info.target.key.instance.empty()) { + // since there is a listed instance this appears to be a pure + // OLH (i.e., no data); we won't index as we index actual + // objects with data and set the OLH then + ldpp_dout(dpp, 20) << "INFO: " << __func__ << ": " << + p(head_obj) << " appears to be a pure OLH object; ignoring" << dendl; + return 0; + } + } catch (buffer::error& err) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << + ": unable to decode OLH info for " << p(head_obj) << dendl; + return -EIO; + } + } + + Bucket bkt(this, bucket_info); + RGWRados::Bucket::UpdateIndex update_idx(&bkt, head_obj); + + // note: we can skip calling prepare() since there's no transaction + // and we don't specify a write tag (i.e., transaction tag) + ret = update_idx.complete(dpp, + pool_id, + 0, // bucket index epoch + head_state->size, + head_state->accounted_size, + head_state->mtime, + etag, + content_type, + storage_class, + &acl_bl, + RGWObjCategory::Main, // RGWObjCategory category, + nullptr, // remove_objs list + y, + nullptr, // user data string + appendable); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << + ": update index complete for " << p(head_obj) << " returned: " << + cpp_strerror(-ret) << dendl; + return ret; + } + + if (bucket_info.versioned()) { + ldpp_dout(dpp, 20) << "INFO: " << __func__ << ": since " << + bucket_info.bucket << " appears to be versioned, setting OLH for " << + p(head_obj) << dendl; + + // write OLH and instance entries + rgw_bucket_dir_entry_meta meta; + meta.category = RGWObjCategory::Main; + meta.mtime = head_state->mtime; + meta.size = head_state->size; + meta.accounted_size = head_state->accounted_size; + meta.etag = etag; + meta.content_type = content_type; + meta.appendable = appendable; + + ret = link_helper(false, meta, "linking version"); + } // if bucket is versioned + + return ret; +} // RGWRados::reindex_obj + struct obj_time_weight { real_time mtime; @@ -7487,7 +7715,7 @@ int RGWRados::repair_olh(const DoutPrefixProvider *dpp, RGWObjState* state, cons return r; } return 0; -} +} // RGWRados::repair_olh int RGWRados::bucket_index_trim_olh_log(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, @@ -7703,7 +7931,7 @@ int RGWRados::apply_olh_log(const DoutPrefixProvider *dpp, /* update olh object */ r = rgw_rados_operate(dpp, ref.pool.ioctx(), ref.obj.oid, &op, y); if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not apply olh update, r=" << r << dendl; + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": could not apply olh update to oid \"" << ref.obj.oid << "\", r=" << r << dendl; return r; } @@ -7822,7 +8050,8 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta, uint64_t olh_epoch, real_time unmod_since, bool high_precision_time, - optional_yield y, rgw_zone_set *zones_trace, bool log_data_change) + optional_yield y, rgw_zone_set *zones_trace, bool log_data_change, + bool skip_olh_obj_update) { string op_tag; @@ -7884,6 +8113,11 @@ int RGWRados::set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, return -EIO; } + // exit early if we're skipping the olh update and just updating the index + if (skip_olh_obj_update) { + return 0; + } + ret = update_olh(dpp, obj_ctx, state, bucket_info, olh_obj, y); if (ret == -ECANCELED) { /* already did what we needed, no need to retry, raced with another user */ ret = 0; @@ -8053,7 +8287,7 @@ int RGWRados::remove_olh_pending_entries(const DoutPrefixProvider *dpp, const RG return 0; } if (r < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not apply olh update, r=" << r << dendl; + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": could not apply olh update to oid \"" << ref.obj.oid << "\", r=" << r << dendl; return r; } } diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index 76e5b6c13e19..47b6bc61710f 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -1091,7 +1091,8 @@ class RGWRados D3nDataCache* d3n_data_cache{nullptr}; int rewrite_obj(RGWBucketInfo& dest_bucket_info, const rgw_obj& obj, const DoutPrefixProvider *dpp, optional_yield y); - int reindex_obj(const RGWBucketInfo& dest_bucket_info, + int reindex_obj(rgw::sal::Driver* driver, + RGWBucketInfo& dest_bucket_info, const rgw_obj& obj, const DoutPrefixProvider* dpp, optional_yield y); @@ -1339,7 +1340,8 @@ class RGWRados int apply_olh_log(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState& obj_state, RGWBucketInfo& bucket_info, const rgw_obj& obj, bufferlist& obj_tag, std::map >& log, uint64_t *plast_ver, optional_yield y, rgw_zone_set *zones_trace = nullptr); - int update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y, rgw_zone_set *zones_trace = nullptr); + int update_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWObjState *state, RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y, + rgw_zone_set *zones_trace = nullptr); int clear_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, const rgw_obj& obj, @@ -1347,9 +1349,19 @@ class RGWRados const std::string& tag, const uint64_t ver, optional_yield y); - int set_olh(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, bool delete_marker, rgw_bucket_dir_entry_meta *meta, - uint64_t olh_epoch, ceph::real_time unmod_since, bool high_precision_time, - optional_yield y, rgw_zone_set *zones_trace = nullptr, bool log_data_change = false); + int set_olh(const DoutPrefixProvider *dpp, + RGWObjectCtx& obj_ctx, + RGWBucketInfo& bucket_info, + const rgw_obj& target_obj, + bool delete_marker, + rgw_bucket_dir_entry_meta *meta, + uint64_t olh_epoch, + ceph::real_time unmod_since, + bool high_precision_time, + optional_yield y, + rgw_zone_set *zones_trace = nullptr, + bool log_data_change = false, + bool skip_olh_obj_update = false); // can skip the OLH object update if, for example, repairing index int repair_olh(const DoutPrefixProvider *dpp, RGWObjState* state, const RGWBucketInfo& bucket_info, const rgw_obj& obj, optional_yield y); int unlink_obj_instance(const DoutPrefixProvider *dpp, RGWObjectCtx& obj_ctx, RGWBucketInfo& bucket_info, const rgw_obj& target_obj, diff --git a/src/rgw/rgw_admin.cc b/src/rgw/rgw_admin.cc index 15bdaba87a56..7237fe8ed1fe 100644 --- a/src/rgw/rgw_admin.cc +++ b/src/rgw/rgw_admin.cc @@ -7872,7 +7872,7 @@ int main(int argc, const char **argv) auto process = [&](const std::string& p_object, const std::string& p_object_version) -> int { std::unique_ptr obj = bucket->get_object(p_object); obj->set_instance(p_object_version); - ret = store->reindex_obj(bucket->get_info(), obj->get_obj(), dpp(), null_yield); + ret = store->reindex_obj(driver, bucket->get_info(), obj->get_obj(), dpp(), null_yield); if (ret < 0) { return ret; } @@ -7894,9 +7894,15 @@ int main(int argc, const char **argv) } std::string obj_name; - const std::string empty_version; while (std::getline(file, obj_name)) { - ret = process(obj_name, empty_version); + std::string version; + auto pos = obj_name.find('\t'); + if (pos != std::string::npos) { + version = obj_name.substr(1 + pos); + obj_name = obj_name.substr(0, pos); + } + + ret = process(obj_name, version); if (ret < 0) { std::cerr << "ERROR: while processing \"" << obj_name << "\", received " << cpp_strerror(-ret) << "." << std::endl; From c02906ae6463425ef3f00000ba1ce78fcc9e2478 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Wed, 14 Jun 2023 15:53:19 -0400 Subject: [PATCH 5/8] rgw: enhances rgw-restore-bucket-index script This enhances the script to both process versioned buckets correctly and to handle object names that begin with underscore. If the bucket is versioned it submits each version chronologically (based on mtime) to be reindexed in order to "replay" the modification of objects. However mtime is not a perfect indicator. So additionally it looks at the OLH object to determine the most recent version and the script makes sure that it is replayed last. The order of previous versions is likely correct, but not guaranteed to be so. Additional logic is added to handle objects with names that begin with underscore ('_') since that's used as a delimiter and needs to be escaped and rados object locators are also used. A man page for the script is added. Signed-off-by: J. Eric Ivancich --- ceph.spec.in | 1 + debian/radosgw.install | 1 + doc/man/8/CMakeLists.txt | 3 +- doc/man/8/rgw-restore-bucket-index.rst | 91 ++++++++ doc/man_index.rst | 1 + src/rgw/rgw-restore-bucket-index | 303 ++++++++++++++----------- 6 files changed, 272 insertions(+), 128 deletions(-) create mode 100644 doc/man/8/rgw-restore-bucket-index.rst diff --git a/ceph.spec.in b/ceph.spec.in index 1fc998d014db..00410df64ed5 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -1704,6 +1704,7 @@ exit 0 %{_mandir}/man8/rbd-replay-many.8* %{_mandir}/man8/rbd-replay-prep.8* %{_mandir}/man8/rgw-orphan-list.8* +%{_mandir}/man8/rgw-restore-bucket-index.8* %dir %{_datadir}/ceph/ %{_datadir}/ceph/known_hosts_drop.ceph.com %{_datadir}/ceph/id_rsa_drop.ceph.com diff --git a/debian/radosgw.install b/debian/radosgw.install index b0367e8f651f..be74a52c1a9c 100644 --- a/debian/radosgw.install +++ b/debian/radosgw.install @@ -7,3 +7,4 @@ usr/bin/radosgw-token usr/share/man/man8/ceph-diff-sorted.8 usr/share/man/man8/radosgw.8 usr/share/man/man8/rgw-orphan-list.8 +usr/share/man/man8/rgw-restore-bucket-index.8 diff --git a/doc/man/8/CMakeLists.txt b/doc/man/8/CMakeLists.txt index f1df5b4b448e..4026cf9d0950 100644 --- a/doc/man/8/CMakeLists.txt +++ b/doc/man/8/CMakeLists.txt @@ -60,7 +60,8 @@ if(WITH_RADOSGW) radosgw-admin.rst rgw-orphan-list.rst rgw-policy-check.rst - ceph-diff-sorted.rst) + ceph-diff-sorted.rst + rgw-restore-bucket-index.rst) endif() if(WITH_RBD) diff --git a/doc/man/8/rgw-restore-bucket-index.rst b/doc/man/8/rgw-restore-bucket-index.rst new file mode 100644 index 000000000000..b297fa753706 --- /dev/null +++ b/doc/man/8/rgw-restore-bucket-index.rst @@ -0,0 +1,91 @@ +:orphan: + +================================================================================== + rgw-restore-bucket-index -- try to restore a bucket's objects to its bucket index +================================================================================== + +.. program:: rgw-restore-bucket-index + +Synopsis +======== + +| **rgw-restore-bucket-index** + +Description +=========== + +:program:`rgw-restore-bucket-index` is an *EXPERIMENTAL* RADOS gateway +user administration utility. It scans the data pool for objects that +belong to a given bucket and tries to add those objects back to the +bucket index. It's intended as a **last resort** after a +**catastrophic** loss of a bucket index. Please thorougly review the +*Warnings* listed below. + +The utility works with regular (i.e., un-versioned) buckets, versioned +buckets, and buckets were versioning has been suspended. + +Warnings +======== + +This utility is currently considered *EXPERIMENTAL*. + +The results are unpredictable if the bucket is in +active use while this utility is running. + +The results are unpredictable if only some bucket's objects are +missing from the bucket index. In such a case, consider using the +"object reindex" subcommand of `radosgw-admin` to restore object's to +the bucket index one-by-one. + +For objects in versioned buckets, if the latest version is a delete +marker, it will be restored. If a delete marker has been written over +with a new version, then that delete marker will not be restored. This +should have minimal impact on results in that the it recovers the +latest version and previous versions are all accessible. + +Command-Line Arguments +====================== + +.. option:: -b + + Specify the bucket to be reindexed. + +.. option:: -p + + Optional, specify the data pool containing head objects for the + bucket. If omitted the utility will try to determine the data pool + on its own. + +.. option:: -l + + Optional, specify a file containing the output of a rados listing + of the data pool. Since listing the data pool can be an expensive + and time-consuming operation, if trying to recover the indices for + multiple buckets, it could be more efficient to re-use the same + listing. + +.. option:: -y + + Optional, proceed without further prompting. Without this option + the utility will display some information and prompt the user as to + whether to proceed. When provided, the utility will simply + proceed. Please use caution when using this option. + +Examples +======== + +Attempt to restore the index for a bucket named *summer-2023-photos*:: + + $ rgw-restore-bucket-index -b summer-2023-photos + +Availability +============ + +:program:`rgw-restore-bucket-index` is part of Ceph, a massively +scalable, open-source, distributed storage system. Please refer to +the Ceph documentation at https://docs.ceph.com for more information. + +See also +======== + +:doc:`radosgw-admin `\(8) diff --git a/doc/man_index.rst b/doc/man_index.rst index 989ed7e229a0..ca1353f911ec 100644 --- a/doc/man_index.rst +++ b/doc/man_index.rst @@ -48,3 +48,4 @@ man/8/ceph-immutable-object-cache man/8/ceph-diff-sorted man/8/rgw-policy-check + man/8/rgw-restore-bucket-index diff --git a/src/rgw/rgw-restore-bucket-index b/src/rgw/rgw-restore-bucket-index index 72e974c492d8..f9269f55b13f 100755 --- a/src/rgw/rgw-restore-bucket-index +++ b/src/rgw/rgw-restore-bucket-index @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# version 2023-03-21 +# version 2023-07-06 # rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case # bucket index entries for objects in the bucket are somehow lost. It @@ -27,39 +27,65 @@ export TOP_PID=$$ # relies on this ordering export LC_ALL=C +# temporary files export bkt_entry=/tmp/rgwrbi-bkt-entry.$$ export bkt_inst=/tmp/rgwrbi-bkt-inst.$$ -export bkt_inst_new=/tmp/rgwrbi-bkt-inst-new.$$ +export marker_ls=/tmp/rgwrbi-marker-ls.$$ export obj_list=/tmp/rgwrbi-object-list.$$ +export obj_list_ver=/tmp/rgwrbi-object-list-ver.$$ +export obj_reindex_script=/tmp/rgwrbi-object-list-script.$$ export zone_info=/tmp/rgwrbi-zone-info.$$ +export olh_info_enc=/tmp/rgwrbi-olh-info-enc.$$ +export olh_info_json=/tmp/rgwrbi-olh-info-json.$$ + export clean_temps=1 + +clean() { + if [ -n "$clean_temps" ] ;then + rm -f $bkt_entry $bkt_inst $marker_ls $obj_list $obj_list_ver $obj_reindex_script $zone_info $olh_info_enc $olh_info_json + fi +} + # number of seconds for a bucket index pending op to be completed via # dir_suggest mechanism -pending_op_secs=120 +export pending_op_secs=120 # -if which radosgw-admin > /dev/null ;then - : -else - echo 'Error: must have command `radosgw-admin` installed and on $PATH for operation.' - exit 1 -fi +# sanity checks +# -# make sure jq is available -if which jq > /dev/null ;then - : -else - echo 'Error: must have command `jq` installed and on $PATH for json parsing.' - exit 1 +export exit_code=0 + +tool_list="radosgw-admin ceph-dencoder jq" +for t in $tool_list ;do + if which $t > /dev/null ;then + : + else + echo "ERROR: must have tool \`$t\` installed and on \$PATH for operation." + exit_code=1 + fi +done +if [ "$exit_code" -ne 0 ] ;then + exit $exit_code fi +dencode_list="RGWOLHInfo" +for t in $dencode_list ;do + if ceph-dencoder list_types | grep -q $t ;then + : + else + echo "ERROR: ceph-dencoder lacking module to decode ${t}." + exit_code=1 + fi +done +if [ "$exit_code" -ne 0 ] ;then + exit $exit_code +fi -clean() { - if [ -n "$clean_temps" ] ;then - rm -f $bkt_entry $bkt_inst $bkt_inst_new $obj_list $zone_info - fi -} +# +# helper functions +# super_exit() { kill -s TERM $TOP_PID @@ -71,41 +97,31 @@ usage() { Usage: $0 -b [-l ] [-p ] [-y] where: - -b Required - The name of the bucket to operate on - -l Optional - A file with the output of 'rados ls -p ' - -p Optional - If not provided, will be inferred from bucket and zone information. - -y Optional - Proceed with correction without prompting the user + -b Required - name of the bucket to operate on + -l Optional - file containing the output of 'rados ls -p ' + -p Optional - data pool; if not provided will be inferred from bucket and zone information + -y Optional - proceed with restoring without confirming with the user USE WITH CAUTION. - + -d Optional - run with debugging output EOF super_exit } -# strips the starting and ending double quotes from a string, so: -# "dog" -> dog -# "dog -> "dog -# d"o"g -> d"o"g -# "do"g" -> do"g -strip_quotes() { - echo "$1" | sed 's/^"\(.*\)"$/\1/' -} - # Determines the name of the data pool. Expects the optional # command-line argument to appear as $1 if there is one. The # command-line has the highest priority, then the "explicit_placement" # in the bucket instance data, and finally the "placement_rule" in the # bucket instance data. get_pool() { - # explicit_placement - expl_pool=$(strip_quotes $(jq '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst)) + expl_pool=$(jq -r '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst) if [ -n "$expl_pool" ] ;then echo "$expl_pool" exit 0 fi # placement_rule - plmt_rule=$(strip_quotes $(jq '.data.bucket_info.placement_rule' $bkt_inst)) + plmt_rule=$(jq -r '.data.bucket_info.placement_rule' $bkt_inst) plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}') plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}') if [ -z "$plmt_class" ] ;then @@ -113,7 +129,7 @@ get_pool() { fi radosgw-admin zone get >$zone_info 2>/dev/null - pool=$(strip_quotes $(jq ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info)) + pool=$(jq -r ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info) if [ -z "$pool" ] ;then echo ERROR: unable to determine pool. @@ -122,39 +138,97 @@ get_pool() { echo "$pool" } -bucket="" +export bucket="" pool="" lsoutput="" -while getopts ":b:l:p:y" o; do - case "${o}" in - b) - bucket="${OPTARG}" - ;; - l) - if [ -e "${OPTARG}" ]; then - lsoutput="${OPTARG}" - else - echo - echo "ERROR: Provided 'rados ls' output file name does not exist. ${OPTARG}" - exit 1 - fi - ;; - p) - pool="${OPTARG}" - ;; - y) - echo "NOTICE: This tool is currently considered EXPERIMENTAL." - proceed=1 - ;; - *) - echo - echo "ERROR: Unrecognized argument: ${o}" - usage - ;; - esac +debug=0 + +while getopts "b:l:p:yd" o; do + case "${o}" in + b) + bucket="${OPTARG}" + ;; + l) + if [ -e "${OPTARG}" ]; then + lsoutput="${OPTARG}" + else + echo + echo "ERROR: Provided 'rados ls' output file name does not exist. ${OPTARG}" + exit 1 + fi + ;; + p) + pool="${OPTARG}" + ;; + y) + echo "NOTICE: This tool is currently considered EXPERIMENTAL." + proceed=1 + ;; + d) + echo setting debug to 1 + debug=1 + ;; + *) + echo + usage + exit 1 # useage should exit also + ;; + esac done shift $((OPTIND-1)) +if [ "$debug" == 1 ] ;then + export debugging_rgwadmin=" --debug-rgw=20 --debug-ms=20 " +else + export debugging_rgwadmin=" 2>/dev/null " +fi + +# special code path for versioned buckets +handle_versioned() { + while read o ;do + + # determine object and locator for OLH + olh_line=$(awk "/_$o(\t.*)?$/"' && !/__:/' $marker_ls) + olh_obj=$(echo "$olh_line" | sed 's/\t.*//') # obj everything before tab + olh_loc=$(echo "$olh_line" | sed 's/^.*\t\(.*\)/\1/') # locator everything after tab + + # process OLH object; determine final instance or delete-marker + rados -p $pool getxattr $olh_obj user.rgw.olh.info --object-locator "$olh_loc" >$olh_info_enc + ceph-dencoder import $olh_info_enc type RGWOLHInfo decode dump_json >$olh_info_json + last_instance=$(jq -r ".target.key.instance" $olh_info_json) + if [ -z "$last_instance" ] ;then + # filters out entry without an instance + filter_out_last_instance="${marker}_[^_]" + else + # filters out entry with an instance + filter_out_last_instance="$last_instance" + fi + + # we currently don't need the delete marker, but we can have access to it + # delete_marker=$(jq -r ".removed" $olh_info_json) # true or false + + IFS='\t' grep -E "(__:.*[^_])?_$o(\t.*)?$" $marker_ls | # versioned head objects + while read obj loc ;do + rados -p $pool stat2 $obj --object-locator "$loc" + done | # output of stat2, which includes mtime + sort -k 3 | # stat2 but sorted by mtime earlier to later + grep -v "$filter_out_last_instance" | # remove the final instance in case it's not last + + # sed 1) removes pool and marker, 2) removes indicator of + # version id, 3) removes obj name including escaped + # leading underscores, 4) inserts object name and tab at + # front of line, and 5) removes trailing tab. Note: after + # 3) the line will be empty or contain a version id, so 5) + # is for when that line is empty and 4 inserts a tab + sed -E \ + -e "s/.*${marker}//" \ + -e 's/^__://' \ + -e "s/_+${o}.*//" \ + -e "s/^/${o}\t/" + echo "$o $last_instance" # now add the final instance; could be delete marker; note TAB + done <$obj_list 2>/dev/null | sed 's/\t$//' >$obj_list_ver +} + if [ -z "$bucket" ]; then echo echo "ERROR: Bucket option ( -b ) is required." @@ -162,9 +236,9 @@ if [ -z "$bucket" ]; then fi # read bucket entry metadata -radosgw-admin metadata get bucket:$bucket >$bkt_entry 2>/dev/null -marker=$(strip_quotes $(jq ".data.bucket.marker" $bkt_entry)) -bucket_id=$(strip_quotes $(jq ".data.bucket.bucket_id" $bkt_entry)) +eval "radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin >$bkt_entry" +export marker=$(jq -r ".data.bucket.marker" $bkt_entry) +export bucket_id=$(jq -r ".data.bucket.bucket_id" $bkt_entry) if [ -z "$marker" -o -z "$bucket_id" ] ;then echo "ERROR: unable to read entry-point metadata for bucket \"$bucket\"." clean @@ -175,22 +249,7 @@ echo marker is $marker echo bucket_id is $bucket_id # read bucket instance metadata -radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id >$bkt_inst 2>/dev/null - -# handle versioned buckets -bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst) -if [ -z "$bkt_flags" ] ;then - echo "ERROR: unable to read instance metadata for bucket \"$bucket\"." - exit 1 -fi - -# mask bit indicating it's a versioned bucket -is_versioned=$(( $bkt_flags & 2)) -if [ "$is_versioned" -ne 0 ] ;then - echo "Error: this bucket appears to be versioned, and this tool cannot work with versioned buckets." - clean - exit 1 -fi +eval "radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id $debugging_rgwadmin >$bkt_inst" # examine number of bucket index shards num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst) @@ -202,17 +261,41 @@ if [ -z "$pool" ]; then fi echo data pool is $pool +# handle versioned buckets +export bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst) +if [ -z "$bkt_flags" ] ;then + echo "ERROR: unable to read instance metadata for bucket \"$bucket\"." + clean + exit 1 +fi + # search the data pool for all of the head objects that begin with the -# marker that are not in namespaces (indicated by an extra underscore) -# and then strip away all but the rgw object name +# marker that are not in namespaces (indicated by an extra underscore +# and colon) and then strip away all but the rgw object name, +# including optional locator that follows a tab. Initial underscores +# are quoted with an underscore, so swap the first double with a +# single. if [ -z "$lsoutput" ]; then - ( rados -p $pool ls | grep "^${marker}_[^_]" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null + ( rados -p $pool ls | grep "^${marker}_" >$marker_ls ) 2>/dev/null else - ( grep "^${marker}_[^_]" "${lsoutput}" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null + ( grep "^${marker}_" "${lsoutput}" >$marker_ls ) 2>/dev/null +fi + +( sed -E 's/\t.*//' $marker_ls | grep -v -E "^${marker}__[^_]+_" | sed -E "s/^${marker}_(.*)/\1/" | sed 's/^__/_/' >$obj_list ) 2>/dev/null + +# mask bit indicating it's a versioned bucket +export is_versioned=$(( $bkt_flags & 2)) +export is_suspended=$(( $bkt_flags & 4)) +if [ "$is_versioned" -ne 0 ] ;then + echo "INFO: this bucket appears to be versioned." + handle_versioned +else + # no additional versioned handling, so just hard link + ln $obj_list $obj_list_ver fi # handle the case where the resulting object list file is empty -if [ -s $obj_list ] ;then +if [ -s $obj_list_ver ] ;then : else echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered." @@ -223,7 +306,7 @@ fi if [ -z "$proceed" ] ;then # warn user and get permission to proceed echo "NOTICE: This tool is currently considered EXPERIMENTAL." - echo "The list of objects that we will attempt to restore can be found in \"$obj_list\"." + echo "The list of objects that we will attempt to restore can be found in \"$obj_list_ver\"." echo "Please review the object names in that file (either below or in another window/terminal) before proceeding." while true ; do read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action @@ -233,7 +316,7 @@ if [ -z "$proceed" ] ;then exit 0 elif [ "$action" == "view" ] ;then echo "Viewing..." - less $obj_list + less $obj_list_ver elif [ "$action" == "proceed!" ] ;then echo "Proceeding..." break @@ -243,41 +326,7 @@ if [ -z "$proceed" ] ;then done fi -# execute object rewrite on all of the head objects -radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list 2>/dev/null -reindex_done=$(date +%s) - -# note: large is 2^30 -export large=1073741824 - -listcmd="radosgw-admin bucket list --bucket=$bucket --allow-unordered --max-entries=$large" - -if [ -n "$proceed" ] ;then - sleep $pending_op_secs - $listcmd >/dev/null 2>/dev/null -else - echo "NOTICE: Bucket stats are currently incorrect. They can be restored with the following command after 2 minutes:" - echo " $listcmd" - - while true ; do - read -p "Would you like to take the time to recalculate bucket stats now? [yes/no] " action - if [ "$action" == "no" ] ;then - break - elif [ "$action" == "yes" ] ;then - # make sure at least $pending_op_secs since reindex completed - now=$(date +%s) - sleep_time=$(expr $pending_op_secs - $now + $reindex_done) - if [ "$sleep_time" -gt 0 ] ;then - sleep $sleep_time - fi - - $listcmd >/dev/null 2>/dev/null - break - else - echo "Error: response \"$action\" is not understood." - fi - done -fi +eval "radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list_ver --yes-i-really-mean-it $debugging_rgwadmin" clean echo Done From bbc74189d0f009c394afc11f0f17313461f9c76c Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Wed, 5 Jul 2023 17:05:06 -0400 Subject: [PATCH 6/8] rgw: a few minor code clean-ups Includes code documentation, formatting fixes, and removing an unimplemented function from an interface. Signed-off-by: J. Eric Ivancich --- src/rgw/driver/rados/rgw_rados.cc | 5 ++--- src/rgw/driver/rados/rgw_rados.h | 32 +++++++++++++++---------------- src/rgw/rgw_common.h | 4 ++-- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/rgw/driver/rados/rgw_rados.cc b/src/rgw/driver/rados/rgw_rados.cc index 8048f1bcab47..2743ff8a8dc2 100644 --- a/src/rgw/driver/rados/rgw_rados.cc +++ b/src/rgw/driver/rados/rgw_rados.cc @@ -6695,18 +6695,17 @@ int RGWRados::Bucket::UpdateIndex::prepare(const DoutPrefixProvider *dpp, RGWMod int r = guard_reshard(dpp, obj, nullptr, [&](BucketShard *bs) -> int { return store->cls_obj_prepare_op(dpp, *bs, op, optag, obj, bilog_flags, y, zones_trace); }, y); - if (r < 0) { return r; } - prepared = true; + prepared = true; return 0; } int RGWRados::Bucket::UpdateIndex::complete(const DoutPrefixProvider *dpp, int64_t poolid, uint64_t epoch, uint64_t size, uint64_t accounted_size, - ceph::real_time& ut, const string& etag, + const ceph::real_time& ut, const string& etag, const string& content_type, const string& storage_class, bufferlist *acl_bl, RGWObjCategory category, diff --git a/src/rgw/driver/rados/rgw_rados.h b/src/rgw/driver/rados/rgw_rados.h index 47b6bc61710f..4779ad592095 100644 --- a/src/rgw/driver/rados/rgw_rados.h +++ b/src/rgw/driver/rados/rgw_rados.h @@ -422,7 +422,6 @@ class RGWRados RGWObjManifest** pmanifest, optional_yield y); int update_placement_map(); - int store_bucket_info(RGWBucketInfo& info, std::map *pattrs, RGWObjVersionTracker *objv_tracker, bool exclusive); void remove_rgw_head_obj(librados::ObjectWriteOperation& op); void cls_obj_check_prefix_exist(librados::ObjectOperation& op, const std::string& prefix, bool fail_if_exist); @@ -781,7 +780,7 @@ class RGWRados int read(int64_t ofs, int64_t end, bufferlist& bl, optional_yield y, const DoutPrefixProvider *dpp); int iterate(const DoutPrefixProvider *dpp, int64_t ofs, int64_t end, RGWGetDataCB *cb, optional_yield y); int get_attr(const DoutPrefixProvider *dpp, const char *name, bufferlist& dest, optional_yield y); - }; + }; // struct RGWRados::Object::Read struct Write { RGWRados::Object *target; @@ -827,7 +826,7 @@ class RGWRados const req_state* get_req_state() { return nullptr; /* XXX dang Only used by LTTng, and it handles null anyway */ } - }; + }; // struct RGWRados::Object::Write struct Delete { RGWRados::Object *target; @@ -861,7 +860,7 @@ class RGWRados explicit Delete(RGWRados::Object *_target) : target(_target) {} int delete_obj(optional_yield y, const DoutPrefixProvider *dpp); - }; + }; // struct RGWRados::Object::Delete struct Stat { RGWRados::Object *source; @@ -882,16 +881,15 @@ class RGWRados State() : completion(NULL), ret(0) {} } state; - explicit Stat(RGWRados::Object *_source) : source(_source) {} int stat_async(const DoutPrefixProvider *dpp); int wait(const DoutPrefixProvider *dpp); - int stat(); + private: int finish(const DoutPrefixProvider *dpp); - }; - }; + }; // struct RGWRados::Object::Stat + }; // class RGWRados::Object class Bucket { RGWRados *store; @@ -939,12 +937,14 @@ class RGWRados } int guard_reshard(const DoutPrefixProvider *dpp, const rgw_obj& obj_instance, BucketShard **pbs, std::function call, optional_yield y); + public: - UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), obj(_obj), - bs(target->get_store()) { - blind = (target->get_bucket_info().layout.current_index.layout.type == rgw::BucketIndexType::Indexless); - } + UpdateIndex(RGWRados::Bucket *_target, const rgw_obj& _obj) : target(_target), + obj(_obj), + bs(target->get_store()) { + blind = target->get_bucket_info().layout.current_index.layout.type == rgw::BucketIndexType::Indexless; + } int get_bucket_shard(BucketShard **pbs, const DoutPrefixProvider *dpp, optional_yield y) { if (!bs_initialized) { @@ -967,7 +967,7 @@ class RGWRados int prepare(const DoutPrefixProvider *dpp, RGWModifyOp, const std::string *write_tag, optional_yield y); int complete(const DoutPrefixProvider *dpp, int64_t poolid, uint64_t epoch, uint64_t size, - uint64_t accounted_size, ceph::real_time& ut, + uint64_t accounted_size, const ceph::real_time& ut, const std::string& etag, const std::string& content_type, const std::string& storage_class, bufferlist *acl_bl, RGWObjCategory category, @@ -987,7 +987,7 @@ class RGWRados const std::string *get_optag() { return &optag; } bool is_prepared() { return prepared; } - }; // class UpdateIndex + }; // class RGWRados::Bucket::UpdateIndex class List { protected: @@ -1051,8 +1051,8 @@ class RGWRados rgw_obj_key& get_next_marker() { return next_marker; } - }; // class List - }; // class Bucket + }; // class RGWRados::Bucket::List + }; // class RGWRados::Bucket int on_last_entry_in_listing(const DoutPrefixProvider *dpp, RGWBucketInfo& bucket_info, diff --git a/src/rgw/rgw_common.h b/src/rgw/rgw_common.h index 67333be99885..c1fe2ee2abee 100644 --- a/src/rgw/rgw_common.h +++ b/src/rgw/rgw_common.h @@ -75,8 +75,8 @@ using ceph::crypto::MD5; #define RGW_SYS_PARAM_PREFIX "rgwx-" #define RGW_ATTR_ACL RGW_ATTR_PREFIX "acl" -#define RGW_ATTR_RATELIMIT RGW_ATTR_PREFIX "ratelimit" -#define RGW_ATTR_LC RGW_ATTR_PREFIX "lc" +#define RGW_ATTR_RATELIMIT RGW_ATTR_PREFIX "ratelimit" +#define RGW_ATTR_LC RGW_ATTR_PREFIX "lc" #define RGW_ATTR_CORS RGW_ATTR_PREFIX "cors" #define RGW_ATTR_ETAG RGW_ATTR_PREFIX "etag" #define RGW_ATTR_BUCKETS RGW_ATTR_PREFIX "buckets" From 522785e05329d4aa3e2672826032d5bd9a8ad0de Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Wed, 12 Jul 2023 13:54:07 -0400 Subject: [PATCH 7/8] rgw: allow multisite specification used w/ rgw-restore-bucket-index script When the metadata for a bucket is requested only the default realm/zonegroup/zone is currently supported. This adds three new command-line options to rgw-restore-bucket-index: -r -g -z The multisite specification will then be used in invocations of `radosgw-admin`, such as to query the zone, get metadata, and invoke the "object reindex" subcommand. Signed-off-by: J. Eric Ivancich --- doc/man/8/rgw-restore-bucket-index.rst | 15 +++++++++++ src/rgw/rgw-restore-bucket-index | 35 ++++++++++++++++++-------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/doc/man/8/rgw-restore-bucket-index.rst b/doc/man/8/rgw-restore-bucket-index.rst index b297fa753706..d721dd9702f7 100644 --- a/doc/man/8/rgw-restore-bucket-index.rst +++ b/doc/man/8/rgw-restore-bucket-index.rst @@ -56,6 +56,21 @@ Command-Line Arguments bucket. If omitted the utility will try to determine the data pool on its own. +.. option:: -r + + Optional, specify the realm if the restoration is not being applied + to the default realm. + +.. option:: -g + + Optional, specify the zonegroup if the restoration is not being applied + to the default zonegroup. + +.. option:: -z + + Optional, specify the zone if the restoration is not being applied + to the default zone. + .. option:: -l Optional, specify a file containing the output of a rados listing diff --git a/src/rgw/rgw-restore-bucket-index b/src/rgw/rgw-restore-bucket-index index f9269f55b13f..fffd5074dd45 100755 --- a/src/rgw/rgw-restore-bucket-index +++ b/src/rgw/rgw-restore-bucket-index @@ -97,12 +97,15 @@ usage() { Usage: $0 -b [-l ] [-p ] [-y] where: - -b Required - name of the bucket to operate on - -l Optional - file containing the output of 'rados ls -p ' - -p Optional - data pool; if not provided will be inferred from bucket and zone information - -y Optional - proceed with restoring without confirming with the user - USE WITH CAUTION. - -d Optional - run with debugging output + -b Required - name of the bucket to operate on + -l Optional - file containing the output of 'rados ls -p ' + -r Optional - specify the realm if not applying to the default realm" + -g Optional - specify the zonegroup if not applying to the default zonegroup" + -z Optional - specify the zone if not applying to the default zone" + -p Optional - data pool; if not provided will be inferred from bucket and zone information + -y Optional - proceed with restoring without confirming with the user + USE WITH CAUTION. + -d Optional - run with debugging output EOF super_exit } @@ -128,7 +131,7 @@ get_pool() { plmt_class=STANDARD fi - radosgw-admin zone get >$zone_info 2>/dev/null + radosgw-admin zone get $multisite_spec >$zone_info 2>/dev/null pool=$(jq -r ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info) if [ -z "$pool" ] ;then @@ -140,10 +143,11 @@ get_pool() { export bucket="" pool="" +multisite_spec="" lsoutput="" debug=0 -while getopts "b:l:p:yd" o; do +while getopts "b:l:p:r:g:z:yd" o; do case "${o}" in b) bucket="${OPTARG}" @@ -160,6 +164,15 @@ while getopts "b:l:p:yd" o; do p) pool="${OPTARG}" ;; + r) + multisite_spec="$multisite_spec --rgw-realm=${OPTARG}" + ;; + g) + multisite_spec="$multisite_spec --rgw-zonegroup=${OPTARG}" + ;; + z) + multisite_spec="$multisite_spec --rgw-zone=${OPTARG}" + ;; y) echo "NOTICE: This tool is currently considered EXPERIMENTAL." proceed=1 @@ -236,7 +249,7 @@ if [ -z "$bucket" ]; then fi # read bucket entry metadata -eval "radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin >$bkt_entry" +eval "radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin $multisite_spec >$bkt_entry" export marker=$(jq -r ".data.bucket.marker" $bkt_entry) export bucket_id=$(jq -r ".data.bucket.bucket_id" $bkt_entry) if [ -z "$marker" -o -z "$bucket_id" ] ;then @@ -249,7 +262,7 @@ echo marker is $marker echo bucket_id is $bucket_id # read bucket instance metadata -eval "radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id $debugging_rgwadmin >$bkt_inst" +eval "radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id $multisite_spec $debugging_rgwadmin >$bkt_inst" # examine number of bucket index shards num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst) @@ -326,7 +339,7 @@ if [ -z "$proceed" ] ;then done fi -eval "radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list_ver --yes-i-really-mean-it $debugging_rgwadmin" +eval "radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list_ver $multisite_spec --yes-i-really-mean-it $debugging_rgwadmin" clean echo Done From 89f81216a8d8a959ed6766eaf9e867cc9a6bc391 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Wed, 9 Aug 2023 10:02:30 -0400 Subject: [PATCH 8/8] rgw: make error message more friendly on rgw-restore-bucket-index When the bucket referenced cannot be found remind the user that they may need to set realm, zonegroup, and/or zone. This improvement was suggested by Madhavi Kasturi. Signed-off-by: J. Eric Ivancich --- src/rgw/rgw-restore-bucket-index | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/rgw/rgw-restore-bucket-index b/src/rgw/rgw-restore-bucket-index index fffd5074dd45..512ca24d9ec2 100755 --- a/src/rgw/rgw-restore-bucket-index +++ b/src/rgw/rgw-restore-bucket-index @@ -94,10 +94,10 @@ super_exit() { usage() { >&2 cat << EOF -Usage: $0 -b [-l ] [-p ] [-y] +Usage: $0 -b [-l ] [-p ] [-y] where: - -b Required - name of the bucket to operate on + -b Required - name of the bucket to operate on -l Optional - file containing the output of 'rados ls -p ' -r Optional - specify the realm if not applying to the default realm" -g Optional - specify the zonegroup if not applying to the default zonegroup" @@ -253,8 +253,12 @@ eval "radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin $multisite_s export marker=$(jq -r ".data.bucket.marker" $bkt_entry) export bucket_id=$(jq -r ".data.bucket.bucket_id" $bkt_entry) if [ -z "$marker" -o -z "$bucket_id" ] ;then - echo "ERROR: unable to read entry-point metadata for bucket \"$bucket\"." + echo "ERROR: Unable to read entry-point metadata for bucket \"$bucket\"." + echo " Please make sure that is correct and, if not using" + echo " the defaults, that , , and/or" + echo " are correctly specified." clean + usage exit 1 fi