From c02906ae6463425ef3f00000ba1ce78fcc9e2478 Mon Sep 17 00:00:00 2001 From: "J. Eric Ivancich" Date: Wed, 14 Jun 2023 15:53:19 -0400 Subject: [PATCH] rgw: enhances rgw-restore-bucket-index script This enhances the script to both process versioned buckets correctly and to handle object names that begin with underscore. If the bucket is versioned it submits each version chronologically (based on mtime) to be reindexed in order to "replay" the modification of objects. However mtime is not a perfect indicator. So additionally it looks at the OLH object to determine the most recent version and the script makes sure that it is replayed last. The order of previous versions is likely correct, but not guaranteed to be so. Additional logic is added to handle objects with names that begin with underscore ('_') since that's used as a delimiter and needs to be escaped and rados object locators are also used. A man page for the script is added. Signed-off-by: J. Eric Ivancich --- ceph.spec.in | 1 + debian/radosgw.install | 1 + doc/man/8/CMakeLists.txt | 3 +- doc/man/8/rgw-restore-bucket-index.rst | 91 ++++++++ doc/man_index.rst | 1 + src/rgw/rgw-restore-bucket-index | 303 ++++++++++++++----------- 6 files changed, 272 insertions(+), 128 deletions(-) create mode 100644 doc/man/8/rgw-restore-bucket-index.rst diff --git a/ceph.spec.in b/ceph.spec.in index 1fc998d014db..00410df64ed5 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -1704,6 +1704,7 @@ exit 0 %{_mandir}/man8/rbd-replay-many.8* %{_mandir}/man8/rbd-replay-prep.8* %{_mandir}/man8/rgw-orphan-list.8* +%{_mandir}/man8/rgw-restore-bucket-index.8* %dir %{_datadir}/ceph/ %{_datadir}/ceph/known_hosts_drop.ceph.com %{_datadir}/ceph/id_rsa_drop.ceph.com diff --git a/debian/radosgw.install b/debian/radosgw.install index b0367e8f651f..be74a52c1a9c 100644 --- a/debian/radosgw.install +++ b/debian/radosgw.install @@ -7,3 +7,4 @@ usr/bin/radosgw-token usr/share/man/man8/ceph-diff-sorted.8 usr/share/man/man8/radosgw.8 usr/share/man/man8/rgw-orphan-list.8 +usr/share/man/man8/rgw-restore-bucket-index.8 diff --git a/doc/man/8/CMakeLists.txt b/doc/man/8/CMakeLists.txt index f1df5b4b448e..4026cf9d0950 100644 --- a/doc/man/8/CMakeLists.txt +++ b/doc/man/8/CMakeLists.txt @@ -60,7 +60,8 @@ if(WITH_RADOSGW) radosgw-admin.rst rgw-orphan-list.rst rgw-policy-check.rst - ceph-diff-sorted.rst) + ceph-diff-sorted.rst + rgw-restore-bucket-index.rst) endif() if(WITH_RBD) diff --git a/doc/man/8/rgw-restore-bucket-index.rst b/doc/man/8/rgw-restore-bucket-index.rst new file mode 100644 index 000000000000..b297fa753706 --- /dev/null +++ b/doc/man/8/rgw-restore-bucket-index.rst @@ -0,0 +1,91 @@ +:orphan: + +================================================================================== + rgw-restore-bucket-index -- try to restore a bucket's objects to its bucket index +================================================================================== + +.. program:: rgw-restore-bucket-index + +Synopsis +======== + +| **rgw-restore-bucket-index** + +Description +=========== + +:program:`rgw-restore-bucket-index` is an *EXPERIMENTAL* RADOS gateway +user administration utility. It scans the data pool for objects that +belong to a given bucket and tries to add those objects back to the +bucket index. It's intended as a **last resort** after a +**catastrophic** loss of a bucket index. Please thorougly review the +*Warnings* listed below. + +The utility works with regular (i.e., un-versioned) buckets, versioned +buckets, and buckets were versioning has been suspended. + +Warnings +======== + +This utility is currently considered *EXPERIMENTAL*. + +The results are unpredictable if the bucket is in +active use while this utility is running. + +The results are unpredictable if only some bucket's objects are +missing from the bucket index. In such a case, consider using the +"object reindex" subcommand of `radosgw-admin` to restore object's to +the bucket index one-by-one. + +For objects in versioned buckets, if the latest version is a delete +marker, it will be restored. If a delete marker has been written over +with a new version, then that delete marker will not be restored. This +should have minimal impact on results in that the it recovers the +latest version and previous versions are all accessible. + +Command-Line Arguments +====================== + +.. option:: -b + + Specify the bucket to be reindexed. + +.. option:: -p + + Optional, specify the data pool containing head objects for the + bucket. If omitted the utility will try to determine the data pool + on its own. + +.. option:: -l + + Optional, specify a file containing the output of a rados listing + of the data pool. Since listing the data pool can be an expensive + and time-consuming operation, if trying to recover the indices for + multiple buckets, it could be more efficient to re-use the same + listing. + +.. option:: -y + + Optional, proceed without further prompting. Without this option + the utility will display some information and prompt the user as to + whether to proceed. When provided, the utility will simply + proceed. Please use caution when using this option. + +Examples +======== + +Attempt to restore the index for a bucket named *summer-2023-photos*:: + + $ rgw-restore-bucket-index -b summer-2023-photos + +Availability +============ + +:program:`rgw-restore-bucket-index` is part of Ceph, a massively +scalable, open-source, distributed storage system. Please refer to +the Ceph documentation at https://docs.ceph.com for more information. + +See also +======== + +:doc:`radosgw-admin `\(8) diff --git a/doc/man_index.rst b/doc/man_index.rst index 989ed7e229a0..ca1353f911ec 100644 --- a/doc/man_index.rst +++ b/doc/man_index.rst @@ -48,3 +48,4 @@ man/8/ceph-immutable-object-cache man/8/ceph-diff-sorted man/8/rgw-policy-check + man/8/rgw-restore-bucket-index diff --git a/src/rgw/rgw-restore-bucket-index b/src/rgw/rgw-restore-bucket-index index 72e974c492d8..f9269f55b13f 100755 --- a/src/rgw/rgw-restore-bucket-index +++ b/src/rgw/rgw-restore-bucket-index @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# version 2023-03-21 +# version 2023-07-06 # rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case # bucket index entries for objects in the bucket are somehow lost. It @@ -27,39 +27,65 @@ export TOP_PID=$$ # relies on this ordering export LC_ALL=C +# temporary files export bkt_entry=/tmp/rgwrbi-bkt-entry.$$ export bkt_inst=/tmp/rgwrbi-bkt-inst.$$ -export bkt_inst_new=/tmp/rgwrbi-bkt-inst-new.$$ +export marker_ls=/tmp/rgwrbi-marker-ls.$$ export obj_list=/tmp/rgwrbi-object-list.$$ +export obj_list_ver=/tmp/rgwrbi-object-list-ver.$$ +export obj_reindex_script=/tmp/rgwrbi-object-list-script.$$ export zone_info=/tmp/rgwrbi-zone-info.$$ +export olh_info_enc=/tmp/rgwrbi-olh-info-enc.$$ +export olh_info_json=/tmp/rgwrbi-olh-info-json.$$ + export clean_temps=1 + +clean() { + if [ -n "$clean_temps" ] ;then + rm -f $bkt_entry $bkt_inst $marker_ls $obj_list $obj_list_ver $obj_reindex_script $zone_info $olh_info_enc $olh_info_json + fi +} + # number of seconds for a bucket index pending op to be completed via # dir_suggest mechanism -pending_op_secs=120 +export pending_op_secs=120 # -if which radosgw-admin > /dev/null ;then - : -else - echo 'Error: must have command `radosgw-admin` installed and on $PATH for operation.' - exit 1 -fi +# sanity checks +# -# make sure jq is available -if which jq > /dev/null ;then - : -else - echo 'Error: must have command `jq` installed and on $PATH for json parsing.' - exit 1 +export exit_code=0 + +tool_list="radosgw-admin ceph-dencoder jq" +for t in $tool_list ;do + if which $t > /dev/null ;then + : + else + echo "ERROR: must have tool \`$t\` installed and on \$PATH for operation." + exit_code=1 + fi +done +if [ "$exit_code" -ne 0 ] ;then + exit $exit_code fi +dencode_list="RGWOLHInfo" +for t in $dencode_list ;do + if ceph-dencoder list_types | grep -q $t ;then + : + else + echo "ERROR: ceph-dencoder lacking module to decode ${t}." + exit_code=1 + fi +done +if [ "$exit_code" -ne 0 ] ;then + exit $exit_code +fi -clean() { - if [ -n "$clean_temps" ] ;then - rm -f $bkt_entry $bkt_inst $bkt_inst_new $obj_list $zone_info - fi -} +# +# helper functions +# super_exit() { kill -s TERM $TOP_PID @@ -71,41 +97,31 @@ usage() { Usage: $0 -b [-l ] [-p ] [-y] where: - -b Required - The name of the bucket to operate on - -l Optional - A file with the output of 'rados ls -p ' - -p Optional - If not provided, will be inferred from bucket and zone information. - -y Optional - Proceed with correction without prompting the user + -b Required - name of the bucket to operate on + -l Optional - file containing the output of 'rados ls -p ' + -p Optional - data pool; if not provided will be inferred from bucket and zone information + -y Optional - proceed with restoring without confirming with the user USE WITH CAUTION. - + -d Optional - run with debugging output EOF super_exit } -# strips the starting and ending double quotes from a string, so: -# "dog" -> dog -# "dog -> "dog -# d"o"g -> d"o"g -# "do"g" -> do"g -strip_quotes() { - echo "$1" | sed 's/^"\(.*\)"$/\1/' -} - # Determines the name of the data pool. Expects the optional # command-line argument to appear as $1 if there is one. The # command-line has the highest priority, then the "explicit_placement" # in the bucket instance data, and finally the "placement_rule" in the # bucket instance data. get_pool() { - # explicit_placement - expl_pool=$(strip_quotes $(jq '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst)) + expl_pool=$(jq -r '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst) if [ -n "$expl_pool" ] ;then echo "$expl_pool" exit 0 fi # placement_rule - plmt_rule=$(strip_quotes $(jq '.data.bucket_info.placement_rule' $bkt_inst)) + plmt_rule=$(jq -r '.data.bucket_info.placement_rule' $bkt_inst) plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}') plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}') if [ -z "$plmt_class" ] ;then @@ -113,7 +129,7 @@ get_pool() { fi radosgw-admin zone get >$zone_info 2>/dev/null - pool=$(strip_quotes $(jq ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info)) + pool=$(jq -r ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info) if [ -z "$pool" ] ;then echo ERROR: unable to determine pool. @@ -122,39 +138,97 @@ get_pool() { echo "$pool" } -bucket="" +export bucket="" pool="" lsoutput="" -while getopts ":b:l:p:y" o; do - case "${o}" in - b) - bucket="${OPTARG}" - ;; - l) - if [ -e "${OPTARG}" ]; then - lsoutput="${OPTARG}" - else - echo - echo "ERROR: Provided 'rados ls' output file name does not exist. ${OPTARG}" - exit 1 - fi - ;; - p) - pool="${OPTARG}" - ;; - y) - echo "NOTICE: This tool is currently considered EXPERIMENTAL." - proceed=1 - ;; - *) - echo - echo "ERROR: Unrecognized argument: ${o}" - usage - ;; - esac +debug=0 + +while getopts "b:l:p:yd" o; do + case "${o}" in + b) + bucket="${OPTARG}" + ;; + l) + if [ -e "${OPTARG}" ]; then + lsoutput="${OPTARG}" + else + echo + echo "ERROR: Provided 'rados ls' output file name does not exist. ${OPTARG}" + exit 1 + fi + ;; + p) + pool="${OPTARG}" + ;; + y) + echo "NOTICE: This tool is currently considered EXPERIMENTAL." + proceed=1 + ;; + d) + echo setting debug to 1 + debug=1 + ;; + *) + echo + usage + exit 1 # useage should exit also + ;; + esac done shift $((OPTIND-1)) +if [ "$debug" == 1 ] ;then + export debugging_rgwadmin=" --debug-rgw=20 --debug-ms=20 " +else + export debugging_rgwadmin=" 2>/dev/null " +fi + +# special code path for versioned buckets +handle_versioned() { + while read o ;do + + # determine object and locator for OLH + olh_line=$(awk "/_$o(\t.*)?$/"' && !/__:/' $marker_ls) + olh_obj=$(echo "$olh_line" | sed 's/\t.*//') # obj everything before tab + olh_loc=$(echo "$olh_line" | sed 's/^.*\t\(.*\)/\1/') # locator everything after tab + + # process OLH object; determine final instance or delete-marker + rados -p $pool getxattr $olh_obj user.rgw.olh.info --object-locator "$olh_loc" >$olh_info_enc + ceph-dencoder import $olh_info_enc type RGWOLHInfo decode dump_json >$olh_info_json + last_instance=$(jq -r ".target.key.instance" $olh_info_json) + if [ -z "$last_instance" ] ;then + # filters out entry without an instance + filter_out_last_instance="${marker}_[^_]" + else + # filters out entry with an instance + filter_out_last_instance="$last_instance" + fi + + # we currently don't need the delete marker, but we can have access to it + # delete_marker=$(jq -r ".removed" $olh_info_json) # true or false + + IFS='\t' grep -E "(__:.*[^_])?_$o(\t.*)?$" $marker_ls | # versioned head objects + while read obj loc ;do + rados -p $pool stat2 $obj --object-locator "$loc" + done | # output of stat2, which includes mtime + sort -k 3 | # stat2 but sorted by mtime earlier to later + grep -v "$filter_out_last_instance" | # remove the final instance in case it's not last + + # sed 1) removes pool and marker, 2) removes indicator of + # version id, 3) removes obj name including escaped + # leading underscores, 4) inserts object name and tab at + # front of line, and 5) removes trailing tab. Note: after + # 3) the line will be empty or contain a version id, so 5) + # is for when that line is empty and 4 inserts a tab + sed -E \ + -e "s/.*${marker}//" \ + -e 's/^__://' \ + -e "s/_+${o}.*//" \ + -e "s/^/${o}\t/" + echo "$o $last_instance" # now add the final instance; could be delete marker; note TAB + done <$obj_list 2>/dev/null | sed 's/\t$//' >$obj_list_ver +} + if [ -z "$bucket" ]; then echo echo "ERROR: Bucket option ( -b ) is required." @@ -162,9 +236,9 @@ if [ -z "$bucket" ]; then fi # read bucket entry metadata -radosgw-admin metadata get bucket:$bucket >$bkt_entry 2>/dev/null -marker=$(strip_quotes $(jq ".data.bucket.marker" $bkt_entry)) -bucket_id=$(strip_quotes $(jq ".data.bucket.bucket_id" $bkt_entry)) +eval "radosgw-admin metadata get bucket:$bucket $debugging_rgwadmin >$bkt_entry" +export marker=$(jq -r ".data.bucket.marker" $bkt_entry) +export bucket_id=$(jq -r ".data.bucket.bucket_id" $bkt_entry) if [ -z "$marker" -o -z "$bucket_id" ] ;then echo "ERROR: unable to read entry-point metadata for bucket \"$bucket\"." clean @@ -175,22 +249,7 @@ echo marker is $marker echo bucket_id is $bucket_id # read bucket instance metadata -radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id >$bkt_inst 2>/dev/null - -# handle versioned buckets -bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst) -if [ -z "$bkt_flags" ] ;then - echo "ERROR: unable to read instance metadata for bucket \"$bucket\"." - exit 1 -fi - -# mask bit indicating it's a versioned bucket -is_versioned=$(( $bkt_flags & 2)) -if [ "$is_versioned" -ne 0 ] ;then - echo "Error: this bucket appears to be versioned, and this tool cannot work with versioned buckets." - clean - exit 1 -fi +eval "radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id $debugging_rgwadmin >$bkt_inst" # examine number of bucket index shards num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst) @@ -202,17 +261,41 @@ if [ -z "$pool" ]; then fi echo data pool is $pool +# handle versioned buckets +export bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst) +if [ -z "$bkt_flags" ] ;then + echo "ERROR: unable to read instance metadata for bucket \"$bucket\"." + clean + exit 1 +fi + # search the data pool for all of the head objects that begin with the -# marker that are not in namespaces (indicated by an extra underscore) -# and then strip away all but the rgw object name +# marker that are not in namespaces (indicated by an extra underscore +# and colon) and then strip away all but the rgw object name, +# including optional locator that follows a tab. Initial underscores +# are quoted with an underscore, so swap the first double with a +# single. if [ -z "$lsoutput" ]; then - ( rados -p $pool ls | grep "^${marker}_[^_]" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null + ( rados -p $pool ls | grep "^${marker}_" >$marker_ls ) 2>/dev/null else - ( grep "^${marker}_[^_]" "${lsoutput}" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null + ( grep "^${marker}_" "${lsoutput}" >$marker_ls ) 2>/dev/null +fi + +( sed -E 's/\t.*//' $marker_ls | grep -v -E "^${marker}__[^_]+_" | sed -E "s/^${marker}_(.*)/\1/" | sed 's/^__/_/' >$obj_list ) 2>/dev/null + +# mask bit indicating it's a versioned bucket +export is_versioned=$(( $bkt_flags & 2)) +export is_suspended=$(( $bkt_flags & 4)) +if [ "$is_versioned" -ne 0 ] ;then + echo "INFO: this bucket appears to be versioned." + handle_versioned +else + # no additional versioned handling, so just hard link + ln $obj_list $obj_list_ver fi # handle the case where the resulting object list file is empty -if [ -s $obj_list ] ;then +if [ -s $obj_list_ver ] ;then : else echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered." @@ -223,7 +306,7 @@ fi if [ -z "$proceed" ] ;then # warn user and get permission to proceed echo "NOTICE: This tool is currently considered EXPERIMENTAL." - echo "The list of objects that we will attempt to restore can be found in \"$obj_list\"." + echo "The list of objects that we will attempt to restore can be found in \"$obj_list_ver\"." echo "Please review the object names in that file (either below or in another window/terminal) before proceeding." while true ; do read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action @@ -233,7 +316,7 @@ if [ -z "$proceed" ] ;then exit 0 elif [ "$action" == "view" ] ;then echo "Viewing..." - less $obj_list + less $obj_list_ver elif [ "$action" == "proceed!" ] ;then echo "Proceeding..." break @@ -243,41 +326,7 @@ if [ -z "$proceed" ] ;then done fi -# execute object rewrite on all of the head objects -radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list 2>/dev/null -reindex_done=$(date +%s) - -# note: large is 2^30 -export large=1073741824 - -listcmd="radosgw-admin bucket list --bucket=$bucket --allow-unordered --max-entries=$large" - -if [ -n "$proceed" ] ;then - sleep $pending_op_secs - $listcmd >/dev/null 2>/dev/null -else - echo "NOTICE: Bucket stats are currently incorrect. They can be restored with the following command after 2 minutes:" - echo " $listcmd" - - while true ; do - read -p "Would you like to take the time to recalculate bucket stats now? [yes/no] " action - if [ "$action" == "no" ] ;then - break - elif [ "$action" == "yes" ] ;then - # make sure at least $pending_op_secs since reindex completed - now=$(date +%s) - sleep_time=$(expr $pending_op_secs - $now + $reindex_done) - if [ "$sleep_time" -gt 0 ] ;then - sleep $sleep_time - fi - - $listcmd >/dev/null 2>/dev/null - break - else - echo "Error: response \"$action\" is not understood." - fi - done -fi +eval "radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list_ver --yes-i-really-mean-it $debugging_rgwadmin" clean echo Done