-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds an experimental script that allows a bucket index of a non-versioned bucket to be restored by applying `radosgw-admin object reindex ...` to all objects in the specified bucket. The objects in the bucket are determined by scanning the data pool for head objects containing the bucket's marker. Signed-off-by: J. Eric Ivancich <[email protected]>
- Loading branch information
Showing
4 changed files
with
196 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
#!/usr/bin/env bash | ||
|
||
# version 2023-03-07 | ||
|
||
# rgw-restore-bucket-index is an EXPERIMENTAL tool to use in case | ||
# bucket index entries for objects in the bucket are somehow lost. It | ||
# is expected to be needed and used rarely. A bucket name is provided | ||
# and the data pool for that bucket is scanned for all head objects | ||
# matching the bucket's marker. The rgw object name is then extracted | ||
# from the rados object name, and `radosgw-admin bucket reindex ...` | ||
# is used to add the bucket index entry. | ||
# | ||
# Because this script must process json objects, the `jq` tool must be | ||
# installed on the system. | ||
# | ||
# Usage: $0 [--proceed] <bucket-name> [data-pool-name] | ||
# | ||
# This tool is designed to be interactive, allowing the user to | ||
# examine the list of objects to be reindexed before | ||
# proceeding. However, if the "--proceed" option is provided, the | ||
# script will not prompt the user and simply proceed. | ||
|
||
trap "clean ; exit 1" TERM | ||
export TOP_PID=$$ | ||
|
||
# IMPORTANT: affects order produced by 'sort' and 'ceph-diff-sorted' | ||
# relies on this ordering | ||
export LC_ALL=C | ||
|
||
export bkt_entry=/tmp/rgwrbi-bkt-entry.$$ | ||
export bkt_inst=/tmp/rgwrbi-bkt-inst.$$ | ||
export bkt_inst_new=/tmp/rgwrbi-bkt-inst-new.$$ | ||
export obj_list=/tmp/rgwrbi-object-list.$$ | ||
export zone_info=/tmp/rgwrbi-zone-info.$$ | ||
export clean_temps=1 | ||
|
||
# make sure jq is available | ||
if which jq > /dev/null ;then | ||
: | ||
else | ||
echo 'Error: must have command `jq` installed and on $PATH for json parsing.' | ||
exit 1 | ||
fi | ||
|
||
clean() { | ||
if [ -n "$clean_temps" ] ;then | ||
rm -f $bkt_entry $bkt_inst $bkt_inst_new $obj_list $zone_info | ||
fi | ||
} | ||
|
||
super_exit() { | ||
kill -s TERM $TOP_PID | ||
} | ||
|
||
usage() { | ||
>&2 cat << EOF | ||
Usage: $0 [--proceed] <bucket-name> [data-pool-name] | ||
NOTE: This tool is currently considered EXPERIMENTAL. | ||
NOTE: If a data-pool-name is not supplied then it will be inferred from bucket and zone information. | ||
NOTE: If --proceed is provided then user will not be prompted to proceed. Use with caution. | ||
EOF | ||
super_exit | ||
} | ||
|
||
# strips the starting and ending double quotes from a string, so: | ||
# "dog" -> dog | ||
# "dog -> "dog | ||
# d"o"g -> d"o"g | ||
# "do"g" -> do"g | ||
strip_quotes() { | ||
echo "$1" | sed 's/^"\(.*\)"$/\1/' | ||
} | ||
|
||
# Determines the name of the data pool. Expects the optional | ||
# command-line argument to appear as $1 if there is one. The | ||
# command-line has the highest priority, then the "explicit_placement" | ||
# in the bucket instance data, and finally the "placement_rule" in the | ||
# bucket instance data. | ||
get_pool() { | ||
# command-line | ||
if [ -n "$1" ] ;then | ||
echo "$1" | ||
exit 0 | ||
fi | ||
|
||
# explicit_placement | ||
expl_pool=$(strip_quotes $(jq '.data.bucket_info.bucket.explicit_placement.data_pool' $bkt_inst)) | ||
if [ -n "$expl_pool" ] ;then | ||
echo "$expl_pool" | ||
exit 0 | ||
fi | ||
|
||
# placement_rule | ||
plmt_rule=$(strip_quotes $(jq '.data.bucket_info.placement_rule' $bkt_inst)) | ||
plmt_pool=$(echo "$plmt_rule" | awk -F / '{print $1}') | ||
plmt_class=$(echo "$plmt_rule" | awk -F / '{print $2}') | ||
if [ -z "$plmt_class" ] ;then | ||
plmt_class=STANDARD | ||
fi | ||
|
||
radosgw-admin zone get >$zone_info 2>/dev/null | ||
pool=$(strip_quotes $(jq ".placement_pools [] | select(.key | contains(\"${plmt_pool}\")) .val .storage_classes.${plmt_class}.data_pool" $zone_info)) | ||
|
||
if [ -z "$pool" ] ;then | ||
echo ERROR: unable to determine pool. | ||
super_exit | ||
fi | ||
echo "$pool" | ||
} | ||
|
||
if [ $1 == "--proceed" ] ;then | ||
echo "NOTICE: This tool is currently considered EXPERIMENTAL." | ||
proceed=1 | ||
shift | ||
fi | ||
|
||
# expect 1 or 2 arguments | ||
if [ $# -eq 0 -o $# -gt 2 ] ;then | ||
usage | ||
fi | ||
|
||
bucket=$1 | ||
|
||
# read bucket entry metadata | ||
radosgw-admin metadata get bucket:$bucket >$bkt_entry 2>/dev/null | ||
marker=$(strip_quotes $(jq ".data.bucket.marker" $bkt_entry)) | ||
bucket_id=$(strip_quotes $(jq ".data.bucket.bucket_id" $bkt_entry)) | ||
echo marker is $marker | ||
echo bucket_id is $bucket_id | ||
|
||
# read bucket instance metadata | ||
radosgw-admin metadata get bucket.instance:${bucket}:$bucket_id >$bkt_inst 2>/dev/null | ||
|
||
# handle versioned buckets | ||
bkt_flags=$(jq ".data.bucket_info.flags" $bkt_inst) | ||
is_versioned=$(( $bkt_flags & 2)) # mask bit indicating it's a versioned bucket | ||
if [ "$is_versioned" -ne 0 ] ;then | ||
echo "Error: this bucket appears to be versioned, and this tool cannot work with versioned buckets." | ||
clean | ||
exit 1 | ||
fi | ||
|
||
# examine number of bucket index shards | ||
num_shards=$(jq ".data.bucket_info.num_shards" $bkt_inst) | ||
echo number of bucket index shards is $num_shards | ||
|
||
# determine data pool | ||
pool=$(get_pool $2) | ||
echo data pool is $pool | ||
|
||
# search the data pool for all of the head objects that begin with the | ||
# marker that are not in namespaces (indicated by an extra underscore) | ||
# and then strip away all but the rgw object name | ||
( rados -p $pool ls | grep "^${marker}_[^_]" | sed "s/^${marker}_\(.*\)/\1/" >$obj_list ) 2>/dev/null | ||
|
||
# handle the case where the resulting object list file is empty | ||
if [ -s $obj_list ] ;then | ||
: | ||
else | ||
echo "NOTICE: No head objects for bucket \"$bucket\" were found in pool \"$pool\", so nothing was recovered." | ||
clean | ||
exit 0 | ||
fi | ||
|
||
if [ -z "$proceed" ] ;then | ||
# warn user and get permission to proceed | ||
echo "NOTICE: This tool is currently considered EXPERIMENTAL." | ||
echo "The list of objects that we will attempt to restore can be found in \"$obj_list\"." | ||
echo "Please review the object names in that file (either below or in another window/terminal) before proceeding." | ||
while true ; do | ||
read -p "Type \"proceed!\" to proceed, \"view\" to view object list, or \"q\" to quit: " action | ||
if [ "$action" == "q" ] ;then | ||
echo "Exiting..." | ||
clean | ||
exit 0 | ||
elif [ "$action" == "view" ] ;then | ||
echo "Viewing..." | ||
less $obj_list | ||
elif [ "$action" == "proceed!" ] ;then | ||
echo "Proceeding..." | ||
break | ||
else | ||
echo "Error: response \"$action\" is not understood." | ||
fi | ||
done | ||
fi | ||
|
||
# execute object rewrite on all of the head objects | ||
radosgw-admin object reindex --bucket=$bucket --objects-file=$obj_list 2>/dev/null | ||
|
||
clean | ||
echo Done |