Skip to content

Commit

Permalink
fix sync script to account for subfolder bucket paths (#135)
Browse files Browse the repository at this point in the history
  • Loading branch information
noyoshi authored Dec 15, 2023
1 parent ce99dbf commit 2fae25e
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,13 @@ aws s3 ls "${S3_PATH}" --recursive | awk '{print $4}'
aws s3 ls "${S3_PATH}" --recursive | awk '{print $4}' | xargs -I {} bash -c 'sudo mkdir -p "${HUGGINGFACE_HUB_CACHE}/$(dirname "{}")"'

copy_file() {
# The files are a list of files without the bucket prefix.
# In the event that the env variable HF_CACHE_BUCKET is not just a bucket, but a bucket plus a subfolder,
# the subfolder will already be included into the file variable.
# In this case, strip all subpaths from the env variable HF_CACHE_BUCKET before attempting to download weights.
file="$1"
sudo -E aws s3 cp "s3://${HF_CACHE_BUCKET}/${file}" "${HUGGINGFACE_HUB_CACHE}/${file}"
true_bucket=`echo $HF_CACHE_BUCKET | cut -d / -f 1`
sudo -E aws s3 cp "s3://${true_bucket}/${file}" "${HUGGINGFACE_HUB_CACHE}/${file}"
}
export -f copy_file

Expand Down

0 comments on commit 2fae25e

Please sign in to comment.