Skip to content

Commit

Permalink
improve performance with parallel during checksums.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
Slach committed Nov 20, 2023
1 parent 0b21347 commit b35b07c
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ FROM ${CLICKHOUSE_IMAGE}:${CLICKHOUSE_VERSION} AS image_full
ARG TARGETPLATFORM
MAINTAINER Eugene Klimov <[email protected]>

RUN apt-get update && apt-get install -y gpg xxd bsdmainutils && wget -qO- https://kopia.io/signing-key | gpg --dearmor -o /usr/share/keyrings/kopia-keyring.gpg && \
RUN apt-get update && apt-get install -y gpg xxd bsdmainutils parallel && wget -qO- https://kopia.io/signing-key | gpg --dearmor -o /usr/share/keyrings/kopia-keyring.gpg && \
echo "deb [signed-by=/usr/share/keyrings/kopia-keyring.gpg] http://packages.kopia.io/apt/ stable main" > /etc/apt/sources.list.d/kopia.list && \
wget -c "https://github.com/mikefarah/yq/releases/latest/download/yq_linux_$(dpkg --print-architecture)" -O /usr/bin/yq && chmod +x /usr/bin/yq && \
apt-get update -y && \
Expand Down
2 changes: 1 addition & 1 deletion test/integration/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ func init() {
// kopia
r.NoError(dockerExec("clickhouse-backup", "bash", "-ce", "curl -sfL https://kopia.io/signing-key | gpg --dearmor -o /usr/share/keyrings/kopia-keyring.gpg"))
r.NoError(dockerExec("clickhouse-backup", "bash", "-ce", "echo 'deb [signed-by=/usr/share/keyrings/kopia-keyring.gpg] https://packages.kopia.io/apt/ stable main' > /etc/apt/sources.list.d/kopia.list"))
installDebIfNotExists(r, "clickhouse-backup", "kopia", "xxd", "bsdmainutils")
installDebIfNotExists(r, "clickhouse-backup", "kopia", "xxd", "bsdmainutils", "parallel")
// restic
r.NoError(dockerExec("clickhouse-backup", "bash", "-xec", "RELEASE_TAG=$(curl -H 'Accept: application/json' -sL https://github.com/restic/restic/releases/latest | jq -c -r -M '.tag_name'); RELEASE=$(echo ${RELEASE_TAG} | sed -e 's/v//'); curl -sfL \"https://github.com/restic/restic/releases/download/${RELEASE_TAG}/restic_${RELEASE}_linux_amd64.bz2\" | bzip2 -d > /bin/restic; chmod +x /bin/restic"))
}
Expand Down
6 changes: 2 additions & 4 deletions test/integration/kopia/download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@ BACKUP_NAME=$1
${CUR_DIR}/list.sh | grep "${BACKUP_NAME}" | while IFS= read -r line; do
SNAPSHOT_ID=$(echo "${line}" | jq -r -c -M .snapshot_id)
SNAPSHOT_PATH=$(echo "${line}" | jq -r -c -M .snapshot_path)
kopia restore "${SNAPSHOT_ID}" --skip-existing ${SNAPSHOT_PATH}
kopia restore --parallel=$(nproc) "${SNAPSHOT_ID}" --skip-existing ${SNAPSHOT_PATH}
LOCAL_BACKUP_DIR="$(dirname ${SNAPSHOT_PATH})/${BACKUP_NAME}"
rm -rf "${LOCAL_BACKUP_DIR}"
find "${SNAPSHOT_PATH}" -type f -name checksums.txt | while read CHECKSUMS_FILE; do
"${CUR_DIR}/checksum_parser.sh" "${CHECKSUMS_FILE}" "download" "${SNAPSHOT_PATH}"
done
find "${SNAPSHOT_PATH}" -type f -name checksums.txt | parallel -j $(nproc) "${CUR_DIR}/checksum_parser.sh" {} "download" "${SNAPSHOT_PATH}"
# need separately `rm` cause hash file can contains multiple the same files in different parts
find ${SNAPSHOT_PATH} -maxdepth 1 -type f -regex '.*/[a-z0-9]\{32\}.*' | while read HASH_FILE; do
rm "${SNAPSHOT_PATH}/${HASH_FILE}"
Expand Down
6 changes: 2 additions & 4 deletions test/integration/kopia/upload.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@ for dir in $(echo "${LOCAL_PATHS}"); do
UPLOAD_DIR="$(dirname "${dir}")/latest"
rm -rf "${UPLOAD_DIR}"
cp -rl "${dir}" "${UPLOAD_DIR}"
find "${UPLOAD_DIR}" -type f -name checksums.txt | while read CHECKSUMS_FILE; do
"${CUR_DIR}/checksum_parser.sh" "${CHECKSUMS_FILE}" "upload" "${UPLOAD_DIR}"
done
find "${UPLOAD_DIR}" -type f -name checksums.txt | parallel -j $(nproc) "${CUR_DIR}/checksum_parser.sh" {} "upload" "${UPLOAD_DIR}"
SNAPSHOT_SOURCES="${UPLOAD_DIR} ${SNAPSHOT_SOURCES}"
fi
done

kopia snapshot create $DIFF_FROM_REMOTE_CMD --fail-fast --tags="backup_name:${BACKUP_NAME}" $SNAPSHOT_SOURCES
kopia snapshot create $DIFF_FROM_REMOTE_CMD --parallel=$(nproc) --fail-fast --tags="backup_name:${BACKUP_NAME}" $SNAPSHOT_SOURCES

for dir in $(echo "${LOCAL_PATHS}"); do
if [[ -d "${dir}" ]]; then
Expand Down

0 comments on commit b35b07c

Please sign in to comment.