From b35b07c9d2c48b0503d4fe2e3b1b29851a65931d Mon Sep 17 00:00:00 2001 From: Slach Date: Mon, 20 Nov 2023 19:33:37 +0400 Subject: [PATCH] improve performance with parallel during checksums.txt --- Dockerfile | 2 +- test/integration/integration_test.go | 2 +- test/integration/kopia/download.sh | 6 ++---- test/integration/kopia/upload.sh | 6 ++---- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 50273133..1c3cd888 100644 --- a/Dockerfile +++ b/Dockerfile @@ -101,7 +101,7 @@ FROM ${CLICKHOUSE_IMAGE}:${CLICKHOUSE_VERSION} AS image_full ARG TARGETPLATFORM MAINTAINER Eugene Klimov -RUN apt-get update && apt-get install -y gpg xxd bsdmainutils && wget -qO- https://kopia.io/signing-key | gpg --dearmor -o /usr/share/keyrings/kopia-keyring.gpg && \ +RUN apt-get update && apt-get install -y gpg xxd bsdmainutils parallel && wget -qO- https://kopia.io/signing-key | gpg --dearmor -o /usr/share/keyrings/kopia-keyring.gpg && \ echo "deb [signed-by=/usr/share/keyrings/kopia-keyring.gpg] http://packages.kopia.io/apt/ stable main" > /etc/apt/sources.list.d/kopia.list && \ wget -c "https://github.com/mikefarah/yq/releases/latest/download/yq_linux_$(dpkg --print-architecture)" -O /usr/bin/yq && chmod +x /usr/bin/yq && \ apt-get update -y && \ diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index 217cf46d..2429a0b1 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -409,7 +409,7 @@ func init() { // kopia r.NoError(dockerExec("clickhouse-backup", "bash", "-ce", "curl -sfL https://kopia.io/signing-key | gpg --dearmor -o /usr/share/keyrings/kopia-keyring.gpg")) r.NoError(dockerExec("clickhouse-backup", "bash", "-ce", "echo 'deb [signed-by=/usr/share/keyrings/kopia-keyring.gpg] https://packages.kopia.io/apt/ stable main' > /etc/apt/sources.list.d/kopia.list")) - installDebIfNotExists(r, "clickhouse-backup", "kopia", "xxd", "bsdmainutils") + installDebIfNotExists(r, "clickhouse-backup", "kopia", "xxd", "bsdmainutils", "parallel") // restic r.NoError(dockerExec("clickhouse-backup", "bash", "-xec", "RELEASE_TAG=$(curl -H 'Accept: application/json' -sL https://github.com/restic/restic/releases/latest | jq -c -r -M '.tag_name'); RELEASE=$(echo ${RELEASE_TAG} | sed -e 's/v//'); curl -sfL \"https://github.com/restic/restic/releases/download/${RELEASE_TAG}/restic_${RELEASE}_linux_amd64.bz2\" | bzip2 -d > /bin/restic; chmod +x /bin/restic")) } diff --git a/test/integration/kopia/download.sh b/test/integration/kopia/download.sh index 5bfedb44..e2f78bcc 100755 --- a/test/integration/kopia/download.sh +++ b/test/integration/kopia/download.sh @@ -6,12 +6,10 @@ BACKUP_NAME=$1 ${CUR_DIR}/list.sh | grep "${BACKUP_NAME}" | while IFS= read -r line; do SNAPSHOT_ID=$(echo "${line}" | jq -r -c -M .snapshot_id) SNAPSHOT_PATH=$(echo "${line}" | jq -r -c -M .snapshot_path) - kopia restore "${SNAPSHOT_ID}" --skip-existing ${SNAPSHOT_PATH} + kopia restore --parallel=$(nproc) "${SNAPSHOT_ID}" --skip-existing ${SNAPSHOT_PATH} LOCAL_BACKUP_DIR="$(dirname ${SNAPSHOT_PATH})/${BACKUP_NAME}" rm -rf "${LOCAL_BACKUP_DIR}" - find "${SNAPSHOT_PATH}" -type f -name checksums.txt | while read CHECKSUMS_FILE; do - "${CUR_DIR}/checksum_parser.sh" "${CHECKSUMS_FILE}" "download" "${SNAPSHOT_PATH}" - done + find "${SNAPSHOT_PATH}" -type f -name checksums.txt | parallel -j $(nproc) "${CUR_DIR}/checksum_parser.sh" {} "download" "${SNAPSHOT_PATH}" # need separately `rm` cause hash file can contains multiple the same files in different parts find ${SNAPSHOT_PATH} -maxdepth 1 -type f -regex '.*/[a-z0-9]\{32\}.*' | while read HASH_FILE; do rm "${SNAPSHOT_PATH}/${HASH_FILE}" diff --git a/test/integration/kopia/upload.sh b/test/integration/kopia/upload.sh index 1189d86e..e7f6b8d1 100755 --- a/test/integration/kopia/upload.sh +++ b/test/integration/kopia/upload.sh @@ -15,14 +15,12 @@ for dir in $(echo "${LOCAL_PATHS}"); do UPLOAD_DIR="$(dirname "${dir}")/latest" rm -rf "${UPLOAD_DIR}" cp -rl "${dir}" "${UPLOAD_DIR}" - find "${UPLOAD_DIR}" -type f -name checksums.txt | while read CHECKSUMS_FILE; do - "${CUR_DIR}/checksum_parser.sh" "${CHECKSUMS_FILE}" "upload" "${UPLOAD_DIR}" - done + find "${UPLOAD_DIR}" -type f -name checksums.txt | parallel -j $(nproc) "${CUR_DIR}/checksum_parser.sh" {} "upload" "${UPLOAD_DIR}" SNAPSHOT_SOURCES="${UPLOAD_DIR} ${SNAPSHOT_SOURCES}" fi done -kopia snapshot create $DIFF_FROM_REMOTE_CMD --fail-fast --tags="backup_name:${BACKUP_NAME}" $SNAPSHOT_SOURCES +kopia snapshot create $DIFF_FROM_REMOTE_CMD --parallel=$(nproc) --fail-fast --tags="backup_name:${BACKUP_NAME}" $SNAPSHOT_SOURCES for dir in $(echo "${LOCAL_PATHS}"); do if [[ -d "${dir}" ]]; then