From afd37f057fa719f4636d13107a42b71d44312620 Mon Sep 17 00:00:00 2001 From: a-thomas-22 Date: Mon, 16 Oct 2023 11:57:11 -0400 Subject: [PATCH 1/3] add days to retain fix --- postgres-appliance/scripts/postgres_backup.sh | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/postgres-appliance/scripts/postgres_backup.sh b/postgres-appliance/scripts/postgres_backup.sh index 3216ae4ed..017953890 100755 --- a/postgres-appliance/scripts/postgres_backup.sh +++ b/postgres-appliance/scripts/postgres_backup.sh @@ -9,23 +9,15 @@ function log log "I was called as: $0 $*" - readonly PGDATA=$1 -DAYS_TO_RETAIN=$BACKUP_NUM_TO_RETAIN -IN_RECOVERY=$(psql -tXqAc "select pg_catalog.pg_is_in_recovery()") -readonly IN_RECOVERY -if [[ $IN_RECOVERY == "f" ]]; then - [[ "$WALG_BACKUP_FROM_REPLICA" == "true" ]] && log "Cluster is not in recovery, not running backup" && exit 0 -elif [[ $IN_RECOVERY == "t" ]]; then - [[ "$WALG_BACKUP_FROM_REPLICA" != "true" ]] && log "Cluster is in recovery, not running backup" && exit 0 -else - log "ERROR: Recovery state unknown: $IN_RECOVERY" && exit 1 +# Check if DAYS_TO_RETAIN is set externally +if [[ -z $DAYS_TO_RETAIN ]]; then + DAYS_TO_RETAIN=$BACKUP_NUM_TO_RETAIN + # leave at least 2 days base backups before creating a new one + [[ "$DAYS_TO_RETAIN" -lt 2 ]] && DAYS_TO_RETAIN=2 fi -# leave at least 2 days base backups before creating a new one -[[ "$DAYS_TO_RETAIN" -lt 2 ]] && DAYS_TO_RETAIN=2 - if [[ "$USE_WALG_BACKUP" == "true" ]]; then readonly WAL_E="wal-g" [[ -z $WALG_BACKUP_COMPRESSION_METHOD ]] || export WALG_COMPRESSION_METHOD=$WALG_BACKUP_COMPRESSION_METHOD From 58350968390a08cf595f2c1c861c79867776c1d5 Mon Sep 17 00:00:00 2001 From: a-thomas-22 Date: Mon, 16 Oct 2023 13:39:14 -0400 Subject: [PATCH 2/3] logic changes and additional logging --- postgres-appliance/scripts/postgres_backup.sh | 59 ++++++++++++------- 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/postgres-appliance/scripts/postgres_backup.sh b/postgres-appliance/scripts/postgres_backup.sh index 017953890..62efcdc18 100755 --- a/postgres-appliance/scripts/postgres_backup.sh +++ b/postgres-appliance/scripts/postgres_backup.sh @@ -1,64 +1,83 @@ #!/bin/bash -function log -{ +# Log function to capture timestamped logs +function log { echo "$(date "+%Y-%m-%d %H:%M:%S.%3N") - $0 - $*" } -[[ -z $1 ]] && echo "Usage: $0 PGDATA" && exit 1 - +# Check if the script is provided with the PGDATA argument +[[ -z $1 ]] && log "Error: PGDATA is missing!" && echo "Usage: $0 PGDATA" && exit 1 log "I was called as: $0 $*" - readonly PGDATA=$1 -# Check if DAYS_TO_RETAIN is set externally +# Ensure DAYS_TO_RETAIN is set, either externally or from BACKUP_NUM_TO_RETAIN if [[ -z $DAYS_TO_RETAIN ]]; then DAYS_TO_RETAIN=$BACKUP_NUM_TO_RETAIN - # leave at least 2 days base backups before creating a new one + log "DAYS_TO_RETAIN was not set. Using BACKUP_NUM_TO_RETAIN value: $DAYS_TO_RETAIN" + + # Make sure there are at least 2 days of base backups before creating a new one [[ "$DAYS_TO_RETAIN" -lt 2 ]] && DAYS_TO_RETAIN=2 + log "Ensuring DAYS_TO_RETAIN is at least 2. Current value: $DAYS_TO_RETAIN" fi +# Decide whether to use wal-g or wal-e for backup based on USE_WALG_BACKUP flag if [[ "$USE_WALG_BACKUP" == "true" ]]; then readonly WAL_E="wal-g" + log "Using wal-g for backup." + + # Optionally set compression method for wal-g if provided [[ -z $WALG_BACKUP_COMPRESSION_METHOD ]] || export WALG_COMPRESSION_METHOD=$WALG_BACKUP_COMPRESSION_METHOD export PGHOST=/var/run/postgresql else readonly WAL_E="wal-e" - - # Ensure we don't have more workes than CPU's - POOL_SIZE=$(grep -c ^processor /proc/cpuinfo 2>/dev/null || 1) + log "Using wal-e for backup." + + # Determine pool size based on CPU count, but cap it at 4 to avoid excessive parallelism + POOL_SIZE=$(grep -c ^processor /proc/cpuinfo 2>/dev/null || echo 1) [ "$POOL_SIZE" -gt 4 ] && POOL_SIZE=4 POOL_SIZE=(--pool-size "$POOL_SIZE") + log "POOL_SIZE set to $POOL_SIZE" fi -BEFORE="" -LEFT=0 - +# Initialization +BEFORE="" # Backup candidate for deletion +LEFT=0 # Counter for backups that will remain NOW=$(date +%s -u) readonly NOW + +log "Listing existing backups..." +# Loop through the existing backups and check if they qualify for deletion while read -r name last_modified rest; do last_modified=$(date +%s -ud "$last_modified") + + # If a backup's age exceeds DAYS_TO_RETAIN, consider it for deletion if [ $(((NOW-last_modified)/86400)) -ge $DAYS_TO_RETAIN ]; then + log "Backup $name is old enough for deletion." if [ -z "$BEFORE" ] || [ "$last_modified" -gt "$BEFORE_TIME" ]; then BEFORE_TIME=$last_modified BEFORE=$name fi else - # count how many backups will remain after we remove everything up to certain date + # Otherwise, increment the counter for backups that will remain ((LEFT=LEFT+1)) fi done < <($WAL_E backup-list 2> /dev/null | sed '0,/^name\s*\(last_\)\?modified\s*/d') -# we want keep at least N backups even if the number of days exceeded -if [ -n "$BEFORE" ] && [ $LEFT -ge $DAYS_TO_RETAIN ]; then +log "Total backups to retain: $LEFT. Target for deletion is: $BEFORE" + +# Ensure a certain number of backups remain, even if their age exceeds DAYS_TO_RETAIN +if [ -n "$BEFORE" ] && [ $LEFT -ge $BACKUP_NUM_TO_RETAIN ]; then + log "Deleting backups before $BEFORE..." + # Use appropriate deletion command based on whether wal-g or wal-e is being used if [[ "$USE_WALG_BACKUP" == "true" ]]; then - $WAL_E delete before FIND_FULL "$BEFORE" --confirm + $WAL_E delete retain $LEFT --confirm else $WAL_E delete --confirm before "$BEFORE" fi +else + log "No backups were deleted." fi -# push a new base backup -log "producing a new backup" -# We reduce the priority of the backup for CPU consumption +# Push a new base backup with reduced CPU priority +log "Producing a new backup..." exec nice -n 5 $WAL_E backup-push "$PGDATA" "${POOL_SIZE[@]}" From 361b9d103252227ee7f8efce30fd0d6c89b6f39f Mon Sep 17 00:00:00 2001 From: a-thomas-22 Date: Fri, 27 Oct 2023 11:52:45 -0500 Subject: [PATCH 3/3] readd in recovery check --- postgres-appliance/scripts/postgres_backup.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/postgres-appliance/scripts/postgres_backup.sh b/postgres-appliance/scripts/postgres_backup.sh index 62efcdc18..c575f036a 100755 --- a/postgres-appliance/scripts/postgres_backup.sh +++ b/postgres-appliance/scripts/postgres_backup.sh @@ -10,6 +10,15 @@ function log { log "I was called as: $0 $*" readonly PGDATA=$1 +IN_RECOVERY=$(psql -tXqAc "select pg_catalog.pg_is_in_recovery()") +readonly IN_RECOVERY +if [[ $IN_RECOVERY == "f" ]]; then + [[ "$WALG_BACKUP_FROM_REPLICA" == "true" ]] && log "Cluster is not in recovery, not running backup" && exit 0 +elif [[ $IN_RECOVERY == "t" ]]; then + [[ "$WALG_BACKUP_FROM_REPLICA" != "true" ]] && log "Cluster is in recovery, not running backup" && exit 0 +else + log "ERROR: Recovery state unknown: $IN_RECOVERY" && exit 1 + # Ensure DAYS_TO_RETAIN is set, either externally or from BACKUP_NUM_TO_RETAIN if [[ -z $DAYS_TO_RETAIN ]]; then DAYS_TO_RETAIN=$BACKUP_NUM_TO_RETAIN