Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved Handling of DAYS_TO_RETAIN in Backup Script #938

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 43 additions & 23 deletions postgres-appliance/scripts/postgres_backup.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
#!/bin/bash

function log
{
# Log function to capture timestamped logs
function log {
echo "$(date "+%Y-%m-%d %H:%M:%S.%3N") - $0 - $*"
}

[[ -z $1 ]] && echo "Usage: $0 PGDATA" && exit 1

# Check if the script is provided with the PGDATA argument
[[ -z $1 ]] && log "Error: PGDATA is missing!" && echo "Usage: $0 PGDATA" && exit 1
log "I was called as: $0 $*"


readonly PGDATA=$1
DAYS_TO_RETAIN=$BACKUP_NUM_TO_RETAIN

IN_RECOVERY=$(psql -tXqAc "select pg_catalog.pg_is_in_recovery()")
readonly IN_RECOVERY
Expand All @@ -21,52 +18,75 @@ elif [[ $IN_RECOVERY == "t" ]]; then
[[ "$WALG_BACKUP_FROM_REPLICA" != "true" ]] && log "Cluster is in recovery, not running backup" && exit 0
else
log "ERROR: Recovery state unknown: $IN_RECOVERY" && exit 1
fi

# leave at least 2 days base backups before creating a new one
[[ "$DAYS_TO_RETAIN" -lt 2 ]] && DAYS_TO_RETAIN=2
# Ensure DAYS_TO_RETAIN is set, either externally or from BACKUP_NUM_TO_RETAIN
if [[ -z $DAYS_TO_RETAIN ]]; then
DAYS_TO_RETAIN=$BACKUP_NUM_TO_RETAIN
log "DAYS_TO_RETAIN was not set. Using BACKUP_NUM_TO_RETAIN value: $DAYS_TO_RETAIN"

# Make sure there are at least 2 days of base backups before creating a new one
[[ "$DAYS_TO_RETAIN" -lt 2 ]] && DAYS_TO_RETAIN=2
log "Ensuring DAYS_TO_RETAIN is at least 2. Current value: $DAYS_TO_RETAIN"
fi

# Decide whether to use wal-g or wal-e for backup based on USE_WALG_BACKUP flag
if [[ "$USE_WALG_BACKUP" == "true" ]]; then
readonly WAL_E="wal-g"
log "Using wal-g for backup."

# Optionally set compression method for wal-g if provided
[[ -z $WALG_BACKUP_COMPRESSION_METHOD ]] || export WALG_COMPRESSION_METHOD=$WALG_BACKUP_COMPRESSION_METHOD
export PGHOST=/var/run/postgresql
else
readonly WAL_E="wal-e"

# Ensure we don't have more workes than CPU's
POOL_SIZE=$(grep -c ^processor /proc/cpuinfo 2>/dev/null || 1)
log "Using wal-e for backup."

# Determine pool size based on CPU count, but cap it at 4 to avoid excessive parallelism
POOL_SIZE=$(grep -c ^processor /proc/cpuinfo 2>/dev/null || echo 1)
[ "$POOL_SIZE" -gt 4 ] && POOL_SIZE=4
POOL_SIZE=(--pool-size "$POOL_SIZE")
log "POOL_SIZE set to $POOL_SIZE"
fi

BEFORE=""
LEFT=0

# Initialization
BEFORE="" # Backup candidate for deletion
LEFT=0 # Counter for backups that will remain
NOW=$(date +%s -u)
readonly NOW

log "Listing existing backups..."
# Loop through the existing backups and check if they qualify for deletion
while read -r name last_modified rest; do
last_modified=$(date +%s -ud "$last_modified")

# If a backup's age exceeds DAYS_TO_RETAIN, consider it for deletion
if [ $(((NOW-last_modified)/86400)) -ge $DAYS_TO_RETAIN ]; then
log "Backup $name is old enough for deletion."
if [ -z "$BEFORE" ] || [ "$last_modified" -gt "$BEFORE_TIME" ]; then
BEFORE_TIME=$last_modified
BEFORE=$name
fi
else
# count how many backups will remain after we remove everything up to certain date
# Otherwise, increment the counter for backups that will remain
((LEFT=LEFT+1))
fi
done < <($WAL_E backup-list 2> /dev/null | sed '0,/^name\s*\(last_\)\?modified\s*/d')

# we want keep at least N backups even if the number of days exceeded
if [ -n "$BEFORE" ] && [ $LEFT -ge $DAYS_TO_RETAIN ]; then
log "Total backups to retain: $LEFT. Target for deletion is: $BEFORE"

# Ensure a certain number of backups remain, even if their age exceeds DAYS_TO_RETAIN
if [ -n "$BEFORE" ] && [ $LEFT -ge $BACKUP_NUM_TO_RETAIN ]; then
log "Deleting backups before $BEFORE..."
# Use appropriate deletion command based on whether wal-g or wal-e is being used
if [[ "$USE_WALG_BACKUP" == "true" ]]; then
$WAL_E delete before FIND_FULL "$BEFORE" --confirm
$WAL_E delete retain $LEFT --confirm
else
$WAL_E delete --confirm before "$BEFORE"
fi
else
log "No backups were deleted."
fi

# push a new base backup
log "producing a new backup"
# We reduce the priority of the backup for CPU consumption
# Push a new base backup with reduced CPU priority
log "Producing a new backup..."
exec nice -n 5 $WAL_E backup-push "$PGDATA" "${POOL_SIZE[@]}"