Skip to content

Commit

Permalink
Merge pull request #106 from ministryofjustice/TM-652-investigate-and…
Browse files Browse the repository at this point in the history
…-implement-an-automated-backup-process-for-opensearch-data

feat: add opensearch-backup job
  • Loading branch information
andrewmooreio authored Nov 19, 2024
2 parents 166211d + 4bee540 commit 50afc3d
Show file tree
Hide file tree
Showing 5 changed files with 240 additions and 0 deletions.
61 changes: 61 additions & 0 deletions .github/workflows/opensearch-backup.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
name: OpenSearch Backup

on:
schedule:
- cron: '0 1 * * *' # Run at 1am UTC daily
workflow_dispatch:
inputs:
environment:
description: 'Environment to backup'
required: true
type: choice
options:
- poc
- dev
- test
- stage
- preprod

jobs:
backup:
name: Backup OpenSearch
runs-on: ubuntu-latest

strategy:
matrix:
environment:
- poc
- dev
- test
- stage
# - preprod
exclude:
- ${{ github.event_name == 'workflow_dispatch' && environment != github.event.inputs.environment }}

environment: ${{ matrix.environment || github.event.inputs.environment }}-preapproved

steps:
- name: Checkout code
uses: actions/[email protected]
- name: Configure kubectl
run: |
echo "${{ secrets.KUBE_CERT }}" > ca.crt
kubectl config set-cluster ${KUBE_CLUSTER} --certificate-authority=./ca.crt --server=https://${KUBE_CLUSTER}
kubectl config set-credentials deploy-user --token=${{ secrets.KUBE_TOKEN }}
kubectl config set-context ${KUBE_CLUSTER} --cluster=${KUBE_CLUSTER} --user=deploy-user --namespace=${KUBE_NAMESPACE}
kubectl config use-context ${KUBE_CLUSTER}
env:
KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }}
KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }}
- name: Backup OpenSearch
working-directory: jobs/opensearch-backup
run: |
set -xeo pipefail
helm install opensearch-backup . \
--set opensearch.environment=${{ matrix.environment || github.event.inputs.environment }}
kubectl wait jobs -l name-prefix=opensearch-backup --for=condition=complete --timeout=1h
- name: Cleanup
run: helm uninstall opensearch-backup --ignore-not-found
5 changes: 5 additions & 0 deletions jobs/opensearch-backup/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v2
name: delius-alfresco-opensearch-backup
version: 0.0.1
description: A Helm chart for backing up AWS OpenSearch indices
type: application
100 changes: 100 additions & 0 deletions jobs/opensearch-backup/templates/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: opensearch-backup-script
data:
backup.sh: |
#!/bin/sh
set -e
echo "Starting OpenSearch backup process..."
# Validate required environment variables
if [ -z "$OPENSEARCH_ENDPOINT" ] || [ -z "$S3_BUCKET_NAME" ] || [ -z "$SNAPSHOT_REPOSITORY" ] || [ -z "$INDICES" ] || [ -z "$REGION" ]; then
echo "Error: Required environment variables are not set"
echo "Required variables: OPENSEARCH_ENDPOINT, S3_BUCKET_NAME, SNAPSHOT_REPOSITORY, INDICES, REGION"
exit 1
fi
# Set timestamp for snapshot name
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
SNAPSHOT_NAME="${SNAPSHOT_PREFIX:-backup}-${ENVIRONMENT:-default}-${TIMESTAMP}"
echo "Creating snapshot: $SNAPSHOT_NAME"
echo "Repository: $SNAPSHOT_REPOSITORY"
echo "Indices to backup: $INDICES"
echo "---"
# Check if repository exists
REPO_CHECK=$(curl -s -o /dev/null -w "%{http_code}" "$OPENSEARCH_ENDPOINT/_snapshot/$SNAPSHOT_REPOSITORY")
if [ "$REPO_CHECK" = "404" ]; then
echo "Repository does not exist. Creating snapshot repository..."
RESPONSE=$(curl -s -XPUT "$OPENSEARCH_ENDPOINT/_snapshot/$SNAPSHOT_REPOSITORY" -H 'Content-Type: application/json' -d "{
\"type\": \"s3\",
\"settings\": {
\"bucket\": \"$S3_BUCKET_NAME\",
\"region\": \"$REGION\",
\"role_arn\": \"$SNAPSHOT_ROLE_ARN\"
}
}")
if echo "$RESPONSE" | grep -q '"acknowledged":true'; then
echo "Repository created successfully"
else
echo "Failed to create repository: $RESPONSE"
exit 1
fi
echo "---"
else
echo "Repository already exists"
echo "---"
fi
# Create the snapshot
echo "Creating snapshot..."
RESPONSE=$(curl -s -XPUT "$OPENSEARCH_ENDPOINT/_snapshot/$SNAPSHOT_REPOSITORY/$SNAPSHOT_NAME" -H 'Content-Type: application/json' -d "{
\"indices\": \"$INDICES\",
\"include_global_state\": false
}")
if ! echo "$RESPONSE" | grep -q '"accepted":true'; then
echo "Failed to create snapshot: $RESPONSE"
exit 1
fi
# Monitor snapshot progress
echo "Monitoring snapshot progress..."
while true; do
CURRENT_TIME=$(date "+%Y-%m-%d %H:%M:%S")
SNAPSHOT_STATUS=$(curl -s "$OPENSEARCH_ENDPOINT/_snapshot/$SNAPSHOT_REPOSITORY/$SNAPSHOT_NAME/_status")
STATE=$(echo "$SNAPSHOT_STATUS" | grep -o '"state":"[^"]*"' | cut -d'"' -f4)
if [ "$STATE" = "SUCCESS" ]; then
echo "[$CURRENT_TIME] Snapshot completed successfully"
break
elif [ "$STATE" = "FAILED" ]; then
echo "[$CURRENT_TIME] Snapshot failed"
exit 1
else
echo "[$CURRENT_TIME] Snapshot in progress... (Status: $STATE)"
sleep 10
fi
done
echo "---"
# Get final snapshot details
echo "Snapshot Details:"
SNAPSHOT_INFO=$(curl -s "$OPENSEARCH_ENDPOINT/_snapshot/$SNAPSHOT_REPOSITORY/$SNAPSHOT_NAME")
# Parse and display relevant information
INDICES_COUNT=$(echo "$SNAPSHOT_INFO" | grep -o '"indices":\[[^]]*\]' | grep -o ',' | wc -l)
INDICES_COUNT=$((INDICES_COUNT + 1))
START_TIME=$(echo "$SNAPSHOT_INFO" | grep -o '"start_time":"[^"]*"' | cut -d'"' -f4)
echo "- Name: $SNAPSHOT_NAME"
echo "- Start Time: $START_TIME"
echo "- Indices Backed Up: $INDICES_COUNT"
echo "---"
echo "Backup process completed successfully"
61 changes: 61 additions & 0 deletions jobs/opensearch-backup/templates/job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: opensearch-backup
spec:
template:
spec:
serviceAccountName: "{{ $.Values.opensearch.serviceAccountPrefix }}-{{ $.Values.opensearch.environment }}"
containers:
- name: opensearch-backup
image: ghcr.io/ministryofjustice/hmpps-delius-alfresco-utils:latest
command:
- /bin/sh
- /scripts/backup.sh
env:
- name: OPENSEARCH_ENDPOINT
valueFrom:
secretKeyRef:
name: {{ $.Values.opensearch.endpointSecretName }}
key: {{ $.Values.opensearch.endpointSecretKey }}
- name: S3_BUCKET_NAME
valueFrom:
secretKeyRef:
name: {{ $.Values.opensearch.s3BucketSecretName }}
key: {{ $.Values.opensearch.s3BucketNameKey }}
- name: SNAPSHOT_ROLE_ARN
valueFrom:
secretKeyRef:
name: {{ $.Values.opensearch.endpointSecretName }}
key: {{ $.Values.opensearch.snapshotRoleArnKey }}
- name: SNAPSHOT_REPOSITORY
value: "{{ $.Values.opensearch.repository }}"
- name: INDICES
value: "{{ $.Values.opensearch.indices }}"
- name: SNAPSHOT_PREFIX
value: "{{ $.Values.opensearch.snapshotPrefix }}"
- name: ENVIRONMENT
value: "{{ $.Values.opensearch.environment }}"
- name: REGION
value: "{{ $.Values.opensearch.region }}"
volumeMounts:
- name: script
mountPath: /scripts
securityContext:
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 999
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
volumes:
- name: script
configMap:
name: opensearch-backup-script
defaultMode: 0755
restartPolicy: Never
13 changes: 13 additions & 0 deletions jobs/opensearch-backup/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
---
opensearch:
repository: "daily-backups"
indices: "alfresco"
snapshotPrefix: "backup"
serviceAccountPrefix: "hmpps-migration"
endpointSecretName: "opensearch-output"
endpointSecretKey: "PROXY_URL"
snapshotRoleArnKey: "SNAPSHOT_ROLE_ARN"
s3BucketSecretName: "s3-opensearch-snapshots-bucket-output"
s3BucketNameKey: "BUCKET_NAME"
region: "eu-west-2"
environment: "" # Default empty, will be set by GitHub Action

0 comments on commit 50afc3d

Please sign in to comment.