Skip to content

Commit

Permalink
✨ Update DB Refresh workflow to include Slack notifications (#76)
Browse files Browse the repository at this point in the history
* ✨ Update DB Refresh workflow to include Slack notifications

* 🐛 poc now uses opensearch

* Update data-refresh.yaml

* --ignore-not-found for helm uninstall

* Happy Helming

* Update data-refresh.yaml

* 🐛 Correct service account names for refresh jobs

* fix: Add curl package and fix Slack message

* typo

* ✨ create image and update job

Update job.yaml

Update job.yaml

Update job.yaml

Update job.yaml

Update job.yaml

Update job.yaml

Update job.yaml

Update job.yaml

Update Dockerfile

Update job.yaml

Update Dockerfile

Update Dockerfile

Update job.yaml

absolute dirs

Update job.yaml

Update job.yaml

Update job.yaml

Update job.yaml

Update Dockerfile

non root support

Update job.yaml

Update data-refresh.yaml

up

Update build-push-db-utils.yml

rename

Update build-push-db-utils.yml

Update build-push-db-utils.yml

no need to install jq

no password

update jobs for refresh

* Update job.yaml

* Update job.yaml

* Update job.yaml

* Update job.yaml

* Update job.yaml

* Update job.yaml

* Update job.yaml

* Update job.yaml

* t

* Update job.yaml

* Update job.yaml

* Update job.yaml

* Update job.yaml

* Update job.yaml

* typo

* Update job.yaml

* Update job.yaml

* Update data-refresh.yaml

---------

Co-authored-by: George Taylor <[email protected]>
  • Loading branch information
andrewmooreio and georgepstaylor authored Jul 29, 2024
1 parent 9a8bce9 commit f4d16d4
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 60 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/build-push-db-utils.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Build and push image

on:
push:
workflow_dispatch:

env:
IMAGE_NAME: hmpps-delius-alfresco-db-utils

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: checkout code
uses: actions/checkout@v4
- name: Log in to the Container registry
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81
with:
images: ${{ env.IMAGE_NAME }}
- name: Build and push Docker image
if: github.ref == 'refs/heads/main'
uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445
with:
context: ./tools/db-utils/
push: true
tags: ghcr.io/${{ github.repository_owner }}/${{ steps.meta.outputs.tags }}, ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:latest
labels: ${{ steps.meta.outputs.labels }}
- name: Build and push Docker image
if: github.ref != 'refs/heads/main'
uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445
with:
context: ./tools/db-utils/
push: true
tags: ghcr.io/${{ github.repository_owner }}/${{ steps.meta.outputs.tags }}, ghcr.io/${{ github.repository_owner }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}-${{ github.run_id }}
labels: ${{ steps.meta.outputs.labels }}
101 changes: 50 additions & 51 deletions .github/workflows/data-refresh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ jobs:
- name: Stop ${{ github.event.inputs.destination_env }} Environment
run: |
kubectl scale deployment alfresco-content-services-alfresco-cs-repository --replicas=0
kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=0
kubectl scale deployment alfresco-content-services-alfresco-search-solr --replicas=0
kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=0
kubectl scale deployment alfresco-content-services-alfresco-search-enterprise-liveindexing --replicas=0
refresh-db:
name: Refresh DB
runs-on: [self-hosted, Linux, management-infrastructure]
runs-on: ubuntu-22.04
environment:
name: ${{ github.event.inputs.source_env }}
needs: stop-destination-environment
Expand All @@ -59,11 +59,10 @@ jobs:
version: 'v1.26.0' # default is latest stable
id: kubectl_install

- name: Install Helm
uses: azure/[email protected]
- uses: azure/[email protected]
with:
version: 'v3.9.0'
id: helm_install
version: 'v3.15.3' # default is latest (stable)
id: install

- name: Configure kubectl
run: |
Expand All @@ -76,20 +75,20 @@ jobs:
KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }}
KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }}

- name: Uninstall DB Refresh chart
run: helm uninstall refresh-db --ignore-not-found

- name: DB Refresh
working-directory: jobs/refresh-db
run: |
helm install refresh-db . \
--set sourceEnvironment=${{ github.event.inputs.source_env }} \
--set destinationEnvironment=${{ github.event.inputs.destination_env }}
kubectl wait job refresh-db --for=condition=complete --timeout 10h
- name: Uninstall DB Refresh chart
run: helm uninstall refresh-db
--set destinationEnvironment=${{ github.event.inputs.destination_env }} \
--set slackWebhookUrl=${{ secrets.SLACK_WEBHOOK_URL }}
refresh-s3:
name: Refresh S3
runs-on: [self-hosted, Linux, management-infrastructure]
runs-on: ubuntu-22.04
environment:
name: ${{ github.event.inputs.source_env }}
needs: stop-destination-environment
Expand All @@ -102,11 +101,10 @@ jobs:
version: 'v1.26.0' # default is latest stable
id: kubectl_install

- name: Install Helm
uses: azure/[email protected]
- uses: azure/[email protected]
with:
version: 'v3.9.0'
id: helm_install
version: 'v3.15.3' # default is latest (stable)
id: install

- name: Configure kubectl
run: |
Expand All @@ -118,6 +116,9 @@ jobs:
env:
KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }}
KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }}

- name: Uninstall S3 Refresh chart
run: helm uninstall refresh-s3 --ignore-not-found

- name: S3 Refresh
working-directory: jobs/refresh-s3
Expand All @@ -139,35 +140,35 @@ jobs:
kubectl wait jobs -l name-prefix=refresh-s3 --for=condition=complete --timeout 10h
- name: Uninstall S3 Refresh chart
run: helm uninstall refresh-s3

wipe-solr-data:
name: Wipe Solr Data
runs-on: ubuntu-22.04
environment:
name: ${{ github.event.inputs.destination_env }}
needs: stop-destination-environment
steps:
- uses: actions/[email protected]

- name: Configure kubectl
run: |
echo "${{ secrets.KUBE_CERT }}" > ca.crt
kubectl config set-cluster ${KUBE_CLUSTER} --certificate-authority=./ca.crt --server=https://${KUBE_CLUSTER}
kubectl config set-credentials deploy-user --token=${{ secrets.KUBE_TOKEN }}
kubectl config set-context ${KUBE_CLUSTER} --cluster=${KUBE_CLUSTER} --user=deploy-user --namespace=${KUBE_NAMESPACE}
kubectl config use-context ${KUBE_CLUSTER}
env:
KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }}
KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }}

- name: Start Solr Data Wipe Job
run: |
kubectl apply -f jobs/wipe-solr-data.yaml
kubectl wait --timeout 10m --for=condition=complete job/wipe-solr-data
- name: Delete Refresh Job
run: kubectl delete job wipe-solr-data
run: helm uninstall refresh-s3 --ignore-not-found

# wipe-solr-data:
# name: Wipe Solr Data
# runs-on: ubuntu-22.04
# environment:
# name: ${{ github.event.inputs.destination_env }}
# needs: stop-destination-environment
# steps:
# - uses: actions/[email protected]

# - name: Configure kubectl
# run: |
# echo "${{ secrets.KUBE_CERT }}" > ca.crt
# kubectl config set-cluster ${KUBE_CLUSTER} --certificate-authority=./ca.crt --server=https://${KUBE_CLUSTER}
# kubectl config set-credentials deploy-user --token=${{ secrets.KUBE_TOKEN }}
# kubectl config set-context ${KUBE_CLUSTER} --cluster=${KUBE_CLUSTER} --user=deploy-user --namespace=${KUBE_NAMESPACE}
# kubectl config use-context ${KUBE_CLUSTER}
# env:
# KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }}
# KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }}

# - name: Start Solr Data Wipe Job
# run: |
# kubectl apply -f jobs/wipe-solr-data.yaml
# kubectl wait --timeout 10m --for=condition=complete job/wipe-solr-data

# - name: Delete Refresh Job
# run: kubectl delete job wipe-solr-data

start-destination-environment:
name: Start ${{ github.event.inputs.destination_env }} Environment
Expand All @@ -177,7 +178,7 @@ jobs:
needs:
- refresh-db
- refresh-s3
- wipe-solr-data
# - wipe-solr-data
steps:
- name: Configure kubectl
run: |
Expand All @@ -190,12 +191,10 @@ jobs:
KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }}
KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }}

- name: Stop ${{ github.event.inputs.destination_env }} Environment
- name: Start ${{ github.event.inputs.destination_env }} Environment
run: |
apt update && apt install -y jq
HELM_VALUES=$(helm get values alfresco-content-services -o json)
kubectl scale deployment alfresco-content-services-alfresco-cs-repository --replicas=$(echo $HELM_VALUES | jq '.repository.replicaCount')
kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=$(echo $HELM_VALUES | jq '.share.replicaCount')
kubectl scale deployment alfresco-content-services-alfresco-search-solr --replicas=1
kubectl scale deployment alfresco-content-services-alfresco-search-enterprise-liveindexing --replicas=1
2 changes: 1 addition & 1 deletion jobs/refresh-db/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 0.1
version: 0.0.1
version: 0.0.2
description: Job to refresh DB data
name: refresh-db
43 changes: 37 additions & 6 deletions jobs/refresh-db/templates/job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,37 @@ data:
entrypoint.sh: |-
#!/bin/bash
set -e
trap 'send_slack_notification $?' EXIT
function send_slack_notification() {
STATUS=$1
if [ "$STATUS" -eq 0 ]; then
JSON_PAYLOAD=$(jq -n --arg text "Refresh DB (${SRC_ENV} to ${DST_ENV}) job succeeded" '{text: $text}')
else
ERROR_MSG=$(tail -n 10 ~/error.log) || ERROR_MSG="Unknown error"
JSON_PAYLOAD=$(jq -n --arg text "Refresh DB (${SRC_ENV} to ${DST_ENV}) job failed with error: $ERROR_MSG" '{text: $text}')
fi
curl -X POST -H 'Content-type: application/json' --data "$JSON_PAYLOAD" $SLACK_WEBHOOK_URL
}
echo "${SRC_DB_HOST}:5432:${SRC_DB_NAME}:${SRC_DB_USER}:${SRC_DB_PASS}" > ~/.pgpass
echo "${DST_DB_HOST}:5432:${DST_DB_NAME}:${DST_DB_USER}:${DST_DB_PASS}" >> ~/.pgpass
cat ~/.pgpass
chmod 0600 ~/.pgpass
chown job:job ~/.pgpass
set -x
pg_dump --jobs=4 --host="$SRC_DB_HOST" --username="$SRC_DB_USER" --dbname="$SRC_DB_NAME" --no-owner --no-privileges --verbose --format=directory --file=/tmp/db-dump
pg_restore --jobs=4 --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" --clean --if-exists --no-owner --no-privileges --verbose /tmp/db-dump
rm -rv /tmp/db-dump ~/.pgpass
# Dump the source database
pg_dump --jobs=4 --host="$SRC_DB_HOST" --username="$SRC_DB_USER" --dbname="$SRC_DB_NAME" --no-owner --no-privileges --verbose --format=directory --file=/home/job/db-dump 2> >(tee ~/error.log >&2)
psql --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" -c "drop schema if exists public cascade;" 2> >(tee ~/error.log >&2)
psql --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" -c "create schema public;" 2> >(tee ~/error.log >&2)
# Restore the source database dump to the destination database
pg_restore --jobs=4 --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" --no-owner --no-privileges --verbose /home/job/db-dump 2> >(tee ~/error.log >&2)
rm -rv /home/job/db-dump ~/.pgpass
---
apiVersion: batch/v1
kind: Job
Expand All @@ -25,7 +48,7 @@ spec:
spec:
containers:
- name: refresh-db
image: postgres:14
image: ghcr.io/ministryofjustice/hmpps-delius-alfresco-db-utils:NIT-1403-alfresco-move-away-from-long-running-github-workflows-10114657186
imagePullPolicy: IfNotPresent
resources:
limits:
Expand All @@ -34,6 +57,8 @@ spec:
command:
- /bin/entrypoint.sh
env:
- name: HOME
value: "/home/job"
- name: SRC_DB_NAME
valueFrom:
secretKeyRef:
Expand Down Expand Up @@ -74,6 +99,12 @@ spec:
secretKeyRef:
name: rds-instance-output-{{ .Values.destinationEnvironment }}
key: RDS_INSTANCE_ADDRESS
- name: SLACK_WEBHOOK_URL
value: "{{ .Values.slackWebhookUrl }}"
- name: SRC_ENV
value: "{{ .Values.sourceEnvironment }}"
- name: DST_ENV
value: "{{ .Values.destinationEnvironment }}"
volumeMounts:
- name: refresh-db-script
mountPath: /bin/entrypoint.sh
Expand All @@ -90,8 +121,8 @@ spec:
- ALL
seccompProfile:
type: RuntimeDefault
serviceAccount: hmpps-migration-development
serviceAccountName: hmpps-migration-development
serviceAccount: hmpps-migration-dev
serviceAccountName: hmpps-migration-dev
restartPolicy: Never
volumes:
- name: refresh-db-script
Expand Down
4 changes: 2 additions & 2 deletions jobs/refresh-s3/templates/job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ spec:
- ALL
seccompProfile:
type: RuntimeDefault
serviceAccount: hmpps-migration-development
serviceAccountName: hmpps-migration-development
serviceAccount: hmpps-migration-dev
serviceAccountName: hmpps-migration-dev
restartPolicy: OnFailure
volumes:
- name: refresh-s3-script
Expand Down
13 changes: 13 additions & 0 deletions tools/db-utils/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM debian:bookworm-slim

# Install psql client
RUN apt-get update && apt-get install -y postgresql-client
# Install curl
RUN apt-get install -y curl
# Install jq
RUN apt-get install -y jq

# Create a non-root user and set the home directory
RUN useradd -u 999 -ms /bin/bash job
USER job
WORKDIR /home/job

0 comments on commit f4d16d4

Please sign in to comment.