Skip to content

Commit

Permalink
Merge pull request #42 from ministryofjustice/NIT-1169-alfresco-test-…
Browse files Browse the repository at this point in the history
…refresh-process-with-larger-data-set

NIT-1169 data refresh process improvements
  • Loading branch information
pete-j-g authored Apr 18, 2024
2 parents 00ff314 + c0117ad commit 851f70c
Show file tree
Hide file tree
Showing 11 changed files with 280 additions and 21 deletions.
45 changes: 33 additions & 12 deletions .github/workflows/data-refresh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,16 @@ jobs:
KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }}
KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }}

- name: Start DB Refresh Job
- name: DB Refresh
working-directory: jobs/refresh-db
run: |
kubectl apply -f jobs/refresh-db.yaml
kubectl wait --timeout 10m --for=condition=complete job/refresh-db
helm install refresh-db . \
--set sourceEnvironment=${{ github.event.inputs.source_env }} \
--set destinationEnvironment=${{ github.event.inputs.destination_env }}
kubectl wait job refresh-db --for=condition=complete --timeout 10h
- name: Delete DB Refresh Job
run: kubectl delete job refresh-db
- name: Uninstall DB Refresh chart
run: helm uninstall refresh-db

refresh-s3:
name: Refresh S3
Expand All @@ -92,13 +95,27 @@ jobs:
KUBE_NAMESPACE: ${{ secrets.KUBE_NAMESPACE }}
KUBE_CLUSTER: ${{ secrets.KUBE_CLUSTER }}

- name: Start S3 Refresh Job
- name: S3 Refresh
working-directory: jobs/refresh-s3
run: |
kubectl apply -f jobs/refresh-s3.yaml
kubectl wait --timeout 10m --for=condition=complete job/refresh-s3
set -xeo pipefail
- name: Delete S3 Refresh Job
run: kubectl delete job refresh-s3
SERVICE_POD_DEPLOYMENT=$(kubectl get deployment -l app=service-pod -o jsonpath="{.items[0].metadata.name}")
SERVICE_POD_NAME=$(kubectl get pod -l app=$SERVICE_POD_DEPLOYMENT -o jsonpath="{.items[0].metadata.name}")
SRC_BUCKET=$(kubectl get secrets s3-bucket-output -o jsonpath='{.data.BUCKET_NAME}' | base64 -d)
DIRS=$(kubectl exec $SERVICE_POD_NAME -- aws s3 ls $SRC_BUCKET | grep -v contentstore | awk -F ' ' '{print $2}' | tr -d '/' | tr '\n' ',')
helm install refresh-s3 . \
--set sourceEnvironment=${{ github.event.inputs.source_env }} \
--set destinationEnvironment=${{ github.event.inputs.destination_env }} \
--set directories="{${DIRS:0:-1}}"
kubectl wait jobs -l name-prefix=refresh-s3 --for=condition=complete --timeout 10h
- name: Uninstall S3 Refresh chart
run: helm uninstall refresh-s3

wipe-solr-data:
name: Wipe Solr Data
Expand Down Expand Up @@ -151,6 +168,10 @@ jobs:

- name: Stop ${{ github.event.inputs.destination_env }} Environment
run: |
kubectl scale deployment alfresco-content-services-alfresco-cs-repository --replicas=1
kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=1
apt update && apt install -y jq
HELM_VALUES=$(helm get values alfresco-content-services -o json)
kubectl scale deployment alfresco-content-services-alfresco-cs-repository --replicas=$(echo $HELM_VALUES | jq '.repository.replicaCount')
kubectl scale deployment alfresco-content-services-alfresco-cs-share --replicas=$(echo $HELM_VALUES | jq '.share.replicaCount')
kubectl scale deployment alfresco-content-services-alfresco-search-solr --replicas=1
5 changes: 5 additions & 0 deletions jobs/migrate-db/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v2
appVersion: 0.1
version: 0.0.1
description: A quickly thrown together Helm chart for deploying a job to migrate DB data
name: migrate-db
102 changes: 102 additions & 0 deletions jobs/migrate-db/templates/job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: migrate-db-script
data:
entrypoint.sh: |-
#!/bin/bash
set -e
echo "${SRC_DB_HOST}:5432:${SRC_DB_NAME}:${SRC_DB_USER}:${SRC_DB_PASS}" > ~/.pgpass
echo "${DST_DB_HOST}:5432:${DST_DB_NAME}:${DST_DB_USER}:${DST_DB_PASS}" >> ~/.pgpass
chmod 0600 ~/.pgpass
set -x
pg_dump --jobs=4 --host="$SRC_DB_HOST" --username="$SRC_DB_USER" --dbname="$SRC_DB_NAME" --no-owner --no-privileges --verbose --format=directory --file=/tmp/db-dump
pg_restore --jobs=4 --host="$DST_DB_HOST" --username="$DST_DB_USER" --dbname="$DST_DB_NAME" --clean --if-exists --no-owner --no-privileges --verbose /tmp/db-dump
rm -rv /tmp/db-dump ~/.pgpass
---
apiVersion: batch/v1
kind: Job
metadata:
name: migrate-db
spec:
template:
spec:
containers:
- name: migrate-db
image: postgres:14
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 4
memory: 2Gi
command:
- /bin/entrypoint.sh
env:
- name: SRC_DB_NAME
valueFrom:
secretKeyRef:
name: legacy-rds-instance
key: DATABASE_NAME
- name: SRC_DB_USER
valueFrom:
secretKeyRef:
name: legacy-rds-instance
key: DATABASE_USERNAME
- name: SRC_DB_PASS
valueFrom:
secretKeyRef:
name: legacy-rds-instance
key: DATABASE_PASSWORD
- name: SRC_DB_HOST
valueFrom:
secretKeyRef:
name: legacy-rds-instance
key: RDS_INSTANCE_ADDRESS
- name: DST_DB_NAME
valueFrom:
secretKeyRef:
name: rds-instance-output
key: DATABASE_NAME
- name: DST_DB_USER
valueFrom:
secretKeyRef:
name: rds-instance-output
key: DATABASE_USERNAME
- name: DST_DB_PASS
valueFrom:
secretKeyRef:
name: rds-instance-output
key: DATABASE_PASSWORD
- name: DST_DB_HOST
valueFrom:
secretKeyRef:
name: rds-instance-output
key: RDS_INSTANCE_ADDRESS
volumeMounts:
- name: migrate-db-script
mountPath: /bin/entrypoint.sh
readOnly: true
subPath: entrypoint.sh
securityContext:
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 999
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
serviceAccount: hmpps-migration-{{ .Values.environment }}
serviceAccountName: hmpps-migration-{{ .Values.environment }}
restartPolicy: Never
volumes:
- name: migrate-db-script
configMap:
name: migrate-db-script
defaultMode: 0755
backoffLimit: 0
...
1 change: 1 addition & 0 deletions jobs/migrate-db/values_stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
environment: stage
5 changes: 5 additions & 0 deletions jobs/migrate-s3/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v2
appVersion: 0.1
version: 0.0.1
description: A quickly thrown together Helm chart for deploying a job to migrate S3 data
name: migrate-s3
72 changes: 72 additions & 0 deletions jobs/migrate-s3/templates/job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: migrate-s3-script
data:
entrypoint.sh: |-
#!/bin/sh
set -xe
aws configure set default.s3.max_concurrent_requests 2000
aws configure set default.s3.use_accelerate_endpoint true
aws s3 sync s3://$SRC_BUCKET/$DIR s3://$DST_BUCKET/$DIR --delete --only-show-errors
echo sync of $DIR directory completed
{{- range .Values.dirs }}
---
apiVersion: batch/v1
kind: Job
metadata:
name: migrate-s3-{{ . | toString | replace "/" "-" }}
spec:
template:
spec:
containers:
- name: migrate-s3
image: 754256621582.dkr.ecr.eu-west-2.amazonaws.com/webops/cloud-platform-service-pod:c5f69b4624b956248001fa7c173c89a0556a457e
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 4
memory: 8Gi
command:
- /bin/entrypoint.sh
env:
- name: SRC_BUCKET
value: {{ $.Values.srcBucket }}
- name: DST_BUCKET
valueFrom:
secretKeyRef:
name: s3-bucket-output
key: BUCKET_NAME
- name: DIR
value: {{ . | quote }}
volumeMounts:
- name: migrate-s3-script
mountPath: /bin/entrypoint.sh
readOnly: true
subPath: entrypoint.sh
securityContext:
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1001
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
serviceAccount: hmpps-migration-{{ $.Values.environment }}
serviceAccountName: hmpps-migration-{{ $.Values.environment }}
restartPolicy: OnFailure
volumes:
- name: migrate-s3-script
configMap:
name: migrate-s3-script
defaultMode: 0755
backoffLimit: 10
{{- end }}
...
13 changes: 13 additions & 0 deletions jobs/migrate-s3/values_stage.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
environment: stage
srcBucket: tf-eu-west-2-hmpps-delius-stage-alfresco-storage-s3bucket
dirs:
- 2019
- 2020
- 2021
- 2022
- 2023
- 2024
- contentstore/2016
- contentstore/2017
- contentstore/2018
- contentstore/2019
5 changes: 5 additions & 0 deletions jobs/refresh-db/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v2
appVersion: 0.1
version: 0.0.1
description: Job to refresh DB data
name: refresh-db
18 changes: 14 additions & 4 deletions jobs/refresh-db.yaml → jobs/refresh-db/templates/job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ spec:
- name: refresh-db
image: postgres:14
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 4
memory: 2Gi
command:
- /bin/entrypoint.sh
env:
Expand All @@ -53,22 +57,22 @@ spec:
- name: DST_DB_NAME
valueFrom:
secretKeyRef:
name: rds-instance-output-poc
name: rds-instance-output-{{ .Values.destinationEnvironment }}
key: DATABASE_NAME
- name: DST_DB_USER
valueFrom:
secretKeyRef:
name: rds-instance-output-poc
name: rds-instance-output-{{ .Values.destinationEnvironment }}
key: DATABASE_USERNAME
- name: DST_DB_PASS
valueFrom:
secretKeyRef:
name: rds-instance-output-poc
name: rds-instance-output-{{ .Values.destinationEnvironment }}
key: DATABASE_PASSWORD
- name: DST_DB_HOST
valueFrom:
secretKeyRef:
name: rds-instance-output-poc
name: rds-instance-output-{{ .Values.destinationEnvironment }}
key: RDS_INSTANCE_ADDRESS
volumeMounts:
- name: refresh-db-script
Expand All @@ -79,7 +83,13 @@ spec:
allowPrivilegeEscalation: false
privileged: false
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 999
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
serviceAccount: hmpps-migration-development
serviceAccountName: hmpps-migration-development
restartPolicy: Never
Expand Down
5 changes: 5 additions & 0 deletions jobs/refresh-s3/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
apiVersion: v2
appVersion: 0.1
version: 0.0.1
description: Jobs to refresh S3 data
name: refresh-s3
Loading

0 comments on commit 851f70c

Please sign in to comment.