diff --git a/.github/workflows/actions/database-backup/action.yml b/.github/workflows/actions/database-backup/action.yml new file mode 100644 index 00000000..cb72004b --- /dev/null +++ b/.github/workflows/actions/database-backup/action.yml @@ -0,0 +1,166 @@ +name: Backup AKS Database +description: backs up AKS database to Azure Storage + +inputs: + environment: + description: "The name of the environment" + required: true + azure_credentials: + description: "JSON object containing a service principal that can read from Azure Key Vault" + required: true + +outputs: + backup_artifact: + description: "The backup artifact name" + value: ${{ inputs.environment }}-backup + +runs: + using: composite + + steps: + - uses: actions/checkout@v4 + id: Checkout + + - name: Set KV environment variables + id: set_kv_env_vars + run: | + tf_vars_file=terraform/aks/workspace_variables/${{ inputs.environment }}_aks.tfvars.json + echo "INF_VAULT_NAME=$(jq -r '.inf_vault_name' ${tf_vars_file})" >> $GITHUB_ENV + echo "NAMESPACE=$(jq -r '.namespace' ${tf_vars_file})" >> $GITHUB_ENV + echo "APP_ENVIRONMENT=$(jq -r '.app_environment' ${tf_vars_file})" >> $GITHUB_ENV + echo "CLUSTER=$(jq -r '.cluster' ${tf_vars_file})" >> $GITHUB_ENV + + if [ -z "$INF_VAULT_NAME" ]; then + echo "::error ::Failed to extract inf_vault_name from $TFVARS" + exit 1 + fi + + if [ -z "$NAMESPACE" ]; then + echo "::error ::Failed to extract namespace from $TFVARS" + exit 1 + fi + + if [ -z "$CLUSTER" ]; then + echo "::error ::Failed to extract cluster from $TFVARS" + exit 1 + fi + + if [ -z "$APP_ENVIRONMENT" ]; then + echo "::error ::Failed to extract app_environment from $TFVARS" + exit 1 + fi + + echo "INF_VAULT_NAME=$INF_VAULT_NAME" >> $GITHUB_OUTPUT + echo "NAMESPACE=$NAMESPACE" >> $GITHUB_OUTPUT + echo "CLUSTER=$CLUSTER" >> $GITHUB_OUTPUT + echo "APP_ENVIRONMENT=$APP_ENVIRONMENT" >> $GITHUB_OUTPUT + shell: bash + env: + TFVARS: ${{ inputs.tfvars }} + working-directory: terraform/aks + + - uses: Azure/login@v1 + with: + creds: ${{ inputs.azure_credentials }} + + - name: Fetch slack web hook + uses: azure/CLI@v1 + id: slack-web-hook + with: + inlineScript: | + SECRET_VALUE=$(az keyvault secret show --name "SLACK-WEBHOOK" --vault-name "${{ env.INF_VAULT_NAME }}" --query "value" -o tsv) + echo "::add-mask::$SECRET_VALUE" + echo "SLACK-WEBHOOK=$SECRET_VALUE" >> $GITHUB_OUTPUT + + - name: Install kubectl + uses: azure/setup-kubectl@v3 + with: + version: "v1.26.1" + + - run: | + test_cluster_rg=s189t01-tsc-ts-rg + test_cluster_name=s189t01-tsc-test-aks + production_cluster_rg=s189p01-tsc-pd-rg + production_cluster_name=s189p01-tsc-production-aks + + case "${ENVIRONMENT_NAME}" in + development_aks) + echo "cluster_rg=$test_cluster_rg" >> $GITHUB_ENV + echo "cluster_name=$test_cluster_name" >> $GITHUB_ENV + echo "app_name=find-a-lost-trn-development" >> $GITHUB_ENV + ;; + test_aks) + echo "cluster_rg=$test_cluster_rg" >> $GITHUB_ENV + echo "cluster_name=$test_cluster_name" >> $GITHUB_ENV + echo "app_name=find-a-lost-trn-test" >> $GITHUB_ENV + ;; + preproduction_aks) + echo "cluster_rg=$test_cluster_rg" >> $GITHUB_ENV + echo "cluster_name=$test_cluster_name" >> $GITHUB_ENV + echo "app_name=find-a-lost-trn-preproduction" >> $GITHUB_ENV + ;; + production_aks) + echo "cluster_rg=$production_cluster_rg" >> $GITHUB_ENV + echo "cluster_name=$production_cluster_name" >> $GITHUB_ENV + echo "app_name=find-a-lost-trn-production" >> $GITHUB_ENV + ;; + *) + echo "unknown cluster" + ;; + esac + + - uses: azure/setup-kubectl@v3 + + - name: K8 setup + shell: bash + run: | + az aks get-credentials -g ${{ env.cluster_rg }} -n ${{ env.cluster_name }} + make bin/konduit.sh + + - name: Setup postgres client + uses: DFE-Digital/github-actions/install-postgres-client@master + + - name: Set environment variable + run: | + BACKUP_FILE_NAME=faltrn_${{ inputs.environment }}_$(date +"%F") + + - name: Backup ${{ inputs.environment }} DB + run: | + bin/konduit.sh find-a-lost-trn-${{ inputs.environment }} -- pg_dump -E utf8 --clean --if-exists --no-owner --verbose --no-password -f ${BACKUP_FILE_NAME}.sql + tar -cvzf ${BACKUP_FILE_NAME}.tar.gz ${BACKUP_FILE_NAME}.sql + + - name: Set Connection String + run: | + STORAGE_CONN_STR="$(az keyvault secret show --name FALTRN-BACKUP-STORAGE-CONNECTION-STRING-AKS --vault-name ${{ env.inf_vault_name }} | jq -r .value)" + echo "::add-mask::$STORAGE_CONN_STR" + echo "STORAGE_CONN_STR=$STORAGE_CONN_STR" >> $GITHUB_ENV + + - name: Upload Backup to Azure Storage + run: | + az storage blob upload --container-name database-backup \ + --file ${BACKUP_FILE_NAME}.tar.gz --name ${BACKUP_FILE_NAME}.tar.gz --overwrite \ + --connection-string '${{ env.STORAGE_CONN_STR }}' + rm ${BACKUP_FILE_NAME}.tar.gz + + - name: Disk cleanup + shell: bash + run: | + sudo rm -rf /usr/local/lib/android || true + sudo rm -rf /usr/share/dotnet || true + sudo rm -rf /opt/ghc || true + + - name: Remove backup file + shell: bash + run: | + rm ${{ env.BACKUP_FILE_NAME }}.sql + + - name: Check for Failure + if: ${{ failure() }} + uses: rtCamp/action-slack-notify@master + env: + SLACK_USERNAME: CI Deployment + SLACK_TITLE: Database backup failure + SLACK_MESSAGE: ${{ inputs.environment }} database backup job failed + SLACK_WEBHOOK: ${{ steps.keyvault-yaml-secret.outputs.SLACK_WEBHOOK }} + SLACK_COLOR: failure + SLACK_FOOTER: Sent from backup job in database-backup workflow diff --git a/.github/workflows/aks-db-backup.yml b/.github/workflows/aks-db-backup.yml new file mode 100644 index 00000000..517a0836 --- /dev/null +++ b/.github/workflows/aks-db-backup.yml @@ -0,0 +1,26 @@ +name: Backup AKS Database + +on: + workflow_dispatch: + inputs: + environment: + description: Environment + type: choice + options: + - development + - test + - preproduction + - production + schedule: # 01:00 UTC + - cron: "0 1 * * *" + +jobs: + backup: + name: Backup AKS Database + runs-on: ubuntu-latest + steps: + - uses: ./.github/workflows/actions/database-backup + id: aks_db_backup + with: + azure_credentials: ${{ secrets.AZURE_CREDENTIALS }} + environment: ${{ inputs.environment }} diff --git a/.github/workflows/database-backup.yml b/.github/workflows/database-backup.yml deleted file mode 100644 index 4c728993..00000000 --- a/.github/workflows/database-backup.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: Backup Database to Azure Storage - -on: - workflow_dispatch: - schedule: # 01:00 UTC - - cron: "0 1 * * *" - -jobs: - backup: - name: Backup PaaS Database ( Production ) - runs-on: ubuntu-latest - environment: - name: production - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - uses: Azure/login@v1 - with: - creds: ${{ secrets.azure_credentials }} - - - name: Set environment variables - shell: bash - run: | - tf_vars_file=terraform/paas/workspace_variables/production.tfvars.json - echo "KEY_VAULT_NAME=$(jq -r '.key_vault_name' ${tf_vars_file})" >> $GITHUB_ENV - echo "PAAS_SPACE=$(jq -r '.paas_space' ${tf_vars_file})" >> $GITHUB_ENV - - - uses: Azure/get-keyvault-secrets@v1 - id: get_secrets - with: - keyvault: ${{ env.KEY_VAULT_NAME }} - secrets: "BACKUP-STORAGE-CONNECTION-STRING,PAAS-USER,PAAS-PASSWORD" - - - uses: DfE-Digital/keyvault-yaml-secret@v1 - id: keyvault-yaml-secret - with: - keyvault: ${{ env.KEY_VAULT_NAME }} - secret: MONITORING - key: SLACK_WEBHOOK - env: - GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} - - - name: Setup cf cli - uses: DFE-Digital/github-actions/setup-cf-cli@master - with: - CF_USERNAME: ${{ steps.get_secrets.outputs.PAAS-USER }} - CF_PASSWORD: ${{ steps.get_secrets.outputs.PAAS-PASSWORD }} - CF_SPACE_NAME: ${{ env.PAAS_SPACE }} - INSTALL_CONDUIT: true - - - name: Setup postgres client - uses: DFE-Digital/github-actions/install-postgres-client@master - - - name: Set environment variable - run: echo "BACKUP_FILE_NAME=find-a-lost-trn-production-pg-svc-$(date +"%F-%H")" >> $GITHUB_ENV - - - name: Backup Production DB - run: | - cf conduit find-a-lost-trn-production-pg-svc -- pg_dump -E utf8 --clean --if-exists --no-owner --verbose --no-password -f ${BACKUP_FILE_NAME}.sql - tar -cvzf ${BACKUP_FILE_NAME}.tar.gz ${BACKUP_FILE_NAME}.sql - - - name: Upload Backup to Azure Storage - run: | - az storage blob upload --container-name find-a-lost-trn \ - --file ${BACKUP_FILE_NAME}.tar.gz --name ${BACKUP_FILE_NAME}.tar.gz \ - --connection-string '${{ steps.get_secrets.outputs.BACKUP-STORAGE-CONNECTION-STRING }}' \ - --overwrite true - - - name: Notify Slack channel on job failure - if: failure() - uses: rtCamp/action-slack-notify@v2 - env: - SLACK_USERNAME: CI Deployment - SLACK_TITLE: Database backup failure - SLACK_MESSAGE: Production database backup job failed - SLACK_WEBHOOK: ${{ steps.keyvault-yaml-secret.outputs.SLACK_WEBHOOK }} - SLACK_COLOR: failure - SLACK_FOOTER: Sent from backup job in database-backup workflow diff --git a/terraform/aks/databases.tf b/terraform/aks/databases.tf index 632d2867..81317c77 100644 --- a/terraform/aks/databases.tf +++ b/terraform/aks/databases.tf @@ -14,6 +14,8 @@ module "postgres" { azure_enable_monitoring = var.enable_monitoring azure_extensions = ["plpgsql", "citext", "uuid-ossp"] server_version = "14" + + azure_enable_backup_storage = var.azure_enable_backup_storage } module "redis" { diff --git a/terraform/aks/variables.tf b/terraform/aks/variables.tf index 99aed108..dfda9371 100644 --- a/terraform/aks/variables.tf +++ b/terraform/aks/variables.tf @@ -113,6 +113,10 @@ variable "inf_vault_name" { description = "infrastructure kv name" } +variable "azure_enable_backup_storage" { + default = false +} + variable "review_url_db_name" { default = null description = "the name of the secret storing review db url" diff --git a/terraform/aks/workspace_variables/production_aks.tfvars.json b/terraform/aks/workspace_variables/production_aks.tfvars.json index ad692aa5..eccff6b0 100644 --- a/terraform/aks/workspace_variables/production_aks.tfvars.json +++ b/terraform/aks/workspace_variables/production_aks.tfvars.json @@ -22,5 +22,6 @@ "inf_vault_name": "s189p01-faltrn-pd-inf-kv", "key_vault_resource_group": "s189p01-faltrn-pd-rg", "worker_replicas": 2, - "replicas": 2 + "replicas": 2, + "azure_enable_backup_storage": true }