Oracle: iterative_call_for_database_backup #1472
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: "Oracle: Backup" | |
run-name: "Oracle: ${{ github.event_name == 'workflow_dispatch' && format('{0}_{1}_{2}', github.event.inputs.TargetEnvironment, github.event.inputs.Period, github.event.inputs.TargetHost) || 'iterative_call_for' }}_database_backup" | |
on: | |
workflow_dispatch: | |
inputs: | |
TargetEnvironment: | |
description: "Target environment" | |
required: true | |
type: choice | |
options: | |
- "delius-core-dev" | |
- "delius-core-test" | |
- "delius-core-training" | |
- "delius-core-stage" | |
- "delius-core-preprod" | |
- "delius-core-prod" | |
- "delius-mis-dev" | |
- "delius-mis-stage" | |
- "delius-mis-preprod" | |
- "delius-mis-prod" | |
TargetHost: | |
description: "Backup target host" | |
required: true | |
type: choice | |
options: | |
- "delius_primarydb" | |
- "mis_primarydb" | |
- "boe_primarydb" | |
- "dsd_primarydb" | |
- "delius_standbydb1" | |
- "mis_standbydb1" | |
- "boe_standbydb1" | |
- "dsd_standbydb1" | |
- "delius_standbydb2" | |
Period: | |
description: "Is this a daily or weekly backup?" | |
required: true | |
type: choice | |
options: | |
- "daily" | |
- "weekly" | |
BackupStandbyOnPrimaryFailure: | |
description: "Retry the backup on the standby if primary backup fails" | |
type: choice | |
default: "yes" | |
options: | |
- "yes" | |
- "no" | |
FixAbsentChunks: | |
description: "Validate and fix absent S3 missing backup chunks" | |
type: choice | |
default: "yes" | |
options: | |
- "yes" | |
- "no" | |
ParametersJSON: | |
description: "JSON parameters for backup job (overrides any other specified inputs)" | |
type: string | |
required: false | |
EnableTrace: | |
description: "Enable RMAN trace for debugging" | |
required: false | |
type: choice | |
default: "no" | |
options: | |
- "yes" | |
- "no" | |
VerboseOutput: | |
description: "Verbose Output level" | |
type: choice | |
options: | |
- "" | |
- "-v" | |
- "-vv" | |
- "-vvv" | |
- "-vvvv" | |
SourceCodeVersion: | |
description: "Source version for the hmpps-delius-operation-automation. Enter a pull request, branch, commit ID, tag, or reference." | |
type: string | |
default: "main" | |
SourceConfigVersion: | |
description: "Source version for the modernisation-platform-configuration-management. Enter a pull request, branch, commit ID, tag, or reference." | |
type: string | |
default: "main" | |
workflow_call: | |
inputs: | |
TargetEnvironment: | |
description: "Target environment" | |
required: true | |
type: string | |
TargetHost: | |
description: "Backup target host" | |
required: true | |
type: string | |
Period: | |
description: "Is this a daily or weekly backup?" | |
required: true | |
type: string | |
repository_dispatch: | |
types: ["oracle-db-backup-success","oracle-db-backup-failure"] | |
# Allow permissions on repository and docker image and OIDC token | |
permissions: | |
contents: read | |
packages: read | |
id-token: write # This is required for requesting the JWT | |
jobs: | |
build_rman_target_name: | |
runs-on: ubuntu-latest | |
outputs: | |
RmanTarget: ${{ steps.preparermantargetname.outputs.RmanTarget }} | |
TargetEnvironment: ${{ steps.preparermantargetname.outputs.TargetEnvironment }}-preapproved | |
BackupStatus: ${{ steps.preparermantargetname.outputs.BackupStatus }} | |
BackupStandbyOnPrimaryFailure: ${{ steps.preparermantargetname.outputs.BackupStandbyOnPrimaryFailure }} | |
SourceCodeVersion: ${{ steps.preparermantargetname.outputs.SourceCodeVersion }} | |
SourceConfigVersion: ${{ steps.preparermantargetname.outputs.SourceConfigVersion }} | |
Inputs: ${{ steps.preparermantargetname.outputs.Inputs }} | |
SlackChannel: ${{ steps.prepare-slack-channel-name.outputs.SlackChannel }} | |
IsRetryJob: ${{ steps.preparermantargetname.outputs.IsRetryJob }} | |
steps: | |
- name: Prepare Rman Target | |
id: preparermantargetname | |
run: | | |
function set_rman_target() | |
{ | |
# Supply 2 parameters: TargetEnvironment & TargetHost | |
# This function determines the inventory name for the RMAN target on which the backup should be run. | |
RmanTarget="environment_name_$(echo $1 | sed 's/dev/development_dev/;s/test/test_test/;s/training/test_training/;s/stage/preproduction_stage/;s/pre-prod/preproduction_pre_prod/;s/-prod/_production_prod/;s/-/_/g')_$2" | |
echo "RmanTarget=$RmanTarget" >> $GITHUB_OUTPUT | |
echo "TargetEnvironment=$TargetEnvironment" >> $GITHUB_OUTPUT | |
} | |
if [[ "${{ github.event_name }}" != "repository_dispatch" ]] && [[ -z '${{ github.event.inputs.ParametersJSON }}' ]]; | |
then | |
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]] | |
then | |
TargetHost="${{ github.event.inputs.TargetHost }}" | |
TargetEnvironment="${{ github.event.inputs.TargetEnvironment }}" | |
Period="${{ github.event.inputs.Period }}" | |
BackupStandbyOnPrimaryFailure="${{ github.event.inputs.BackupStandbyOnPrimaryFailure }}" | |
FixAbsentChunks="${{ github.event.inputs.FixAbsentChunks }}" | |
EnableTrace="${{ github.event.inputs.EnableTrace }}" | |
VerboseOutput="${{ github.event.inputs.VerboseOutput }}" | |
SourceCodeVersion="${{ github.event.inputs.SourceCodeVersion }}" | |
SourceConfigVersion="${{ github.event.inputs.SourceConfigVersion }}" | |
elif [[ "${{ github.event_name }}" == "schedule" ]] | |
then | |
TargetHost="${{ inputs.TargetHost }}" | |
TargetEnvironment="${{ inputs.TargetEnvironment }}" | |
Period="${{ inputs.Period }}" | |
BackupStandbyOnPrimaryFailure="yes" | |
fi | |
set_rman_target $TargetEnvironment $TargetHost | |
# Build a JSON variable capturing all the inputs which have been supplied | |
Inputs="{" | |
Inputs=$Inputs"\"TargetEnvironment\": \"$TargetEnvironment\"," | |
# The TargetHost is the type of the host, e.g. delius_primarydb | |
# The RmanTarget is the full group name including both the environment name and the database type, and may | |
# be updating during the course of the backup if it is retries on a standby database. | |
# Note that we deliberately exclude the EnableTrace attribute from the JSON since GitHub currently only | |
# supports 10 elements and this was deemed the least important. This means that we cannot activate | |
# RMAN trace on retry backups (only on the initial backup). However, retry backups are expected to be | |
# rare, so the requirement for tracing them is very limited. | |
Inputs=$Inputs"\"TargetHost\": \"$TargetHost\"," | |
Inputs=$Inputs"\"RmanTarget\": \"$RmanTarget\"," | |
Inputs=$Inputs"\"Period\": \"$Period\"," | |
Inputs=$Inputs"\"BackupStandbyOnPrimaryFailure\": \"${BackupStandbyOnPrimaryFailure:-yes}\"," | |
Inputs=$Inputs"\"FixAbsentChunks\": \"${FixAbsentChunks:-yes}\"," | |
Inputs=$Inputs"\"VerboseOutput\": \"$VerboseOutput\"," | |
Inputs=$Inputs"\"SourceCodeVersion\": \"$SourceCodeVersion\"," | |
Inputs=$Inputs"\"SourceConfigVersion\": \"$SourceConfigVersion\"," | |
Inputs=$Inputs"\"Phase\": \"Initialising Backup\"" | |
Inputs=$Inputs"}" | |
else | |
# We come here if we have either: | |
# (1) Responded to a Repository Dispatch event, or | |
# (2) Supplied ParametersJSON as an input | |
# (ParametersJSON is only intended for test purposes; we can supply JSON from | |
# the command line to simulate it being passed as the payload of a | |
# Repository Dispatch event) | |
# In this case, the incoming JSON Inputs override any other command line parameters | |
if [[ "${{ github.event_name }}" == "repository_dispatch" ]]; then | |
echo "Responding to Repository Dispatch event" | |
Inputs='${{ toJson(github.event.client_payload) }}' | |
Inputs=$(echo $Inputs | jq -r) | |
Action='${{ github.event.action }}' | |
else | |
echo "Overriding command line with supplied JSON inputs" | |
Inputs='${{ github.event.inputs.ParametersJSON }}' | |
Action=$(echo $Inputs | jq -r '.BackupAction // "unknown"') | |
[[ "$Action" == "unknown" ]] && "Action is not specified in JSON inputs" && exit 6 | |
fi | |
if [[ "$Action" == "oracle-db-backup-success" ]]; then | |
BackupStatus="success" | |
# If the backup has been successful then we validate the presence of all | |
# of the backup chunks in the S3 backup bucket. | |
Inputs=$(echo $Inputs | jq '.Phase = "Validating Chunks"') | |
else | |
echo "Backup has failed" | |
# If the backup has failed then we decide what to do next. If BackupStandbyOnPrimaryFailure | |
# has the value of "yes" then we attempt to retry the backup on the next host. i.e.: | |
# 1. If the backup was running on the 1st Standby then we attempt to retry on the 2nd Standby. | |
# 2. If the backup was running on the Primary then we attempt to retry on the 1st Standby. | |
# Note that we do not attempt to check if Standby databases exist at this point, since that | |
# requires checking out of the inventory. We simply decide which host we would use next | |
# assuming that it exists. The subsequent job will fail if the selected host does not exist. | |
BackupStatus="failed" | |
echo "BackupOnStandby: "$(echo $Inputs | jq -r '.BackupStandbyOnPrimaryFailure // "yes"') | |
if [[ $(echo $Inputs | jq -r '.BackupStandbyOnPrimaryFailure // "yes"') == "yes" ]]; then | |
if [[ $(echo $Inputs | jq -r '.RmanTarget') =~ standbydb2 ]]; then | |
echo "No Further Retry Targets Available" | |
BackupStatus="fatal" | |
else | |
Inputs=$(echo $Inputs | jq '.Phase = "Retrying Failed Backup on Standby"') | |
echo "Selecting Retry Target" | |
# If we are allowed to retry a failed backup then change to the next host in sequence | |
Inputs=$(echo $Inputs | jq '.RmanTarget |= gsub("standbydb1"; "standbydb2")') | |
Inputs=$(echo $Inputs | jq '.RmanTarget |= gsub("primarydb"; "standbydb1")') | |
# The IsRetryJob parameter is simply used for informational purposes to indicate | |
# a job which was attempted on the originally selected host but had to be retried | |
# on another host due to a failure. | |
Inputs=$(echo $Inputs | jq '.IsRetryJob = "yes"') | |
fi | |
fi | |
fi | |
RmanTarget=$(echo $Inputs | jq -r '.RmanTarget') | |
TargetEnvironment=$(echo $Inputs | jq -r '.TargetEnvironment') | |
Period=$(echo $Inputs | jq -r '.Period') | |
IsRetryJob=$(echo $Inputs | jq -r '.IsRetryJob // "no"') | |
# The following 2 parameters are mandatory; abort if they are missing from JSON inputs | |
[[ -z $TargetEnvironment ]] && "Target Environment is not specified in JSON inputs" && exit 3 | |
[[ -z $Period ]] && "Backup Period (daily/weekly) is not specified in JSON inputs" && exit 4 | |
if [[ "$(echo $Inputs | jq -r '.RmanTarget')" == "null" ]]; then | |
# We calculate the inventory name for the RMAN target by combining the environment name | |
# and desired host name. This is also added back into the Inputs JSON. | |
set_rman_target $TargetEnvironment $TargetHost | |
Inputs=$(echo $Inputs | jq --arg RMANTARGET "$RmanTarget" '.RmanTarget = $RMANTARGET') | |
else | |
echo $Inputs | jq -r '.RmanTarget' | |
fi | |
fi | |
echo "RmanTarget: $RmanTarget" | |
echo "Inputs: $Inputs" | |
echo "RmanTarget=$RmanTarget" >> $GITHUB_OUTPUT | |
echo "TargetEnvironment=$TargetEnvironment" >> $GITHUB_OUTPUT | |
echo "BackupStatus=$BackupStatus" >> $GITHUB_OUTPUT | |
echo "Inputs="$(echo $Inputs | jq @json) | |
echo "Inputs="$(echo $Inputs | jq @json) >> $GITHUB_OUTPUT | |
echo "EnableTrace="${EnableTrace:-"no"} >> $GITHUB_OUTPUT | |
echo "SourceCodeVersion=$SourceCodeVersion" >> $GITHUB_OUTPUT | |
echo "SourceConfigVersion=$SourceConfigVersion" >> $GITHUB_OUTPUT | |
echo "IsRetryJob=$IsRetryJob" >> $GITHUB_OUTPUT | |
- name: Prepare Slack Channel Name | |
id: prepare-slack-channel-name | |
run: | | |
if [[ "${{ github.event.inputs.TargetEnvironment }}" == "delius-core-prod" ]]; then | |
echo "SlackChannel=delius-aws-oracle-prod-alerts" >> $GITHUB_OUTPUT | |
else | |
echo "SlackChannel=delius-aws-oracle-dev-alerts" >> $GITHUB_OUTPUT | |
fi | |
oracle-backup: | |
name: oracle-backup | |
needs: build_rman_target_name | |
environment: ${{needs.build_rman_target_name.outputs.TargetEnvironment}} | |
runs-on: ubuntu-latest | |
container: | |
image: ghcr.io/ministryofjustice/hmpps-delius-operational-automation:0.78.0 | |
timeout-minutes: 1440 | |
env: | |
backup_command: ansible-playbook operations/playbooks/oracle_backup/backup.yml | |
validate_command: ansible-playbook operations/playbooks/oracle_backup/validate.yml | |
inventory: inventory/ansible | |
RmanTarget: "${{needs.build_rman_target_name.outputs.RmanTarget}}" | |
TargetEnvironment: "${{needs.build_rman_target_name.outputs.TargetEnvironment}}" | |
SSMParameter: "/oracle-backups/${{needs.build_rman_target_name.outputs.RmanTarget}}" | |
BackupStatus: "${{needs.build_rman_target_name.outputs.BackupStatus}}" | |
BackupStandbyOnPrimaryFailure: "${{needs.build_rman_target_name.outputs.BackupStandbyOnPrimaryFailure}}" | |
SourceCodeVersion: "${{needs.build_rman_target_name.outputs.SourceCodeVersion}}" | |
SourceConfigVersion: "${{needs.build_rman_target_name.outputs.SourceConfigVersion}}" | |
Inputs: "${{needs.build_rman_target_name.outputs.Inputs}}" | |
SlackChannel: ${{ needs.build_rman_target_name.outputs.SlackChannel }} | |
IsRetryJob: "${{needs.build_rman_target_name.outputs.IsRetryJob }}" | |
ansible_config: operations/playbooks/ansible.cfg | |
continue-on-error: false | |
steps: | |
- name: Checkout hmpps-delius-operation-automation | |
uses: actions/checkout@v4 | |
with: | |
sparse-checkout-cone-mode: false | |
sparse-checkout: | | |
playbooks/oracle_backup | |
playbooks/ansible.cfg | |
path: operations | |
ref: "${{ env.SourceCodeVersion }}" | |
fetch-depth: 0 | |
- name: Checkout Ansible Inventory From modernisation-platform-configuration-management | |
uses: actions/checkout@v4 | |
with: | |
repository: ministryofjustice/modernisation-platform-configuration-management | |
sparse-checkout-cone-mode: false | |
sparse-checkout: | | |
ansible/hosts | |
ansible/group_vars | |
path: inventory | |
ref: "${{ env.SourceConfigVersion }}" | |
fetch-depth: 0 | |
- name: Checkout Ansible Required Roles From modernisation-platform-configuration-management | |
uses: actions/checkout@v4 | |
with: | |
repository: ministryofjustice/modernisation-platform-configuration-management | |
sparse-checkout-cone-mode: false | |
sparse-checkout: | | |
ansible/roles/secretsmanager-passwords | |
ansible/roles/get-modernisation-platform-facts | |
path: roles | |
ref: "${{ env.SourceConfigVersion }}" | |
fetch-depth: 0 | |
- name: Configure AWS Credentials | |
id: login-aws | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: "arn:aws:iam::${{ vars.AWS_ACCOUNT_ID }}:role/modernisation-platform-oidc-cicd" | |
role-session-name: "hmpps-delius-operational-automation-${{ github.run_number }}" | |
role-duration-seconds: 21600 | |
aws-region: "eu-west-2" | |
- name: Get Slack Token | |
id: get-slack-token | |
run: | | |
# The OEM Account ID which is associated with this Delius account is held in the SSM Parameter Store. | |
OEM_ACCOUNT_ID=$(aws ssm get-parameter --name account_ids --region eu-west-2 --with-decryption --output json | \ | |
jq '.Parameter.Value' | tr -d '\' | sed 's/^\"//' | sed 's/\"$//' | \ | |
jq -r 'to_entries | map(select(.key | contains("hmpps-oem-"))) | first' | jq -r '.value' ) | |
# We can then access the Slack Token from the AWS Secrets for that OEM Account | |
SECRET_ARN="arn:aws:secretsmanager:eu-west-2:${OEM_ACCOUNT_ID}:secret:/oracle/database/EMREP/shared-passwords" | |
SECRET_VALUE=$(aws secretsmanager get-secret-value --secret-id "${SECRET_ARN}" --query SecretString --output json) | |
SLACK_TOKEN=$(echo ${SECRET_VALUE} | jq -r | jq -r 'to_entries[] | select(.key=="slack_token").value') | |
echo "slack_token=${SLACK_TOKEN}" >> $GITHUB_OUTPUT | |
- name: Check And Set Backup Runtime | |
id: check-and-set-backup-runtime | |
shell: bash | |
run: | | |
if [[ "$BackupStatus" == "fatal" ]]; then | |
echo "All Possible Backup Retry Targets Exhausted" | |
Inputs=$(echo $Inputs | jq -r | jq '.Phase == "Failed And No More Retry Targets Remain"' | jq @json) | |
aws ssm put-parameter --region ${AWS_REGION} --name "$SSMParameter" --type String --overwrite --value "$(echo $Inputs | jq -r)" | |
exit 11 | |
fi | |
# The SSM Parameter store is used to record information about a backup run. This is mostly done simply for | |
# informational purposes and is not used to marshal calls to steps in the backup (those are done instead by | |
# respository_dispatch events). | |
# However, the ONE case where we DO act on the contents of the parameter store is to abort a backup if there | |
# is already a backup running on the same host. This is detected if the previous backup is not at the | |
# Inactive, Done or Failed phase. | |
# (Ignore errors fetching the parameter - this happens if the backup has not been run before: we | |
# just default this to "Inactive") | |
RUNTIME=$(aws ssm get-parameter --region ${AWS_REGION} --name "$SSMParameter" --query "Parameter.Value" --output text) || true | |
if echo $RUNTIME | jq -r 2>/dev/null | jq 'if type == "object" then "VALID_JSON_OBJECT" else "not" end' | grep -q VALID_JSON_OBJECT; then | |
PHASE=$(echo ${RUNTIME:-"{}"} | jq -r '.Phase // "Inactive"') | |
else | |
echo "Invalid JSON in $SSMParameter - resetting to Inactive" | |
SSMParameterValue="{\"Phase\":\"Inactive\"}" | |
aws ssm put-parameter --region ${AWS_REGION} --name "$SSMParameter" --type String --overwrite --value "$SSMParameterValue" | |
PHASE="Inactive" | |
fi | |
# Initialize the Backup Status for this RMAN Backup Target | |
if [[ "${{ github.event_name }}" != "repository_dispatch" ]]; then | |
# If this is a scheduled or manually started job then check that it does not conflict with | |
# one which is already in progress. | |
# This check is done by looking at the current phase of the backup as written to the SSM | |
# parameter store. | |
if [[ $PHASE != "Inactive" ]] && [[ $PHASE != "Done" ]] && [[ ! $PHASE =~ Failed ]]; then | |
# If we have detected what looks like a backup running concurrently, also check the | |
# last time that the SSM parameter was updated. If it was over 23 hours ago (82800 seconds) | |
# then we can assume that the previous backup has failed in such a way that it was | |
# unable to update the SSM parameter first, so we assume it is no longer running and | |
# start the new backup anyway. | |
LAST_UPDATED=$(aws ssm get-parameter --region ${AWS_REGION} --name "$SSMParameter" --query "Parameter.LastModifiedDate" --output text | cut -d. -f1) | |
CURRENT_EPOCH=$(date +%s) | |
if [[ $((CURRENT_EPOCH-LAST_UPDATED)) -le 82800 ]]; then | |
echo "Backup is already active on this host. Multiple concurrent runs are not allowed." | |
Inputs=$(echo $Inputs | jq -r | jq '.Phase == "Concurrent Backup Attempt"' | jq @json) | |
exit 1 | |
fi | |
fi | |
fi | |
aws ssm put-parameter --region ${AWS_REGION} --name "$SSMParameter" --type String --overwrite \ | |
--value "$(echo $Inputs | jq -r )" \ | |
--description "Runtime phase and status details of RMAN backups running on the $RmanTarget host" | |
- name: Start Ansible Backup On Selected Host | |
if: env.BackupStatus != 'success' | |
id: backup | |
continue-on-error: false | |
shell: bash | |
run: | | |
# This step starts running the backup by calling the Ansible backup play | |
# with the appropriate inputs. | |
# This play is intended to run asynchronously, so once it calls the rman_backup.sh shell | |
# script, it does not wait for the script to complete. This is because: | |
# (1) Some backups can exceed the maximum 6 hour limit for a job on a hosted GitHub Runner | |
# (2) GitHub Runners are billed all the time they are active, even if they are not doing | |
# anything. Running the backup asynchronously allows the runner to complete as soon | |
# as the backup starts and therefore saves on hosting costs. | |
# The backup script, rman_backup.sh, is itself is responsible for raising a respository_dispatch | |
# event to re-enter this workflow so steps after the backup can continue, whether the backup has | |
# succeeed or failed. | |
echo "Period: "$(echo "$Inputs" | jq -r | jq -r '.Period' ) | |
# The following section determines what happens if the previous attempt to run the backup has | |
# failed: | |
# If BackupStandbyOnPrimaryFailue is set to "yes" and a Standby database exists, then the | |
# backup may be retried on the Standby. Otherwise the job aborts with a failure since there | |
# is notwhere else where the backup may be attempted. | |
echo "BackupStatus: $BackupStatus" | |
echo "BackupStandbyOnPrimaryFailure: $BackupStandbyOnPrimaryFailure" | |
echo "RmanTarget: $RmanTarget" | |
if [[ "$BackupStatus" == "failed" ]]; then | |
echo "The Backup Failed" | |
fi | |
if [[ "$RmanTarget" =~ "standbydb1" ]]; then | |
echo "Target is 1st Standby" | |
echo "Standby Config File is ${inventory}/group_vars/${RmanTarget/primarydb/standbydb1}.yml" | |
if [[ ! -e ${inventory}/group_vars/${RmanTarget/primarydb/standbydb1}.yml ]]; then | |
echo "Standby config file does not exist" | |
fi | |
fi | |
if [[ "$RmanTarget" =~ "standbydb2" ]]; then | |
echo "Target is 2nd Standby" | |
echo "Standby Config File is ${inventory}/group_vars/${RmanTarget/primarydb/standbydb2}.yml" | |
if [[ ! -e ${inventory}/group_vars/${RmanTarget/primarydb/standbydb2}.yml ]]; then | |
echo "Standby config file does not exist" | |
fi | |
fi | |
if [[ "$BackupStatus" == "failed" ]]; then | |
if [[ "$BackupStandbyOnPrimaryFailure" == "no" ]]; then | |
echo "Backup Failed And No Retry Allowed" | |
Inputs=$(echo $Inputs | jq -r | jq '.Phase = "Failed And No Retry Allowed"' | jq @json) | |
aws ssm put-parameter --region ${AWS_REGION} --name "$SSMParameter" --type String --overwrite --value "$(echo $Inputs | jq -r)" | |
exit 10 | |
else | |
if [[ "$RmanTarget" =~ "standbydb1" ]] && [[ ! -e ${inventory}/group_vars/${RmanTarget/primarydb/standbydb1}.yml ]]; then | |
echo "Failed And No Standby Exists" | |
Inputs=$(echo $Inputs | jq -r | jq '.Phase = "Failed And No Standby Exists"' | jq @json) | |
aws ssm put-parameter --region ${AWS_REGION} --name "$SSMParameter" --type String --overwrite --value "$(echo $Inputs | jq -r)" | |
exit 20 | |
fi | |
if [[ "$RmanTarget" =~ "standbydb2" ]] && [[ ! -e ${inventory}/group_vars/${RmanTarget/primarydb/standbydb2}.yml ]]; then | |
echo "Failed on First Standby And No Second Standby Exists" | |
Inputs=$(echo $Inputs | jq -r | jq '.Phase = "Failed on First Standby And No Second Standby Exists"' | jq @json) | |
aws ssm put-parameter --region ${AWS_REGION} --name "$SSMParameter" --type String --overwrite --value "$(echo $Inputs | jq -r)" | |
exit 30 | |
fi | |
fi | |
fi | |
export ANSIBLE_CONFIG=$ansible_config | |
# A symlink is used to access the Backup Role as if it were a standalone Ansible Role | |
ln -s $PWD/roles/ansible/roles $PWD/operations/playbooks/oracle_backup/roles | |
$backup_command -i $inventory -e ansible_aws_ssm_bucket_name=${{ vars.ANSIBLE_AWS_SSM_BUCKET_NAME }} -e rman_target=$RmanTarget -e daily_weekly=$(echo "$Inputs" | jq -r | jq -r '.Period' ) -e enable_trace=$EnableTrace -e ssm_parameter=$SSMParameter -e repository_dispatch="${{ github.repository }}" -e json_inputs="$Inputs" $(echo "$Inputs" | jq -r | jq -r '.VerboseOutput // ""') | |
- name: Report Backup Retry Attempt | |
id: slack-retry | |
if: env.BackupStatus == 'failed' | |
uses: slackapi/[email protected] | |
with: | |
channel-id: "${{ env.SlackChannel }}" | |
payload: | | |
{"username":"Retrying Failed Backup on Standby","icon_emoji":"spinning-circle-of-death","text":"Retrying backup on: ${{ env.RmanTarget }}"} | |
env: | |
SLACK_BOT_TOKEN: ${{ steps.get-slack-token.outputs.slack_token }} | |
# To avoid clutter we do not report all backup successes - just those that needed to be retried due to prior failure | |
- name: Report Backup Retry Attempt Success | |
id: slack-retry-success | |
if: env.BackupStatus == 'success' && env.IsRetryJob == 'yes' | |
uses: slackapi/[email protected] | |
with: | |
channel-id: "${{ env.SlackChannel }}" | |
payload: | | |
{"username":"Retrying Failed Backup on Standby Successful","icon_emoji":"github-tick","text":"Retrying Failed Backup on Standby Successful for: ${{ env.RmanTarget }}"} | |
env: | |
SLACK_BOT_TOKEN: ${{ steps.get-slack-token.outputs.slack_token }} | |
- name: Run Ansible Validate And Fix Absent Chunks | |
if: env.BackupStatus == 'success' | |
run: | | |
# We only come here by a repository_dispatch event which reports that the backup was successful. | |
# If this is the case then we run a second Ansible play which checks that all backup chunk pieces are available | |
# on the S3 bucket. This is a precaution due to previous incidents where the backup metadata did not match | |
# the available chunks. This has not been seen on more recent versions of OSBWS and it is believed the | |
# bug has been fixed. However, since Oracle had been unable to identify the root cause, we keep this | |
# check active just in case. | |
# Unlike the Backup Ansible play, this step is not asynchronous so we do not expect it to encounter | |
# time limits. | |
export ANSIBLE_CONFIG=$ansible_config | |
ln -s $PWD/roles/ansible/roles $PWD/operations/playbooks/oracle_backup/roles | |
$validate_command -i $inventory -e ansible_aws_ssm_bucket_name=${{ vars.ANSIBLE_AWS_SSM_BUCKET_NAME }} -e rman_target=$RmanTarget -e fix_absent_chunks=$(echo "$Inputs" | jq -r | jq -r '.FixAbsentChunks // "yes"') $(echo "$Inputs" | jq -r | jq -r '.VerboseOutput // ""') | |
- name: Record Job Completed in SSM Parameter Store | |
if: env.BackupStatus == 'success' | |
run: | | |
Inputs=$(echo $Inputs | jq -r | jq '.Phase = "Done"' | jq @json) | |
aws ssm put-parameter --region ${AWS_REGION} --name "$SSMParameter" --type String --overwrite --value "$(echo $Inputs | jq -r)" | |
- name: Record Job Failure in SSM Parameter Store | |
if: failure() | |
shell: bash | |
run: | | |
Inputs=$(echo $Inputs | jq -r | jq '.Phase += " -- Failed"' | jq @json) | |
if [[ ! $(echo $Inputs | jq -r | jq '.Phase') =~ Concurrent ]]; then | |
# Do not update SSM Parameter if failure was due to concurrent backup attempts as we do | |
# not want to overwrite the parameter value for the original run | |
aws ssm put-parameter --region ${AWS_REGION} --name "$SSMParameter" --type String --overwrite --value "$(echo $Inputs | jq -r)" | |
fi | |
- name: Slack Failure Notification | |
id: slack-failure | |
if: failure() | |
uses: slackapi/[email protected] | |
with: | |
channel-id: "${{ env.SlackChannel }}" | |
payload: | | |
{"username":"Failed Backup","icon_emoji":"red_circle","text":"Failed Backup","blocks":[{"type": "section","text": {"type": "mrkdwn","text": "Failed Backup:"}},{"type": "section","fields":[{"type": "mrkdwn","text": "*Workflow:*\n<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ github.workflow }}>"},{"type": "mrkdwn","text": "*Job:*\n${{ github.job }}"},{"type": "mrkdwn","text": "*Repo:*\n${{ github.repository }}"},{"type": "mrkdwn","text": "*Rman Target:*\n${{ env.RmanTarget }}"}]}]} | |
env: | |
SLACK_BOT_TOKEN: ${{ steps.get-slack-token.outputs.slack_token }} |