Skip to content

Commit

Permalink
[ci] add cloudwatch metrics for scheduled workflow failures (#2966)
Browse files Browse the repository at this point in the history
  • Loading branch information
siddvenk authored Jan 26, 2024
1 parent 9a52d3d commit e3c6238
Show file tree
Hide file tree
Showing 11 changed files with 98 additions and 1 deletion.
7 changes: 7 additions & 0 deletions .github/workflows/codeql-analysis-java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,10 @@ jobs:

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

publish-success-metric:
needs: [ analyze ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-CodeQL-Failure
9 changes: 8 additions & 1 deletion .github/workflows/docker_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,4 +56,11 @@ jobs:
context: .
file: docker/spark/Dockerfile
build-args: DJL_VERSION=${DJL_VERSION}
tags: deepjavalibrary/djl-spark:${{ env.DJL_VERSION }}-cpu
tags: deepjavalibrary/djl-spark:${{ env.DJL_VERSION }}-cpu

publish-success-metric:
needs: [ publish ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-SparkDockerPublish-Failure
7 changes: 7 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,10 @@ jobs:
run: |
aws s3 sync ../site s3://djl-ai/documentation/nightly --delete
aws cloudfront create-invalidation --distribution-id E733IIDCG0G5U --paths "/*"
publish-success-metric:
needs: [ documentation ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-DocumentationPublish-Failure
7 changes: 7 additions & 0 deletions .github/workflows/native_jni_s3_paddle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,10 @@ jobs:
PADDLE_VERSION="$(cat gradle.properties | awk -F '=' '/paddlepaddle_version/ {print $2}')"
aws s3 sync jnilib s3://djl-ai/publish/paddlepaddle-${PADDLE_VERSION}/jnilib
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/paddlepaddle-${PADDLE_VERSION}/jnilib*"
publish-success-metric:
needs: [ publish ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-NativeJNIPaddleS3Publish-Failure
7 changes: 7 additions & 0 deletions .github/workflows/native_jni_s3_pytorch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,10 @@ jobs:
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-aarch64-runner.outputs.aarch64_instance_id }}
./stop_instance.sh $instance_id
publish-success-metric:
needs: [ build-pytorch-jni-macos, build-pytorch-jni-linux, build-pytorch-jni-precxx11, build-pytorch-jni-windows, build-pytorch-jni-arm64-macos, stop-runners ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-NativeJNIPytorchS3Publish-Failure
7 changes: 7 additions & 0 deletions .github/workflows/native_jni_s3_pytorch_android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,10 @@ jobs:
PYTORCH_VERSION=${PYTORCH_VERSION:-$(cat gradle.properties | awk -F '=' '/pytorch_version/ {print $2}')}
aws s3 sync engines/pytorch/pytorch-native/jnilib s3://djl-ai/publish/pytorch/${PYTORCH_VERSION}/jnilib
aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/pytorch/${PYTORCH_VERSION}/jnilib*"
publish-success-metric:
needs: [ build-pytorch-jni-android ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-NativeJNIPytorchAndroidS3Publish-Failure
7 changes: 7 additions & 0 deletions .github/workflows/nightly_android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,10 @@ jobs:
emulator-options: -no-snapshot-save -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -camera-back none
disable-animations: true
script: cd android/core && ./gradlew cAT

publish-success-metric:
needs: [ build ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-AndroidIntegrationTests-Failure
7 changes: 7 additions & 0 deletions .github/workflows/nightly_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,10 @@ jobs:
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.gpu_1_instance_id }}
./stop_instance.sh $instance_id
publish-success-metric:
needs: [ publish, stop-runners ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-NightlyIntegrationTestsPublish-Failure
27 changes: 27 additions & 0 deletions .github/workflows/publish-job-success.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Publish Job Success Metric to CloudWatch

on:
workflow_call:
inputs:
metric-name:
description: "The name of the job to publish a metric for"
type: string
required: true

jobs:
publish-job-success-to-cloudwatch:
if: ${{ github.event_name == 'schedule' }}
runs-on: [ self-hosted, scheduler ]
steps:
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v2
with:
aws-region: us-west-2
- name: Publish Job Success Metric
run: |
[[ ${{ job.status }} == "success" ]]
failedBuild=$?
aws cloudwatch put-metric-data --namespace GithubCI \
--metric-name ${{ inputs.metric-name }} \
--value $failedBuild \
--unit Count
7 changes: 7 additions & 0 deletions .github/workflows/publish_android_packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,10 @@ jobs:
ORG_GRADLE_PROJECT_signingPassword: ${{ secrets.ORG_GRADLE_PROJECT_signingPassword }}
ORG_GRADLE_PROJECT_ossrhUsername: ${{ secrets.ORG_GRADLE_PROJECT_ossrhUsername }}
ORG_GRADLE_PROJECT_ossrhPassword: ${{ secrets.ORG_GRADLE_PROJECT_ossrhPassword }}

publish-success-metric:
needs: [ release-android ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-AndroidPublish-Failurei
7 changes: 7 additions & 0 deletions .github/workflows/serving_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,10 @@ jobs:
ORG_GRADLE_PROJECT_ossrhUsername: ${{ secrets.ORG_GRADLE_PROJECT_ossrhUsername }}
ORG_GRADLE_PROJECT_ossrhPassword: ${{ secrets.ORG_GRADLE_PROJECT_ossrhPassword }}
DJL_STAGING: ${{ github.event.inputs.repo-id }}

publish-success-metric:
needs: [ publish ]
if: always()
uses: ./.github/workflows/publish-job-success.yml
with:
metric-name: DJL-DJLServingPublish-Failure

0 comments on commit e3c6238

Please sign in to comment.