Skip to content

Workload Checks Suite #172

Workload Checks Suite

Workload Checks Suite #172

# Workload Checks Suite
#
# Runs Vector Workload Checks.
#
# Runs on:
# - scheduled UTC midnight Tues-Sat
# - on demand by a PR comment matching either of:
# - '/ci-run-regression'
# - '/ci-run-all'
# (the comment issuer must be a member of the Vector GH team)
#
# This workflow runs the collection of our workload checks, using the latest Vector nightly image,
# which depends on when the workflow is invoked.
#
# The goal is to establish a baseline of check results for a variety of cases
# and visualize trends for important Vector use cases.
name: Workload Checks Suite
on:
workflow_call:
workflow_dispatch:
schedule:
# At midnight UTC Tue-Sat
- cron: '0 0 * * 2-6'
env:
SINGLE_MACHINE_PERFORMANCE_API: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_API }}
jobs:
compute-metadata:
name: Compute metadata
runs-on: ubuntu-latest
timeout-minutes: 5
outputs:
replicas: ${{ steps.experimental-meta.outputs.REPLICAS }}
warmup-seconds: ${{ steps.experimental-meta.outputs.WARMUP_SECONDS }}
total-samples: ${{ steps.experimental-meta.outputs.TOTAL_SAMPLES }}
smp-version: ${{ steps.experimental-meta.outputs.SMP_CRATE_VERSION }}
lading-version: ${{ steps.experimental-meta.outputs.LADING_VERSION }}
target-sha: ${{ steps.git-metadata.outputs.TARGET_SHA }}
steps:
- uses: actions/checkout@v3
- name: Get git metadata
id: git-metadata
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
export TARGET_SHA=$(git rev-parse HEAD)
echo "TARGET_SHA=${TARGET_SHA}" >> $GITHUB_OUTPUT
echo "target sha is: ${TARGET_SHA}"
if [ "${TARGET_SHA}" = "" ] ; then
echo "TARGET_SHA not found, exiting."
exit 1
fi
- name: Setup experimental metadata
id: experimental-meta
run: |
export WARMUP_SECONDS="45"
export REPLICAS="10"
export TOTAL_SAMPLES="600"
export SMP_CRATE_VERSION="0.11.0"
export LADING_VERSION="0.19.1"
echo "warmup seconds: ${WARMUP_SECONDS}"
echo "replicas: ${REPLICAS}"
echo "total samples: ${TOTAL_SAMPLES}"
echo "smp crate version: ${SMP_CRATE_VERSION}"
echo "lading version: ${LADING_VERSION}"
echo "WARMUP_SECONDS=${WARMUP_SECONDS}" >> $GITHUB_OUTPUT
echo "REPLICAS=${REPLICAS}" >> $GITHUB_OUTPUT
echo "TOTAL_SAMPLES=${TOTAL_SAMPLES}" >> $GITHUB_OUTPUT
echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT
echo "LADING_VERSION=${LADING_VERSION}" >> $GITHUB_OUTPUT
submit-job:
name: Submit workload checks job
runs-on: ubuntu-latest
timeout-minutes: 90
needs:
- compute-metadata
steps:
- name: Check status, in-progress
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
/repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \
-f state='pending' \
-f description='Experiments submitted to the Workload Checks cluster.' \
-f context='Workload Checks Suite / submission' \
-f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
- uses: actions/checkout@v3
- name: Configure AWS Credentials
uses: aws-actions/[email protected]
with:
aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }}
aws-region: us-west-2
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Download SMP binary
run: |
aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp
- name: Submit job
env:
RUST_LOG: info
run: |
CURRENT_DATE=$(date --utc '+%Y_%m_%d')
RUST_LOG_DEBUG="debug,aws_config::profile::credentials=error"
chmod +x ${{ runner.temp }}/bin/smp
RUST_BACKTRACE=1 RUST_LOG="${RUST_LOG_DEBUG}" ${{ runner.temp }}/bin/smp \
--team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} \
job submit-workload \
--lading-version ${{ needs.compute-metadata.outputs.lading-version }} \
--total-samples ${{ needs.compute-metadata.outputs.total-samples }} \
--warmup-seconds ${{ needs.compute-metadata.outputs.warmup-seconds }} \
--replicas ${{ needs.compute-metadata.outputs.replicas }} \
--target-image timberio/vector:nightly-debian \
--target-sha ${{ needs.compute-metadata.outputs.target-sha }} \
--target-config-dir ${{ github.workspace }}/workload-checks \
--target-name vector \
--target-command "/usr/bin/vector" \
--target-environment-variables "DD_HOSTNAME=smp-workload-checks,DD_DD_URL=http://127.0.0.1:9092,DD_API_KEY=00000001" \
--tags smp_status=nightly,client_team="vector",tag_date="${CURRENT_DATE}" \
--submission-metadata ${{ runner.temp }}/submission-metadata
- uses: actions/upload-artifact@v3
with:
name: vector-submission-metadata
path: ${{ runner.temp }}/submission-metadata
- name: Await job
timeout-minutes: 120
env:
RUST_LOG: info
run: |
chmod +x ${{ runner.temp }}/bin/smp
${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} \
job status \
--wait \
--wait-delay-seconds 60 \
--wait-timeout-minutes 90 \
--submission-metadata ${{ runner.temp }}/submission-metadata
- name: Handle cancellation if necessary
if: ${{ cancelled() }}
env:
RUST_LOG: info
run: |
chmod +x ${{ runner.temp }}/bin/smp
${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job cancel \
--submission-metadata ${{ runner.temp }}/submission-metadata
- name: Check status, cancelled
if: ${{ cancelled() }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
/repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \
-f state='failure' \
-f description='Experiments submitted to the Workload Checks cluster cancelled.' \
-f context='Workload Checks Suite / submission' \
-f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
- name: Check status, success
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
/repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \
-f state='success' \
-f description='Experiments submitted to the Workload Checks cluster successfully.' \
-f context='Workload Checks Suite / submission' \
-f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
- name: Check status, failure
if: ${{ failure() }}
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh api \
--method POST \
-H "Accept: application/vnd.github+json" \
/repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \
-f state='success' \
-f description='Experiments submitted to the Workload Checks Suite failed.' \
-f context='Workload Checks Suite / submission' \
-f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}