-
Notifications
You must be signed in to change notification settings - Fork 22
266 lines (255 loc) · 12.2 KB
/
soak-testing.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# Pre-requisites:
# - AWS Account with correct permissions
# - `gh-pages` branch
# - AWS Account needs to add the LOG_GROUP_NAME defined below
name: Soak Testing
on:
workflow_dispatch:
inputs:
target_commit_sha:
description: 'The commit SHA on this repo to use for the Soak Tests.'
required: true
test_duration_minutes:
description: 'The duration of the Soak Tests in minutes.'
required: true
default: 300
schedule:
- cron: '0 15 * * *'
env:
# NOTE: The configuration of `APP_PROCESS_EXECUTABLE_NAME` is repo dependent
APP_PROCESS_EXECUTABLE_NAME: python3
AWS_DEFAULT_REGION: us-east-1
DEFAULT_TEST_DURATION_MINUTES: 300
HOSTMETRICS_INTERVAL_SECS: 600
CPU_LOAD_THRESHOLD: 75
TOTAL_MEMORY_THRESHOLD: 2684354560 # 2.5 GiB
MAX_BENCHMARKS_TO_KEEP: 100
LISTEN_ADDRESS_PORT: 8080
# TODO: We might be able to adapt the "Soak Tests" to be "Overhead Tests".
# This means monitoring the Sample App's performance using high levels of TPS
# for the Load Generator over a shorter period of testing time. For example:
# https://github.com/aws-observability/aws-otel-collector/blob/main/docs/performance_model.md
# THROUGHPUT_PER_SECOND: TBD?
jobs:
test_apps_and_publish_results:
name: Soak Performance Test - (${{ matrix.app-platform }}, ${{ matrix.instrumentation-type }})
runs-on: ubuntu-latest
permissions:
contents: write
id-token: write
issues: write
strategy:
fail-fast: false
matrix:
app-platform: [ flask ]
instrumentation-type: [ auto, manual, none ]
env:
# NOTE: The configuration of `APP_PATH` is repo dependent
APP_PATH: integration-test-apps/${{ matrix.instrumentation-type}}-instrumentation/${{ matrix.app-platform }}
LOGS_NAMESPACE: ${{ github.repository }}/soak-tests-${{ matrix.app-platform }}-${{ matrix.instrumentation-type }}
steps:
# MARK: - GitHub Workflow Event Type Specific Values
- name: Use INPUT as commit SHA
if: ${{ github.event_name == 'workflow_dispatch' }}
run: |
echo "TARGET_SHA=${{ github.event.inputs.target_commit_sha }}" | tee --append $GITHUB_ENV;
- name: Use LATEST as commit SHA
if: ${{ github.event_name != 'workflow_dispatch' }}
run: |
echo "TARGET_SHA=${{ github.sha }}" | tee --append $GITHUB_ENV;
- name: Configure Performance Test Duration
run: |
echo "TEST_DURATION_MINUTES=${{ github.event.inputs.test_duration_minutes || env.DEFAULT_TEST_DURATION_MINUTES }}" | tee --append $GITHUB_ENV;
- name: Clone This Repo @ ${{ env.TARGET_SHA }}
uses: actions/checkout@v3
with:
ref: ${{ env.TARGET_SHA }}
# MARK: - Instrumentation-Type Specific Values
# NOTE: The configuration of `APP_PROCESS_COMMAND_LINE_DIMENSION_VALUE` is
# repo dependent
- name: Configure Auto Instrumentation-Type Specific Values
if: ${{ matrix.instrumentation-type == 'auto' }}
run: |
echo 'APP_PROCESS_COMMAND_LINE_DIMENSION_VALUE<<EOF' >> $GITHUB_ENV
echo '/usr/local/bin/python3 application.py' | tee --append $GITHUB_ENV;
echo 'EOF' >> $GITHUB_ENV
- name: Configure Manual Instrumentation-Type Specific Values
if: ${{ matrix.instrumentation-type == 'manual' ||
matrix.instrumentation-type == 'none' }}
run: |
echo 'APP_PROCESS_COMMAND_LINE_DIMENSION_VALUE<<EOF' >> $GITHUB_ENV
echo '/usr/local/bin/python3 /app/application.py' | tee --append $GITHUB_ENV;
echo 'EOF' >> $GITHUB_ENV
# FIXME: This uses the latest Commit SHA of the upstream Core repo. We
# should provide it as an input to consider the edge case where we want to
# run against a specific Core repo change. For the manual
# instrumentation app.
# FIXME: We might also consider making this configurable for the
# Auto-Instrumentation app.
- name: Clone OpenTelemetry Core Repo
uses: actions/checkout@v3
if: ${{ matrix.instrumentation-type == 'manual' }}
with:
repository: open-telemetry/opentelemetry-python
path: ${{ env.APP_PATH }}/opentelemetry-python-core
# FIXME: This uses the latest Commit SHA of the upstream Contrib repo. We
# should provide it as an input to consider the edge case where we want to
# run against a specific Contrib repo change. For the manual
# instrumentation app.
- name: Clone OpenTelemetry Contrib Repo
uses: actions/checkout@v3
if: ${{ matrix.instrumentation-type == 'manual' }}
with:
repository: open-telemetry/opentelemetry-python-contrib
path: ${{ env.APP_PATH }}/opentelemetry-python-contrib
# MARK: - Uniquely identify this Sample App environment
- name: Create unique combination using matrix + commit parameters
run: |
echo "MATRIX_COMMIT_COMBO=${{ matrix.app-platform }}-${{ matrix.instrumentation-type }}-${{ env.TARGET_SHA }}" | tee --append $GITHUB_ENV;
# MARK: - Run Performance Tests
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ secrets.AWS_ASSUME_ROLE_ARN }}
role-duration-seconds: 21600 # 6 Hours
aws-region: ${{ env.AWS_DEFAULT_REGION }}
- name: Configure Performance Test environment variables
run: |
echo "NUM_OF_CPUS=$(nproc --all)" | tee --append $GITHUB_ENV;
- name: Run All Docker Containers - Sample App + OTel Collector + Load Generator + Alarm Poller
id: check-failure-during-performance-tests
continue-on-error: true
working-directory: .github/docker-performance-tests
env:
INSTANCE_ID: ${{ github.run_id }}-${{ github.run_number }}
LOG_GROUP_NAME: otel-sdk-performance-tests
# Also uses:
# AWS_ACCESS_KEY_ID
# AWS_SECRET_ACCESS_KEY
# AWS_SESSION_TOKEN
# APP_PATH
# TARGET_SHA
# LISTEN_ADDRESS_PORT
# LOGS_NAMESPACE
# APP_PROCESS_COMMAND_LINE_DIMENSION_VALUE
# APP_PROCESS_EXECUTABLE_NAME
# HOSTMETRICS_INTERVAL_SECS
# TEST_DURATION_MINUTES
# NUM_OF_CPUS
# CPU_LOAD_THRESHOLD
# TOTAL_MEMORY_THRESHOLD
# AWS_DEFAULT_REGION
# MATRIX_COMMIT_COMBO
# GITHUB_RUN_ID
run: |-
docker-compose up --build;
RUN_TESTS_EXIT_CODE=$(
docker inspect $(
docker ps --quiet --all --filter "name=docker-performance-tests_alarms-poller"
) --format="{{.State.ExitCode}}"
);
echo "RUN_TESTS_EXIT_CODE=$RUN_TESTS_EXIT_CODE" | tee --append $GITHUB_ENV;
exit $RUN_TESTS_EXIT_CODE;
- name: Fail early if Soak Tests failed to start
if: ${{ env.RUN_TESTS_EXIT_CODE == '' || env.RUN_TESTS_EXIT_CODE == 1 }}
run: exit 1;
# MARK: - Report on Performance Test Results
- name: Install script dependencies
run: pip install boto3
- name: Get a snapshot of metrics and commit them to the repository
run: |
python3 .github/scripts/performance-tests/produce_metric_widget_images.py \
--logs-namespace ${{ env.LOGS_NAMESPACE }} \
--metrics-period ${{ env.HOSTMETRICS_INTERVAL_SECS }} \
--num-of-cpus ${{ env.NUM_OF_CPUS }} \
--app-process-command-line-dimension-value "${{ env.APP_PROCESS_COMMAND_LINE_DIMENSION_VALUE }}" \
--target-sha ${{ env.TARGET_SHA }} \
--github-run-id ${GITHUB_RUN_ID} \
--test-duration-minutes ${{ env.TEST_DURATION_MINUTES }} \
--cpu-load-threshold ${{ env.CPU_LOAD_THRESHOLD }} \
--total-memory-threshold ${{ env.TOTAL_MEMORY_THRESHOLD }} \
--app-platform ${{ matrix.app-platform }} \
--instrumentation-type ${{ matrix.instrumentation-type }} \
--max-benchmarks-to-keep ${{ env.MAX_BENCHMARKS_TO_KEEP }} \
--github-repository ${GITHUB_REPOSITORY}
echo "::warning::Checkout Snapshots at this link: https://github.com/${GITHUB_REPOSITORY}/blob/gh-pages/soak-tests/snapshots/commits/${{ env.TARGET_SHA }}/runs/${GITHUB_RUN_ID}/${{ matrix.app-platform }}";
git config user.email "41898282+github-actions[bot]@users.noreply.github.com";
git config user.name "GitHub Actions";
git fetch;
git checkout gh-pages;
git add soak-tests/snapshots/commits;
git commit -m "Soak Test Snapshots from ${{ env.TARGET_SHA }} - ${GITHUB_RUN_ID}";
git push;
git checkout main;
- name: Prepare Performance Test results as JSON output
run: python3 .github/scripts/performance-tests/get-metric-data/produce_performance_test_results.py
--logs-namespace ${{ env.LOGS_NAMESPACE }}
--metrics-period ${{ env.HOSTMETRICS_INTERVAL_SECS }}
--num-of-cpus ${{ env.NUM_OF_CPUS }}
--app-process-command-line-dimension-value "${{ env.APP_PROCESS_COMMAND_LINE_DIMENSION_VALUE }}"
--target-sha ${{ env.TARGET_SHA }}
--github-run-id ${GITHUB_RUN_ID}
--test-duration-minutes ${{ env.TEST_DURATION_MINUTES }}
- name: Do we already have Performance Test graph results for this commit?
continue-on-error: true
id: check-already-have-performance-results
run: |
git checkout gh-pages;
HAS_RESULTS_ALREADY=$(
sed 's/window.BENCHMARK_DATA = //' soak-tests/per-commit-overall-results/data.js |
jq "
.entries |
.\"Soak Test Results - sample-app-${{ matrix.app-platform }}-${{ matrix.instrumentation-type }}\" |
any(.commit.id == \"${{ env.TARGET_SHA }}\")
" || echo false
);
git checkout main;
[[ $HAS_RESULTS_ALREADY == true ]]
- name: Graph and Report Performance Test Averages result
uses: benchmark-action/github-action-benchmark@v1
continue-on-error: true
id: check-failure-after-performance-tests
with:
name: Soak Test Results - sample-app-${{ matrix.app-platform }}-${{ matrix.instrumentation-type }}
tool: customSmallerIsBetter
output-file-path: output.json
github-token: ${{ secrets.GITHUB_TOKEN }}
max-items-in-chart: ${{ env.MAX_BENCHMARKS_TO_KEEP }}
alert-threshold: 175%
# Does not work as expected, see:
# https://github.com/open-telemetry/opentelemetry-python/pull/1478
# comment-always: true
fail-on-alert: true
auto-push: ${{ github.event_name == 'schedule' &&
steps.check-already-have-performance-results.outcome == 'failure' &&
github.ref == 'refs/heads/main' }}
gh-pages-branch: gh-pages
benchmark-data-dir-path: soak-tests/per-commit-overall-results
- name: Publish Issue if failed DURING Performance Tests
uses: JasonEtco/create-an-issue@v2
if: ${{ github.event_name == 'schedule' &&
steps.check-failure-during-performance-tests.outcome == 'failure' }}
env:
APP_PLATFORM: ${{ matrix.app-platform }}
INSTRUMENTATION_TYPE: ${{ matrix.instrumentation-type }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
filename: .github/auto-issue-templates/failure-during-soak_tests.md
update_existing: true
- name: Publish Issue if failed AFTER Performance Tests
uses: JasonEtco/create-an-issue@v2
if: ${{ github.event_name == 'schedule' &&
steps.check-failure-after-performance-tests.outcome == 'failure' }}
env:
APP_PLATFORM: ${{ matrix.app-platform }}
INSTRUMENTATION_TYPE: ${{ matrix.instrumentation-type }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
filename: .github/auto-issue-templates/failure-after-soak_tests.md
update_existing: true
- name: Check for Performance Degradation either DURING or AFTER Performance Tests
if: ${{ steps.check-failure-during-performance-tests.outcome == 'failure' ||
steps.check-failure-after-performance-tests.outcome == 'failure' }}
run: >-
echo 'Performance Tests failed, see the logs above for details';
exit 1;