-
Notifications
You must be signed in to change notification settings - Fork 4.2k
287 lines (286 loc) · 11.9 KB
/
connector-performance-command.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
name: Connector Performance Harness
on:
workflow_call:
inputs:
connector:
type: string
required: true
dataset:
type: string
required: true
repo:
description: "Repo to check out code from. Defaults to the main airbyte repo. Set this when building connectors from forked repos."
type: string
required: false
default: "airbytehq/airbyte"
gitref:
description: "The git ref to check out from the specified repository."
type: string
required: false
default: master
uuid:
description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id."
type: string
required: false
stream-number:
description: "Number of streams to use for destination performance measurement."
type: string
required: false
default: "1"
sync-mode:
description: "Sync mode to use for destination performance measurement."
required: false
type: string
default: "full_refresh"
report-to-datadog:
description: "Whether to report the performance test results to Datadog."
required: false
type: string
default: "true"
workflow_dispatch:
inputs:
connector:
description: "Airbyte Connector"
type: choice
required: true
options:
- connectors/source-postgres
- connectors/source-mysql
- connectors/source-mongodb-v2
- connectors/destination-snowflake
default: "connectors/source-postgres"
repo:
description: "Repo to check out code from. Defaults to the main airbyte repo. Set this when building connectors from forked repos."
required: false
default: "airbytehq/airbyte"
gitref:
description: "The git ref to check out from the specified repository."
required: false
default: master
comment-id:
description: "The comment-id of the slash command. Used to update the comment with the status."
required: false
uuid:
description: "Custom UUID of workflow run. Used because GitHub dispatches endpoint does not return workflow run id."
required: false
dataset:
description: "Name of dataset to use for performance measurement. Currently supports 1m, 10m, 20m."
required: false
default: "1m"
stream-number:
description: "Number of streams to use for destination performance measurement."
required: false
default: "1"
sync-mode:
description: "Sync mode to use for destination performance measurement."
required: false
type: choice
options:
- full_refresh
- incremental
default: "full_refresh"
report-to-datadog:
description: "Whether to report the performance test results to Datadog."
required: false
default: "false"
pr:
description: "PR Number (Unused)"
type: number
required: false
jobs:
uuid:
name: "Custom UUID of workflow run"
timeout-minutes: 10
runs-on: ubuntu-latest
steps:
- name: UUID ${{ inputs.uuid }}
run: true
start-test-runner:
name: Start Build EC2 Runner
needs: uuid
timeout-minutes: 10
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Checkout Airbyte
uses: actions/checkout@v3
with:
repository: ${{ inputs.repo }}
ref: ${{ inputs.gitref }}
- name: Check PAT rate limits
run: |
./tools/bin/find_non_rate_limited_PAT \
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
- name: Start AWS Runner
id: start-ec2-runner
uses: ./.github/actions/start-aws-runner
with:
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
github-token: ${{ env.PAT }}
performance-test:
timeout-minutes: 240
needs: start-test-runner
runs-on: ${{ needs.start-test-runner.outputs.label }}
steps:
- name: Link comment to workflow run
if: inputs.comment-id
uses: peter-evans/create-or-update-comment@v1
with:
comment-id: ${{ inputs.comment-id }}
body: |
#### Note: The following `dataset=` values are supported: `1m`<sub>(default)</sub>, `10m`, `20m`,
`bottleneck_stream1`, `bottleneck_stream_randomseed. For destinations only: you can also use `stream-numbers=N`
to simulate N number of parallel streams. Additionally, `sync-mode=incremental` is supported for destinations.
For example: `dataset=1m stream-numbers=2 sync-mode=incremental`
> :runner: ${{inputs.connector}} https://github.com/${{github.repository}}/actions/runs/${{github.run_id}}.
- name: Search for valid connector name format
id: regex
uses: AsasInnab/regex-action@v1
with:
regex_pattern: "^(connectors/)?[a-zA-Z0-9-_]+$"
regex_flags: "i" # required to be set for this plugin
search_string: ${{ inputs.connector }}
- name: Validate input workflow format
if: steps.regex.outputs.first_match != inputs.connector
run: echo "The connector provided has an invalid format!" && exit 1
- name: Filter supported connectors
if: "${{ inputs.connector != 'connectors/source-postgres' &&
inputs.connector != 'connectors/source-mysql' &&
inputs.connector != 'connectors/destination-snowflake' &&
inputs.connector != 'connectors/source-mongodb-v2' }}"
run: echo "Only connectors/source-postgres, source-mysql, source-mongodb-v2 and destination-snowflake currently supported by harness" && exit 1
- name: Checkout Airbyte
uses: actions/checkout@v3
with:
repository: ${{ inputs.repo }}
ref: ${{ inputs.gitref }}
fetch-depth: 0 # This is to fetch the main branch in case we are running on a different branch.
- name: Install Java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "21"
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
- name: Install CI scripts
run: |
pip install pipx
pipx ensurepath
pipx install airbyte-ci/connectors/ci_credentials
pipx install airbyte-ci/connectors/connector_ops
- name: Source or Destination harness
id: which-harness
run: |
the_harness="$(echo ${{inputs.connector}} | sed 's/.*\///; s/-.*//')"-harness
echo "harness_type=$the_harness" >> "$GITHUB_OUTPUT"
- name: Write harness credentials
run: |
export PATH="$PATH:/root/.local/bin"
ci_credentials connectors-performance/$HARNESS_TYPE write-to-storage
connector_name=$(echo ${{ inputs.connector }} | sed 's,.*/,,')
ci_credentials connectors-performance/$connector_name write-to-storage
env:
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
- name: build harness
shell: bash
run: |
echo "Building... ${{ steps.which-harness.outputs.harness_type }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
./gradlew :airbyte-integrations:connectors-performance:$HARNESS_TYPE:build -x check
env:
HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
- name: build connector
shell: bash
run: |
echo "Building... ${{inputs.connector}}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY # this is a blank line
connector_name=$(echo ${{ inputs.connector }} | sed 's,.*/,,')
echo "Running ./gradlew :airbyte-integrations:connectors:$connector_name:build -x check"
./gradlew :airbyte-integrations:connectors:$connector_name:build -x check
env:
GCP_GSM_CREDENTIALS: ${{ secrets.GCP_GSM_CREDENTIALS }}
- name: KIND Kubernetes Cluster Setup
uses: helm/[email protected]
with:
config: "./tools/bin/${{ steps.which-harness.outputs.harness_type }}-kind-cluster-config.yaml"
- name: Run harness
id: run-harness
shell: bash
env:
CONN: ${{ inputs.connector }}
DS: ${{ inputs.dataset }}
STREAM_NUMBER: ${{ inputs.stream-number }}
SYNC_MODE: ${{ inputs.sync-mode }}
REPORT_TO_DATADOG: ${{ inputs.report-to-datadog }}
PREFIX: '{"type":"LOG","log":{"level":"INFO","message":"INFO i.a.i.p.PerformanceTest(runTest):165'
SUFFIX: '"}}'
HARNESS_TYPE: ${{ steps.which-harness.outputs.harness_type }}
DD_API_KEY: ${{ secrets.DD_API_KEY }}
run: |
kubectl apply -f ./tools/bin/admin-service-account.yaml
connector_name=$(echo $CONN | cut -d / -f 2)
kind load docker-image airbyte/$connector_name:dev --name chart-testing
kind load docker-image airbyte/$HARNESS_TYPE:dev --name chart-testing
# envsubst requires variables to be exported or setup in the env field in this step.
export CONNECTOR_IMAGE_NAME=${CONN/connectors/airbyte}:dev
export DATASET=$DS
export HARNESS=$HARNESS_TYPE
envsubst < ./tools/bin/run-harness-process.yaml | kubectl create -f -
echo "harness is ${{ steps.which-harness.outputs.harness_type }}"
POD=$(kubectl get pod -l app=performance-harness -o jsonpath="{.items[0].metadata.name}")
kubectl wait --for=condition=Ready --timeout=20s "pod/$POD"
kubectl logs --follow $POD
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "RUN_RESULT<<$EOF" >> $GITHUB_OUTPUT
kubectl logs --tail=1 $POD | while read line ; do line=${line#"$PREFIX"}; line=${line%"$SUFFIX"}; echo $line >> $GITHUB_OUTPUT ; done
echo "$EOF" >> $GITHUB_OUTPUT
- name: Link comment to workflow run
if: inputs.comment-id
uses: peter-evans/create-or-update-comment@v2
with:
reactions: "+1"
comment-id: ${{ inputs.comment-id }}
body: |
## Performance test Result:
```
${{ steps.run-harness.outputs.RUN_RESULT }}
```
# need to add credentials here
# In case of self-hosted EC2 errors, remove this block.
stop-test-runner:
name: Stop Build EC2 Runner
timeout-minutes: 10
needs:
- start-test-runner # required to get output from the start-runner job
- performance-test # required to wait when the main job is done
- uuid
runs-on: ubuntu-latest
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.SELF_RUNNER_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.SELF_RUNNER_AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Checkout Airbyte
uses: actions/checkout@v3
- name: Check PAT rate limits
run: |
./tools/bin/find_non_rate_limited_PAT \
${{ secrets.GH_PAT_BUILD_RUNNER_OSS }} \
${{ secrets.GH_PAT_BUILD_RUNNER_BACKUP }}
- name: Stop EC2 runner
uses: supertopher/[email protected]
with:
mode: stop
github-token: ${{ env.PAT }}
label: ${{ needs.start-test-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-test-runner.outputs.ec2-instance-id }}