-
Notifications
You must be signed in to change notification settings - Fork 0
/
.gitlab-ci.yml
335 lines (294 loc) · 8.68 KB
/
.gitlab-ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# ======
# Globals
# ======
variables:
PYTHONPATH: "$CI_PROJECT_DIR/orchestration/:$CI_PROJECT_DIR/extract/:$CI_PROJECT_DIR/extract/shared_modules/:$PYTHONPATH"
BRANCH_NAME: "$CI_COMMIT_REF_NAME"
# ======
# CI Stages
# ======
stages:
- ❄️ Snowflake
- 🚂 Extract # extract/extract-ci.yml
- ⚙️ dbt Run # transform/snowflake-dbt/snowflake-dbt-ci.yml
- 🛠 dbt Misc # transform/snowflake-dbt/snowflake-dbt-ci.yml
- 📚 dbt Docs
- 🐍 Python
- 🛑 Snowflake Stop
- triage
- triage run
include:
- "extract/extract-ci.yml"
- "transform/snowflake-dbt/snowflake-dbt-ci.yml"
# ======
# Snowflake Database Clones
# ======
# Template for cloning databases in Snowflake for use in MRs
.snowflake_clone_template: &snowflake_clone_template
image: registry.gitlab.com/gitlab-data/data-image/data-image:v0.0.13
tags:
- analytics
before_script:
- export PATH="$CI_PROJECT_DIR/orchestration/:$PATH"
only:
refs:
- merge_requests
variables:
- $SNOWFLAKE_SYSADMIN_ROLE
- $SNOWFLAKE_LOAD_WAREHOUSE
- $SNOWFLAKE_LOAD_DATABASE # make sure the guard works
- $SNOWFLAKE_PREP_DATABASE # make sure the guard works
- $SNOWFLAKE_PROD_DATABASE # make sure the guard works
except:
refs:
- master
variables:
- $BRANCH_NAME == $SNOWFLAKE_LOAD_DATABASE
- $BRANCH_NAME == $SNOWFLAKE_PREP_DATABASE
- $BRANCH_NAME == $SNOWFLAKE_PROD_DATABASE
- $TEST_PIPELINE
when: manual
.snowflake_start_clone: &snowflake_start_clone
<<: *snowflake_clone_template
environment:
name: review/$CI_COMMIT_REF_NAME
on_stop: clone_stop
stage: ❄️ Snowflake
variables:
GIT_STRATEGY: clone
# Clone Jobs
📈clone_prod:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --database prod --empty
- manage_snowflake.py manage_clones --database prep --empty
when: always
📈❗️clone_prod_real:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --force --database prod
- manage_snowflake.py manage_clones --force --database prep
📈⚙clone_prep_specific_schema:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --database prep --schema $SCHEMA_NAME
📈⚙clone_prod_specific_schema:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --database prod --schema $SCHEMA_NAME
🥩clone_raw_full:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --database raw --include_stages
🥩📜clone_raw_sheetload:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --database raw --schema sheetload
🥩🛢clone_raw_postgres_pipeline:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --database raw --schema tap_postgres
🥩⚙clone_raw_specific_schema:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --database raw --schema $SCHEMA_NAME --include_stages
👥force_clone_both:
<<: *snowflake_start_clone
script:
- manage_snowflake.py manage_clones --force --database prep --empty
- manage_snowflake.py manage_clones --force --database prod --empty
- manage_snowflake.py manage_clones --force --database raw --include_stages
clone_stop:
<<: *snowflake_clone_template
stage: 🛑 Snowflake Stop
environment:
name: review/$CI_COMMIT_REF_NAME
action: stop
variables:
GIT_STRATEGY: none
script:
- git clone $CI_REPOSITORY_URL
- analytics/orchestration/manage_snowflake.py delete_clones
# ======
# Python Code Checks
# ======
.python_check: &python_check
stage: 🐍 Python
image: registry.gitlab.com/gitlab-data/data-image/ci-python-image:v1.0.0
before_script:
- CHANGED_FILES=$(git diff --name-only ${CI_COMMIT_SHA} origin/${CI_MERGE_REQUEST_TARGET_BRANCH_NAME} | grep '\.py'$)
tags:
- analytics
only:
changes:
- "**/*.py"
refs:
- merge_requests
allow_failure: true
.python_testing: &python_testing
stage: 🐍 Python
tags:
- analytics
image: registry.gitlab.com/gitlab-data/data-image/data-image:v0.0.20
only:
changes:
- "**/*.py"
refs:
- merge_requests
allow_failure: true
⚫python_black:
<<: *python_check
script:
- black --check .
🌽python_flake8:
<<: *python_check
script:
- echo "Changed files are $CHANGED_FILES"
- flake8 $CHANGED_FILES --ignore=E203,E501,W503,W605
✏️python_mypy:
<<: *python_check
script:
- mypy extract/ --ignore-missing-imports
🗒️python_pylint:
<<: *python_check
script:
- echo "Changed files are $CHANGED_FILES"
- pylint $CHANGED_FILES --disable=line-too-long,E0401,E0611,W1203,W1202
🤔python_complexity:
<<: *python_check
script:
- xenon --max-absolute B --max-modules B --max-average A . -i transform,shared_modules
🦅python_vulture:
<<: *python_check
script:
- echo "Changed files are $CHANGED_FILES"
- vulture $CHANGED_FILES --min-confidence 100
✅python_pytest:
<<: *python_testing
script:
- python -m pytest -vv -x --junitxml=report.xml
artifacts:
reports:
junit: ${CI_PROJECT_DIR}/report.xml
paths:
- ${CI_PROJECT_DIR}/.coverage.${CI_JOB_ID}
expire_in: 1 day
when: on_success
# ======
# Snowflake Permissions Validator
# ======
.yaml_validate: &yaml_validate
stage: 🐍 Python
image: registry.gitlab.com/gitlab-data/data-image/data-image:v0.0.13
tags:
- analytics
only:
changes:
- "permissions/snowflake/roles.yml"
refs:
- merge_request
allow_failure: true
📁yaml_validation:
<<: *yaml_validate
script:
- python -c 'import yaml, sys; print(yaml.safe_load(sys.stdin))' < permissions/snowflake/roles.yml
.permifrost: &permifrost
stage: 🐍 Python
image: registry.gitlab.com/gitlab-data/permifrost:v0.13.1
tags:
- analytics
only:
refs:
- merge_requests
allow_failure: true
🧊⚙permifrost_run:
<<: *permifrost
script:
- permifrost run permissions/snowflake/roles.yml --diff --dry
when: manual
🧊permifrost_spec_test:
<<: *permifrost
script:
- permifrost spec-test permissions/snowflake/roles.yml
when: manual
# ======
# dbt docs to GitLab Pages
# ======
.pages_job_template: &pages_job_template
stage: 📚 dbt Docs
image: registry.gitlab.com/gitlab-data/data-image/dbt-image:v1.0.2
variables:
SNOWFLAKE_ROLE: $SNOWFLAKE_TRANSFORM_ROLE
SNOWFLAKE_WAREHOUSE: $SNOWFLAKE_TRANSFORM_WAREHOUSE
before_script:
- export PATH="$CI_PROJECT_DIR/orchestration/:$PATH"
script:
- export SNOWFLAKE_SNAPSHOT_DATABASE="SNOWFLAKE"
- echo $SNOWFLAKE_SNAPSHOT_DATABASE
- mkdir -p ~/.ssh
- touch ~/.ssh/id_rsa
- chmod 700 ~/.ssh
- echo "$GIT_DATA_TESTS_SSH_PRIVATE_KEY" | base64 --decode > ~/.ssh/id_rsa # decodes key from base64
- chmod 0400 ~/.ssh/id_rsa # Makes key read only
- echo -e "Host *\n\tStrictHostKeyChecking no\n\n" > ~/.ssh/config # Adds gitlab.com as known host
- cd $CI_PROJECT_DIR/transform/snowflake-dbt/
- rm packages.yml
- mv docs-packages.yml packages.yml
- dbt deps --profiles-dir profile --target prod
- dbt docs generate --profiles-dir profile --target prod
- mkdir -p $CI_PROJECT_DIR/public/
- python $CI_PROJECT_DIR/orchestration/upload_dbt_file_to_snowflake.py manifest
- cd target
- | # remove row counts
sed -i 's/"Row Count", "value": [0-9]*.0/"Row Count", "value": -1.0/g' catalog.json
- cp *.json graph.gpickle $CI_PROJECT_DIR/public/
- cd $CI_PROJECT_DIR/transform/snowflake-dbt/docs/
- cp index.html gitlab.css $CI_PROJECT_DIR/public/
tags:
- analytics
artifacts:
name: "dbt Docs Files"
paths:
- public
expire_in: 1 week
# Run the script to generate the dbt docs and stand them up in gitlab pages
pages:
<<: *pages_job_template
only:
changes:
- "transform/snowflake-dbt/*"
refs:
- master
variables:
- $DEPLOY_DBT_PAGES
dry-run:triage:
image: ruby:2.4
stage: triage
script:
- gem install gitlab-triage
- gitlab-triage --help
- gitlab-triage --dry-run --token $ANALYTICS_API_TOKEN --source projects --source-id $CI_PROJECT_PATH
when: manual
except:
- schedules
policy:run-triage:
image: ruby:2.4
stage: triage run
script:
- gem install gitlab-triage
- gitlab-triage --token $ANALYTICS_API_TOKEN --source projects --source-id $CI_PROJECT_PATH
only:
refs:
- master
when: manual
except:
- schedules
schedule:run-triage:
image: ruby:2.4
stage: triage run
script:
- gem install gitlab-triage
- gitlab-triage --token $ANALYTICS_API_TOKEN --source projects --source-id $CI_PROJECT_PATH
only:
variables:
- $RUN_GITLAB_TRIAGE