forked from great-expectations/great_expectations
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathazure-pipelines-docs-integration.yml
169 lines (157 loc) · 6.67 KB
/
azure-pipelines-docs-integration.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
trigger:
branches:
include:
- pre_pr_docs-*
resources:
containers:
- container: postgres
image: postgres:11
ports:
- 5432:5432
env:
POSTGRES_DB: "test_ci"
POSTGRES_HOST_AUTH_METHOD: "trust"
- container: mysql
image: mysql:8.0.20
ports:
- 3306:3306
env:
MYSQL_ALLOW_EMPTY_PASSWORD: "yes"
MYSQL_DATABASE: test_ci
- container: mssql
image: mcr.microsoft.com/mssql/server:2019-latest
env:
ACCEPT_EULA: Y
MSSQL_SA_PASSWORD: ReallyStrongPwd1234%^&*
MSSQL_DB: test_ci
MSSQL_PID: Developer
ports:
- 1433:1433
- container: trino
image: trinodb/trino:400
ports:
- 8088:8080
variables:
isMain: $[eq(variables['Build.SourceBranch'], 'refs/heads/main')]
isDevelop: $[eq(variables['Build.SourceBranch'], 'refs/heads/develop')]
GE_USAGE_STATISTICS_URL: "https://qa.stats.greatexpectations.io/great_expectations/v1/usage_statistics"
stages:
- stage: scope_check
pool:
vmImage: 'ubuntu-20.04'
jobs:
- job: changes
steps:
- task: ChangedFiles@1
name: CheckDocsChanges
inputs:
verbose: true
rules: |
[DocsChanged]
docs/**
tests/integration/docusaurus/**
tests/integration/fixtures/**
tests/test_sets/**
- bash: ./scripts/check_for_docs_deps_changes
name: CheckDocsDependenciesChanges
- stage: custom_checks
dependsOn: scope_check
pool:
vmImage: 'ubuntu-latest'
jobs:
- job: link_checker
condition: or(eq(stageDependencies.scope_check.changes.outputs['CheckDocsChanges.DocsChanged'], true), eq(variables.isMain, true))
steps:
- bash: python scripts/docs_link_checker.py -p docs -r docs -s docs --skip-external
name: LinkChecker
- job: docs_snippet_checker
condition: or(eq(stageDependencies.scope_check.changes.outputs['CheckDocsChanges.DocsChanged'], true), eq(variables.isMain, true))
steps:
- script: |
yarn install
python scripts/validate_docs_snippets.py
name: DocsSnippetChecker
- stage: docusaurus_tests
dependsOn: scope_check
pool:
vmImage: 'ubuntu-latest'
jobs:
- job: test_docs
timeoutInMinutes: 120
condition: or(eq(stageDependencies.scope_check.changes.outputs['CheckDocsDependenciesChanges.DocsDependenciesChanged'], true), eq(stageDependencies.scope_check.changes.outputs['CheckDocsChanges.DocsChanged'], true), eq(variables.isMain, true))
variables:
python.version: '3.8'
services:
postgres: postgres
mysql: mysql
mssql: mssql
trino: trino
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
displayName: 'Use Python $(python.version)'
- bash: python -m pip install --upgrade pip==20.2.4
displayName: 'Update pip'
- script: |
pip install --requirement requirements-dev.txt --constraint constraints-dev.txt .
displayName: 'Install dependencies'
- task: DownloadSecureFile@1
name: gcp_authkey
displayName: 'Download GCS Credentials'
inputs:
secureFile: 'superconductive-service-acct_ge-oss-ci-cd.json'
retryCount: '2'
- script: |
# install google cloud storage dependency
pip install google-cloud-bigquery-storage
# this is recommended by the Google documentation for CI/CD https://cloud.google.com/sdk/docs/install#other_installation_options
curl -sS https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-370.0.0-linux-x86_64.tar.gz > ./google-cloud-sdk-370.0.0-linux-x86_64.tar.gz && tar -xf ./google-cloud-sdk-370.0.0-linux-x86_64.tar.gz
./google-cloud-sdk/install.sh --usage-reporting=False --path-update=True --quiet --install-python=False
# creating new named configuration
./google-cloud-sdk/bin/gcloud config configurations create ge-oss-ci-cd-configurations
# setting account config using project and service account info
./google-cloud-sdk/bin/gcloud config set account account-for-azure-tests --project=$(GE_TEST_GCP_PROJECT) --access-token-file=$(gcp_authkey.secureFilePath)
# Pass the configured Cloud SDK authentication to gsutil.
./google-cloud-sdk/bin/gcloud config set pass_credentials_to_gsutil True
# Authorize access to Google Cloud with a service account
./google-cloud-sdk/bin/gcloud auth activate-service-account --key-file=$(gcp_authkey.secureFilePath)
displayName: 'Install and setup Google Cloud SDK'
- script: |
sqlcmd -U sa -P "ReallyStrongPwd1234%^&*" -Q "CREATE DATABASE test_ci;" -o create_db_output.txt
displayName: 'Create MSSQL database test_ci'
- script: |
pip install pytest pytest-azurepipelines git+https://github.com/awslabs/aws-glue-libs.git
pytest -v --docs-tests -m integration --mysql --bigquery --mssql --spark --postgresql --trino --aws --azure tests/integration/test_script_runner.py
displayName: 'pytest'
env:
# snowflake credentials
SNOWFLAKE_ACCOUNT: $(SNOWFLAKE_ACCOUNT)
SNOWFLAKE_USER: $(SNOWFLAKE_USER)
SNOWFLAKE_PW: $(SNOWFLAKE_PW)
SNOWFLAKE_DATABASE: $(SNOWFLAKE_DATABASE)
SNOWFLAKE_SCHEMA: $(SNOWFLAKE_SCHEMA)
SNOWFLAKE_WAREHOUSE: $(SNOWFLAKE_WAREHOUSE)
SNOWFLAKE_ROLE: $(SNOWFLAKE_ROLE)
# redshift credentials
REDSHIFT_USERNAME: $(REDSHIFT_USERNAME)
REDSHIFT_PASSWORD: $(REDSHIFT_PASSWORD)
REDSHIFT_HOST: $(REDSHIFT_HOST)
REDSHIFT_PORT: $(REDSHIFT_PORT)
REDSHIFT_DATABASE: $(REDSHIFT_DATABASE)
REDSHIFT_SSLMODE: $(REDSHIFT_SSLMODE)
# AWS credentials
AWS_ACCESS_KEY_ID: $(AWS_ACCESS_KEY_ID)
AWS_SECRET_ACCESS_KEY: $(AWS_SECRET_ACCESS_KEY)
AWS_DEFAULT_REGION: $(AWS_DEFAULT_REGION)
ATHENA_DB_NAME: $(ATHENA_DB_NAME)
ATHENA_STAGING_S3: $(ATHENA_STAGING_S3)
ATHENA_DATA_BUCKET: $(ATHENA_DATA_BUCKET)
ATHENA_TEN_TRIPS_DB_NAME: $(ATHENA_TEN_TRIPS_DB_NAME)
# GCP credentials
GOOGLE_APPLICATION_CREDENTIALS: $(gcp_authkey.secureFilePath)
GE_TEST_GCP_PROJECT: $(GE_TEST_GCP_PROJECT)
GE_TEST_BIGQUERY_DATASET: $(GE_TEST_BIGQUERY_DATASET)
# Azure credentials
AZURE_CREDENTIAL: $(AZURE_CREDENTIAL)
AZURE_ACCESS_KEY: $(AZURE_ACCESS_KEY)