-
Notifications
You must be signed in to change notification settings - Fork 0
/
airbods.yaml
451 lines (425 loc) · 12.2 KB
/
airbods.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
---
# Airbods deployment script
# Ansible playbook
# This playbook defines tasks to deploy the application stack.
# It installs several services and configures them.
#
# Table of contents:
# * Install PostgreSQL database
# * Install Redis database
# * Install RabbitMQ message broker
# * Install Apache Airflow
# * Install pgAdmin
- name: Install miscellaneous packages
hosts: all
become: yes
pre_tasks:
- name: Install utilities
apt:
update_cache: yes
name:
- ca-certificates
- curl
- wget
- ncdu
- htop
- name: Install PostgreSQL database
hosts: database
become: yes
pre_tasks:
- name: Install postgresql package
apt:
name:
- postgresql-12
- python3-pip
- name: Create Airbods system user
user:
name: airbods
system: yes
# Python library that will allow us to use Ansible PostgreSQL modules
# Pre-requisite for Ansible PostgreSQL tasks
- name: Install psycopg2
pip:
name:
- psycopg2-binary
tasks:
- name: Install PostgreSQL auth config
copy:
src: pg_hba.conf
dest: /etc/postgresql/12/main/pg_hba.conf
notify:
- Restart PostgreSQL
- name: Create postgres user
user:
name: postgres
password: "{{ lookup('file', 'secrets/postgres.txt') | password_hash('sha512') }}"
- name: Install PostgreSQL configuration
copy:
src: postgresql.conf
dest: /etc/postgresql/12/main/postgresql.conf
notify:
- Restart PostgreSQL
# postgresql.conf ssl_cert_file
- name: Install certificate
copy:
src: airbods_my_domain_com_cert.cer
dest: /var/lib/postgresql/12/main/server.crt
owner: postgres
group: postgres
# postgresql.conf ssl_key_file
- name: Install private key
copy:
src: secrets/airbods_my_domain_com.key
dest: /var/lib/postgresql/12/main/server.key
owner: postgres
group: postgres
mode: 0600
- name: Create postgres home directory
file:
path: /home/postgres
state: directory
owner: postgres
group: postgres
mode: 0755
# postgresql.conf ssl_passphrase_command
- name: Install SSL key password
copy:
content: "{{ lookup('file', 'secrets/database.txt') }}"
dest: /home/postgres/ssl_key.txt
owner: postgres
group: postgres
mode: 0600
- name: Start PostgreSQL service
service:
name: postgresql
state: started
- name: Create Airbods database user
become_user: postgres
postgresql_user:
user: airbods
password: "{{ lookup('file', 'secrets/database.txt') }}"
# If this doesn't work, run:
# sudo -u postgres psql -c "\password airbods"
- name: Create Airbods database
become_user: postgres
postgresql_db:
name: airbods
owner: airbods
- name: Create Airbods database structure
become_user: airbods
postgresql_query:
db: airbods
query: "{{ lookup('file', item) }}"
login_user: airbods
login_password: "{{ lookup('file', 'secrets/database.txt') }}"
with_fileglob:
- "database/*.sql"
- name: Create researcher database role
become_user: postgres
postgresql_user:
db: airbods
user: researcher
role_attr_flags: NOLOGIN
- name: Set researcher permissions
become_user: postgres
postgresql_privs:
role: researcher
db: airbods
schema: public
privs: SELECT
type: table
objs: ALL_IN_SCHEMA
# Apache Airflow credentials
- name: Create airflow database user
become_user: postgres
postgresql_user:
user: airflow
password: "{{ lookup('file', 'secrets/airflow.txt') }}"
- name: Create airflow database
become_user: postgres
postgresql_db:
name: airflow
owner: airflow
handlers:
- name: Restart PostgreSQL
service:
name: postgresql
state: restarted
- name: Install Redis database
hosts: scheduler
become: yes
tasks:
- name: Install Redis package
apt:
name: redis
# Listen on localhost and network interface (see ifconfig)
- name: Redis listen externally
replace:
path: /etc/redis/redis.conf
regexp: '^bind.*'
replace: 'bind 127.0.0.1 172.30.16.201'
notify:
- Restart Redis
handlers:
- name: Restart Redis
service:
name: redis
state: restarted
# Why You Should use Celery with RabbitMQ
# https://www.section.io/engineering-education/why-you-should-use-celery-with-rabbitmq/
- name: Install RabbitMQ message broker
hosts: scheduler
become: yes
vars:
node: rabbit@localhost
tasks:
- name: Install RabbitMQ package
apt:
name: rabbitmq-server=3.*
# https://stackoverflow.com/a/45475646
- name: RabbitMQ set HOSTNAME
lineinfile:
path: /etc/rabbitmq/rabbitmq-env.conf
regexp: '^HOSTNAME='
line: HOSTNAME=localhost
notify:
- Restart RabbitMQ
- name: Start RabbitMQ service
service:
name: rabbitmq-server
state: started
- name: Enable RabbitMQ management console
shell:
cmd: "rabbitmq-plugins enable rabbitmq_management"
- name: Create RabbitMQ vhost
rabbitmq_vhost:
name: airflow
node: "{{ node }}"
# This task seems to be problematic with older versions of Ansible
# https://github.com/ansible-collections/community.rabbitmq/issues/52
- name: Create RabbitMQ user
rabbitmq_user:
node: "{{ node }}"
user: airflow
password: "{{ lookup('file', 'secrets/rabbitmq.txt') }}"
permissions:
- vhost: airflow
read_priv: .*
write_priv: .*
configure_priv: .*
- name: Create RabbitMQ management user
rabbitmq_user:
node: "{{ node }}"
user: admin
password: "{{ lookup('file', 'secrets/rabbitmq_admin.txt') }}"
tags: administrator
# assign full access control
permissions:
- vhost: /
configure_priv: .*
read_priv: .*
write_priv: .*
- name: Delete RabbitMQ guest user
rabbitmq_user:
node: "{{ node }}"
user: guest
state: absent
handlers:
- name: Restart RabbitMQ
systemd:
name: rabbitmq-server
state: restarted
- name: Install Airflow scheduler
hosts: scheduler
become: yes
roles:
- airflow
vars:
service:
- scheduler
- webserver
- flower
tasks:
- name: Initialise Airflow database
become_user: airflow
# "initdb is also idempotent, so this can be run as often as you
# choose to, without needing to worry about the database changing."
# https://stackoverflow.com/a/59560731
shell:
cmd: "{{ airflow_bin_path }}/airflow db init"
- name: Update database schema
become_user: airflow
shell:
cmd: "{{ airflow_bin_path }}/airflow db upgrade"
- name: Create Airflow admin user
# Don't show password
no_log: true
become_user: airflow
command:
argv:
- "{{ airflow_bin_path }}/airflow"
- "users"
- "create"
- "--username"
- "airflow"
- "--password"
- "{{ lookup('file', 'secrets/airflow.txt') }}"
- "--firstname"
- "admin"
- "--lastname"
- "user"
- "--role"
- "Admin"
- "--email"
- name: Create Datacake pool
# https://airflow.apache.org/docs/apache-airflow/stable/concepts/pools.html
become_user: airflow
shell:
# https://airflow.apache.org/docs/apache-airflow/stable/cli-and-env-variables-ref.html#pools
cmd: "{{ airflow_bin_path }}/airflow pools set datacake 8 Datacake"
- name: Install Airflow worker
hosts: workers
become: yes
roles:
- airflow
vars:
service:
- worker
tasks:
- name: Create raw data directory
file:
path: /mnt/airbods/raw_data
state: directory
owner: airflow
group: airflow
mode: 0755
# pgAdmin (PostgreSQL database administration browser application)
- name: Install pgAdmin
# pgAdmin docs
# https://www.pgadmin.org/docs/pgadmin4/latest
hosts: database
become: yes
tasks:
# pgAdmin 4 (APT) download and installation instructions
# https://www.pgadmin.org/download/pgadmin-4-apt/
- name: Install pgAdmin APT public key
apt_key:
url: "https://www.pgadmin.org/static/packages_pgadmin_org.pub"
- name: Install pgAdmin APT respository
apt_repository:
# Use lsb_release -cs to get the Ubuntu version
repo: "deb https://ftp.postgresql.org/pub/pgadmin/pgadmin4/apt/focal pgadmin4 main"
- name: Install pgAdmin
apt:
update_cache: yes
name:
# Server mode (web app) without Apache HTTPD
- pgadmin4
# https://docs.gunicorn.org/en/latest/install.html#ubuntu
- gunicorn
# We don't need Apache HTTPD
- name: Stop Apache2
service:
name: apache2
state: stopped
enabled: no
# Configure Gunicorn
- name: Create Gunicorn config directory
file:
state: directory
path: /etc/gunicorn.d/pgadmin4
- name: Grant gunicorn priviledged ports
shell:
# Grant privileges to the Python interpreter used by Gunicorn
# sudo getcap /usr/bin/python3.8
cmd: "setcap cap_net_bind_service=+eip /usr/bin/python3.8"
- name: Install Gunicorn service
copy:
src: gunicorn/gunicorn.service
dest: /etc/systemd/system/gunicorn.service
notify:
- Restart Gunicorn
- name: Install Gunicorn configuration file
copy:
src: gunicorn/gunicorn.conf.py
dest: /etc/gunicorn.d/pgadmin4/gunicorn.conf.py
notify:
- Restart Gunicorn
- name: Install Gunicorn environment variables
copy:
src: gunicorn/gunicorn.env
dest: /etc/gunicorn.d/pgadmin4/gunicorn.env
- name: Create pgAdmin role
become_user: postgres
postgresql_user:
user: pgadmin
password: "{{ lookup('file', 'secrets/pgadmin.txt') }}"
role_attr_flags: LOGIN,CREATEROLE
- name: Start Gunicorn service
service:
name: gunicorn
state: started
enabled: yes
daemon_reload: yes
handlers:
- name: Restart Gunicorn
service:
name: gunicorn
state: restarted
daemon_reload: yes
# NGINX is used as a reverse proxy to handle requests to the PgAdmin app
# https://www.pgadmin.org/docs/pgadmin4/6.5/server_deployment.html#nginx-configuration-with-gunicorn
# https://docs.gunicorn.org/en/stable/deploy.html
- name: Install NGINX
# pgAdmin docs
# https://www.pgadmin.org/docs/pgadmin4/latest
hosts: database
become: yes
vars:
site_path: /etc/nginx/sites-available/gunicorn.conf
tasks:
- name: Install NGINX
apt:
name: nginx
- name: Install NGINX site configuration
copy:
src: gunicorn/gunicorn.conf
dest: "{{ site_path }}"
notify:
- Restart NGINX
- name: Enable NGINX site
file:
state: link
src: "{{ site_path }}"
path: /etc/nginx/sites-enabled/gunicorn.conf
- name: Disable default site
file:
state: absent
path: /etc/nginx/sites-enabled/default
# Encryption
- name: Make www-data dir
file:
state: directory
path: /home/www-data
owner: www-data
group: www-data
mode: 0700
- name: Install SSL certificate
copy:
src: airbods_my_domain_com_cert.cer
dest: /home/www-data/airbods_my_domain_com_cert.cer
- name: Install SSL private key
copy:
src: secrets/airbods_my_domain_com.key
dest: /home/www-data/airbods_my_domain_com.key
mode: 0600
owner: www-data
group: ssl-cert
handlers:
- name: Restart NGINX
service:
name: gunicorn
state: restarted
daemon_reload: yes
...