Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(airflow): airflow system load balancer using microsservices arch #193

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .env.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ _AIRFLOW_WWW_USER_CREATE=${_AIRFLOW_WWW_USER_CREATE:-True}
_AIRFLOW_WWW_USER_USERNAME=${_AIRFLOW_WWW_USER_USERNAME}
_AIRFLOW_WWW_USER_PASSWORD=${_AIRFLOW_WWW_USER_PASSWORD}
_AIRFLOW_WWW_USER_EMAIL=${_AIRFLOW_WWW_USER_EMAIL}
_AIRFLOW_WWW_USER_FIRST_NAME=${_AIRFLOW_WWW_USER_FIRST_NAME}
_AIRFLOW_WWW_USER_LAST_NAME=${_AIRFLOW_WWW_USER_LAST_NAME}
_AIRFLOW_WWW_USER_FIRSTNAME=${_AIRFLOW_WWW_USER_FIRSTNAME}
_AIRFLOW_WWW_USER_LASTNAME=${_AIRFLOW_WWW_USER_LASTNAME}
_AIRFLOW_WWW_USER_ROLE=${_AIRFLOW_WWW_USER_ROLE}
_PIP_ADDITIONAL_REQUIREMENTS=${_PIP_ADDITIONAL_REQUIREMENTS}
AIRFLOW__API__AUTH_BACKENDS=${AIRFLOW__API__AUTH_BACKENDS:-"airflow.api.auth.backend.basic_auth"}
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=${AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION}
Expand Down
3 changes: 1 addition & 2 deletions containers/airflow/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,4 @@ RUN sudo mkdir -p /opt/scripts /sources /opt/airflow \
&& sudo touch /var/log/colombia_fetch.log \
&& sudo chown -R airflow /var/log/*

ENTRYPOINT [ "/opt/entrypoint.sh" ]
CMD /opt/scripts/startup.sh
ENTRYPOINT [ "/entrypoint.sh" ]
230 changes: 167 additions & 63 deletions containers/compose-base.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,47 @@
version: '3.4'

x-airflow-common:
&airflow-common
build:
context: ..
dockerfile: containers/airflow/Dockerfile
args:
UID: ${HOST_UID}
GID: ${HOST_GID}
environment:
&airflow-common-env
AIRFLOW_HOME: /opt/airflow
AIRFLOW__SCHEDULER__ENABLE_HEALTH_CHECK: 'true'
HOST_UID: ${HOST_UID}
HOST_GID: ${HOST_GID}
_AIRFLOW_DB_UPGRADE: ${_AIRFLOW_DB_UPGRADE}
_AIRFLOW_WWW_USER_CREATE: ${_AIRFLOW_WWW_USER_CREATE}
_AIRFLOW_WWW_USER_EMAIL: ${_AIRFLOW_WWW_USER_EMAIL}
_AIRFLOW_WWW_USER_FIRSTNAME: ${_AIRFLOW_WWW_USER_FIRSTNAME}
_AIRFLOW_WWW_USER_LASTNAME: ${_AIRFLOW_WWW_USER_LASTNAME}
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD}
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME}
_AIRFLOW_WWW_USER_ROLE: ${_AIRFLOW_WWW_USER_ROLE}
AIRFLOW_FILES_PATH_DIR_HOST: ${AIRFLOW_FILES_PATH_DIR_HOST}
AIRFLOW_PORT: ${AIRFLOW_PORT}
AIRFLOW__API__AUTH_BACKENDS: ${AIRFLOW__API__AUTH_BACKENDS}
AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION: ${AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION}
AIRFLOW__CORE__FERNET_KEY: ${AIRFLOW__CORE__FERNET_KEY}
AIRFLOW__DATABASE__SQL_ALCHEMY_CONN: ${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN}
AIRFLOW__WEBSERVER__SECRET_KEY: ${AIRFLOW__WEBSERVER__SECRET_KEY}
volumes:
- ${AIRFLOW_FILES_PATH_DIR_HOST}/dags:${AIRFLOW_HOME}/dags
- ${AIRFLOW_FILES_PATH_DIR_HOST}/logs:${AIRFLOW_HOME}/logs
- ${AIRFLOW_FILES_PATH_DIR_HOST}/plugins:${AIRFLOW_HOME}/plugins
user: "${HOST_GID}:0"
depends_on:
&airflow-common-depends-on
redis:
condition: service_healthy
postgres:
condition: service_healthy


services:
superset:
platform: linux/amd64
Expand All @@ -22,71 +64,14 @@ services:
interval: 60s
timeout: 10s
retries: 5
start_period: 40s # requires docker-compose 3.4
start_period: 40s
volumes:
- ..:/opt/EpiGraphHub
- ${SUPERSET_DB_PATH_DIR_HOST}:/opt/data/superset/
ports:
- ${SUPERSET_PORT}:8088
depends_on:
- redis
- flower

airflow:
platform: linux/amd64
hostname: airflow
restart: unless-stopped
env_file:
- ../.env
build:
context: ..
dockerfile: containers/airflow/Dockerfile
args:
# note: if you want to use a specific UID and GID, ensure to add it to
# the .env file
- UID=${HOST_UID:-1000}
- GID=${HOST_GID:-1000}
environment:
AIRFLOW_UID: "${HOST_UID:-1000}"
user: "epigraphhub:epigraphhub"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
interval: 30s
timeout: 10s
retries: 10
start_period: 40s # requires docker-compose 3.4
volumes:
- ..:/opt/EpiGraphHub
- $HOME/.config:/home/epigraphhub/.config
- ${SUPERSET_DB_PATH_DIR_HOST}:/opt/data/superset/
- ${AIRFLOW_FILES_PATH_DIR_HOST}/logs:/opt/airflow/logs
- ${AIRFLOW_FILES_PATH_DIR_HOST}/plugins:/opt/airflow/plugins
- ./airflow/dags:/opt/airflow/dags
- ~/.config:/home/epigraphhub/.config
ports:
- ${AIRFLOW_PORT}:8080
depends_on:
- redis
- flower
- minio

redis:
platform: linux/amd64
image: redis:alpine
hostname: redis
restart: unless-stopped
# environment:
# DISABLE_COMMANDS: "FLUSHDB,FLUSHALL,CONFIG"
# ALLOW_EMPTY_PASSWORD: "no"
# REDIS_PASSWORD: "${REDIS_PASSWORD}"
healthcheck:
test: ["CMD", "redis-cli","ping"]
interval: 60s
timeout: 5s
retries: 5
start_period: 30s # requires docker-compose 3.4
volumes:
- redis:/data

postgres:
platform: linux/amd64
Expand Down Expand Up @@ -120,6 +105,24 @@ services:
retries: 10
command: -p ${POSTGRES_PORT}

redis:
platform: linux/amd64
image: redis:alpine
hostname: redis
restart: unless-stopped
# environment:
# DISABLE_COMMANDS: "FLUSHDB,FLUSHALL,CONFIG"
# ALLOW_EMPTY_PASSWORD: "no"
# REDIS_PASSWORD: "${REDIS_PASSWORD}"
healthcheck:
test: ["CMD", "redis-cli","ping"]
interval: 60s
timeout: 5s
retries: 5
start_period: 30s
volumes:
- redis:/data

flower:
platform: linux/amd64
image: mher/flower:0.9.7
Expand All @@ -132,8 +135,7 @@ services:
"flower",
"--broker=redis://redis:6379/0",
"--port=8888",
"--basic_auth=admin:${FLOWER_PASSWORD}"
]
"--basic_auth=admin:${FLOWER_PASSWORD}"]
ports:
- ${FLOWER_PORT}:8888
healthcheck:
Expand All @@ -153,8 +155,7 @@ services:
"server",
"/data",
"--console-address",
":9090"
]
":9090"]
ports:
- ${MINIO_PORT_1}:9000
- ${MINIO_PORT_2}:9090
Expand All @@ -165,8 +166,111 @@ services:
interval: 60s
timeout: 10s
retries: 5
start_period: 40s # requires docker-compose 3.4
start_period: 40s

airflow_webserver:
<<: *airflow-common
command: webserver
image: eph_airflow
hostname: webserver
container_name: airflow_webserver
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:${AIRFLOW_PORT}/health"]
interval: 30s
timeout: 10s
retries: 10
start_period: 40s
ports:
- ${AIRFLOW_PORT}:8080
depends_on:
<<: *airflow-common-depends-on
airflow_startup:
condition: service_completed_successfully

airflow_scheduler:
<<: *airflow-common
command: scheduler
image: eph_airflow
hostname: scheduler
container_name: eph_airflow_scheduler
healthcheck:
test: ["CMD", "curl", "--fail", "http://localhost:8974/health"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow_startup:
condition: service_completed_successfully

airflow_worker:
<<: *airflow-common
command: celery worker
image: eph_airflow
hostname: worker
container_name: eph_airflow_worker
healthcheck:
test:
- "CMD-SHELL"
- 'celery --app airflow.executors.celery_executor.app inspect ping -d "celery@$${HOSTNAME}"'
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
environment:
<<: *airflow-common-env
# Required to handle warm shutdown of the celery workers properly
# See https://airflow.apache.org/docs/docker-stack/entrypoint.html#signal-propagation
DUMB_INIT_SETSID: "0"
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow_startup:
condition: service_completed_successfully

airflow_triggerer:
<<: *airflow-common
command: triggerer
image: eph_airflow
hostname: triggerer
container_name: eph_airflow_triggerer
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type TriggererJob --hostname "$${HOSTNAME}"']
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
restart: always
depends_on:
<<: *airflow-common-depends-on
airflow_startup:
condition: service_completed_successfully

airflow_startup:
<<: *airflow-common
entrypoint: /bin/bash
image: eph_airflow
container_name: eph_airflow_startup
# yamllint disable rule:line-length
command:
- -c
- |
mkdir -p /sources/logs /sources/dags /sources/plugins
chown -R "${HOST_UID}:0" /sources/{logs,dags,plugins}
exec /entrypoint airflow version
# yamllint enable rule:line-length
environment:
<<: *airflow-common-env
_AIRFLOW_DB_UPGRADE: 'true'
_AIRFLOW_WWW_USER_CREATE: 'true'
_AIRFLOW_WWW_USER_USERNAME: ${_AIRFLOW_WWW_USER_USERNAME}
_AIRFLOW_WWW_USER_PASSWORD: ${_AIRFLOW_WWW_USER_PASSWORD}
user: "0:0"
volumes:
- ${AIRFLOW_FILES_PATH_DIR_HOST}:/sources

volumes:
redis:
Expand Down
6 changes: 0 additions & 6 deletions containers/compose-dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,3 @@ services:
- postgres
- redis
- flower

airflow:
depends_on:
- postgres
- redis
- flower
Loading