diff --git a/.containers-sugar.yaml b/.containers-sugar.yaml deleted file mode 100644 index dfe90fc..0000000 --- a/.containers-sugar.yaml +++ /dev/null @@ -1,18 +0,0 @@ -version: 1.9.0 -compose-app: docker-compose -env-file: .env - -service-groups: - - name: airflow - project-name: egh-airflow - compose-path: - - containers/compose-airflow.yaml - env-file: containers/airflow/.env - services: - default: webserver,scheduler,worker,triggerer - available: - - name: webserver - - name: scheduler - - name: worker - - name: triggerer - - name: airflow-cli diff --git a/.env.tpl b/.env.tpl index 23c6945..14fe9ef 100644 --- a/.env.tpl +++ b/.env.tpl @@ -11,7 +11,7 @@ AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_CREATION=${AIRFLOW__CORE__DAGS_ARE_PAUSED_AT_C AIRFLOW__CORE__FERNET_KEY=${AIRFLOW__CORE__FERNET_KEY} AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=${AIRFLOW__DATABASE__SQL_ALCHEMY_CONN} AIRFLOW__WEBSERVER__SECRET_KEY=${AIRFLOW__WEBSERVER__SECRET_KEY} -AIRFLOW__SMTP__SMTP_HOST=${AIRFLOW__SMTP__SMTP_HOST"} +AIRFLOW__SMTP__SMTP_HOST=${AIRFLOW__SMTP__SMTP_HOST} AIRFLOW__SMTP__SMTP_STARTTLS=${AIRFLOW__SMTP__SMTP_STARTTLS:-False} AIRFLOW__SMTP__SMTP_SSL=${AIRFLOW__SMTP__SMTP_SSL:-False} AIRFLOW__SMTP__SMTP_USER=${AIRFLOW__SMTP__SMTP_USER} diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 7472749..b4e6196 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -67,8 +67,28 @@ env: SUPERSET_DB_DATABASE: postgres jobs: - main: + check-branch: + runs-on: ubuntu-latest + + timeout-minutes: 2 + concurrency: + group: ci-check-branch-${{ github.ref }} + cancel-in-progress: true + + steps: + - uses: actions/checkout@v4 + if: ${{ github.event_name == 'pull_request' }} + with: + fetch-depth: 0 + + - name: Check if the PR's branch is updated + if: ${{ github.event_name == 'pull_request' }} + uses: osl-incubator/gh-check-pr-is-updated@1.0.0 + with: + remote_branch: origin/main + pr_sha: ${{ github.event.pull_request.head.sha }} + main: runs-on: ubuntu-latest timeout-minutes: 55 defaults: @@ -79,7 +99,7 @@ jobs: cancel-in-progress: true steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Create environment variables file run: | @@ -90,19 +110,17 @@ jobs: envsubst < .env.tpl > .env mkdir /home/runner/work/psql_data - - name: setup - run: | - make prepare-host - - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@v3 with: miniconda-version: "latest" - mamba-version: "*" environment-file: conda/base.yaml channels: conda-forge,r,nodefaults activate-environment: epigraphhub - use-mamba: true - miniforge-variant: Mambaforge + auto-update-conda: true + conda-solver: libmamba + + - name: setup + run: makim host.prepare - name: configure epigraphhub credentials run: | @@ -110,19 +128,19 @@ jobs: --db-uri "${POSTGRES_EPIGRAPH_USER}:${POSTGRES_EPIGRAPH_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_EPIGRAPH_DB}" - name: configure aws s3 credentials to minio - run: make dev-create-s3-credential + run: makim aws.create-s3-credential - name: build docker containers - run: make containers-build + run: sugar build - name: start services run: | - make containers-start-services SERVICES="superset airflow postgres" + sugar ext restart --options -d - name: wait for the services are properly working run: | - docker ps - make containers-wait-all + sugar ps + makim containers.wait-all - name: run epigraphhub tests run: | @@ -146,7 +164,7 @@ jobs: - name: Generate logs if: ${{ failure() }} run: | - make containers-logs ARGS="--tail 1000" SERVICES="" > /tmp/containers-services.log + sugar logs --options --tail 1000 > /tmp/containers-services.log - name: Archive log artifacts uses: actions/upload-artifact@v3 diff --git a/.makim.yaml b/.makim.yaml new file mode 100644 index 0000000..bd32150 --- /dev/null +++ b/.makim.yaml @@ -0,0 +1,126 @@ +version: 1.0 +env-file: .env +shell: bash + +groups: + host: + tasks: + prepare: + help: Prepare host environment + run: bash scripts/prepare-host.sh + + containers: + tasks: + pull: + help: Pull container images + run: | + set -e + docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml pull ${SERVICES} + + build: + help: Build containers + run: | + set -e + docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml build ${SERVICES} + + start: + help: Start containers + run: docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml up --remove-orphans -d ${SERVICES} + + stop: + help: Stop containers + run: docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml stop ${ARGS} ${SERVICES} + + remove: + help: Remove containers + run: docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml rm ${ARGS} ${SERVICES} + + restart: + help: Restart containers + dependencies: + - task: containers.stop + - task: containers.start + + logs: + help: Show container logs + run: docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml logs ${ARGS} ${SERVICES} + + logs-follow: + help: Follow container logs + run: docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml logs --follow ${ARGS} ${SERVICES} + + exec: + help: Execute command in a running container + run: docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml exec ${ARGS} ${SERVICE} ${CMD} + + console: + help: Open console in a container + run: docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml exec ${ARGS} ${SERVICE} bash + + down: + help: Take down containers and volumes + run: docker-compose --env-file=.env --project-name egh-${ENV} --file containers/compose-base.yaml --file containers/compose-${ENV}.yaml down --volumes --remove-orphans + + reset-storage: + help: Reset container storage + run: rm -rf ~/.local/share/containers/ + + wait: + help: Wait until container is healthy + args: + service: + help: the name of the service + type: str + required: true + timeout: + help: the time (in seconds) for waiting the service + type: int + default: 90 + required: false + run: | + timeout ${{ args.timeout }} ./containers/healthcheck.sh ${{ args.service }} + + wait-all: + help: Wait until container is healthy + args: + timeout: + help: the time (in seconds) for waiting the service + type: int + default: 90 + required: false + dependencies: + - task: containers.wait + args: + timeout: ${{ args.timeout }} + service: postgres + - task: containers.wait + args: + timeout: ${{ args.timeout }} + service: minio + - task: containers.wait + args: + timeout: ${{ args.timeout }} + service: redis + - task: containers.wait + args: + timeout: ${{ args.timeout }} + service: flower + - task: containers.wait + args: + timeout: ${{ args.timeout }} + service: superset + + aws: + tasks: + create-s3-credential: + help: Create S3 credentials for development + run: ./scripts/dev/create-s3-credential.sh + + conda: + tasks: + lock: + help: Generate lock files for Conda environments + run: | + cd conda + rm -f conda-*.lock + conda-lock --conda `which mamba` -f prod.yaml -p osx-64 -p linux-64 --kind explicit diff --git a/.sugar.yaml b/.sugar.yaml new file mode 100644 index 0000000..3c3f2d7 --- /dev/null +++ b/.sugar.yaml @@ -0,0 +1,53 @@ +version: 1.0 +compose-app: docker compose +env-file: .env + +defaults: + group: {{ env.ENV }} + +groups: + airflow: + project-name: egh-airflow + compose-path: + - containers/compose-airflow.yaml + env-file: containers/airflow/.env + services: + default: webserver,scheduler,worker,triggerer + available: + - name: webserver + - name: scheduler + - name: worker + - name: triggerer + - name: airflow-cli + + dev: + project-name: egh-dev + env-file: .env + compose-path: + - containers/compose-base.yaml + - containers/compose-dev.yaml + services: + default: superset,flower,minio,postgres + available: + - name: superset + # - name: airflow + - name: postgres + - name: minio + - name: redis + - name: flower + + prod: + project-name: egh-prod + env-file: .env + compose-path: + - containers/compose-base.yaml + - containers/compose-prod.yaml + services: + default: superset,flower,minio,postgres + available: + - name: superset + # - name: airflow + - name: postgres + - name: minio + - name: redis + - name: flower diff --git a/conda/base.yaml b/conda/base.yaml index e29e01a..90e350b 100644 --- a/conda/base.yaml +++ b/conda/base.yaml @@ -11,7 +11,8 @@ dependencies: - webdriver-manager - pip - pip: - - containers-sugar + - containers-sugar==1.11.4 + - makim==1.15.2 - compose-go - - epigraphhub + - epigraphhub>=2.1.1 - "selenium<=4.0" diff --git a/conda/pip.txt b/conda/pip.txt index d7a4e65..6475026 100644 --- a/conda/pip.txt +++ b/conda/pip.txt @@ -3,3 +3,5 @@ pangres>=4.1 sodapy shillelagh sqlalchemy-sybase +# issue: ImportError: cannot import name 'url_quote' from 'werkzeug.urls' +Werkzeug>=2.2,<3.0 diff --git a/containers/airflow/Dockerfile b/containers/airflow/Dockerfile old mode 100644 new mode 100755 index 96068ff..d8fd64a --- a/containers/airflow/Dockerfile +++ b/containers/airflow/Dockerfile @@ -57,15 +57,6 @@ RUN curl https://www.python.org/ftp/python/3.10.8/Python-3.10.8.tgz -o /tmp/Pyth && echo "alias python3.10=/opt/py310/bin/python3.10" >> /home/airflow/.bashrc \ && rm -rf /tmp/Python-3.10* -RUN curl https://www.python.org/ftp/python/3.11.6/Python-3.11.6.tgz -o /tmp/Python-3.11.6.tgz \ - && tar -zxvf /tmp/Python-3.11.6.tgz -C /tmp \ - && cd /tmp/Python-3.11.6 \ - && ./configure --prefix=/opt/py311 --enable-optimizations \ - && make install \ - && chown -R airflow /opt/py311 \ - && echo "alias python3.11=/opt/py311/bin/python3.11" >> /home/airflow/.bashrc \ - && rm -rf /tmp/Python-3.11* - COPY --chown=airflow containers/airflow/config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg COPY --chown=airflow containers/airflow/scripts/*.sh ${AIRFLOW_HOME}/scripts/ COPY --chown=airflow containers/airflow/scripts/entrypoint.sh /opt/entrypoint.sh @@ -81,21 +72,18 @@ ARG POSTGRES_EPIGRAPH_DB ENV DB_USER "${POSTGRES_EPIGRAPH_USER}:${POSTGRES_EPIGRAPH_PASSWORD}" ENV DB_URI "${DB_USER}@${POSTGRES_EPIGRAPH_HOST}:${POSTGRES_EPIGRAPH_PORT}/${POSTGRES_EPIGRAPH_DB}" +RUN python -c "assert '${DB_URI}' != ':@:/', 'DBI_URI ${DB_URI} is incorrect.'" + RUN /usr/local/bin/python -m virtualenv /opt/envs/py310 --python="/opt/py310/bin/python3.10" \ && sed -i "s/include-system-site-packages = false/include-system-site-packages = true/" /opt/envs/py310/pyvenv.cfg \ && source /opt/envs/py310/bin/activate \ && pip install "cython<3.0.0" \ && pip install --no-build-isolation "pyyaml<6.0" \ && pip install -r /opt/envs/epigraphhub.txt \ - && epigraphhub-config --name "epigraphhub" --db-uri "${DB_URI}" - -RUN /usr/local/bin/python -m virtualenv /opt/envs/py311 --python="/opt/py311/bin/python3.11" \ - && sed -i "s/include-system-site-packages = false/include-system-site-packages = true/" /opt/envs/py311/pyvenv.cfg \ - && source /opt/envs/py311/bin/activate \ - && pip install "cython<3.0.0" \ - && pip install --no-build-isolation "pyyaml<6.0" \ && pip install -r /opt/envs/pysus.txt +RUN epigraphhub-config --name "epigraphhub" --db-uri "${DB_URI}" + WORKDIR ${AIRFLOW_HOME} # ref: https://stackoverflow.com/questions/44331836/apt-get-install-tzdata-noninteractive diff --git a/containers/compose-base.yaml b/containers/compose-base.yaml index 975f0b7..f48cc47 100644 --- a/containers/compose-base.yaml +++ b/containers/compose-base.yaml @@ -46,9 +46,15 @@ services: # the .env file - UID=${HOST_UID:-1000} - GID=${HOST_GID:-1000} + - AIRFLOW_UID=${HOST_UID:-1000} + - POSTGRES_EPIGRAPH_HOST=${POSTGRES_EPIGRAPH_HOST} + - POSTGRES_EPIGRAPH_PORT=${POSTGRES_EPIGRAPH_PORT} + - POSTGRES_EPIGRAPH_USER=${POSTGRES_EPIGRAPH_USER} + - POSTGRES_EPIGRAPH_PASSWORD=${POSTGRES_EPIGRAPH_PASSWORD} + - POSTGRES_EPIGRAPH_DB=${POSTGRES_EPIGRAPH_DB} environment: AIRFLOW_UID: "${HOST_UID:-1000}" - user: "epigraphhub:epigraphhub" + user: "airflow" healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] interval: 30s diff --git a/containers/healthcheck.sh b/containers/healthcheck.sh index 9fc6a28..a43353e 100755 --- a/containers/healthcheck.sh +++ b/containers/healthcheck.sh @@ -8,7 +8,7 @@ if [ -f ${PROJECT_DIR}/.env ]; then fi export CONTAINER_NAME=${1:-"superset"} -export CONTAINER_NAME="egh-${ENV:-dev}_${CONTAINER_NAME}_1" +export CONTAINER_NAME="egh-${ENV:-dev}-${CONTAINER_NAME}-1" echo "[II] Checking ${CONTAINER_NAME} ..." diff --git a/containers/superset/superset.sh b/containers/superset/superset.sh index 4e5bd95..98e3885 100755 --- a/containers/superset/superset.sh +++ b/containers/superset/superset.sh @@ -1,12 +1,14 @@ #!/usr/bin/env bash -set -ex +set -x export FLASK_APP=superset # Initialize the database superset db upgrade +set -e + if [[ $ENV = "dev" ]]; then echo "Running in development mode ..." superset fab create-admin \ diff --git a/containers/superset/superset_config.py b/containers/superset/superset_config.py index 5816b97..ad50e63 100644 --- a/containers/superset/superset_config.py +++ b/containers/superset/superset_config.py @@ -10,6 +10,9 @@ TALISMAN_ENABLED = None TALISMAN_CONFIG = {} +# github.com/apache/superset/discussions/24435#discussioncomment-6282883 +SESSION_PROTECTION = "basic" + SESSION_COOKIE_SAMESITE = "Lax" # Flask-WTF flag for CSRF WTF_CSRF_ENABLED = os.getenv("ENV") == "prod" @@ -110,7 +113,7 @@ class CeleryConfig: DB_USER = os.getenv("SUPERSET_DB_USER") DB_PASS = os.getenv("SUPERSET_DB_PASSWORD") DB_DATABASE = os.getenv("SUPERSET_DB_DATABASE") -else: +else: DB_USER = "dev_admin" DB_PASS = "admin" DB_DATABASE = "dev_superset"