From bb3128b7c1bc7560c10ae1530978976906f25475 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 6 Nov 2023 13:39:08 +0000 Subject: [PATCH 01/10] Explicitly find the metrics package The setuptools backend works with a single directly without configuration. However when we add more than one directory, which we're about to do, it needs to be configured to know where to find the code we want to package. --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index baeacc7b..546fcf37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,3 +75,7 @@ extend-ignore = [ [tool.ruff.isort] lines-after-imports = 2 + + +[tool.setuptools.packages.find] +include = ["metrics*"] From e4e6a8a851efdcebc55a2df2d5d7863a7636c82d Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 6 Nov 2023 13:40:55 +0000 Subject: [PATCH 02/10] Remove the project script for this tool We're not using this entrypoint, and it was incorrectly added with the add_job name, after copy/pasting from job-runner. --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 546fcf37..833155f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,9 +22,6 @@ dependencies = [ ] dynamic = ["version"] -[project.scripts] -add_job = "metrics.cli:cli" - [project.urls] Home = "https://bennett.ox.ac.uk" Source = "https://github.com/opensafely-core/metrics/" From 636f46f7a95d17d5179051241f6e62c47855ef18 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 6 Nov 2023 13:41:39 +0000 Subject: [PATCH 03/10] Use a more specific name for the timescaledb URL We're using specific names for all the other configuration env vars, and have multiple databases involved with this project, so lets make it clear which database this URL is for. --- metrics/cli.py | 6 +++--- metrics/timescaledb/writer.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/metrics/cli.py b/metrics/cli.py index 92e98d84..c3ba4ce5 100644 --- a/metrics/cli.py +++ b/metrics/cli.py @@ -7,15 +7,15 @@ @click.group() @click.option("--debug", default=False, is_flag=True) -@click.option("--database-url", required=True, envvar="DATABASE_URL") +@click.option("--timescaledb-url", required=True, envvar="TIMESCALEDB_URL") @click.pass_context -def cli(ctx, debug, database_url): +def cli(ctx, debug, timescaledb_url): ctx.ensure_object(dict) setup_logging(debug) ctx.obj["DEBUG"] = debug - ctx.obj["DATABASE_URL"] = database_url + ctx.obj["TIMESCALEDB_URL"] = timescaledb_url cli.add_command(github) diff --git a/metrics/timescaledb/writer.py b/metrics/timescaledb/writer.py index cf2233e4..0eb2e707 100644 --- a/metrics/timescaledb/writer.py +++ b/metrics/timescaledb/writer.py @@ -9,7 +9,7 @@ log = structlog.get_logger() -DATABASE_URL = os.environ["DATABASE_URL"] +TIMESCALEDB_URL = os.environ["TIMESCALEDB_URL"] def ensure_table(name): @@ -25,7 +25,7 @@ def ensure_table(name): def run(sql, *args): - with psycopg.connect(DATABASE_URL) as conn: + with psycopg.connect(TIMESCALEDB_URL) as conn: cursor = conn.cursor() return cursor.execute(sql, *args) From 79ffabd91e84cc84931e31d5376bdcd3d12b84fa Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 6 Nov 2023 13:43:04 +0000 Subject: [PATCH 04/10] Configure the user/pass/db name for timescaledb So we can rely on it in local configuration. --- docker-compose.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index bffd2090..d7580a4f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -30,7 +30,9 @@ services: timescaledb: image: timescale/timescaledb-ha:pg14-latest environment: - POSTGRES_PASSWORD: password + POSTGRES_DB: metrics + POSTGRES_PASSWORD: pass + POSTGRES_USER: user ports: - 5433:5432 volumes: From 4ae894a1745ddaa0069a3d1220298be83c50a69b Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 6 Nov 2023 13:44:20 +0000 Subject: [PATCH 05/10] Rename just run to just grafana We have multiple things which need running in this project, so lets name them clearly. --- justfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/justfile b/justfile index 370f78a7..a63ba4f6 100644 --- a/justfile +++ b/justfile @@ -119,9 +119,9 @@ fix: devenv $BIN/ruff --fix . -# Run the dev project -run: devenv - docker-compose up +# Run the grafana stack +grafana: + docker-compose up grafana metrics *args: devenv From ff5a156f79d1cb3aaacb30e9c042e3b2b7e81426 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 6 Nov 2023 13:45:12 +0000 Subject: [PATCH 06/10] Wrap the metrics CLI in docker ready for deployment --- .dockerignore | 14 +++ .github/workflows/main.yml | 39 +++++++++ docker-compose.yaml | 49 +++++++++++ docker/Dockerfile | 160 ++++++++++++++++++++++++++++++++++ docker/build-dependencies.txt | 4 + docker/dependencies.txt | 7 ++ dotenv-sample | 17 ++++ justfile | 29 ++++++ 8 files changed, 319 insertions(+) create mode 100644 .dockerignore create mode 100644 docker/Dockerfile create mode 100644 docker/build-dependencies.txt create mode 100644 docker/dependencies.txt create mode 100644 dotenv-sample diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..ecf9ad20 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,14 @@ +.git/ + +**/*~ +**/.#* +**/*# +**/htmlcov +**/__pycache__ +**/*.pyc +**/.python-version +**/.env +**/.venv +**/venv +**/.coverage +**/*.egg-info/ diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 048dc7f0..d8dd5ce7 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -30,12 +30,51 @@ jobs: run: | just test + lint-dockerfile: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0 + with: + dockerfile: docker/Dockerfile + + docker-test-and-build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: "opensafely-core/setup-action@v1" + with: + install-just: true + + - name: Build docker image for both prod and dev + run: | + just docker-build prod + just docker-build dev + + - name: Run smoke test on prod + run: | + just docker-run prod python -m metrics + + - name: Save docker image + run: | + docker save metrics | gzip > /tmp/metrics.tar.gz + + - name: Upload docker image + uses: actions/upload-artifact@v3 + with: + name: metrics-image + path: /tmp/metrics.tar.gz + required-checks: if: always() needs: - check - test + - docker-test-and-build + - lint-dockerfile runs-on: Ubuntu-latest diff --git a/docker-compose.yaml b/docker-compose.yaml index d7580a4f..eb4dc613 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -38,6 +38,55 @@ services: volumes: - timescaledb:/home/postgres/pgdata/data + metrics-prod: + # image name, both locally and public + image: metrics + build: + dockerfile: docker/Dockerfile + # the prod stage in the Dockerfile + target: metrics-prod + # should speed up the build in CI, where we have a cold cache + cache_from: # should speed up the build in CI, where we have a cold cache + - ghcr.io/opensafely-core/base-docker + - ghcr.io/ebmdatalab/metrics + args: + # this makes the image work for later cache_from: usage + - BUILDKIT_INLINE_CACHE=1 + # env vars should be supplied by just + - BUILD_DATE + - GITREF + # use dockers builitin PID daemon + init: true + environment: + - GITHUB_TOKEN=dummy + - SLACK_SIGNING_SECRET=dummy + - SLACK_TECH_SUPPORT_CHANNEL_ID=dummy + - SLACK_TOKEN=dummy + - TIMESCALEDB_URL=dummy + + # main development service + metrics-dev: + extends: + service: metrics-prod + image: metrics-dev + container_name: metrics-dev + # running as a specific uid/gid allows files written to mounted volumes by + # the docker container's default user to match the host user's uid/gid, for + # convienience. + user: ${DEV_USERID:-1000}:${DEV_GROUPID:-1000} + build: + # the dev stage in the Dockerfile + target: metrics-dev + # pass the uid/gid as build arg + args: + - DEV_USERID=${DEV_USERID:-1000} + - DEV_GROUPID=${DEV_GROUPID:-1000} + volumes: + # mount our current code + - .:/app + env_file: + - .env + volumes: postgres: grafana: diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000..f7b0eb80 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,160 @@ +# syntax=docker/dockerfile:1.2 +################################################# +# +# Create base image with python installed. +# +# DL3007 ignored because base-docker we specifically always want to build on +# the latest base image, by design. +# +# hadolint ignore=DL3007 +FROM ghcr.io/opensafely-core/base-docker:22.04 as base-python + +# we are going to use an apt cache on the host, so disable the default debian +# docker clean up that deletes that cache on every apt install +RUN rm -f /etc/apt/apt.conf.d/docker-clean + +# ensure fully working base python3.11 installation using deadsnakes ppa +# see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e +# use space efficient utility from base image +RUN --mount=type=cache,target=/var/cache/apt \ + echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\ + /usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc + +# install any additional system dependencies +COPY docker/dependencies.txt /tmp/dependencies.txt +RUN --mount=type=cache,target=/var/cache/apt \ + /root/docker-apt-install.sh /tmp/dependencies.txt + + +################################################## +# +# Build image +# +# Ok, now we have local base image with python and our system dependencies on. +# We'll use this as the base for our builder image, where we'll build and +# install any python packages needed. +# +# We use a separate, disposable build image to avoid carrying the build +# dependencies into the production image. +FROM base-python as builder + +# Install any system build dependencies +COPY docker/build-dependencies.txt /tmp/build-dependencies.txt +RUN --mount=type=cache,target=/var/cache/apt \ + /root/docker-apt-install.sh /tmp/build-dependencies.txt + +# Install everything in venv for isolation from system python libraries +RUN python3.11 -m venv /opt/venv +ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH" + +# The cache mount means a) /root/.cache is not in the image, and b) it's preserved +# between docker builds locally, for faster dev rebuild. +COPY requirements.prod.txt /tmp/requirements.prod.txt + +# DL3042: using cache mount instead +# DL3013: we always want latest pip/setuptools/wheel, at least for now +# hadolint ignore=DL3042,DL3013 +RUN --mount=type=cache,target=/root/.cache \ + /opt/venv/bin/python -m pip install -U pip setuptools wheel && \ + /opt/venv/bin/python -m pip install --no-deps --require-hashes --requirement /tmp/requirements.prod.txt + + +################################################## +# +# Base project image +# +# Ok, we've built everything we need, build an image with all dependencies but +# no code. +# +# Not including the code at this stage has two benefits: +# +# 1) this image only rebuilds when the handfull of files needed to build metrics-base +# changes. If we do `COPY . /app` now, this will rebuild when *any* file changes. +# +# 2) Ensures we *have* to mount the volume for dev image, as there's no embedded +# version of the code. Otherwise, we could end up accidentally using the +# version of the code included when the prod image was built. +FROM base-python as metrics-base + +# Create a non-root metrics user to run the app as +RUN useradd --create-home --user-group metrics + +# copy venv over from builder image. These will have root:root ownership, but +# are readable by all. +COPY --from=builder /opt/venv /opt/venv + +# Ensure we're using the venv by default +ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH" + +RUN mkdir /app +WORKDIR /app + +# We set command rather than entrypoint, to make it easier to run different +# things from the cli +CMD ["/opt/venv/bin/python", "-m", "metrics"] + +# This may not be necessary, but it probably doesn't hurt +ENV PYTHONPATH=/app + +# switch to running as the user +USER metrics + + +################################################## +# +# Production image +# +# Copy code in, add proper metadata +FROM metrics-base as metrics-prod + +# Adjust this metadata to fit project. Note that the base-docker image does set +# some basic metadata. +LABEL org.opencontainers.image.title="metrics" \ + org.opencontainers.image.description="Bennett Institute internal metrics tranformation tool" \ + org.opencontainers.image.source="https://github.com/ebmdatalab/metrics" + +# copy application code +COPY metrics /app/metrics + +# finally, tag with build information. These will change regularly, therefore +# we do them as the last action. +ARG BUILD_DATE=unknown +LABEL org.opencontainers.image.created=$BUILD_DATE +ARG GITREF=unknown +LABEL org.opencontainers.image.revision=$GITREF + + + +################################################## +# +# Dev image +# +# Now we build a dev image from our metrics-dev image. This is basically +# installing dev dependencies and matching local UID/GID. It is expected that +# the current code will be mounted in /app when this is run +# +FROM metrics-base as metrics-dev + +# switch back to root to run the install of dev requirements.txt +USER root + +# install development requirements +COPY requirements.dev.txt /tmp/requirements.dev.txt +# using cache mount instead +# hadolint ignore=DL3042 +RUN --mount=type=cache,target=/root/.cache \ + python -m pip install --requirement /tmp/requirements.dev.txt + +# in dev, ensure metrics uid matches host user id +ARG DEV_USERID=1000 +ARG DEV_GROUPID=1000 +RUN usermod -u $DEV_USERID metrics +# Modify metrics only if group id does not already exist. We run dev +# containers with an explicit group id anyway, so file permissions on the host +# will be correct, and we do not actually rely on named metrics group access to +# anything. +RUN grep -q ":$DEV_GROUPID:" /etc/group || groupmod -g $DEV_GROUPID metrics + + +# switch back to metrics +USER metrics diff --git a/docker/build-dependencies.txt b/docker/build-dependencies.txt new file mode 100644 index 00000000..09ec91a5 --- /dev/null +++ b/docker/build-dependencies.txt @@ -0,0 +1,4 @@ +# list ubuntu packges needed to build dependencies, one per line +build-essential +libpq-dev +python3.11-dev diff --git a/docker/dependencies.txt b/docker/dependencies.txt new file mode 100644 index 00000000..0802b439 --- /dev/null +++ b/docker/dependencies.txt @@ -0,0 +1,7 @@ +# list ubuntu packages needed in production, one per line +git +postgresql-client +python3.11 +python3.11-distutils +python3.11-venv +tzdata diff --git a/dotenv-sample b/dotenv-sample new file mode 100644 index 00000000..098d9e6e --- /dev/null +++ b/dotenv-sample @@ -0,0 +1,17 @@ +# The DSN for access the timescaledb database +TIMESCALEDB_URL=postgres://user:pass@localhost:5433/metrics + +# API token for pulling data from Github +GITHUB_TOKEN= + +# Slack API access credentials. +# The slack app used for this will need the following OAuth scopes: +# * channels:history +# * groups:history +# * im:history +# * npim:history +SLACK_SIGNING_SECRET= +SLACK_TOKEN= + +# Slack channel ID for tech-support-channel +SLACK_TECH_SUPPORT_CHANNEL_ID=C0270Q313H7 diff --git a/justfile b/justfile index a63ba4f6..ce18b654 100644 --- a/justfile +++ b/justfile @@ -1,3 +1,6 @@ +# Load .env files by default +set dotenv-load := true + export VIRTUAL_ENV := env_var_or_default("VIRTUAL_ENV", ".venv") export BIN := VIRTUAL_ENV + if os_family() == "unix" { "/bin" } else { "/Scripts" } @@ -5,6 +8,9 @@ export PIP := BIN + if os_family() == "unix" { "/python -m pip" } else { "/pytho export DEFAULT_PYTHON := if os_family() == "unix" { "python3.11" } else { "python" } +export DEV_USERID := `id -u` +export DEV_GROUPID := `id -g` + # list available commands default: @@ -126,3 +132,26 @@ grafana: metrics *args: devenv python -m metrics {{ args }} + + +docker-build env="dev": + #!/usr/bin/env bash + set -euo pipefail + + test -z "${SKIP_BUILD:-}" || { echo "SKIP_BUILD set"; exit 0; } + + # ensure env file exists + test -f .env || cp dotenv-sample .env + + # set build args for prod builds + export BUILD_DATE=$(date -u +'%y-%m-%dT%H:%M:%SZ') + export GITREF=$(git rev-parse --short HEAD) + + # build the thing + docker-compose build --pull metrics-{{ env }} + + +# run command in dev|prod container +docker-run env="dev" *args="bash": + {{ just_executable() }} docker-build {{ env }} + docker-compose run --rm metrics-{{ env }} {{ args }} From 1863a0042ca68827abe1768179d04628f0f173a7 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Mon, 6 Nov 2023 14:55:00 +0000 Subject: [PATCH 07/10] Deploy metrics to dokku3 --- .github/workflows/main.yml | 55 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d8dd5ce7..b10ac97a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,6 +1,12 @@ --- name: CI +env: + IMAGE_NAME: metrics + PUBLIC_IMAGE_NAME: ghcr.io/ebmdatalab/metrics + REGISTRY: ghcr.io + SSH_AUTH_SOCK: /tmp/agent.sock + on: push: @@ -67,6 +73,55 @@ jobs: name: metrics-image path: /tmp/metrics.tar.gz + deploy: + needs: + - check + - test + - docker-test-and-build + - lint-dockerfile + + runs-on: ubuntu-latest + + permissions: + contents: read + packages: write + + if: github.ref == 'refs/heads/main' + + concurrency: deploy-production + + steps: + - uses: actions/checkout@v4 + - uses: "opensafely-core/setup-action@v1" + with: + install-just: true + + - name: Download docker image + uses: actions/download-artifact@v3 + with: + name: metrics-image + path: /tmp/image + + - name: Import docker image + run: gunzip -c /tmp/image/metrics.tar.gz | docker load + + - name: Test image we imported from previous job works + run: | + SKIP_BUILD=1 just docker-run prod python -m metrics + + - name: Publish image + run: | + echo ${{ secrets.GITHUB_TOKEN }} | docker login "$REGISTRY" -u ${{ github.actor }} --password-stdin + docker tag "$IMAGE_NAME" "$PUBLIC_IMAGE_NAME":latest + docker push "$PUBLIC_IMAGE_NAME":latest + + - name: Deploy image + run: | + ssh-agent -a "$SSH_AUTH_SOCK" > /dev/null + ssh-add - <<< "${{ secrets.DOKKU3_DEPLOY_SSH_KEY }}" + SHA=$(docker inspect --format='{{index .RepoDigests 0}}' "$PUBLIC_IMAGE_NAME":latest) + ssh -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" dokku@dokku3.ebmdatalab.net git:from-image metrics "$SHA" + required-checks: if: always() From 718afd121fcf36867352f28d16c931e5b82b7eef Mon Sep 17 00:00:00 2001 From: George Hickman Date: Tue, 7 Nov 2023 09:13:52 +0000 Subject: [PATCH 08/10] Remove unused requirements.prod.in We switched to using pyproject.toml for this project in fc2050f but this file was missed as part of that. Nothing is configured to use it so it's just a plain deletion. --- requirements.prod.in | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 requirements.prod.in diff --git a/requirements.prod.in b/requirements.prod.in deleted file mode 100644 index 06b80caa..00000000 --- a/requirements.prod.in +++ /dev/null @@ -1,4 +0,0 @@ -# Main prod requirements - -# To generate requirements file, run: -# pip-compile --generate-hashes --output-file=requirements.prod.txt requirements.prod.in From 0df8ce8f4713af21397676d461194b72fb41b961 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Tue, 7 Nov 2023 10:04:09 +0000 Subject: [PATCH 09/10] Use the python linked to the current env to invoke the CLI --- justfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/justfile b/justfile index ce18b654..51bf85fa 100644 --- a/justfile +++ b/justfile @@ -131,7 +131,7 @@ grafana: metrics *args: devenv - python -m metrics {{ args }} + $BIN/python -m metrics {{ args }} docker-build env="dev": From ff06b1a7712fb0dc6bea835d69d9fd3628b54d62 Mon Sep 17 00:00:00 2001 From: George Hickman Date: Tue, 7 Nov 2023 10:06:04 +0000 Subject: [PATCH 10/10] Use docker compose over docker-compose The former invokes the docker plugin instead of the standalone tool, allowing us to target compose v2 more consistently. --- justfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/justfile b/justfile index 51bf85fa..42f062c1 100644 --- a/justfile +++ b/justfile @@ -127,7 +127,7 @@ fix: devenv # Run the grafana stack grafana: - docker-compose up grafana + docker compose up grafana metrics *args: devenv @@ -148,10 +148,10 @@ docker-build env="dev": export GITREF=$(git rev-parse --short HEAD) # build the thing - docker-compose build --pull metrics-{{ env }} + docker compose build --pull metrics-{{ env }} # run command in dev|prod container docker-run env="dev" *args="bash": {{ just_executable() }} docker-build {{ env }} - docker-compose run --rm metrics-{{ env }} {{ args }} + docker compose run --rm metrics-{{ env }} {{ args }}