diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 048dc7f0..d778d545 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -30,12 +30,56 @@ jobs: run: | just test + lint-dockerfile: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0 + with: + dockerfile: docker/Dockerfile + + docker-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: "opensafely-core/setup-action@v1" + with: + install-just: true + + - name: Build docker image for both prod and dev + run: | + just docker-build prod + just docker-build dev + + - name: Run unit tests on docker dev image + run: | + # build docker and run test + just docker-test + + - name: Run smoke test on prod + run: | + just docker-run prod python -m metrics + + - name: Save docker image + run: | + docker save metrics | gzip > /tmp/metrics.tar.gz + + - name: Upload docker image + uses: actions/upload-artifact@v3 + with: + name: metrics-image + path: /tmp/metrics.tar.gz + required-checks: if: always() needs: - check - test + - docker-test + - lint-dockerfile runs-on: Ubuntu-latest diff --git a/docker-compose.yaml b/docker-compose.yaml index d7580a4f..974ca902 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -38,6 +38,65 @@ services: volumes: - timescaledb:/home/postgres/pgdata/data + prod: + # image name, both locally and public + image: metrics + build: + dockerfile: docker/Dockerfile + # the prod stage in the Dockerfile + target: metrics-prod + # should speed up the build in CI, where we have a cold cache + cache_from: # should speed up the build in CI, where we have a cold cache + - ghcr.io/opensafely-core/base-docker + - ghcr.io/ebmdatalab/metrics + args: + # this makes the image work for later cache_from: usage + - BUILDKIT_INLINE_CACHE=1 + # env vars should be supplied by just + - BUILD_DATE + - GITREF + # use dockers builitin PID daemon + init: true + + # main development service + dev: + extends: + service: prod + image: metrics-dev + container_name: metrics-dev + # running as a specific uid/gid allows files written to mounted volumes by + # the docker container's default user to match the host user's uid/gid, for + # convienience. + user: ${DEV_USERID:-1000}:${DEV_GROUPID:-1000} + build: + # the dev stage in the Dockerfile + target: metrics-dev + # pass the uid/gid as build arg + args: + - DEV_USERID=${DEV_USERID:-1000} + - DEV_GROUPID=${DEV_GROUPID:-1000} + # Some tricks are needed here to be able to test the BindMountVolumeAPI + # when running inside docker, as we need the volumes to be mountable by the + # host docker. Our pytest fixtures create the directories in /tmp, so we + # provide a host mounted /tmp to the container, so we can access it from + # the host as well. + volumes: + # mount our current code + - .:/app + env_file: + - .env + + # test runner service - uses dev-image with a different command + test: + extends: + service: dev + container_name: metrics-test + # override command + command: > + bash -c "/opt/venv/bin/coverage run --module pytest + && (/opt/venv/bin/coverage report || /opt/venv/bin/coverage html)" + + volumes: postgres: grafana: diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 00000000..5468c5c0 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,168 @@ +# syntax=docker/dockerfile:1.2 +################################################# +# +# Create base image with python installed. +# +# DL3007 ignored because base-docker we specifically always want to build on +# the latest base image, by design. +# +# hadolint ignore=DL3007 +FROM ghcr.io/opensafely-core/base-docker:22.04 as base-python + +# we are going to use an apt cache on the host, so disable the default debian +# docker clean up that deletes that cache on every apt install +RUN rm -f /etc/apt/apt.conf.d/docker-clean + +# ensure fully working base python3.11 installation using deadsnakes ppa +# see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e +# use space efficient utility from base image +RUN --mount=type=cache,target=/var/cache/apt \ + echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\ + /usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc + +# install any additional system dependencies +COPY docker/dependencies.txt /tmp/dependencies.txt +RUN --mount=type=cache,target=/var/cache/apt \ + /root/docker-apt-install.sh /tmp/dependencies.txt + + +################################################## +# +# Build image +# +# Ok, now we have local base image with python and our system dependencies on. +# We'll use this as the base for our builder image, where we'll build and +# install any python packages needed. +# +# We use a separate, disposable build image to avoid carrying the build +# dependencies into the production image. +FROM base-python as builder + +# Install any system build dependencies +COPY docker/build-dependencies.txt /tmp/build-dependencies.txt +RUN --mount=type=cache,target=/var/cache/apt \ + /root/docker-apt-install.sh /tmp/build-dependencies.txt + +# Install everything in venv for isolation from system python libraries +RUN python3.11 -m venv /opt/venv +ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH" + +# The cache mount means a) /root/.cache is not in the image, and b) it's preserved +# between docker builds locally, for faster dev rebuild. +COPY requirements.prod.txt /tmp/requirements.prod.txt + +# DL3042: using cache mount instead +# DL3013: we always want latest pip/setuptools/wheel, at least for now +# hadolint ignore=DL3042,DL3013 +RUN --mount=type=cache,target=/root/.cache \ + /opt/venv/bin/python -m pip install -U pip setuptools wheel && \ + /opt/venv/bin/python -m pip install --no-deps --require-hashes --requirement /tmp/requirements.prod.txt + + +################################################## +# +# Base project image +# +# Ok, we've built everything we need, build an image with all dependencies but +# no code. +# +# Not including the code at this stage has two benefits: +# +# 1) this image only rebuilds when the handfull of files needed to build metrics-base +# changes. If we do `COPY . /app` now, this will rebuild when *any* file changes. +# +# 2) Ensures we *have* to mount the volume for dev image, as there's no embedded +# version of the code. Otherwise, we could end up accidentally using the +# version of the code included when the prod image was built. +FROM base-python as metrics-base + +# Create a non-root metrics user to run the app as +RUN useradd --create-home --user-group metrics + +# copy venv over from builder image. These will have root:root ownership, but +# are readable by all. +COPY --from=builder /opt/venv /opt/venv + +# Ensure we're using the venv by default +ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH" + +RUN mkdir /app +WORKDIR /app + +# We set command rather than entrypoint, to make it easier to run different +# things from the cli +CMD ["/opt/venv/bin/python", "-m", "metrics"] + +# This may not be necessary, but it probably doesn't hurt +ENV PYTHONPATH=/app + +# We are not ready to do this step yet +# switch to running as the user +# USER metrics + + +################################################## +# +# Production image +# +# Copy code in, add proper metadata +FROM metrics-base as metrics-prod + +# Adjust this metadata to fit project. Note that the base-docker image does set +# some basic metadata. +LABEL org.opencontainers.image.title="metrics" \ + org.opencontainers.image.description="Bennett Institute internal metrics tranformation tool" \ + org.opencontainers.image.source="https://github.com/ebmdatalab/metrics" + +# copy application code +COPY . /app + +# finally, tag with build information. These will change regularly, therefore +# we do them as the last action. +ARG BUILD_DATE=unknown +LABEL org.opencontainers.image.created=$BUILD_DATE +ARG GITREF=unknown +LABEL org.opencontainers.image.revision=$GITREF + + + +################################################## +# +# Dev image +# +# Now we build a dev image from our metrics-dev image. This is basically +# installing dev dependencies and matching local UID/GID. It is expected that +# the current code will be mounted in /app when this is run +# +FROM metrics-base as metrics-dev + +# switch back to root to run the install of dev requirements.txt +#USER root + +# TODO: its possible python dev dependencies might need some additional build packages installed? + +# install development requirements +COPY requirements.dev.txt /tmp/requirements.dev.txt +# using cache mount instead +# hadolint ignore=DL3042 +RUN --mount=type=cache,target=/root/.cache \ + python -m pip install --requirement /tmp/requirements.dev.txt + +# modify container docker gid to match host +# FIXME: is this needed when we're not talking to docker locally? +# ARG DOCKER_HOST_GROUPID +# RUN groupmod -g $DOCKER_HOST_GROUPID docker + +# in dev, ensure metrics uid matches host user id +ARG DEV_USERID=1000 +ARG DEV_GROUPID=1000 +RUN usermod -u $DEV_USERID metrics +# Modify metrics only if group id does not already exist. We run dev +# containers with an explicit group id anyway, so file permissions on the host +# will be correct, and we do not actually rely on named metrics group access to +# anything. +RUN grep -q ":$DEV_GROUPID:" /etc/group || groupmod -g $DEV_GROUPID metrics + + +# switch back to metrics +#USER metrics diff --git a/docker/build-dependencies.txt b/docker/build-dependencies.txt new file mode 100644 index 00000000..09ec91a5 --- /dev/null +++ b/docker/build-dependencies.txt @@ -0,0 +1,4 @@ +# list ubuntu packges needed to build dependencies, one per line +build-essential +libpq-dev +python3.11-dev diff --git a/docker/dependencies.txt b/docker/dependencies.txt new file mode 100644 index 00000000..0802b439 --- /dev/null +++ b/docker/dependencies.txt @@ -0,0 +1,7 @@ +# list ubuntu packages needed in production, one per line +git +postgresql-client +python3.11 +python3.11-distutils +python3.11-venv +tzdata diff --git a/dotenv-sample b/dotenv-sample new file mode 100644 index 00000000..5dea78f4 --- /dev/null +++ b/dotenv-sample @@ -0,0 +1,19 @@ + + +# The DSN for access the timescaledb database +TIMESCALEDB_URL=postgres://user:pass@localhost:5433/metrics + +# API token for pulling data from Github +GITHUB_TOKEN= + +# Slack API access credentials. +# The slack app used for this will need the following OAuth scopes: +# * channels:history +# * groups:history +# * im:history +# * npim:history +SLACK_SIGNING_SECRET= +SLACK_TOKEN= + +# Slack channel ID for tech-support-channel +SLACK_TECH_SUPPORT_CHANNEL_ID=C0270Q313H7 diff --git a/justfile b/justfile index a63ba4f6..4c044e55 100644 --- a/justfile +++ b/justfile @@ -1,3 +1,6 @@ +# Load .env files by default +set dotenv-load := true + export VIRTUAL_ENV := env_var_or_default("VIRTUAL_ENV", ".venv") export BIN := VIRTUAL_ENV + if os_family() == "unix" { "/bin" } else { "/Scripts" } @@ -5,6 +8,9 @@ export PIP := BIN + if os_family() == "unix" { "/python -m pip" } else { "/pytho export DEFAULT_PYTHON := if os_family() == "unix" { "python3.11" } else { "python" } +export DEV_USERID := `id -u` +export DEV_GROUPID := `id -g` + # list available commands default: @@ -126,3 +132,26 @@ grafana: metrics *args: devenv python -m metrics {{ args }} + + +docker-build env="dev": + #!/usr/bin/env bash + set -euo pipefail + + test -z "${SKIP_BUILD:-}" || { echo "SKIP_BUILD set"; exit 0; } + + # ensure env file exists + test -f .env || cp dotenv-sample .env + + # set build args for prod builds + export BUILD_DATE=$(date -u +'%y-%m-%dT%H:%M:%SZ') + export GITREF=$(git rev-parse --short HEAD) + + # build the thing + docker-compose build --pull {{ env }} + + +# run command in dev|prod container +docker-run env="dev" *args="bash": + {{ just_executable() }} docker-build {{ env }} + docker-compose run --rm {{ env }} {{ args }}