Skip to content

Commit

Permalink
Wrap the metrics CLI in docker ready for deployment
Browse files Browse the repository at this point in the history
  • Loading branch information
ghickman committed Nov 6, 2023
1 parent c4be777 commit 4175696
Show file tree
Hide file tree
Showing 7 changed files with 330 additions and 0 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,56 @@ jobs:
run: |
just test
lint-dockerfile:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
with:
dockerfile: docker/Dockerfile

docker-test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: "opensafely-core/setup-action@v1"
with:
install-just: true

- name: Build docker image for both prod and dev
run: |
just docker-build prod
just docker-build dev
- name: Run unit tests on docker dev image
run: |
# build docker and run test
just docker-test
- name: Run smoke test on prod
run: |
just docker-run prod python -m metrics
- name: Save docker image
run: |
docker save metrics | gzip > /tmp/metrics.tar.gz
- name: Upload docker image
uses: actions/upload-artifact@v3
with:
name: metrics-image
path: /tmp/metrics.tar.gz

required-checks:
if: always()

needs:
- check
- test
- docker-test
- lint-dockerfile

runs-on: Ubuntu-latest

Expand Down
59 changes: 59 additions & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,65 @@ services:
volumes:
- timescaledb:/home/postgres/pgdata/data

prod:
# image name, both locally and public
image: metrics
build:
dockerfile: docker/Dockerfile
# the prod stage in the Dockerfile
target: metrics-prod
# should speed up the build in CI, where we have a cold cache
cache_from: # should speed up the build in CI, where we have a cold cache
- ghcr.io/opensafely-core/base-docker
- ghcr.io/ebmdatalab/metrics
args:
# this makes the image work for later cache_from: usage
- BUILDKIT_INLINE_CACHE=1
# env vars should be supplied by just
- BUILD_DATE
- GITREF
# use dockers builitin PID daemon
init: true

# main development service
dev:
extends:
service: prod
image: metrics-dev
container_name: metrics-dev
# running as a specific uid/gid allows files written to mounted volumes by
# the docker container's default user to match the host user's uid/gid, for
# convienience.
user: ${DEV_USERID:-1000}:${DEV_GROUPID:-1000}
build:
# the dev stage in the Dockerfile
target: metrics-dev
# pass the uid/gid as build arg
args:
- DEV_USERID=${DEV_USERID:-1000}
- DEV_GROUPID=${DEV_GROUPID:-1000}
# Some tricks are needed here to be able to test the BindMountVolumeAPI
# when running inside docker, as we need the volumes to be mountable by the
# host docker. Our pytest fixtures create the directories in /tmp, so we
# provide a host mounted /tmp to the container, so we can access it from
# the host as well.
volumes:
# mount our current code
- .:/app
env_file:
- .env

# test runner service - uses dev-image with a different command
test:
extends:
service: dev
container_name: metrics-test
# override command
command: >
bash -c "/opt/venv/bin/coverage run --module pytest
&& (/opt/venv/bin/coverage report || /opt/venv/bin/coverage html)"
volumes:
postgres:
grafana:
Expand Down
168 changes: 168 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
# syntax=docker/dockerfile:1.2
#################################################
#
# Create base image with python installed.
#
# DL3007 ignored because base-docker we specifically always want to build on
# the latest base image, by design.
#
# hadolint ignore=DL3007
FROM ghcr.io/opensafely-core/base-docker:22.04 as base-python

# we are going to use an apt cache on the host, so disable the default debian
# docker clean up that deletes that cache on every apt install
RUN rm -f /etc/apt/apt.conf.d/docker-clean

# ensure fully working base python3.11 installation using deadsnakes ppa
# see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e
# use space efficient utility from base image
RUN --mount=type=cache,target=/var/cache/apt \
echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\
/usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc

# install any additional system dependencies
COPY docker/dependencies.txt /tmp/dependencies.txt
RUN --mount=type=cache,target=/var/cache/apt \
/root/docker-apt-install.sh /tmp/dependencies.txt


##################################################
#
# Build image
#
# Ok, now we have local base image with python and our system dependencies on.
# We'll use this as the base for our builder image, where we'll build and
# install any python packages needed.
#
# We use a separate, disposable build image to avoid carrying the build
# dependencies into the production image.
FROM base-python as builder

# Install any system build dependencies
COPY docker/build-dependencies.txt /tmp/build-dependencies.txt
RUN --mount=type=cache,target=/var/cache/apt \
/root/docker-apt-install.sh /tmp/build-dependencies.txt

# Install everything in venv for isolation from system python libraries
RUN python3.11 -m venv /opt/venv
ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH"

# The cache mount means a) /root/.cache is not in the image, and b) it's preserved
# between docker builds locally, for faster dev rebuild.
COPY requirements.prod.txt /tmp/requirements.prod.txt

# DL3042: using cache mount instead
# DL3013: we always want latest pip/setuptools/wheel, at least for now
# hadolint ignore=DL3042,DL3013
RUN --mount=type=cache,target=/root/.cache \
/opt/venv/bin/python -m pip install -U pip setuptools wheel && \
/opt/venv/bin/python -m pip install --no-deps --require-hashes --requirement /tmp/requirements.prod.txt


##################################################
#
# Base project image
#
# Ok, we've built everything we need, build an image with all dependencies but
# no code.
#
# Not including the code at this stage has two benefits:
#
# 1) this image only rebuilds when the handfull of files needed to build metrics-base
# changes. If we do `COPY . /app` now, this will rebuild when *any* file changes.
#
# 2) Ensures we *have* to mount the volume for dev image, as there's no embedded
# version of the code. Otherwise, we could end up accidentally using the
# version of the code included when the prod image was built.
FROM base-python as metrics-base

# Create a non-root metrics user to run the app as
RUN useradd --create-home --user-group metrics

# copy venv over from builder image. These will have root:root ownership, but
# are readable by all.
COPY --from=builder /opt/venv /opt/venv

# Ensure we're using the venv by default
ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH"

RUN mkdir /app
WORKDIR /app

# We set command rather than entrypoint, to make it easier to run different
# things from the cli
CMD ["/opt/venv/bin/python", "-m", "metrics"]

# This may not be necessary, but it probably doesn't hurt
ENV PYTHONPATH=/app

# We are not ready to do this step yet
# switch to running as the user
# USER metrics


##################################################
#
# Production image
#
# Copy code in, add proper metadata
FROM metrics-base as metrics-prod

# Adjust this metadata to fit project. Note that the base-docker image does set
# some basic metadata.
LABEL org.opencontainers.image.title="metrics" \
org.opencontainers.image.description="Bennett Institute internal metrics tranformation tool" \
org.opencontainers.image.source="https://github.com/ebmdatalab/metrics"

# copy application code
COPY . /app

# finally, tag with build information. These will change regularly, therefore
# we do them as the last action.
ARG BUILD_DATE=unknown
LABEL org.opencontainers.image.created=$BUILD_DATE
ARG GITREF=unknown
LABEL org.opencontainers.image.revision=$GITREF



##################################################
#
# Dev image
#
# Now we build a dev image from our metrics-dev image. This is basically
# installing dev dependencies and matching local UID/GID. It is expected that
# the current code will be mounted in /app when this is run
#
FROM metrics-base as metrics-dev

# switch back to root to run the install of dev requirements.txt
#USER root

# TODO: its possible python dev dependencies might need some additional build packages installed?

# install development requirements
COPY requirements.dev.txt /tmp/requirements.dev.txt
# using cache mount instead
# hadolint ignore=DL3042
RUN --mount=type=cache,target=/root/.cache \
python -m pip install --requirement /tmp/requirements.dev.txt

# modify container docker gid to match host
# FIXME: is this needed when we're not talking to docker locally?
# ARG DOCKER_HOST_GROUPID
# RUN groupmod -g $DOCKER_HOST_GROUPID docker

# in dev, ensure metrics uid matches host user id
ARG DEV_USERID=1000
ARG DEV_GROUPID=1000
RUN usermod -u $DEV_USERID metrics
# Modify metrics only if group id does not already exist. We run dev
# containers with an explicit group id anyway, so file permissions on the host
# will be correct, and we do not actually rely on named metrics group access to
# anything.
RUN grep -q ":$DEV_GROUPID:" /etc/group || groupmod -g $DEV_GROUPID metrics


# switch back to metrics
#USER metrics
4 changes: 4 additions & 0 deletions docker/build-dependencies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# list ubuntu packges needed to build dependencies, one per line
build-essential
libpq-dev
python3.11-dev
7 changes: 7 additions & 0 deletions docker/dependencies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# list ubuntu packages needed in production, one per line
git
postgresql-client
python3.11
python3.11-distutils
python3.11-venv
tzdata
19 changes: 19 additions & 0 deletions dotenv-sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@


# The DSN for access the timescaledb database
TIMESCALEDB_URL=postgres://user:pass@localhost:5433/metrics

# API token for pulling data from Github
GITHUB_TOKEN=

# Slack API access credentials.
# The slack app used for this will need the following OAuth scopes:
# * channels:history
# * groups:history
# * im:history
# * npim:history
SLACK_SIGNING_SECRET=
SLACK_TOKEN=

# Slack channel ID for tech-support-channel
SLACK_TECH_SUPPORT_CHANNEL_ID=C0270Q313H7
29 changes: 29 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
# Load .env files by default
set dotenv-load := true

export VIRTUAL_ENV := env_var_or_default("VIRTUAL_ENV", ".venv")

export BIN := VIRTUAL_ENV + if os_family() == "unix" { "/bin" } else { "/Scripts" }
export PIP := BIN + if os_family() == "unix" { "/python -m pip" } else { "/python.exe -m pip" }

export DEFAULT_PYTHON := if os_family() == "unix" { "python3.11" } else { "python" }

export DEV_USERID := `id -u`
export DEV_GROUPID := `id -g`


# list available commands
default:
Expand Down Expand Up @@ -126,3 +132,26 @@ grafana:

metrics *args: devenv
python -m metrics {{ args }}


docker-build env="dev":
#!/usr/bin/env bash
set -euo pipefail
test -z "${SKIP_BUILD:-}" || { echo "SKIP_BUILD set"; exit 0; }

# ensure env file exists
test -f .env || cp dotenv-sample .env

# set build args for prod builds
export BUILD_DATE=$(date -u +'%y-%m-%dT%H:%M:%SZ')
export GITREF=$(git rev-parse --short HEAD)

# build the thing
docker-compose build --pull {{ env }}


# run command in dev|prod container
docker-run env="dev" *args="bash":
{{ just_executable() }} docker-build {{ env }}
docker-compose run --rm {{ env }} {{ args }}

0 comments on commit 4175696

Please sign in to comment.