Skip to content

Commit

Permalink
Try simplifying Docker image builds
Browse files Browse the repository at this point in the history
Uses the approach from @victorlin's github-actions-docker-build [1]
template to create a Docker image for this project with GitHub Actions
and push the resulting image to the GHCR. This first attempt replaces
the more complicated previous Dockerfile with one that inherits from the
Nextstrain base image which has almost all of the dependencies we need
for this project installed now. Then, we install the few remaining
packages that aren't already in that base image including Python
packages and Java. Still to be added are Julia and TreeKnit.

[1] https://github.com/victorlin/github-actions-docker-build
  • Loading branch information
huddlej committed Jul 11, 2024
1 parent 8654f84 commit 0ee8fb0
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 116 deletions.
47 changes: 47 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: docker

on:
push:
branches:
- master
paths:
- .github/workflows/docker.yml
- build
- Dockerfile

workflow_dispatch:

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:

- uses: actions/checkout@v4

# Uncomment this if you are building for a non-native --platform
# - uses: docker/setup-qemu-action@v3

- uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

- run: |
image_tag="ghcr.io/${{ github.repository }}:${{ github.run_id }}"
image_latest="ghcr.io/${{ github.repository }}:latest"
./build \
--tag "$image_tag" \
--tag "$image_latest" \
--push
cat >"$GITHUB_STEP_SUMMARY" <<~~
Image successfully published.
Pull the image for local use:
docker pull $image_tag
~~
136 changes: 20 additions & 116 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,119 +1,23 @@
# FROM condaforge/mambaforge:latest AS conda
# LABEL io.github.snakemake.containerized="true"
# LABEL io.github.snakemake.conda_env_hash="46e372c7772af8a6dc79f1d5ef885e12edf75fe9839d9b38ee010668be95888c"

# # Step 1: Retrieve conda environments

# # Conda environment:
# # source: cartography.yml
# # prefix: /conda-envs/4f5bdb38739416e451545e49f72c0b3d
# # name: cartography
# # channels:
# # - conda-forge
# # - bioconda
# # - defaults
# # dependencies:
# # - augur=13.1.2
# # - pip
# # - seaborn
# # - altair
# # - altair_saver
# # - dendropy
# # - python=3.7*
# # - scikit-learn
# # - umap-learn
# # - jsonschema
# # - jupyterlab
# # - matplotlib
# # - nodejs
# # - pandoc=2.14.1
# # - pandoc-crossref=0.3.12.0
# # - selenium
# # - python-chromedriver-binary
# # - reportlab
# # - wget
# # - snakemake
# # - snp-sites
# # - tabix
# # - pixy
# # - openjdk=11
# # - tsv-utils
# # - samtools
# # - joblib=1.1.0 # required for hdbscan to work
# # - hdbscan=0.8.28
# # - statsmodels
# # - pip:
# # - pathogen-embed==0.1.0
# # - statistics
# # - svglib
# # - tabulate
# RUN mkdir -p /conda-envs/4f5bdb38739416e451545e49f72c0b3d
# COPY cartography.yml /conda-envs/4f5bdb38739416e451545e49f72c0b3d/environment.yaml

# # Step 2: Generate conda environments

# RUN mamba env create --prefix /conda-envs/4f5bdb38739416e451545e49f72c0b3d --file /conda-envs/4f5bdb38739416e451545e49f72c0b3d/environment.yaml && \
# mamba clean --all -y

FROM julia

RUN apt-get update -y && \
apt-get install clang -y

RUN julia -e 'using Pkg; Pkg.add(["TreeKnit"]); Pkg.build("TreeKnit");'

ARG MINIFORGE_NAME=Miniforge3
ARG MINIFORGE_VERSION=23.1.0-4
ARG TARGETPLATFORM

ENV CONDA_DIR=/opt/conda
ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
ENV PATH=${CONDA_DIR}/bin:${PATH}

# 1. Install just enough for conda to work
# 2. Keep $HOME clean (no .wget-hsts file), since HSTS isn't useful in this context
# 3. Install miniforge from GitHub releases
# 4. Apply some cleanup tips from https://jcrist.github.io/conda-docker-tips.html
# Particularly, we remove pyc and a files. The default install has no js, we can skip that
# 5. Activate base by default when running as any *non-root* user as well
# Good security practice requires running most workloads as non-root
# This makes sure any non-root users created also have base activated
# for their interactive shells.
# 6. Activate base by default when running as root as well
# The root user is already created, so won't pick up changes to /etc/skel
RUN apt-get update > /dev/null && \
apt-get install --no-install-recommends --yes \
wget bzip2 ca-certificates \
git \
tini \
> /dev/null && \
# Start from a base image
FROM nextstrain/base:build-20240710T214955Z

# Install Python packages.
# Allow Snakemake to create subdirs in the user cache dir
# <https://github.com/nextstrain/ncov-ingest/pull/401>
RUN pip3 install \
"altair[all]" \
jupyterlab \
notebook \
seaborn \
statsmodels \
&& rm -rf ~/.cache

# Install Java.
RUN apt-get update && apt-get install -y --no-install-recommends \
default-jre && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
wget --no-hsts --quiet https://github.com/conda-forge/miniforge/releases/download/${MINIFORGE_VERSION}/${MINIFORGE_NAME}-${MINIFORGE_VERSION}-Linux-$(uname -m).sh -O /tmp/miniforge.sh && \
/bin/bash /tmp/miniforge.sh -b -p ${CONDA_DIR} && \
rm /tmp/miniforge.sh && \
conda clean --tarballs --index-cache --packages --yes && \
find ${CONDA_DIR} -follow -type f -name '*.a' -delete && \
find ${CONDA_DIR} -follow -type f -name '*.pyc' -delete && \
conda clean --force-pkgs-dirs --all --yes && \
echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> /etc/skel/.bashrc && \
echo ". ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate base" >> ~/.bashrc

ENTRYPOINT ["tini", "--"]
CMD [ "/bin/bash" ]

# Step 2: Generate conda environments

RUN conda upgrade conda

RUN conda install -c conda-forge mamba

COPY cartography.yml /cartography.yaml
rm -rf /var/lib/apt/lists/*

RUN mamba env update -f cartography.yaml && \
mamba clean --all -y

# RUN mamba env create --file cartography.yaml && \
# mamba clean --all -y
# Install Julia.

#RUN conda activate cartography
# Install TreeKnit.
20 changes: 20 additions & 0 deletions build
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
#
# Build the image.
#
set -euo pipefail

# `buildx create` is necessary to use a driver that supports multi-platform
# images.
builder=custom-builder


if ! docker buildx inspect "$builder" &>/dev/null; then
# Using a persistent builder allows for faster local development.
# However, if this is changed and it was previously run on your machine,
# you may need to remove the builder manually before running the script:
# docker buildx rm "custom-builder"
docker buildx create --use --name "$builder" --driver docker-container --driver-opt network=host
fi

docker buildx build "$@" .
18 changes: 18 additions & 0 deletions clean
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash
#
# Cleans up after the build process by removing build artifacts, caches, and logs.
#
set -euo pipefail


builder=custom-builder


echo "--> Deleting $builder (and its caches)"

if docker buildx inspect "$builder" &>/dev/null; then
docker buildx du --builder "$builder"
docker buildx rm --builder "$builder"
else
echo "skipped; $builder does not exist"
fi

0 comments on commit 0ee8fb0

Please sign in to comment.