Skip to content

Commit

Permalink
refactor(datascience): simplify stages
Browse files Browse the repository at this point in the history
  • Loading branch information
adamblake committed Jan 5, 2024
1 parent f7c95ae commit 7e25aa5
Showing 1 changed file with 16 additions and 58 deletions.
74 changes: 16 additions & 58 deletions datascience-notebook/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,34 @@ ARG APT_LIB_DIR=/var/lib/apt
ARG SHARED_LIBS_LIST=/tmp/shared-libs.txt


# py-builder
# base
# -------------------------------------
FROM ${PARENT_IMAGE} as py-builder
FROM ${PARENT_IMAGE} as base

USER root

# install cmdstan dependencies
ARG APT_CACHE_DIR APT_LIB_DIR
# hadolint ignore=DL3008,DL3009
RUN --mount=type=cache,target="${APT_CACHE_DIR}",sharing=locked \
--mount=type=cache,target="${APT_LIB_DIR}",sharing=locked \
apt-get update && apt-get upgrade -y && apt-get -y install --no-install-recommends \
g++

USER ${NB_UID}

# install cmdstan
RUN Rscript -e 'remotes::install_cran("cmdstanr", repos = "https://mc-stan.org/r-packages/", dependencies = TRUE)' \
&& Rscript -e 'cmdstanr::install_cmdstan()'


# py-builder
# -------------------------------------
FROM base as py-builder

RUN --mount=type=cache,target=${CONDA_DIR}/pkgs,uid=${NB_UID},sharing=locked \
mamba install --yes \
"gdal" \
"pkg-config" \
"proj" \
&& fix-permissions "${CONDA_DIR}" \
&& fix-permissions "/home/${NB_USER}"

USER ${NB_UID}
"gdal"

# make wheels for the Python packages
ARG PIP_CACHE_DIR PIP_WHEEL_DIR PY_REQUIREMENTS
Expand All @@ -53,59 +59,11 @@ RUN --mount=type=bind,source="${PY_REQUIREMENTS}",target=/tmp/requirements.txt \
&& pip3 wheel --cache-dir="${PIP_CACHE_DIR}" --wheel-dir="${PIP_WHEEL_DIR}" -r /tmp/requirements.txt


# r-builder
# -------------------------------------
FROM ${PARENT_IMAGE} as r-builder


# pre-shared-libs
# -------------------------------------
FROM ${BASE_IMAGE} as pre-shared-libs
ARG SHARED_LIBS_LIST
RUN find-shared-libs "${R_HOME}/library" > "${SHARED_LIBS_LIST}"


# new-shared-libs
# -------------------------------------
FROM r-builder as new-shared-libs
ARG SHARED_LIBS_LIST
COPY --from=pre-shared-libs "${SHARED_LIBS_LIST}" /tmp/pre-shared-libs.txt
RUN find-shared-libs "${R_HOME}/library" > /tmp/post-shared-libs.txt \
&& diff-shared-libs /tmp/pre-shared-libs.txt /tmp/post-shared-libs.txt > "${SHARED_LIBS_LIST}"


# final
# -------------------------------------
FROM ${BASE_IMAGE} as final
FROM base
LABEL maintainer="CourseKata <[email protected]>"

USER root

# install cmdstan dependencies
ARG APT_CACHE_DIR APT_LIB_DIR
# hadolint ignore=DL3008,DL3009
RUN --mount=type=cache,target="${APT_CACHE_DIR}",sharing=locked \
--mount=type=cache,target="${APT_LIB_DIR}",sharing=locked \
apt-get update && apt-get upgrade -y && apt-get -y install --no-install-recommends \
g++

# copy over R packages and shared libraries
ARG SHARED_LIBS_LIST
COPY --from=r-builder --chown=${NB_UID}:${NB_GID} "${R_HOME}/" "${R_HOME}"
RUN --mount=type=cache,from=r-builder,source=/,target=/mounted-r-builder \
--mount=type=bind,from=new-shared-libs,source="${SHARED_LIBS_LIST}",target="${SHARED_LIBS_LIST}" \
copy-shared-libs "${SHARED_LIBS_LIST}" /mounted-r-builder / \
# this is hard to copy over from the r-builder image
&& Rscript -e 'remotes::install_cran("V8", force = TRUE)' \
# install cmdstan
&& Rscript -e 'remotes::install_cran("cmdstanr", repos = "https://mc-stan.org/r-packages/", dependencies = TRUE)' \
&& Rscript -e 'cmdstanr::install_cmdstan()' \
# ensure user has full control of package and home dirs
&& fix-permissions "${CONDA_DIR}" \
&& fix-permissions "/home/${NB_USER}"

USER ${NB_UID}

# install Python packages from wheels
ARG PIP_CACHE_DIR PIP_WHEEL_DIR PY_REQUIREMENTS
RUN --mount=type=bind,source="${PY_REQUIREMENTS}",target=/tmp/requirements.txt \
Expand Down

0 comments on commit 7e25aa5

Please sign in to comment.