Skip to content

Commit

Permalink
Adjust Dockerfile & deployment use micromamba
Browse files Browse the repository at this point in the history
* Use micromamba not conda in Dockerfile CMD, also use pip install --no-deps
* Use micromamba not conda in command passed to build container
* Use default mambauser rather than catalyst in docker container
* Remove --no-capture-output which isn't supported by micromamba. Is this a problem?
* Remove uninterpolated vars in .env and more --no-capture-output
* Separate ETL and pytest commands.
* Stop trying to run tests in parallel. Sigh.
* Add google cloud sdk to conda environment.
* Install Google Cloud SDK from conda-forge.
* Add back in the making of required directories. Oops.
* Attempt to have micromamba run pass through output
  • Loading branch information
zaneselvans committed Oct 27, 2023
1 parent 9767c39 commit 4e5589f
Show file tree
Hide file tree
Showing 13 changed files with 4,189 additions and 20,114 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/build-deploy-pudl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,12 @@ jobs:
gcloud compute instances update-container "$GCE_INSTANCE" \
--zone "$GCE_INSTANCE_ZONE" \
--container-image "docker.io/catalystcoop/pudl-etl:${{ env.GITHUB_REF }}" \
--container-command "conda" \
--container-command "micromamba" \
--container-arg="run" \
--container-arg="--no-capture-output" \
--container-arg="-p" \
--container-arg="/home/catalyst/env" \
--container-arg="--prefix" \
--container-arg="/home/mambauser/env" \
--container-arg="--attach" \
--container-arg='' \
--container-arg="bash" \
--container-arg="./docker/gcp_pudl_etl.sh" \
--container-env-file="./docker/.env" \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/zenodo-cache-sync.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ env:
PUBLIC_ZENODO_CACHE_BUCKET: gs://zenodo-cache.catalyst.coop
GITHUB_REF: ${{ github.ref_name }} # This is changed to dev if running on a schedule
PUDL_OUTPUT: ~/pudl-work/output
PUDL_INPUT: ~/pudl-work/data/
PUDL_INPUT: ~/pudl-work/input/

jobs:
zenodo-cache-sync:
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ endif

# Regenerate the conda lockfile and render platform specific conda environments.
conda-lock:
rm -f environments/conda-lock.yml
rm -f environments/conda-*lock.yml
conda-lock \
--${mamba} \
--file=pyproject.toml \
Expand Down
16 changes: 8 additions & 8 deletions docker/.env
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
HOST_PUDL_IN=./pudl_in
HOST_PUDL_OUT=./pudl_out
CONTAINER_HOME=/home/catalyst
PUDL_INPUT=/home/catalyst/pudl_work/data
PUDL_OUTPUT=/home/catalyst/pudl_work/output
DAGSTER_HOME=/home/catalyst/pudl_work/dagster_home
CONDA_PREFIX=/home/catalyst/env
PUDL_SETTINGS_YML=/home/catalyst/src/pudl/package_data/settings/etl_full.yml
LOGFILE=/home/catalyst/pudl_work/output/pudl-etl.log
CONDA_RUN="conda run --no-capture-output --prefix /home/catalyst/env"
CONTAINER_HOME=/home/mambauser
PUDL_INPUT=/home/mambauser/pudl_work/input
PUDL_OUTPUT=/home/mambauser/pudl_work/output
DAGSTER_HOME=/home/mambauser/pudl_work/dagster_home
CONDA_PREFIX=/home/mambauser/env
PUDL_SETTINGS_YML=/home/mambauser/src/pudl/package_data/settings/etl_full.yml
LOGFILE=/home/mambauser/pudl_work/output/pudl-etl.log
CONDA_RUN="micromamba run --prefix /home/mambauser/env --attach ''"
GCS_CACHE=gs://zenodo-cache.catalyst.coop
7 changes: 5 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ ENV PUDL_INPUT=${CONTAINER_PUDL_WORKSPACE}/input
ENV PUDL_OUTPUT=${CONTAINER_PUDL_WORKSPACE}/output
ENV DAGSTER_HOME=${CONTAINER_PUDL_WORKSPACE}/dagster_home

# Create data input/output directories
RUN mkdir -p ${PUDL_INPUT} ${PUDL_OUTPUT} ${DAGSTER_HOME}

# Create a conda environment based on the specification in the repo
COPY environments/conda-lock.yml environments/conda-lock.yml
RUN micromamba create --prefix ${CONDA_PREFIX} --yes --category main dev docs test datasette --file environments/conda-lock.yml && \
Expand All @@ -46,9 +49,9 @@ ENV LD_LIBRARY_PATH=${CONDA_PREFIX}/lib
# We need information from .git to get version with setuptools_scm so we mount that
# directory without copying it into the image.
RUN --mount=type=bind,source=.git,target=${PUDL_REPO}/.git \
${CONDA_RUN} pip install --no-cache-dir --editable . && \
${CONDA_RUN} pip install --no-cache-dir --no-deps --editable . && \
# Run the PUDL setup script so we know where to read and write data
${CONDA_RUN} pudl_setup

# Run the unit tests:
CMD ["conda", "run", "--no-capture-output", "--prefix", "${CONDA_PREFIX}", "pytest", "test/unit"]
CMD ["micromamba", "run", "--prefix", "${CONDA_PREFIX}", "--attach", "''", "pytest", "test/unit"]
6 changes: 2 additions & 4 deletions docker/gcp_pudl_etl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,14 @@ function run_pudl_etl() {
--max-concurrent 6 \
--gcs-cache-path gs://internal-zenodo-cache.catalyst.coop \
$PUDL_SETTINGS_YML && \
# Run multiple pytest processes in the background and wait for them to exit
pytest \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--etl-settings=$PUDL_SETTINGS_YML \
--live-dbs test/integration test/unit & \
--live-dbs test/integration test/unit && \
pytest \
--gcs-cache-path=gs://internal-zenodo-cache.catalyst.coop \
--etl-settings=$PUDL_SETTINGS_YML \
--live-dbs test/validate & \
wait
--live-dbs test/validate
}

function shutdown_vm() {
Expand Down
43 changes: 26 additions & 17 deletions environments/conda-linux-64.lock.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Generated by conda-lock.
# platform: linux-64
# input_hash: 7c8b2f7fe28cdfc4b83bb1b9bf64f5957d45794c915656c2c040ddb9df08ef23
# input_hash: 76d990a5280c67a86298a825022c4767c0b2833a5d8c84a1e7737006040a2986

channels:
- conda-forge
Expand Down Expand Up @@ -121,7 +121,7 @@ dependencies:
- libthrift=0.19.0=hb90f79a_1
- libtiff=4.6.0=ha9c0a0a_2
- libxslt=1.1.37=h0054252_1
- minizip=4.0.1=h0ab5242_5
- minizip=4.0.2=h0ab5242_0
- nodejs=20.8.1=h1990674_0
- nss=3.94=h1d7d5a4_0
- orc=1.9.0=h52d3b3c_2
Expand All @@ -139,6 +139,7 @@ dependencies:
- backcall=0.2.0=pyh9f0ad1d_0
- backoff=2.2.1=pyhd8ed1ab_0
- backports=1.0=pyhd8ed1ab_3
- backports.zoneinfo=0.2.1=py311h38be061_8
- blinker=1.6.3=pyhd8ed1ab_0
- brotli=1.1.0=hd590300_1
- brotli-python=1.1.0=py311hb755f60_1
Expand All @@ -156,7 +157,7 @@ dependencies:
- colorama=0.4.6=pyhd8ed1ab_0
- crashtest=0.4.1=pyhd8ed1ab_0
- cycler=0.12.1=pyhd8ed1ab_0
- dagster-pipes=1.5.4=pyhd8ed1ab_1
- dagster-pipes=1.5.5=pyhd8ed1ab_0
- dataclasses=0.8=pyhc8e2a94_3
- dbus=1.13.6=h5008d03_3
- debugpy=1.8.0=py311hb755f60_1
Expand All @@ -174,6 +175,7 @@ dependencies:
- freexl=2.0.0=h743c826_0
- frozenlist=1.4.0=py311h459d7ec_1
- fsspec=2023.9.2=pyh1a96a4e_0
- google-cloud-sdk=452.0.1=py311h38be061_0
- greenlet=3.0.1=py311hb755f60_0
- grpcio=1.57.0=py311ha6695c7_2
- hpack=4.0.0=pyh9f0ad1d_0
Expand Down Expand Up @@ -209,6 +211,7 @@ dependencies:
- more-itertools=10.1.0=pyhd8ed1ab_0
- msgpack-python=1.0.6=py311h9547e67_0
- multidict=6.0.4=py311h459d7ec_1
- multimethod=1.9.1=pyhd8ed1ab_0
- munch=4.0.0=pyhd8ed1ab_0
- munkres=1.1.4=pyh9f0ad1d_0
- mypy_extensions=1.0.0=pyha770c72_0
Expand Down Expand Up @@ -254,7 +257,7 @@ dependencies:
- rpds-py=0.10.6=py311h46250e7_0
- rtree=1.1.0=py311h3bb2b0f_0
- ruamel.yaml.clib=0.2.7=py311h459d7ec_2
- ruff=0.1.2=py311h7145743_0
- ruff=0.1.3=py311h7145743_0
- send2trash=1.8.2=pyh41d4057_0
- setuptools=68.2.2=pyhd8ed1ab_0
- shellingham=1.5.4=pyhd8ed1ab_0
Expand All @@ -263,6 +266,7 @@ dependencies:
- smmap=5.0.0=pyhd8ed1ab_0
- sniffio=1.3.0=pyhd8ed1ab_0
- snowballstemmer=2.2.0=pyhd8ed1ab_0
- sortedcontainers=2.4.0=pyhd8ed1ab_0
- soupsieve=2.5=pyhd8ed1ab_1
- sphinxcontrib-jsmath=1.0.1=pyhd8ed1ab_0
- stringcase=1.2.0=py_0
Expand Down Expand Up @@ -290,17 +294,18 @@ dependencies:
- websockets=10.4=py311hd4cff14_1
- wheel=0.41.2=pyhd8ed1ab_0
- widgetsnbextension=4.0.9=pyhd8ed1ab_0
- wrapt=1.15.0=py311h459d7ec_1
- xlrd=2.0.1=pyhd8ed1ab_3
- xlsxwriter=3.1.7=pyhd8ed1ab_0
- xorg-libxext=1.3.4=h0b41bf4_2
- xorg-libxrender=0.9.11=hd590300_0
- xyzservices=2023.10.0=pyhd8ed1ab_0
- xyzservices=2023.10.1=pyhd8ed1ab_0
- zipp=3.17.0=pyhd8ed1ab_0
- aiosignal=1.3.1=pyhd8ed1ab_0
- anyio=4.0.0=pyhd8ed1ab_0
- asgi-csrf=0.9=pyhd8ed1ab_0
- asgiref=3.7.2=pyhd8ed1ab_0
- asttokens=2.4.0=pyhd8ed1ab_0
- asttokens=2.4.1=pyhd8ed1ab_0
- async-lru=2.0.4=pyhd8ed1ab_0
- aws-c-auth=0.7.4=h1083cbe_2
- aws-c-mqtt=0.9.7=h55cd26b_0
Expand All @@ -327,6 +332,7 @@ dependencies:
- h2=4.1.0=pyhd8ed1ab_0
- hdf5=1.14.2=nompi_h4f84152_100
- html5lib=1.1=pyh9f0ad1d_0
- hypothesis=6.88.1=pyha770c72_0
- importlib-metadata=6.8.0=pyha770c72_0
- importlib_resources=6.1.0=pyhd8ed1ab_0
- isodate=0.6.1=pyhd8ed1ab_0
Expand Down Expand Up @@ -372,6 +378,7 @@ dependencies:
- tinycss2=1.2.1=pyhd8ed1ab_0
- tqdm=4.66.1=pyhd8ed1ab_0
- typing-extensions=4.8.0=hd8ed1ab_0
- typing_inspect=0.9.0=pyhd8ed1ab_0
- universal_pathlib=0.1.4=pyhd8ed1ab_0
- urllib3=1.26.18=pyhd8ed1ab_0
- watchdog=3.0.0=py311h38be061_1
Expand All @@ -383,7 +390,7 @@ dependencies:
- arrow=1.3.0=pyhd8ed1ab_0
- async-timeout=4.0.3=pyhd8ed1ab_0
- aws-c-s3=0.3.17=hfb4bb88_4
- botocore=1.31.70=pyhd8ed1ab_0
- botocore=1.31.72=pyhd8ed1ab_0
- branca=0.6.0=pyhd8ed1ab_0
- cmarkgfm=0.8.0=py311h459d7ec_3
- croniter=2.0.1=pyhd8ed1ab_0
Expand Down Expand Up @@ -426,11 +433,11 @@ dependencies:
- watchfiles=0.20.0=py311h46250e7_2
- wcwidth=0.2.8=pyhd8ed1ab_0
- aiohttp=3.8.6=py311h459d7ec_1
- alembic=1.12.0=pyhd8ed1ab_0
- arelle-release=2.16.3=pyhd8ed1ab_0
- alembic=1.12.1=pyhd8ed1ab_0
- arelle-release=2.17.0=pyhd8ed1ab_0
- argon2-cffi=23.1.0=pyhd8ed1ab_0
- aws-crt-cpp=0.24.2=ha28989d_2
- black=23.10.0=py311h38be061_0
- black=23.10.1=py311h38be061_0
- bottleneck=1.3.7=py311h1f0f07a_1
- cachecontrol=0.13.1=pyhd8ed1ab_0
- contourpy=1.1.1=py311h9547e67_1
Expand All @@ -451,7 +458,7 @@ dependencies:
- numba=0.57.1=py311h96b013e_0
- numexpr=2.8.7=py311h039bad6_104
- oauthlib=3.2.2=pyhd8ed1ab_0
- pandas=2.1.1=py311h320fe9a_1
- pandas=2.1.2=py311h320fe9a_0
- prompt-toolkit=3.0.39=pyha770c72_0
- pybtex-docutils=1.0.3=py311h38be061_1
- pyopenssl=23.2.0=pyhd8ed1ab_1
Expand All @@ -464,13 +471,14 @@ dependencies:
- secretstorage=3.3.3=py311h38be061_2
- shapely=2.0.2=py311he06c224_0
- stevedore=5.1.0=pyhd8ed1ab_0
- typeguard=4.1.5=pyhd8ed1ab_1
- typer=0.9.0=pyhd8ed1ab_0
- uvicorn-standard=0.23.2=h38be061_1
- virtualenv=20.24.6=pyhd8ed1ab_0
- aws-sdk-cpp=1.11.156=h314d761_4
- boto3=1.28.70=pyhd8ed1ab_0
- boto3=1.28.72=pyhd8ed1ab_0
- cachecontrol-with-filecache=0.13.1=pyhd8ed1ab_0
- dagster=1.5.4=pyhd8ed1ab_1
- dagster=1.5.5=pyhd8ed1ab_0
- datasette=0.64.4=pyhd8ed1ab_1
- doc8=1.1.1=pyhd8ed1ab_0
- email-validator=2.1.0.post1=pyhd8ed1ab_0
Expand All @@ -484,14 +492,15 @@ dependencies:
- keyring=24.2.0=py311h38be061_1
- matplotlib-base=3.8.0=py311h54ef318_2
- nbformat=5.9.2=pyhd8ed1ab_0
- pandera-core=0.17.2=pyhd8ed1ab_0
- pre-commit=3.5.0=pyha770c72_0
- prompt_toolkit=3.0.39=hd8ed1ab_0
- requests-oauthlib=1.3.1=pyhd8ed1ab_0
- scikit-learn=1.3.2=py311hc009520_1
- timezonefinder=6.2.0=py311h459d7ec_1
- catalystcoop.ferc_xbrl_extractor=1.1.1=pyhd8ed1ab_0
- conda-lock=2.4.1=pyhd8ed1ab_0
- dagster-graphql=1.5.4=pyhd8ed1ab_1
- catalystcoop.ferc_xbrl_extractor=1.2.1=pyhd8ed1ab_0
- conda-lock=2.4.2=pyhd8ed1ab_0
- dagster-graphql=1.5.5=pyhd8ed1ab_0
- fiona=1.9.5=py311hbac4ec9_0
- google-api-core=2.12.0=pyhd8ed1ab_0
- google-auth-oauthlib=1.1.0=pyhd8ed1ab_0
Expand All @@ -503,7 +512,7 @@ dependencies:
- recordlinkage=0.16=pyhd8ed1ab_0
- tabulator=1.53.5=pyhd8ed1ab_0
- twine=4.0.2=pyhd8ed1ab_0
- dagster-webserver=1.5.4=pyhd8ed1ab_1
- dagster-webserver=1.5.5=pyhd8ed1ab_0
- geopandas=0.14.0=pyhd8ed1ab_1
- google-cloud-core=2.3.3=pyhd8ed1ab_0
- ipdb=0.13.13=pyhd8ed1ab_0
Expand Down
Loading

0 comments on commit 4e5589f

Please sign in to comment.