From 87dab1102a7a82c9ef4387ab6889b89094e629e8 Mon Sep 17 00:00:00 2001 From: James Frost Date: Thu, 1 Aug 2024 13:10:33 +0100 Subject: [PATCH 1/2] Merge install_local_cset into build_conda The install_local_cset script was converted to bash as part of this. This means we only have a single job for environment setup, and test that the environment actually works as part of that. We also skip copying non-git hidden files when installing local CSET, as they can often include large things like conda environments that slow the script significantly. --- .../app/build_conda/bin/build_conda_env.sh | 142 +++++++++++------- .../app/build_conda/bin/install-local-cset.sh | 29 ++++ .../bin/install-local-cset.py | 46 ------ .../app/install_local_cset/rose-app.conf | 2 - cset-workflow/flow.cylc | 12 +- 5 files changed, 118 insertions(+), 113 deletions(-) create mode 100755 cset-workflow/app/build_conda/bin/install-local-cset.sh delete mode 100755 cset-workflow/app/install_local_cset/bin/install-local-cset.py delete mode 100644 cset-workflow/app/install_local_cset/rose-app.conf diff --git a/cset-workflow/app/build_conda/bin/build_conda_env.sh b/cset-workflow/app/build_conda/bin/build_conda_env.sh index c4430538c..a00026cdb 100755 --- a/cset-workflow/app/build_conda/bin/build_conda_env.sh +++ b/cset-workflow/app/build_conda/bin/build_conda_env.sh @@ -1,76 +1,106 @@ #! /bin/bash # This script builds the conda environment used by most subsequent tasks. set -euo pipefail -IFS="$(printf '\n\t')" # Use default location if CONDA_VENV_LOCATION is not specified. if [[ -z "$CONDA_VENV_LOCATION" ]]; then CONDA_VENV_LOCATION="${CYLC_WORKFLOW_SHARE_DIR}/cset_conda_env" fi -# Decide if the environment needs building. -if [[ "$CONDA_VENV_CREATE" == True ]]; then - true -elif [[ "$CONDA_VENV_CREATE" == False ]]; then - echo "Conda environment building disabled" - exit 0 -else - echo "Invalid value for CONDA_VENV_CREATE: $CONDA_VENV_CREATE" - exit 1 -fi -# Find environment definition file, abort if not found. -env_lock_file="${CYLC_WORKFLOW_RUN_DIR}/requirements/locks/py312-lock-linux-64.txt" -if [[ -f "$env_lock_file" ]]; then - echo "Using environment file $env_lock_file" -else - echo "Environment file $env_lock_file not found" - exit 1 -fi +should_build_conda_env() { + # Decide if the environment needs building. + if [[ "$CONDA_VENV_CREATE" == True ]] + then + true + elif [[ "$CONDA_VENV_CREATE" == False ]] + then + echo "Conda environment building disabled" + return 1 + else + >&2 echo "Invalid value for CONDA_VENV_CREATE: $CONDA_VENV_CREATE" + exit 1 + fi -if [[ -f "${CONDA_VENV_LOCATION}/cset_env_hash" ]]; then - if [[ "$(cat "${CONDA_VENV_LOCATION}/cset_env_hash")" == "$(sha256sum "$env_lock_file" | head -c 64)" ]] + # Find environment definition file, abort if not found. + env_lock_file="${CYLC_WORKFLOW_RUN_DIR}/requirements/locks/py312-lock-linux-64.txt" + if [[ -f "$env_lock_file" ]] then - echo "Conda environment already exist, no build required" - exit 0 + echo "Using environment file $env_lock_file" else - echo "Conda environment is out of date, building afresh" + >&2 echo "Environment file $env_lock_file not found" + exit 1 fi -else - echo "Conda environment does not exist, building afresh" -fi -# Source modules/paths required to build the environment. -if [[ $CSET_ENV_USE_MODULES == True ]]; then - if [[ $MODULES_LIST ]]; then - IFS_SAVE=$IFS - IFS=' ' - if [[ $MODULES_PURGE == True ]]; then - module purge + if [[ -f "${CONDA_VENV_LOCATION}/cset_env_hash" ]] + then + if [[ "$(cat "${CONDA_VENV_LOCATION}/cset_env_hash")" == "$(sha256sum "$env_lock_file" | head -c 64)" ]] + then + echo "Conda environment already exist, no build required" + return 1 + else + echo "Conda environment is out of date, building afresh" fi - for build_module in $MODULES_LIST; do - # Loads the same modules that the other tasks uses, although it only needs - # a module to make conda available. This is to simplify the logic. - module load "$build_module" - done - IFS="$IFS_SAVE" - echo "sourcing conda via modules:" - module list + else + echo "Conda environment does not exist, building afresh" fi -fi -if [[ -d "$CONDA_PATH" ]]; then - echo "Sourcing conda on path: ${CONDA_PATH}" -else - CONDA_PATH="" -fi -# Remove old conda environment. -rm -rf -- "$CONDA_VENV_LOCATION" + return 0 +} -# Build conda environment. -echo "Building conda with:" -echo "${CONDA_PATH}conda create -p $CONDA_VENV_LOCATION --file $env_lock_file --yes --force --quiet" -"${CONDA_PATH}conda" create -p "$CONDA_VENV_LOCATION" --file "$env_lock_file" --yes --force --quiet -# Create hash file for next run. -sha256sum "$env_lock_file" | head -c 64 > "${CONDA_VENV_LOCATION}/cset_env_hash" +build_conda_env() { + # Source modules/paths required to build the environment. + if [[ $CSET_ENV_USE_MODULES == True ]] + then + if [[ $MODULES_LIST ]] + then + if [[ $MODULES_PURGE == True ]] + then + module purge + fi + for build_module in $MODULES_LIST + do + # Loads the same modules that the other tasks uses, although it only needs + # a module to make conda available. This is to simplify the logic. + module load "$build_module" + done + echo "Sourcing conda via modules:" + module list + fi + fi + + if [[ -d "$CONDA_PATH" ]] + then + echo "Sourcing conda from: ${CONDA_PATH}" + else + CONDA_PATH="" + fi + + # Remove old conda environment. + echo "Removing conda environment with:" + echo "${CONDA_PATH}conda remove -p $CONDA_VENV_LOCATION --all --yes --quiet" + if ! "${CONDA_PATH}conda" remove -p "$CONDA_VENV_LOCATION" --all --yes --quiet + then + >&2 echo "Failed to conda remove old environment, trying to remove manually." + rm -rf -- "$CONDA_VENV_LOCATION" + fi + + # Build conda environment. + echo "Building conda with:" + echo "${CONDA_PATH}conda create -p $CONDA_VENV_LOCATION --file $env_lock_file --yes --force --quiet" + "${CONDA_PATH}conda" create -p "$CONDA_VENV_LOCATION" --file "$env_lock_file" --yes --force --quiet + + # Create hash file for next run. + sha256sum "$env_lock_file" | head -c 64 > "${CONDA_VENV_LOCATION}/cset_env_hash" +} + + +if should_build_conda_env +then + build_conda_env +fi + +# Install development version of CSET into the conda environment if needed, and +# validate CSET is installed. This needs to run inside the conda environment. +app_env_wrapper install-local-cset.sh diff --git a/cset-workflow/app/build_conda/bin/install-local-cset.sh b/cset-workflow/app/build_conda/bin/install-local-cset.sh new file mode 100755 index 000000000..c29ace6b6 --- /dev/null +++ b/cset-workflow/app/build_conda/bin/install-local-cset.sh @@ -0,0 +1,29 @@ +#! /bin/bash + +# Install development version of CSET into the conda environment if needed. + +set -euo pipefail + +if [[ $CSET_ENV_USE_LOCAL_CSET = "True" ]] +then + cset_install_path="$(mktemp -d)" + cset_source_path="${CSET_LOCAL_CSET_PATH}" + echo "Using local CSET from ${cset_source_path}" + + # Directly install wheel files, or copy source folder. + if [[ $cset_source_path == *.whl ]] + then + echo "Wheel file, installing directly." + cset_install_path="${cset_source_path}" + else + # Copy project to temporary location to avoid permissions issues. We + # don't want to copy all hidden files, as they can contain large conda + # environments, but we do want the .git directory. + cp -r "${cset_source_path}"/* "${cset_source_path}"/.git "${cset_install_path}" + fi + + # Build and install into python environment. + pip install --verbose --progress-bar off --no-deps -- "${cset_install_path}" +fi + +echo "Using CSET version: $(cset --version)" diff --git a/cset-workflow/app/install_local_cset/bin/install-local-cset.py b/cset-workflow/app/install_local_cset/bin/install-local-cset.py deleted file mode 100755 index bd0ad7085..000000000 --- a/cset-workflow/app/install_local_cset/bin/install-local-cset.py +++ /dev/null @@ -1,46 +0,0 @@ -#! /usr/bin/env python3 - -"""Install development version of CSET into the conda environment if needed.""" - -import logging -import os -import shutil -import subprocess -import sys -import tempfile - -logging.basicConfig( - level=os.getenv("LOGLEVEL", "INFO"), format="%(asctime)s %(levelname)s %(message)s" -) - -if os.getenv("CSET_ENV_USE_LOCAL_CSET") == "True": - with tempfile.TemporaryDirectory() as cset_install_path: - cset_source_path = os.path.expandvars( - os.path.expanduser(os.getenv("CSET_LOCAL_CSET_PATH")) - ) - logging.info("Using local CSET from %s", cset_source_path) - - # Directly install wheel files, or copy source folders. - if cset_source_path.endswith(".whl"): - logging.info("Wheel file, installing directly.") - cset_install_path = cset_source_path - else: - # Copy project to temporary location to avoid permissions issues. - shutil.copytree(cset_source_path, cset_install_path, dirs_exist_ok=True) - - # Build and install into python environment. - subprocess.run( - ( - "pip", - "install", - "--verbose", - "--progress-bar", - "off", - "--no-deps", - cset_install_path, - ), - check=True, - ) - -result = subprocess.run(("cset", "--version"), check=True, capture_output=True) -print(f"Using CSET version: {result.stdout.decode(sys.stdout.encoding)}") diff --git a/cset-workflow/app/install_local_cset/rose-app.conf b/cset-workflow/app/install_local_cset/rose-app.conf deleted file mode 100644 index e5af56971..000000000 --- a/cset-workflow/app/install_local_cset/rose-app.conf +++ /dev/null @@ -1,2 +0,0 @@ -[command] -default=app_env_wrapper install-local-cset.py diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index aff4cef2c..2fae0c1ff 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -15,9 +15,7 @@ URL = https://metoffice.github.io/CSET [[graph]] # Only runs on the first cycle. R1/^ = """ - build_conda => install_website_skeleton - build_conda => install_local_cset - install_website_skeleton & install_local_cset => FETCH_DATA + build_conda & install_website_skeleton => FETCH_DATA """ # Only runs on the final cycle. @@ -112,15 +110,11 @@ URL = https://metoffice.github.io/CSET platform = localhost [[build_conda]] - # Create the conda environment if it does not yet exist. + # Create the conda environment if it does not yet exist, possibly installing + # CSET from source. execution time limit = PT30M [[[environment]]] CONDA_VENV_CREATE = {{CONDA_VENV_CREATE}} - - [[install_local_cset]] - # Install CSET from source. - execution time limit = PT10M - [[[environment]]] CSET_ENV_USE_LOCAL_CSET = {{CSET_ENV_USE_LOCAL_CSET}} {% if CSET_ENV_USE_LOCAL_CSET %} CSET_LOCAL_CSET_PATH = {{CSET_LOCAL_CSET_PATH}} From 7c4347b515a6b98dc91a6ddfc4f8241f63b22e49 Mon Sep 17 00:00:00 2001 From: dasha-shchep <28752245+dasha-shchep@users.noreply.github.com> Date: Thu, 15 Aug 2024 16:49:17 +0100 Subject: [PATCH 2/2] Fix instance of `install_local_cset` in flow.cylc --- cset-workflow/flow.cylc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cset-workflow/flow.cylc b/cset-workflow/flow.cylc index 2fae0c1ff..3f9a866de 100644 --- a/cset-workflow/flow.cylc +++ b/cset-workflow/flow.cylc @@ -26,7 +26,7 @@ URL = https://metoffice.github.io/CSET # Runs every cycle to process the data in parallel. {{CSET_CYCLE_PERIOD}} = """ - install_website_skeleton[^] & install_local_cset[^] => + install_website_skeleton[^] & build_conda[^] => FETCH_DATA:succeed-all => PARALLEL:succeed-all => process_finish => housekeeping_raw