Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge install_local_cset into build_conda so environment is setup in a single cylc task #791

Merged
merged 2 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 86 additions & 56 deletions cset-workflow/app/build_conda/bin/build_conda_env.sh
Original file line number Diff line number Diff line change
@@ -1,76 +1,106 @@
#! /bin/bash
# This script builds the conda environment used by most subsequent tasks.
set -euo pipefail
IFS="$(printf '\n\t')"

# Use default location if CONDA_VENV_LOCATION is not specified.
if [[ -z "$CONDA_VENV_LOCATION" ]]; then
CONDA_VENV_LOCATION="${CYLC_WORKFLOW_SHARE_DIR}/cset_conda_env"
fi

# Decide if the environment needs building.
if [[ "$CONDA_VENV_CREATE" == True ]]; then
true
elif [[ "$CONDA_VENV_CREATE" == False ]]; then
echo "Conda environment building disabled"
exit 0
else
echo "Invalid value for CONDA_VENV_CREATE: $CONDA_VENV_CREATE"
exit 1
fi

# Find environment definition file, abort if not found.
env_lock_file="${CYLC_WORKFLOW_RUN_DIR}/requirements/locks/py312-lock-linux-64.txt"
if [[ -f "$env_lock_file" ]]; then
echo "Using environment file $env_lock_file"
else
echo "Environment file $env_lock_file not found"
exit 1
fi
should_build_conda_env() {
# Decide if the environment needs building.
if [[ "$CONDA_VENV_CREATE" == True ]]
then
true
elif [[ "$CONDA_VENV_CREATE" == False ]]
then
echo "Conda environment building disabled"
return 1
else
>&2 echo "Invalid value for CONDA_VENV_CREATE: $CONDA_VENV_CREATE"
exit 1
fi

if [[ -f "${CONDA_VENV_LOCATION}/cset_env_hash" ]]; then
if [[ "$(cat "${CONDA_VENV_LOCATION}/cset_env_hash")" == "$(sha256sum "$env_lock_file" | head -c 64)" ]]
# Find environment definition file, abort if not found.
env_lock_file="${CYLC_WORKFLOW_RUN_DIR}/requirements/locks/py312-lock-linux-64.txt"
if [[ -f "$env_lock_file" ]]
then
echo "Conda environment already exist, no build required"
exit 0
echo "Using environment file $env_lock_file"
else
echo "Conda environment is out of date, building afresh"
>&2 echo "Environment file $env_lock_file not found"
exit 1
fi
else
echo "Conda environment does not exist, building afresh"
fi

# Source modules/paths required to build the environment.
if [[ $CSET_ENV_USE_MODULES == True ]]; then
if [[ $MODULES_LIST ]]; then
IFS_SAVE=$IFS
IFS=' '
if [[ $MODULES_PURGE == True ]]; then
module purge
if [[ -f "${CONDA_VENV_LOCATION}/cset_env_hash" ]]
then
if [[ "$(cat "${CONDA_VENV_LOCATION}/cset_env_hash")" == "$(sha256sum "$env_lock_file" | head -c 64)" ]]
then
echo "Conda environment already exist, no build required"
return 1
else
echo "Conda environment is out of date, building afresh"
fi
for build_module in $MODULES_LIST; do
# Loads the same modules that the other tasks uses, although it only needs
# a module to make conda available. This is to simplify the logic.
module load "$build_module"
done
IFS="$IFS_SAVE"
echo "sourcing conda via modules:"
module list
else
echo "Conda environment does not exist, building afresh"
fi
fi
if [[ -d "$CONDA_PATH" ]]; then
echo "Sourcing conda on path: ${CONDA_PATH}"
else
CONDA_PATH=""
fi

# Remove old conda environment.
rm -rf -- "$CONDA_VENV_LOCATION"
return 0
}

# Build conda environment.
echo "Building conda with:"
echo "${CONDA_PATH}conda create -p $CONDA_VENV_LOCATION --file $env_lock_file --yes --force --quiet"
"${CONDA_PATH}conda" create -p "$CONDA_VENV_LOCATION" --file "$env_lock_file" --yes --force --quiet

# Create hash file for next run.
sha256sum "$env_lock_file" | head -c 64 > "${CONDA_VENV_LOCATION}/cset_env_hash"
build_conda_env() {
# Source modules/paths required to build the environment.
if [[ $CSET_ENV_USE_MODULES == True ]]
then
if [[ $MODULES_LIST ]]
then
if [[ $MODULES_PURGE == True ]]
then
module purge
fi
for build_module in $MODULES_LIST
do
# Loads the same modules that the other tasks uses, although it only needs
# a module to make conda available. This is to simplify the logic.
module load "$build_module"
done
echo "Sourcing conda via modules:"
module list
fi
fi

if [[ -d "$CONDA_PATH" ]]
then
echo "Sourcing conda from: ${CONDA_PATH}"
else
CONDA_PATH=""
fi

# Remove old conda environment.
echo "Removing conda environment with:"
echo "${CONDA_PATH}conda remove -p $CONDA_VENV_LOCATION --all --yes --quiet"
if ! "${CONDA_PATH}conda" remove -p "$CONDA_VENV_LOCATION" --all --yes --quiet
then
>&2 echo "Failed to conda remove old environment, trying to remove manually."
rm -rf -- "$CONDA_VENV_LOCATION"
fi

# Build conda environment.
echo "Building conda with:"
echo "${CONDA_PATH}conda create -p $CONDA_VENV_LOCATION --file $env_lock_file --yes --force --quiet"
"${CONDA_PATH}conda" create -p "$CONDA_VENV_LOCATION" --file "$env_lock_file" --yes --force --quiet

# Create hash file for next run.
sha256sum "$env_lock_file" | head -c 64 > "${CONDA_VENV_LOCATION}/cset_env_hash"
}


if should_build_conda_env
then
build_conda_env
fi

# Install development version of CSET into the conda environment if needed, and
# validate CSET is installed. This needs to run inside the conda environment.
app_env_wrapper install-local-cset.sh
29 changes: 29 additions & 0 deletions cset-workflow/app/build_conda/bin/install-local-cset.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#! /bin/bash

# Install development version of CSET into the conda environment if needed.

set -euo pipefail

if [[ $CSET_ENV_USE_LOCAL_CSET = "True" ]]
then
cset_install_path="$(mktemp -d)"
cset_source_path="${CSET_LOCAL_CSET_PATH}"
echo "Using local CSET from ${cset_source_path}"

# Directly install wheel files, or copy source folder.
if [[ $cset_source_path == *.whl ]]
then
echo "Wheel file, installing directly."
cset_install_path="${cset_source_path}"
else
# Copy project to temporary location to avoid permissions issues. We
# don't want to copy all hidden files, as they can contain large conda
# environments, but we do want the .git directory.
cp -r "${cset_source_path}"/* "${cset_source_path}"/.git "${cset_install_path}"
fi

# Build and install into python environment.
pip install --verbose --progress-bar off --no-deps -- "${cset_install_path}"
fi

echo "Using CSET version: $(cset --version)"
46 changes: 0 additions & 46 deletions cset-workflow/app/install_local_cset/bin/install-local-cset.py

This file was deleted.

2 changes: 0 additions & 2 deletions cset-workflow/app/install_local_cset/rose-app.conf

This file was deleted.

14 changes: 4 additions & 10 deletions cset-workflow/flow.cylc
dasha-shchep marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ URL = https://metoffice.github.io/CSET
[[graph]]
# Only runs on the first cycle.
R1/^ = """
build_conda => install_website_skeleton
build_conda => install_local_cset
install_website_skeleton & install_local_cset => FETCH_DATA
build_conda & install_website_skeleton => FETCH_DATA
"""

# Only runs on the final cycle.
Expand All @@ -28,7 +26,7 @@ URL = https://metoffice.github.io/CSET

# Runs every cycle to process the data in parallel.
{{CSET_CYCLE_PERIOD}} = """
install_website_skeleton[^] & install_local_cset[^] =>
install_website_skeleton[^] & build_conda[^] =>
FETCH_DATA:succeed-all => PARALLEL:succeed-all =>
process_finish => housekeeping_raw

Expand Down Expand Up @@ -112,15 +110,11 @@ URL = https://metoffice.github.io/CSET
platform = localhost

[[build_conda]]
# Create the conda environment if it does not yet exist.
# Create the conda environment if it does not yet exist, possibly installing
# CSET from source.
execution time limit = PT30M
[[[environment]]]
CONDA_VENV_CREATE = {{CONDA_VENV_CREATE}}

[[install_local_cset]]
# Install CSET from source.
execution time limit = PT10M
[[[environment]]]
CSET_ENV_USE_LOCAL_CSET = {{CSET_ENV_USE_LOCAL_CSET}}
{% if CSET_ENV_USE_LOCAL_CSET %}
CSET_LOCAL_CSET_PATH = {{CSET_LOCAL_CSET_PATH}}
Expand Down