Skip to content

Commit

Permalink
swancustomenvironments: Remove swan middle layer
Browse files Browse the repository at this point in the history
Due to the fact `venv-pack` (used by nxcals) can only ship one environment (user's), the packages installed in the middle layer won't be shipped, which leads to discrepancies in the results. For that reason, we remove this middle layer and ignore the extra packages installed by swan, ensuring they stay updated and reliable.
In spite of this, the requirements file must not be modified, which means a temporary and modified version of requirements.txt has to be created and used for the installation
  • Loading branch information
rodrigo-sobral committed Dec 5, 2024
1 parent 9132703 commit 380b3c2
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 38 deletions.
Original file line number Diff line number Diff line change
@@ -1,27 +1,10 @@
#!/bin/bash

# Create a middle layer for installing ipykernel, putting it apart from the user environment
uv venv $SWAN_ENV --seed 2>&1
source $SWAN_ENV/bin/activate
uv pip install "ipykernel==${IPYKERNEL_VERSION}"
SWAN_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
deactivate

# If using NXCALS, we need to also install the Spark packages and their dependencies in the SWAN environment
if [ -n "${USE_NXCALS}" ]; then
SPARKCONNECTOR="sparkconnector==$(python -c 'import sparkconnector; print(sparkconnector.__version__)')"
SPARKMONITOR="sparkmonitor==$(python -c 'import sparkmonitor; print(sparkmonitor.__version__)')"
SPARKCONNECTOR_DEPENDENCIES="swanportallocator requests" # TODO: Remove swanportallocator and requests installation when the SparkConnector package gets properly updated

# Activate the SWAN environment for installing the Spark packages
source $SWAN_ENV/bin/activate
uv pip install ${SPARKMONITOR} ${SPARKCONNECTOR_DEPENDENCIES} 2>&1

# -------------- HACK SECTION --------------
# Install SPARKCONNECTOR_DEPENDENCIES separately, install SparkConnector without its dependencies and change the configuration file
# TODO: Remove this when the SparkConnector package gets properly updated
uv pip install ${SPARKCONNECTOR} --no-deps 2>&1
wget https://raw.githubusercontent.com/swan-cern/jupyter-extensions/refs/heads/swan-on-tn/SparkConnector/sparkconnector/configuration.py -O ${SWAN_PACKAGES_PATH}/sparkconnector/configuration.py 2>&1
fi

# Set up Acc-Py and create the environment
Expand All @@ -36,7 +19,7 @@ eval "${ACTIVATE_ENV_CMD}"
# Install user-requested packages in the environment.
# Use uv for better performance if environment is fully resolved;
# Otherwise, use pip for resolution (more reliable long-term).
_log "Installing packages from ${REQ_PATH}..."
_log "Installing packages from ${ORIGINAL_REQ_PATH}..."
if [ "${RESOLVED_REQ}" = true ]; then
# Use the same pip configuration as the Acc-Py default pip
ACCPY_PIP_CONF="-i $(pip config get global.index-url) --allow-insecure-host $(pip config get global.trusted-host)"
Expand All @@ -48,8 +31,19 @@ if [ $? -ne 0 ]; then
exit 1
fi

# Inject middle layer packages into the user environment by adding a .pth file to
# the environment site-packages that contains the path to the middle layer site-packages
USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
echo ${SWAN_PACKAGES_PATH} > ${USER_PACKAGES_PATH}/$(basename $SWAN_ENV).pth
if [ -n "${USE_NXCALS}" ]; then
# For NXCALS, install the Spark packages and their dependencies, using the according tool for its resolution
if [ "${RESOLVED_REQ}" = true ]; then
uv pip install ${ACCPY_PIP_CONF} ${SPARKMONITOR} ${SPARKCONNECTOR_DEPENDENCIES} 2>&1
uv pip install ${ACCPY_PIP_CONF} ${SPARKCONNECTOR} --no-deps 2>&1
else
pip install ${SPARKMONITOR} ${SPARKCONNECTOR_DEPENDENCIES} 2>&1
pip install ${SPARKCONNECTOR} --no-deps 2>&1
fi

# -------------- HACK SECTION --------------
# Install SPARKCONNECTOR_DEPENDENCIES separately, install SparkConnector without its dependencies and change the configuration file
# TODO: Remove this when the SparkConnector package gets properly updated
USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
wget https://raw.githubusercontent.com/swan-cern/jupyter-extensions/refs/heads/swan-on-tn/SparkConnector/sparkconnector/configuration.py -O ${USER_PACKAGES_PATH}/sparkconnector/configuration.py 2>&1
fi
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
#!/bin/bash

# Create a middle layer for installing ipykernel, putting it apart from the user environment
uv venv $SWAN_ENV --seed 2>&1
source $SWAN_ENV/bin/activate
uv pip install "ipykernel==${IPYKERNEL_VERSION}"
SWAN_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
deactivate

if [ "${RESOLVED_REQ}" = true ]; then
uv venv ${ENV_PATH} --seed 2>&1
else
Expand All @@ -21,7 +14,7 @@ eval "${ACTIVATE_ENV_CMD}"
# Install user-requested packages in the environment.
# Use uv for better performance if environment is fully resolved;
# Otherwise, use pip for resolution (more reliable long-term).
_log "Installing packages from ${REQ_PATH}..."
_log "Installing packages from ${ORIGINAL_REQ_PATH}..."
if [ "${RESOLVED_REQ}" = true ]; then
uv pip install -r "${REQ_PATH}" ${IPYKERNEL} 2>&1
else
Expand All @@ -30,8 +23,3 @@ fi
if [ $? -ne 0 ]; then
exit 1
fi

# Inject middle layer packages into the user environment by adding a .pth file to
# the environment site-packages that contains the path to the middle layer site-packages
USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
echo ${SWAN_PACKAGES_PATH} > ${USER_PACKAGES_PATH}/$(basename $SWAN_ENV).pth
14 changes: 11 additions & 3 deletions SwanCustomEnvironments/swancustomenvironments/scripts/makenv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -162,19 +162,27 @@ IPYKERNEL="ipykernel==$(python -c 'import ipykernel; print(ipykernel.__version__
if [ -f "${TMP_REPO_PATH}/requirements.txt" ]; then
# Fully resolved requirements (requirements.txt) take precedence
RESOLVED_REQ=true
REQ_PATH="${TMP_REPO_PATH}/requirements.txt"
ORIGINAL_REQ_PATH="${TMP_REPO_PATH}/requirements.txt"
REQ_PATH=$(mktemp)
if [ -n "${USE_NXCALS}" ]; then
grep -v -E "sparkmonitor|sparkconnector|swanportallocator|requests|ipykernel" ${ORIGINAL_REQ_PATH} > ${REQ_PATH}
else
grep -v -E "ipykernel" ${ORIGINAL_REQ_PATH} > ${REQ_PATH}

fi
elif [ -f "${TMP_REPO_PATH}/requirements.in" ]; then
# If only requirements.in is present, proceed with high-level requirements
RESOLVED_REQ=false
REQ_PATH="${TMP_REPO_PATH}/requirements.in"
ORIGINAL_REQ_PATH="${TMP_REPO_PATH}/requirements.in"
REQ_PATH="${ORIGINAL_REQ_PATH}"
else
# There are no requirements files (neither requirements.txt nor requirements.in) in the repository
_log "ERROR: No requirements file found. You must provide a requirements.in or requirements.txt file." && exit 1
fi

# Check if the requirements file contains the nxcals package, if the user activated the nxcals option
if [ -n "${USE_NXCALS}" ] && ! grep -q "nxcals" "${REQ_PATH}"; then
_log "ERROR: The NXCALS cluster was selected but the requirements file (${REQ_PATH}) does not contain the nxcals package." && exit 1
_log "ERROR: The NXCALS cluster was selected but the requirements file (${ORIGINAL_REQ_PATH}) does not contain the nxcals package." && exit 1
fi

_log "Creating environment ${ENV_NAME} using ${BUILDER}${BUILDER_VERSION:+ (${BUILDER_VERSION})}..."
Expand Down

0 comments on commit 380b3c2

Please sign in to comment.