swancustomenvironments: If resolved, install requirements using uv

When creating a custom environment, two types of a requirements file can now be provided: 1. High-level requirements, i.e. not fully resolved. These are provided by a file called requirements.in. 2. Fully resolved requirements, i.e. all the necessary packages including their dependencies and specific versions. These are provided by a file called requirements.txt. If (2) is present, the packages will be installed with uv for performance reasons. Since a full resolution has already been done, it is safe to do so (we don't depend on the resolution of uv). If (2) is not present but (1) is, the packages will be installed with pip, probably in a slower way but with the dependency resolution of pip. This guarantees a more stable solution in the medium-long term.
swan-cern · Nov 28, 2024 · 220b804 · 220b804
1 parent 36d7172
commit 220b804
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 28 deletions.
diff --git a/SwanCustomEnvironments/swancustomenvironments/scripts/builders/accpy.sh b/SwanCustomEnvironments/swancustomenvironments/scripts/builders/accpy.sh
@@ -1,24 +1,26 @@
 #!/bin/bash
 
-# If using NXCALS, we need to install the Spark extensions and the nxcals package.
-if [ -n "${INSTALL_NXCALS}" ]; then
+# Create a middle layer for installing ipykernel, putting it apart from the user environment
+uv venv $SWAN_ENV --seed 2>&1
+source $SWAN_ENV/bin/activate
+uv pip install "ipykernel==${IPYKERNEL_VERSION}"
+SWAN_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+deactivate
+
+# If using NXCALS, we need to also install the Spark packages and their dependencies in the SWAN environment
+if [ -n "${USE_NXCALS}" ]; then
     SPARKCONNECTOR="sparkconnector==$(python -c 'import sparkconnector; print(sparkconnector.__version__)')"
     SPARKMONITOR="sparkmonitor==$(python -c 'import sparkmonitor; print(sparkmonitor.__version__)')"
-    NXCALS="nxcals"
     SPARKCONNECTOR_DEPENDENCIES="swanportallocator requests" # TODO: Remove swanportallocator and requests installation when the SparkConnector package gets properly updated
 
-    # Create a middle layer for installing Spark extensions, putting them apart from the user environment
-    SWAN_ENV="${HOME}/swan"
-    python -m venv ${SWAN_ENV} 2>&1
-    source ${SWAN_ENV}/bin/activate
-    SWAN_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
-
-    pip install ${SPARKMONITOR} ${SPARKCONNECTOR_DEPENDENCIES} 2>&1
+    # Activate the SWAN environment for installing the Spark packages
+    source $SWAN_ENV/bin/activate
+    uv pip install ${SPARKMONITOR} ${SPARKCONNECTOR_DEPENDENCIES} 2>&1
 
     # -------------- HACK SECTION --------------
     # Install SPARKCONNECTOR_DEPENDENCIES separately, install SparkConnector without its dependencies and change the configuration file
     # TODO: Remove this when the SparkConnector package gets properly updated
-    pip install ${SPARKCONNECTOR} --no-deps 2>&1
+    uv pip install ${SPARKCONNECTOR} --no-deps 2>&1
     wget https://raw.githubusercontent.com/swan-cern/jupyter-extensions/refs/heads/swan-on-tn/SparkConnector/sparkconnector/configuration.py -O ${SWAN_PACKAGES_PATH}/sparkconnector/configuration.py 2>&1
 fi
 
@@ -31,16 +33,23 @@ _log "Setting up the environment..."
 ACTIVATE_ENV_CMD="source ${ENV_PATH}/bin/activate"
 eval "${ACTIVATE_ENV_CMD}"
 
-# Install packages in the environment and the same ipykernel that the Jupyter server uses
+# Install user-requested packages in the environment.
+# Use uv for better performance if environment is fully resolved;
+# Otherwise, use pip for resolution (more reliable long-term).
 _log "Installing packages from ${REQ_PATH}..."
-pip install -r "${REQ_PATH}" "ipykernel==${IPYKERNEL_VERSION}" ${NXCALS} 2>&1
+if [ "${RESOLVED_REQ}" = true ]; then
+    # Use the same pip configuration as the Acc-Py default pip
+    ACCPY_PIP_CONF="-i $(pip config get global.index-url) --allow-insecure-host $(pip config get global.trusted-host)"
+    uv pip install ${ACCPY_PIP_CONF} -r "${REQ_PATH}" 2>&1
+else
+    pip install -r "${REQ_PATH}" 2>&1
+fi
 if [ $? -ne 0 ]; then
     exit 1
 fi
 
 # Inject middle layer packages into the user environment by adding a .pth file to
 # the environment site-packages that contains the path to the middle layer site-packages
-if [ -n "${INSTALL_NXCALS}" ]; then
-    USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
-    echo ${SWAN_PACKAGES_PATH} > ${USER_PACKAGES_PATH}/$(basename ${SWAN_ENV}).pth
-fi
+USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+echo ${SWAN_PACKAGES_PATH} > ${USER_PACKAGES_PATH}/$(basename $SWAN_ENV).pth
+
diff --git a/SwanCustomEnvironments/swancustomenvironments/scripts/builders/venv.sh b/SwanCustomEnvironments/swancustomenvironments/scripts/builders/venv.sh
@@ -1,16 +1,37 @@
 #!/bin/bash
 
-# Create the environment
-python3 -m venv ${ENV_PATH} 2>&1
+# Create a middle layer for installing ipykernel, putting it apart from the user environment
+uv venv $SWAN_ENV --seed 2>&1
+source $SWAN_ENV/bin/activate
+uv pip install "ipykernel==${IPYKERNEL_VERSION}"
+SWAN_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+deactivate
+
+if [ "${RESOLVED_REQ}" = true ]; then
+    uv venv ${ENV_PATH} --seed 2>&1
+else
+    python -m venv ${ENV_PATH} 2>&1
+fi
 
 # Activate the environment
 _log "Setting up the environment..."
 ACTIVATE_ENV_CMD="source ${ENV_PATH}/bin/activate"
 eval "${ACTIVATE_ENV_CMD}"
 
-# Install packages in the environment and the same ipykernel that the Jupyter server uses
+# Install user-requested packages in the environment.
+# Use uv for better performance if environment is fully resolved;
+# Otherwise, use pip for resolution (more reliable long-term).
 _log "Installing packages from ${REQ_PATH}..."
-pip install -r "${REQ_PATH}" "ipykernel==${IPYKERNEL_VERSION}" 2>&1
+if [ "${RESOLVED_REQ}" = true ]; then
+    uv pip install -r "${REQ_PATH}" 2>&1
+else
+    pip install -r "${REQ_PATH}" 2>&1
+fi
 if [ $? -ne 0 ]; then
     exit 1
 fi
+
+# Inject middle layer packages into the user environment by adding a .pth file to
+# the environment site-packages that contains the path to the middle layer site-packages
+USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])')
+echo ${SWAN_PACKAGES_PATH} > ${USER_PACKAGES_PATH}/$(basename $SWAN_ENV).pth
diff --git a/SwanCustomEnvironments/swancustomenvironments/scripts/makenv.sh b/SwanCustomEnvironments/swancustomenvironments/scripts/makenv.sh
@@ -111,7 +111,7 @@ while [ $# -gt 0 ]; do
             shift
             ;;
         --nxcals)
-            INSTALL_NXCALS=true
+            USE_NXCALS=true
             shift
             ;;
         --help|-h)
@@ -157,13 +157,25 @@ fi
 # Create and set up the environment
 
 ENV_PATH="/home/$USER/${ENV_NAME}"
-REQ_PATH="${TMP_REPO_PATH}/requirements.txt"
+SWAN_ENV="/home/$USER/swan"
 IPYKERNEL_VERSION=$(python -c "import ipykernel; print(ipykernel.__version__)")
 
-# Check if requirements.txt exists in the repository
-if [ ! -f "${REQ_PATH}" ]; then
-    _log "ERROR: Requirements file not found (${REQ_PATH})."
-    exit 1
+if [ -f "${TMP_REPO_PATH}/requirements.txt" ]; then
+    # Fully resolved requirements (requirements.txt) take precedence
+    RESOLVED_REQ=true
+    REQ_PATH="${TMP_REPO_PATH}/requirements.txt"
+elif [ -f "${TMP_REPO_PATH}/requirements.in" ]; then
+    # If only requirements.in is present, proceed with high-level requirements
+    RESOLVED_REQ=false
+    REQ_PATH="${TMP_REPO_PATH}/requirements.in"
+else
+    # There are no requirements files (neither requirements.txt nor requirements.in) in the repository
+    _log "ERROR: No requirements file found. You must provide a requirements.in or requirements.txt file." && exit 1
+fi
+
+# Check if the requirements file contains the nxcals package, if the user activated the nxcals option
+if [ -n "${USE_NXCALS}" ] && ! grep -q "nxcals" "${REQ_PATH}"; then
+    _log "ERROR: The NXCALS cluster was selected but the requirements file (${REQ_PATH}) does not contain the nxcals package." && exit 1
 fi
 
 _log "Creating environment ${ENV_NAME} using ${BUILDER}${BUILDER_VERSION:+ (${BUILDER_VERSION})}..."
@@ -182,7 +194,7 @@ ln -f -s ${KERNEL_JSON} /home/$USER/.local/share/jupyter/kernels/python3/kernel.
 
 # For NXCALS, configure the environment kernel and terminal with some variables to
 # ensure the connection with the cluster works properly.
-if [ -n "${INSTALL_NXCALS}" ]; then
+if [ -n "${USE_NXCALS}" ]; then
     # Kernel configuration
     # - SPARK_HOME: needed to point to the SPARK installation provided by the nxcals package
     # - PYSPARK_PYTHON: needed to point to the Python executable in the environment shipped to the cluster