From 380b3c2590dfad1010ad8d3cd43cfbe8357e236a Mon Sep 17 00:00:00 2001 From: RodrigoSobral2000 Date: Thu, 5 Dec 2024 15:25:45 +0100 Subject: [PATCH] swancustomenvironments: Remove swan middle layer Due to the fact `venv-pack` (used by nxcals) can only ship one environment (user's), the packages installed in the middle layer won't be shipped, which leads to discrepancies in the results. For that reason, we remove this middle layer and ignore the extra packages installed by swan, ensuring they stay updated and reliable. In spite of this, the requirements file must not be modified, which means a temporary and modified version of requirements.txt has to be created and used for the installation --- .../scripts/builders/accpy.sh | 38 ++++++++----------- .../scripts/builders/venv.sh | 14 +------ .../swancustomenvironments/scripts/makenv.sh | 14 +++++-- 3 files changed, 28 insertions(+), 38 deletions(-) diff --git a/SwanCustomEnvironments/swancustomenvironments/scripts/builders/accpy.sh b/SwanCustomEnvironments/swancustomenvironments/scripts/builders/accpy.sh index 1da9c636..ff2c8df2 100644 --- a/SwanCustomEnvironments/swancustomenvironments/scripts/builders/accpy.sh +++ b/SwanCustomEnvironments/swancustomenvironments/scripts/builders/accpy.sh @@ -1,27 +1,10 @@ #!/bin/bash -# Create a middle layer for installing ipykernel, putting it apart from the user environment -uv venv $SWAN_ENV --seed 2>&1 -source $SWAN_ENV/bin/activate -uv pip install "ipykernel==${IPYKERNEL_VERSION}" -SWAN_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') -deactivate - # If using NXCALS, we need to also install the Spark packages and their dependencies in the SWAN environment if [ -n "${USE_NXCALS}" ]; then SPARKCONNECTOR="sparkconnector==$(python -c 'import sparkconnector; print(sparkconnector.__version__)')" SPARKMONITOR="sparkmonitor==$(python -c 'import sparkmonitor; print(sparkmonitor.__version__)')" SPARKCONNECTOR_DEPENDENCIES="swanportallocator requests" # TODO: Remove swanportallocator and requests installation when the SparkConnector package gets properly updated - - # Activate the SWAN environment for installing the Spark packages - source $SWAN_ENV/bin/activate - uv pip install ${SPARKMONITOR} ${SPARKCONNECTOR_DEPENDENCIES} 2>&1 - - # -------------- HACK SECTION -------------- - # Install SPARKCONNECTOR_DEPENDENCIES separately, install SparkConnector without its dependencies and change the configuration file - # TODO: Remove this when the SparkConnector package gets properly updated - uv pip install ${SPARKCONNECTOR} --no-deps 2>&1 - wget https://raw.githubusercontent.com/swan-cern/jupyter-extensions/refs/heads/swan-on-tn/SparkConnector/sparkconnector/configuration.py -O ${SWAN_PACKAGES_PATH}/sparkconnector/configuration.py 2>&1 fi # Set up Acc-Py and create the environment @@ -36,7 +19,7 @@ eval "${ACTIVATE_ENV_CMD}" # Install user-requested packages in the environment. # Use uv for better performance if environment is fully resolved; # Otherwise, use pip for resolution (more reliable long-term). -_log "Installing packages from ${REQ_PATH}..." +_log "Installing packages from ${ORIGINAL_REQ_PATH}..." if [ "${RESOLVED_REQ}" = true ]; then # Use the same pip configuration as the Acc-Py default pip ACCPY_PIP_CONF="-i $(pip config get global.index-url) --allow-insecure-host $(pip config get global.trusted-host)" @@ -48,8 +31,19 @@ if [ $? -ne 0 ]; then exit 1 fi -# Inject middle layer packages into the user environment by adding a .pth file to -# the environment site-packages that contains the path to the middle layer site-packages -USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') -echo ${SWAN_PACKAGES_PATH} > ${USER_PACKAGES_PATH}/$(basename $SWAN_ENV).pth +if [ -n "${USE_NXCALS}" ]; then + # For NXCALS, install the Spark packages and their dependencies, using the according tool for its resolution + if [ "${RESOLVED_REQ}" = true ]; then + uv pip install ${ACCPY_PIP_CONF} ${SPARKMONITOR} ${SPARKCONNECTOR_DEPENDENCIES} 2>&1 + uv pip install ${ACCPY_PIP_CONF} ${SPARKCONNECTOR} --no-deps 2>&1 + else + pip install ${SPARKMONITOR} ${SPARKCONNECTOR_DEPENDENCIES} 2>&1 + pip install ${SPARKCONNECTOR} --no-deps 2>&1 + fi + # -------------- HACK SECTION -------------- + # Install SPARKCONNECTOR_DEPENDENCIES separately, install SparkConnector without its dependencies and change the configuration file + # TODO: Remove this when the SparkConnector package gets properly updated + USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') + wget https://raw.githubusercontent.com/swan-cern/jupyter-extensions/refs/heads/swan-on-tn/SparkConnector/sparkconnector/configuration.py -O ${USER_PACKAGES_PATH}/sparkconnector/configuration.py 2>&1 +fi \ No newline at end of file diff --git a/SwanCustomEnvironments/swancustomenvironments/scripts/builders/venv.sh b/SwanCustomEnvironments/swancustomenvironments/scripts/builders/venv.sh index ed55abd0..6b63d572 100755 --- a/SwanCustomEnvironments/swancustomenvironments/scripts/builders/venv.sh +++ b/SwanCustomEnvironments/swancustomenvironments/scripts/builders/venv.sh @@ -1,12 +1,5 @@ #!/bin/bash -# Create a middle layer for installing ipykernel, putting it apart from the user environment -uv venv $SWAN_ENV --seed 2>&1 -source $SWAN_ENV/bin/activate -uv pip install "ipykernel==${IPYKERNEL_VERSION}" -SWAN_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') -deactivate - if [ "${RESOLVED_REQ}" = true ]; then uv venv ${ENV_PATH} --seed 2>&1 else @@ -21,7 +14,7 @@ eval "${ACTIVATE_ENV_CMD}" # Install user-requested packages in the environment. # Use uv for better performance if environment is fully resolved; # Otherwise, use pip for resolution (more reliable long-term). -_log "Installing packages from ${REQ_PATH}..." +_log "Installing packages from ${ORIGINAL_REQ_PATH}..." if [ "${RESOLVED_REQ}" = true ]; then uv pip install -r "${REQ_PATH}" ${IPYKERNEL} 2>&1 else @@ -30,8 +23,3 @@ fi if [ $? -ne 0 ]; then exit 1 fi - -# Inject middle layer packages into the user environment by adding a .pth file to -# the environment site-packages that contains the path to the middle layer site-packages -USER_PACKAGES_PATH=$(python3 -c 'import sysconfig; print(sysconfig.get_paths()["purelib"])') -echo ${SWAN_PACKAGES_PATH} > ${USER_PACKAGES_PATH}/$(basename $SWAN_ENV).pth diff --git a/SwanCustomEnvironments/swancustomenvironments/scripts/makenv.sh b/SwanCustomEnvironments/swancustomenvironments/scripts/makenv.sh index 38484a7f..1052fb8a 100755 --- a/SwanCustomEnvironments/swancustomenvironments/scripts/makenv.sh +++ b/SwanCustomEnvironments/swancustomenvironments/scripts/makenv.sh @@ -162,11 +162,19 @@ IPYKERNEL="ipykernel==$(python -c 'import ipykernel; print(ipykernel.__version__ if [ -f "${TMP_REPO_PATH}/requirements.txt" ]; then # Fully resolved requirements (requirements.txt) take precedence RESOLVED_REQ=true - REQ_PATH="${TMP_REPO_PATH}/requirements.txt" + ORIGINAL_REQ_PATH="${TMP_REPO_PATH}/requirements.txt" + REQ_PATH=$(mktemp) + if [ -n "${USE_NXCALS}" ]; then + grep -v -E "sparkmonitor|sparkconnector|swanportallocator|requests|ipykernel" ${ORIGINAL_REQ_PATH} > ${REQ_PATH} + else + grep -v -E "ipykernel" ${ORIGINAL_REQ_PATH} > ${REQ_PATH} + + fi elif [ -f "${TMP_REPO_PATH}/requirements.in" ]; then # If only requirements.in is present, proceed with high-level requirements RESOLVED_REQ=false - REQ_PATH="${TMP_REPO_PATH}/requirements.in" + ORIGINAL_REQ_PATH="${TMP_REPO_PATH}/requirements.in" + REQ_PATH="${ORIGINAL_REQ_PATH}" else # There are no requirements files (neither requirements.txt nor requirements.in) in the repository _log "ERROR: No requirements file found. You must provide a requirements.in or requirements.txt file." && exit 1 @@ -174,7 +182,7 @@ fi # Check if the requirements file contains the nxcals package, if the user activated the nxcals option if [ -n "${USE_NXCALS}" ] && ! grep -q "nxcals" "${REQ_PATH}"; then - _log "ERROR: The NXCALS cluster was selected but the requirements file (${REQ_PATH}) does not contain the nxcals package." && exit 1 + _log "ERROR: The NXCALS cluster was selected but the requirements file (${ORIGINAL_REQ_PATH}) does not contain the nxcals package." && exit 1 fi _log "Creating environment ${ENV_NAME} using ${BUILDER}${BUILDER_VERSION:+ (${BUILDER_VERSION})}..."