-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revert unintended update of configs/sites/aws-pcluster/README.md
- Loading branch information
Showing
2 changed files
with
124 additions
and
54 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,9 +4,9 @@ | |
|
||
### Base instance | ||
Choose a basic AMI from the Community AMIs tab that matches your desired OS and parallelcluster version. Select an instance type of the same family that you are planning to use for the head and the compute nodes, and enough storage for a swap file and a spack-stack installation. For example: | ||
- AMI ID: ami-07410779598773e7d (aws-parallelcluster-3.8.0-ubuntu-2204-lts-hvm-x86_64-202312160956 2023-12-16T10-00-45.861Z) | ||
- Instance hpc7a.96xlarge | ||
- Use 500GB of gp3 storage as / | ||
- AMI ID: ami-093dab62f7840644b | ||
- Instance hpc6a.48xlarge | ||
- Use 350GB of gp3 storage as / | ||
|
||
### Prerequisites | ||
1. As `root`: | ||
|
@@ -47,7 +47,8 @@ tar -xvf Lmod-8.7.tar.bz2 | |
cd Lmod-8.7 | ||
# Note the weird prefix, lmod installs in PREFIX/lmod/X.Y automatically | ||
./configure --prefix=/opt/ \ | ||
--with-lmodConfigDir=/opt/lmod/8.7/config 2>&1 | tee log.config | ||
--with-lmodConfigDir=/opt/lmod/8.7/config \ | ||
2>&1 | tee log.config | ||
make install 2>&1 | tee log.install | ||
ln -sf /opt/lmod/lmod/init/profile /etc/profile.d/z00_lmod.sh | ||
ln -sf /opt/lmod/lmod/init/cshrc /etc/profile.d/z00_lmod.csh | ||
|
@@ -56,19 +57,19 @@ ln -sf /opt/lmod/lmod/init/profile.fish /etc/profile.d/z00_lmod.fish | |
# Add custom module locations and fix existing modules | ||
# | ||
# intelmpi | ||
echo "conflict openmpi" >> /opt/intel/mpi/2021.9.0/modulefiles/intelmpi | ||
echo 'if { [ module-info mode load ] && ![ is-loaded libfabric-aws/1.19.0amzn4.0 ] } {' >> /opt/intel/mpi/2021.9.0/modulefiles/intelmpi | ||
echo ' module load libfabric-aws/1.19.0amzn4.0' >> /opt/intel/mpi/2021.9.0/modulefiles/intelmpi | ||
echo '}' >> /opt/intel/mpi/2021.9.0/modulefiles/intelmpi | ||
echo "conflict openmpi" >> /opt/intel/mpi/2021.6.0/modulefiles/intelmpi | ||
echo 'if { [ module-info mode load ] && ![ is-loaded libfabric-aws/1.16.0~amzn4.0 ] } {' >> /opt/intel/mpi/2021.6.0/modulefiles/intelmpi | ||
echo ' module load libfabric-aws/1.16.0~amzn4.0' >> /opt/intel/mpi/2021.6.0/modulefiles/intelmpi | ||
echo '}' >> /opt/intel/mpi/2021.6.0/modulefiles/intelmpi | ||
# openmpi | ||
echo "conflict intelmpi" >> /usr/share/modules/modulefiles/openmpi/4.1.6 | ||
echo 'if { [ module-info mode load ] && ![ is-loaded libfabric-aws/1.19.0amzn4.0 ] } {' >> /usr/share/modules/modulefiles/openmpi/4.1.6 | ||
echo ' module load libfabric-aws/1.19.0amzn4.0' >> /usr/share/modules/modulefiles/openmpi/4.1.6 | ||
echo '}' >> /usr/share/modules/modulefiles/openmpi/4.1.6 | ||
echo "conflict intelmpi" >> /usr/share/modules/modulefiles/openmpi/4.1.4 | ||
echo 'if { [ module-info mode load ] && ![ is-loaded libfabric-aws/1.16.0~amzn4.0 ] } {' >> /usr/share/modules/modulefiles/openmpi/4.1.4 | ||
echo ' module load libfabric-aws/1.16.0~amzn4.0' >> /usr/share/modules/modulefiles/openmpi/4.1.4 | ||
echo '}' >> /usr/share/modules/modulefiles/openmpi/4.1.4 | ||
# | ||
echo "module use /usr/share/modules/modulefiles" >> /etc/profile.d/z01_lmod.sh | ||
### NO NOT ANY MORE ### echo "module use /opt/intel/mpi/2021.9.0/modulefiles" >> /etc/profile.d/z01_lmod.sh | ||
### NO NOT ANY MORE ### echo "module use /home/ubuntu/jedi/modulefiles" >> /etc/profile.d/z01_lmod.sh | ||
echo "module use /opt/intel/mpi/2021.6.0/modulefiles" >> /etc/profile.d/z01_lmod.sh | ||
echo "module use /home/ubuntu/jedi/modulefiles" >> /etc/profile.d/z01_lmod.sh | ||
# | ||
# Log out completely, ssh back into the instance and check if lua modules work | ||
exit | ||
|
@@ -77,10 +78,10 @@ exit | |
ssh ... | ||
# Now user ubuntu | ||
module av | ||
module load libfabric-aws/1.19.0amzn4.0 | ||
module load openmpi/4.1.6 | ||
module load libfabric-aws/1.16.0~amzn4.0 | ||
module load openmpi/4.1.4 | ||
module list | ||
module unload openmpi/4.1.6 | ||
module unload openmpi/4.1.4 | ||
module load intelmpi | ||
module list | ||
module purge | ||
|
@@ -102,18 +103,29 @@ apt install -y unzip | |
apt install -y automake | ||
apt install -y xterm | ||
apt install -y texlive | ||
apt install -y cmake | ||
# This is for ecflow | ||
apt install -y qtcreator qtbase5-dev qt5-qmake | ||
apt install -y libqt5widgets5 | ||
apt install -y qt5-default | ||
apt install -y libqt5svg5-dev | ||
apt install -y qt5dxcb-plugin | ||
# For mysql | ||
apt install -y mysql-server | ||
# Test | ||
mysql -u root | ||
### # Remove AWS openmpi | ||
### apt remove -y openmpi40-aws | ||
# This is because boost doesn't work with the Intel compiler | ||
apt install -y libboost1.71-dev | ||
apt install -y libboost-chrono1.71-dev | ||
apt install -y libboost-date-time1.71-dev | ||
apt install -y libboost-exception1.71-dev | ||
apt install -y libboost-filesystem1.71-dev | ||
apt install -y libboost-program-options1.71-dev | ||
apt install -y libboost-python1.71-dev | ||
apt install -y libboost-regex1.71-dev | ||
apt install -y libboost-serialization1.71-dev | ||
apt install -y libboost-system1.71-dev | ||
apt install -y libboost-test1.71-dev | ||
apt install -y libboost-thread1.71-dev | ||
apt install -y libboost-timer1.71-dev | ||
# Python | ||
apt install -y python3-dev python3-pip | ||
|
@@ -122,16 +134,7 @@ apt install -y python3-dev python3-pip | |
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null | ||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list | ||
apt-get update | ||
apt-get install -y intel-hpckit-2024.0/all | ||
exit | ||
# As ubuntu | ||
/opt/intel/modulefiles-setup.sh | ||
# Back to root | ||
sudo su | ||
mv /home/ubuntu/modulefiles /opt/intel/modulefiles | ||
echo "module unuse /opt/intel/mpi/2021.9.0/modulefiles" >> /etc/profile.d/z01_lmod.sh | ||
echo "module use /opt/intel/modulefiles" >> /etc/profile.d/z01_lmod.sh | ||
apt-get install -y intel-hpckit-2022.2.0/all | ||
# Docker | ||
# See https://docs.docker.com/engine/install/ubuntu/ | ||
|
@@ -155,7 +158,15 @@ service sshd restart | |
cd /usr/lib64/ | ||
ln -sf /usr/lib/x86_64-linux-gnu/libcrypt.so . | ||
cd /usr/include | ||
ln -sf python3.10/pyconfig.h . | ||
ln -sf python3.8/pyconfig.h . | ||
# Create swapfile - 100GB | ||
dd if=/dev/zero of=/swapfile bs=128M count=800 | ||
chmod 600 /swapfile | ||
mkswap /swapfile | ||
swapon /swapfile | ||
swapon -s | ||
echo "/swapfile swap swap defaults 0 0" >> /etc/fstab | ||
# Exit root session | ||
exit | ||
|
@@ -166,12 +177,73 @@ git config --global credential.helper cache | |
|
||
2. Log out and back in to enable x11 forwarding | ||
|
||
3. Create directory for spack-stack external packages | ||
3. Build ecflow outside of spack to be able to link against OS boost | ||
``` | ||
mkdir -p /home/ubuntu/spack-stack/external | ||
mkdir -p /home/ubuntu/jedi/ecflow-5.8.4/src | ||
cd /home/ubuntu/jedi/ecflow-5.8.4/src | ||
wget https://confluence.ecmwf.int/download/attachments/8650755/ecFlow-5.8.4-Source.tar.gz?api=v2 | ||
mv ecFlow-5.8.4-Source.tar.gz\?api\=v2 ecFlow-5.8.4-Source.tar.gz | ||
tar -xvzf ecFlow-5.8.4-Source.tar.gz | ||
export WK=/home/ubuntu/jedi/ecflow-5.8.4/src/ecFlow-5.8.4-Source | ||
export BOOST_ROOT=/usr | ||
# Build ecFlow | ||
cd $WK | ||
mkdir build | ||
cd build | ||
cmake .. -DPython3_EXECUTABLE=/usr/bin/python3 -DENABLE_STATIC_BOOST_LIBS=OFF -DCMAKE_INSTALL_PREFIX=/home/ubuntu/jedi/ecflow-5.8.4 2>&1 | tee log.cmake | ||
make -j4 2>&1 | tee log.make | ||
make install 2>&1 | tee log.install | ||
# Create a modulefiles directory with the following ecflow/5.8.4 module in it (w/o the '%%%%...' lines): | ||
mkdir -p /home/ubuntu/jedi/modulefiles/ecflow | ||
vi /home/ubuntu/jedi/modulefiles/ecflow/5.8.4 | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
#%Module1.0 | ||
module-whatis "Provides an ecflow-5.8.4 server+ui installation for use with spack." | ||
conflict ecflow | ||
proc ModulesHelp { } { | ||
puts stderr "Provides an ecflow-5.8.4 server+ui installation for use with spack." | ||
} | ||
# Set this value | ||
set ECFLOW_PATH "/home/ubuntu/jedi/ecflow-5.8.4" | ||
prepend-path PATH "${ECFLOW_PATH}/bin" | ||
prepend-path LD_LIBRARY_PATH "${ECFLOW_PATH}/lib" | ||
prepend-path LIBRARY_PATH "${ECFLOW_PATH}/lib" | ||
prepend-path CPATH "${ECFLOW_PATH}/include" | ||
prepend-path CMAKE_PREFIX_PATH "${ECFLOW_PATH}" | ||
prepend-path PYTHONPATH "${ECFLOW_PATH}/lib/python3.8/site-packages" | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
``` | ||
|
||
4. Install msql community server | ||
``` | ||
cd /home/ubuntu/jedi | ||
mkdir -p mysql-8.0.31/src | ||
cd mysql-8.0.31/src | ||
wget https://dev.mysql.com/get/Downloads/MySQL-8.0/mysql-server_8.0.32-1ubuntu20.04_amd64.deb-bundle.tar | ||
tar -xvf mysql-server_8.0.32-1ubuntu20.04_amd64.deb-bundle.tar | ||
# Switch to root | ||
sudo su | ||
dpkg -i *.deb | ||
apt --fix-broken install | ||
dpkg -i *.deb | ||
# Use an empty password for root, choose legacy authentication method; test connection | ||
mysql -u root | ||
show databases; | ||
# exit mysql | ||
exit | ||
# exit root session | ||
exit | ||
rm *.deb | ||
``` | ||
|
||
4. Option 1: Testing existing site config in spack-stack (skip steps 5-7 afterwards) | ||
5. Option 1: Testing existing site config in spack-stack (skip steps 5-7 afterwards) | ||
``` | ||
mkdir -p /home/ubuntu/sandpit | ||
cd /home/ubuntu/sandpit | ||
|
@@ -188,7 +260,7 @@ spack module lmod refresh | |
spack stack setup-meta-modules | ||
``` | ||
|
||
5. Option 2: Test configuring site from scratch | ||
6. Option 2: Test configuring site from scratch | ||
``` | ||
mkdir /home/ubuntu/jedi && cd /home/ubuntu/jedi | ||
git clone -b develop --recursive https://github.com/jcsda/spack-stack spack-stack | ||
|
@@ -199,41 +271,39 @@ spack env activate -p envs/unified-env | |
export SPACK_SYSTEM_CONFIG_PATH=/home/ubuntu/jedi/spack-stack/envs/unified-env/site | ||
spack external find --scope system \ | ||
--exclude bison --exclude cmake \ | ||
--exclude curl --exclude openssl \ | ||
--exclude openssh | ||
spack external find --scope system | ||
spack external find --scope system perl | ||
spack external find --scope system python | ||
spack external find --scope system wget | ||
spack external find --scope system mysql | ||
spack external find --scope system texlive | ||
spack external find --scope system sed | ||
spack external find --scope system mysql | ||
# No external find for pre-installed intel-oneapi-mpi (from pcluster AMI), | ||
# and no way to add object entry to list using "spack config add". | ||
echo " intel-oneapi-mpi:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " externals:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - spec: intel-oneapi-mpi@2021.9.0%[email protected]" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - spec: intel-oneapi-mpi@2021.6.0%[email protected]" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " prefix: /opt/intel" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " modules:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - libfabric-aws/1.19.0amzn4.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - libfabric-aws/1.16.0~amzn4.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - intelmpi" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
# Add external openmpi | ||
echo " openmpi:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " externals:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - spec: [email protected].6%[email protected]~cuda~cxx~cxx_exceptions~java~memchecker+pmi~static~wrapper-rpath" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - spec: [email protected].4%[email protected]~cuda~cxx~cxx_exceptions~java~memchecker+pmi~static~wrapper-rpath" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " fabrics=ofi schedulers=slurm" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " prefix: /opt/amazon/openmpi" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " modules:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - libfabric-aws/1.19.0amzn4.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - openmpi/4.1.6" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - libfabric-aws/1.16.0~amzn3.0" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - openmpi/4.1.4" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
# Can't find qt5 because qtpluginfo is broken, | ||
# and no way to add object entry to list using "spack config add". | ||
echo " qt:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " buildable: False" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " externals:" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - spec: qt@5.15.3" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " - spec: qt@5.12.8" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
echo " prefix: /usr" >> ${SPACK_SYSTEM_CONFIG_PATH}/packages.yaml | ||
# Add external boost | ||
|
@@ -255,15 +325,15 @@ spack compiler find --scope system | |
export -n SPACK_SYSTEM_CONFIG_PATH | ||
spack config add "packages:mpi:buildable:False" | ||
spack config add "packages:all:providers:mpi:[intel-oneapi-mpi@2021.9.0, [email protected].6]" | ||
spack config add "packages:all:providers:mpi:[intel-oneapi-mpi@2021.6.0, [email protected].4]" | ||
spack config add "packages:all:compiler:[[email protected], [email protected]]" | ||
# edit envs/unified-env/site/compilers.yaml and replace the following line in the **Intel** compiler section: | ||
# environment: {} | ||
# --> | ||
# environment: | ||
# prepend_path: | ||
# LD_LIBRARY_PATH: '/opt/intel/oneapi/compiler/2021.9.0/linux/compiler/lib/intel64_lin' | ||
# LD_LIBRARY_PATH: '/opt/intel/oneapi/compiler/2021.6.0/linux/compiler/lib/intel64_lin' | ||
# set: | ||
# I_MPI_PMI_LIBRARY: '/opt/slurm/lib/libpmi.so' | ||
``` | ||
|