diff --git a/modulefiles/gsi_gaea.intel.lua b/modulefiles/gsi_gaea.intel.lua index 96643202a7..799822caa8 100644 --- a/modulefiles/gsi_gaea.intel.lua +++ b/modulefiles/gsi_gaea.intel.lua @@ -1,18 +1,13 @@ help([[ ]]) -unload("intel") -unload("cray-mpich") -unload("cray-python") -unload("darshan") +prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/gsi-addon-dev/install/modulefiles/Core") -prepend_path("MODULEPATH", "/lustre/f2/dev/wpo/role.epic/contrib/spack-stack/spack-stack-1.4.1-c4/envs/unified-env/install/modulefiles/Core") -prepend_path("MODULEPATH", "/lustre/f2/pdata/esrl/gsd/spack-stack/modulefiles") - -local stack_python_ver=os.getenv("stack_python_ver") or "3.9.12" -local stack_intel_ver=os.getenv("stack_intel_ver") or "2022.0.2" -local stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "7.7.20" +local stack_python_ver=os.getenv("stack_python_ver") or "3.11.6" +local stack_intel_ver=os.getenv("stack_intel_ver") or "2023.1.0" +local stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.25" local cmake_ver=os.getenv("cmake_ver") or "3.23.1" +local prod_util_ver=os.getenv("prod_util_ver") or "2.1.1" load(pathJoin("stack-intel", stack_intel_ver)) load(pathJoin("stack-cray-mpich", stack_cray_mpich_ver)) @@ -20,23 +15,18 @@ load(pathJoin("stack-python", stack_python_ver)) load(pathJoin("cmake", cmake_ver)) load("gsi_common") - -local prod_util_ver=os.getenv("prod_util_ver") or "1.2.2" -load(pathJoin("prod-util", prod_util_ver)) - --- Needed at runtime: -load("alps") +load(pathJoin("prod_util", prod_util_ver)) local MKLROOT="/opt/intel/oneapi/mkl/2022.0.2/" prepend_path("LD_LIBRARY_PATH",pathJoin(MKLROOT,"lib/intel64")) pushenv("MKLROOT", MKLROOT) -pushenv("GSI_BINARY_SOURCE_DIR", "/lustre/f2/dev/role.epic/contrib/GSI_data/fix/20240208") +pushenv("GSI_BINARY_SOURCE_DIR", "/gpfs/f5/ufs-ard/world-shared/GSI_data/fix/gsi/20240208") setenv("CC","cc") setenv("FC","ftn") setenv("CXX","CC") pushenv("CRAYPE_LINK_TYPE","dynamic") +unload("cray-libsci") whatis("Description: GSI environment on Gaea with Intel Compilers") - diff --git a/regression/regression_param.sh b/regression/regression_param.sh index a4f5d7035c..6ee72f14da 100755 --- a/regression/regression_param.sh +++ b/regression/regression_param.sh @@ -26,8 +26,8 @@ case $machine in ;; Gaea) sub_cmd="sub_gaea" - memnode=64 - numcore=36 + memnode=251 + numcore=128 ;; wcoss2) sub_cmd="sub_wcoss2" @@ -69,8 +69,8 @@ case $regtest in topts[1]="0:30:00" ; popts[1]="48/2" ; ropts[1]="/1" topts[2]="0:30:00" ; popts[2]="60/3" ; ropts[2]="/2" elif [[ "$machine" = "Gaea" ]]; then - topts[1]="0:10:00" ; popts[1]="18/8/" ; ropts[1]="/1" - topts[2]="0:10:00" ; popts[2]="18/10/" ; ropts[2]="/2" + topts[1]="0:10:00" ; popts[1]="12/8/" ; ropts[1]="/1" + topts[2]="0:10:00" ; popts[2]="12/10/" ; ropts[2]="/2" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:10:00" ; popts[1]="12/8/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="12/10/" ; ropts[2]="/2" @@ -99,8 +99,8 @@ case $regtest in topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Gaea" ]]; then - topts[1]="0:15:00" ; popts[1]="18/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="18/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" @@ -129,8 +129,8 @@ case $regtest in topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Gaea" ]]; then - topts[1]="0:15:00" ; popts[1]="18/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="18/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" @@ -158,8 +158,8 @@ case $regtest in topts[1]="0:15:00" ; popts[1]="5/4/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="10/4/" ; ropts[2]="/1" elif [[ "$machine" = "Gaea" ]]; then - topts[1]="0:15:00" ; popts[1]="18/1/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="18/2/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" @@ -188,8 +188,8 @@ case $regtest in topts[1]="0:15:00" ; popts[1]="4/4/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="6/6/" ; ropts[2]="/1" elif [[ "$machine" = "Gaea" ]]; then - topts[1]="0:15:00" ; popts[1]="4/4/" ; ropts[1]="/1" - topts[2]="0:15:00" ; popts[2]="6/6/" ; ropts[2]="/1" + topts[1]="0:15:00" ; popts[1]="28/1/" ; ropts[1]="/1" + topts[2]="0:15:00" ; popts[2]="28/2/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:15:00" ; popts[1]="28/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="28/2/" ; ropts[2]="/1" @@ -218,8 +218,8 @@ case $regtest in topts[1]="0:30:00" ; popts[1]="6/12/" ; ropts[1]="/1" topts[2]="0:30:00" ; popts[2]="8/12/" ; ropts[2]="/1" elif [[ "$machine" = "Gaea" ]]; then - topts[1]="0:30:00" ; popts[1]="8/6/" ; ropts[1]="/1" - topts[2]="0:30:00" ; popts[2]="8/8/" ; ropts[2]="/1" + topts[1]="0:30:00" ; popts[1]="14/8/" ; ropts[1]="/1" + topts[2]="0:30:00" ; popts[2]="14/14/" ; ropts[2]="/1" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:30:00" ; popts[1]="14/8/" ; ropts[1]="/1" topts[2]="0:30:00" ; popts[2]="14/14/" ; ropts[2]="/2" @@ -248,8 +248,8 @@ case $regtest in topts[1]="0:10:00" ; popts[1]="12/3/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="12/5/" ; ropts[2]="/2" elif [[ "$machine" = "Gaea" ]]; then - topts[1]="0:10:00" ; popts[1]="12/3/" ; ropts[1]="/1" - topts[2]="0:10:00" ; popts[2]="12/5/" ; ropts[2]="/2" + topts[1]="0:10:00" ; popts[1]="16/2/" ; ropts[1]="/1" + topts[2]="0:10:00" ; popts[2]="16/4/" ; ropts[2]="/2" elif [[ "$machine" = "wcoss2" ]]; then topts[1]="0:10:00" ; popts[1]="16/2/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="16/4/" ; ropts[2]="/2" @@ -315,7 +315,7 @@ elif [[ "$machine" = "Gaea" ]]; then export MPI_BUFS_PER_PROC=256 export MPI_BUFS_PER_HOST=256 export MPI_GROUP_MAX=256 - export APRUN="srun --export=ALL --mpi=pmi2 -n \$ntasks" + export APRUN="srun --export=ALL -n \$ntasks" elif [[ "$machine" = "wcoss2" ]]; then export OMP_PLACES=cores export OMP_STACKSIZE=2G diff --git a/regression/regression_var.sh b/regression/regression_var.sh index 315028675c..aebbccab8b 100755 --- a/regression/regression_var.sh +++ b/regression/regression_var.sh @@ -36,7 +36,7 @@ elif [[ -d /mnt/lfs4 || -d /jetmon || -d /mnt/lfs1 ]]; then # Jet export machine="Jet" elif [[ -d /discover ]]; then # NCCS Discover export machine="Discover" -elif [[ -d /sw/gaea ]]; then # Gaea +elif [[ -d /ncrc ]]; then # Gaea export machine="Gaea" elif [[ -d /data/prod ]]; then # S4 export machine="S4" @@ -52,17 +52,13 @@ echo "Running Regression Tests on '$machine'"; case $machine in Gaea) export queue="normal" - export noscrub="/lustre/f2/scratch/$LOGNAME/gsi_tmp/noscrub" - export ptmp="/lustre/f2/scratch/$LOGNAME/gsi_tmp/ptmp" - export casesdir="/lustre/f2/dev/role.epic/contrib/GSI_data/CASES/regtest" - - export group="global" - if [[ "$cmaketest" = "false" ]]; then - export basedir="/lustre/f2/dev/$LOGNAME/sandbox/GSI" - fi + export group="ufs-ard" + export noscrub="/gpfs/f5/${group}/scratch/${USER}/$LOGNAME/gsi_tmp/noscrub" + export ptmp="/gpfs/f5/${group}/scratch/${USER}/$LOGNAME/gsi_tmp/ptmp" + export casesdir="/gpfs/f5/ufs-ard/world-shared/GSI_data/CASES/regtest" export check_resource="no" - export accnt="nggps_emc" + export accnt="ufs-ard" ;; wcoss2) export local_or_default="${local_or_default:-/lfs/h2/emc/da/noscrub/$LOGNAME}" diff --git a/ush/module-setup.sh b/ush/module-setup.sh index f587842f0f..299e13aa4e 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -56,10 +56,8 @@ elif [[ $MACHINE_ID = gaea* ]] ; then # the module command fails. Hence we actually have to source # /etc/profile here. source /etc/profile - __ms_source_etc_profile=yes fi - - source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh + module reset elif [[ $MACHINE_ID = expanse* ]]; then # We are on SDSC Expanse diff --git a/ush/sub_gaea b/ush/sub_gaea index afad6aa7ab..9c4e253c93 100755 --- a/ush/sub_gaea +++ b/ush/sub_gaea @@ -88,8 +88,8 @@ output=${output:-$jobname.out} myuser=$LOGNAME myhost=$(hostname) -if [ -d /lustre/f2/scratch/$LOGNAME ]; then - DATA=/lustre/f2/scratch/$LOGNAME/tmp +if [ -d /gpfs/f5/epic/scratch/${USER}/$LOGNAME ]; then + DATA=/gpfs/f5/epic/scratch/${USER}/$LOGNAME/tmp fi DATA=${DATA:-$ptmp/tmp} @@ -110,7 +110,7 @@ echo "" echo "#SBATCH --output=$output" >> $cfile echo "#SBATCH --job-name=$jobname" >> $cfile echo "#SBATCH --qos=$queue" >> $cfile -echo "#SBATCH --clusters=c4" >> $cfile +echo "#SBATCH --clusters=c5" >> $cfile echo "#SBATCH --time=$timew" >> $cfile echo "#SBATCH --nodes=$nodes --ntasks-per-node=$procs --cpus-per-task=$threads" >> $cfile echo "#SBATCH --account=$accnt" >> $cfile @@ -121,13 +121,13 @@ echo "export ntasks=$(( $nodes * $procs ))" >> $cfile echo "export ppn=$procs" >> $cfile echo "export threads=$threads" >> $cfile echo "export OMP_NUM_THREADS=$threads" >> $cfile -echo "ulimit -s unlimited" >> $cfile +echo "ulimit -s unlimited" >> $cfile echo "" >>$cfile echo ". "$(awk '{ print $1, $2, $3, $4, $5, $6, $7, $8, $9 }' $regdir/regression_var.out) >>$cfile echo "" >>$cfile -echo "source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh" >> $cfile +echo "module reset" >> $cfile echo "module use $modulefiles" >> $cfile echo "module load gsi_gaea.intel" >> $cfile echo "module list" >> $cfile @@ -158,7 +158,7 @@ sbatch=${sbatch:-sbatch} ofile=$DATA/subout$$ >$ofile chmod 777 $ofile -$sbatch --export=ALL $cfile >$ofile +$sbatch $cfile >$ofile rc=$? cat $ofile if [[ -w $SUBLOG ]];then