From ac95b79c139f494a5e0769be46419d0c83ded9ae Mon Sep 17 00:00:00 2001 From: Jim Edwards Date: Tue, 16 Apr 2024 13:34:06 -0500 Subject: [PATCH] port to stampede3-spr --- config/cesm/machines/config_batch.xml | 21 +++- config/cesm/machines/config_compilers.xml | 28 ++++- config/cesm/machines/config_machines.xml | 112 +++++++++++++++--- tools/statistical_ensemble_test/ensemble.py | 4 +- tools/statistical_ensemble_test/single_run.py | 2 +- 5 files changed, 143 insertions(+), 24 deletions(-) diff --git a/config/cesm/machines/config_batch.xml b/config/cesm/machines/config_batch.xml index 896e37cc9cf..25f735ace9e 100644 --- a/config/cesm/machines/config_batch.xml +++ b/config/cesm/machines/config_batch.xml @@ -654,29 +654,40 @@ + + ssh login1 cd $CASEROOT ; sbatch + + + + + + + spr + + + - ssh stampede2.tacc.utexas.edu cd $CASEROOT ; sbatch + ssh login1 cd $CASEROOT ; sbatch - skx-normal + skx skx-dev - ssh stampede2.tacc.utexas.edu cd $CASEROOT ; sbatch + ssh login2 cd $CASEROOT ; sbatch - normal - development + icx diff --git a/config/cesm/machines/config_compilers.xml b/config/cesm/machines/config_compilers.xml index 73d44b42306..25df6f252bd 100644 --- a/config/cesm/machines/config_compilers.xml +++ b/config/cesm/machines/config_compilers.xml @@ -1274,6 +1274,15 @@ using a fortran linker. + + + -DHAVE_NANOTIME -DHAVE_SLASHPROC + + $ENV{TACC_NETCDF_DIR} + lustre + $ENV{TACC_PNETCDF_DIR} + + -DHAVE_NANOTIME @@ -1283,6 +1292,23 @@ using a fortran linker. $ENV{TACC_PNETCDF_DIR} + + + -xhost -no-fma + + + -xhost -no-fma + -mcmodel medium + + + -L$ENV{TACC_HDF5_LIB} -lhdf5 $(MKL) + + + $SHELL{${NETCDF_PATH}/bin/nf-config --flibs} -L$ENV{TACC_HDF5_LIB} -lhdf5 + + $ENV{TRILINOS_PATH} + + -xCOMMON-AVX512 -no-fma @@ -1292,7 +1318,7 @@ using a fortran linker. -mcmodel medium - -L$ENV{TACC_HDF5_LIB} -lhdf5 $(MKL) -zmuldefs -xCOMMON-AVX512 + -L$ENV{TACC_HDF5_LIB} -lhdf5 $(MKL) -xCOMMON-AVX512 $SHELL{${NETCDF_PATH}/bin/nf-config --flibs} -L$ENV{TACC_HDF5_LIB} -lhdf5 diff --git a/config/cesm/machines/config_machines.xml b/config/cesm/machines/config_machines.xml index 0bf72bf1c2f..da8110b050c 100644 --- a/config/cesm/machines/config_machines.xml +++ b/config/cesm/machines/config_machines.xml @@ -2554,9 +2554,77 @@ This allows using a different mpirun command to launch unit tests + + Intel Xeon CPU MAX 9480 ("Sapphire Rapids HBM") 112 cores on two sockets (2x 56 cores), batch system is SLURM + .*.stampede3.tacc.utexas.edu + LINUX + intel + impi,mvapich + $ENV{SCRATCH} + /work/02503/edwardsj/CESM/inputdata + /work/02503/edwardsj/CESM/inputdata/lmwg + $ENV{WORK}/archive/$CASE + /work/02503/edwardsj/CESM/cesm_baselines + /work/02503/edwardsj/CESM/cime/tools/cprnc/cprnc + 4 + slurm + cseg + 112 + 112 + + ibrun + + -n {{ total_tasks }} + + + + ibrun + + -n {{ total_tasks }} + + + + /opt/apps/lmod/lmod/init/perl + /opt/apps/lmod/lmod/init/env_modules_python.py + /opt/apps/lmod/lmod/init/sh + /opt/apps/lmod/lmod/init/csh + /opt/apps/lmod/lmod/libexec/lmod perl + /opt/apps/lmod/lmod/libexec/lmod python + module + module + + + TACC + intel/24.0 + cmake/3.28.1 + + + mvapich/3.0 + + + impi + + + pnetcdf/1.12.3 + parallel-netcdf/4.9.2 + phdf5/1.14.3 + + + netcdf/4.9.2 + + + + 256M + + + ifort + + + + + Intel Xeon Platinum 8160 ("Skylake"),48 cores on two sockets (24 cores/socket) , batch system is SLURM - .*stampede2 LINUX intel impi,mvapich2 @@ -2573,11 +2641,17 @@ This allows using a different mpirun command to launch unit tests 48 ibrun + + -n {{ total_tasks }} + ibrun + + -n {{ total_tasks }} + - + /opt/apps/lmod/lmod/init/perl /opt/apps/lmod/lmod/init/env_modules_python.py /opt/apps/lmod/lmod/init/sh @@ -2587,30 +2661,38 @@ This allows using a different mpirun command to launch unit tests module module - - TACC - python2/2.7.15 - intel/18.0.2 - cmake/3.16.1 + + TACC + intel/24.0 + cmake/3.28.1 - mvapich2/2.3.1 - pnetcdf/1.11 - parallel-netcdf/4.6.2 + mvapich/3.0 - mvapich2 - impi/18.0.2 - pnetcdf/1.11 - parallel-netcdf/4.6.2 + impi + + pnetcdf/1.12.3 + parallel-netcdf/4.9.2 + phdf5/1.14.3 + - netcdf/4.3.3.1 + netcdf/4.9.2 256M + + /work2/02503/edwardsj/stampede3/intel24.0/esmf/v8.6.1b04/lib/libO/Linux.intel.64.intelmpi.default/esmf.mk + ON + SUMMARY + /work/06242/tg855414/stampede2/FV3GFS/benchmark-inputs/2012010100/gfs/fcst + /work/06242/tg855414/stampede2/FV3GFS/fix_am + /work/06242/tg855414/stampede2/FV3GFS/addon + ifort + diff --git a/tools/statistical_ensemble_test/ensemble.py b/tools/statistical_ensemble_test/ensemble.py index 740ab8446f6..3b7932f4340 100644 --- a/tools/statistical_ensemble_test/ensemble.py +++ b/tools/statistical_ensemble_test/ensemble.py @@ -27,11 +27,11 @@ def get_pertlim_uf(rand_num): k = (i - 1)%100 if i%2 != 0: ll = j + (k/2)*18 - ippt = '{0:03d}'.format(ll) + ippt = '{0:03d}'.format(int(ll)) ptlim = "0."+ippt+"d-13" else: ll = j + ((k-1)/2)*18 - ippt = '{0:03d}'.format(ll) + ippt = '{0:03d}'.format(int(ll)) ptlim = "-0."+ippt+"d-13" return ptlim diff --git a/tools/statistical_ensemble_test/single_run.py b/tools/statistical_ensemble_test/single_run.py index 14014cd8249..66ec9e1c19e 100644 --- a/tools/statistical_ensemble_test/single_run.py +++ b/tools/statistical_ensemble_test/single_run.py @@ -307,7 +307,7 @@ def single_case(opts_dict, case_flags, stat_dir): f.write(text2) f.write(text3) if opts_dict['pertlim'] != "0": - text = "\npertlim = " + opts_dict['pertlim'] + text = "\npertlim = " + str(opts_dict['pertlim']) f.write(text) else: print("Warning: no user_nl_cam found")