Skip to content

Commit

Permalink
port to stampede3-spr
Browse files Browse the repository at this point in the history
  • Loading branch information
jedwards4b committed Apr 16, 2024
1 parent b5d9ed9 commit ac95b79
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 24 deletions.
21 changes: 16 additions & 5 deletions config/cesm/machines/config_batch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -654,29 +654,40 @@
</queues>
</batch_system>

<batch_system MACH="stampede3-spr" type="slurm" >
<batch_submit>ssh login1 cd $CASEROOT ; sbatch</batch_submit>
<submit_args>
<arg flag="--time" name="$JOB_WALLCLOCK_TIME"/>
<arg flag="-p" name="$JOB_QUEUE"/>
<arg flag="--account" name="$PROJECT"/>
</submit_args>
<queues>
<queue walltimemax="24:00:00" nodemin="1" nodemax="16" default="true">spr</queue>
</queues>
</batch_system>

<batch_system MACH="stampede2-skx" type="slurm" >
<batch_submit>ssh stampede2.tacc.utexas.edu cd $CASEROOT ; sbatch</batch_submit>
<batch_submit>ssh login1 cd $CASEROOT ; sbatch</batch_submit>
<submit_args>
<arg flag="--time" name="$JOB_WALLCLOCK_TIME"/>
<arg flag="-p" name="$JOB_QUEUE"/>
<arg flag="--account" name="$PROJECT"/>
</submit_args>
<queues>
<queue walltimemax="48:00:00" nodemin="1" nodemax="256" default="true">skx-normal</queue>
<queue walltimemax="48:00:00" nodemin="1" nodemax="256" default="true">skx</queue>
<queue walltimemax="02:00:00" nodemin="1" nodemax="4" >skx-dev</queue>
</queues>
</batch_system>

<batch_system MACH="stampede2-knl" type="slurm" >
<batch_submit>ssh stampede2.tacc.utexas.edu cd $CASEROOT ; sbatch</batch_submit>
<batch_submit>ssh login2 cd $CASEROOT ; sbatch</batch_submit>
<submit_args>
<arg flag="--time" name="$JOB_WALLCLOCK_TIME"/>
<arg flag="-p" name="$JOB_QUEUE"/>
<arg flag="--account" name="$PROJECT"/>
</submit_args>
<queues>
<queue walltimemax="48:00:00" nodemin="1" nodemax="256" >normal</queue>
<queue walltimemax="02:00:00" nodemin="1" nodemax="8" default="true">development</queue>
<queue walltimemax="48:00:00" nodemin="1" nodemax="256" >icx</queue>
</queues>
</batch_system>

Expand Down
28 changes: 27 additions & 1 deletion config/cesm/machines/config_compilers.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1274,6 +1274,15 @@ using a fortran linker.
</SLIBS>
</compiler>

<compiler MACH="stampede3-spr">
<CPPDEFS>
<append> -DHAVE_NANOTIME -DHAVE_SLASHPROC </append>
</CPPDEFS>
<NETCDF_PATH>$ENV{TACC_NETCDF_DIR}</NETCDF_PATH>
<PIO_FILESYSTEM_HINTS>lustre</PIO_FILESYSTEM_HINTS>
<PNETCDF_PATH>$ENV{TACC_PNETCDF_DIR}</PNETCDF_PATH>
</compiler>

<compiler MACH="stampede2-skx">
<CPPDEFS>
<append> -DHAVE_NANOTIME </append>
Expand All @@ -1283,6 +1292,23 @@ using a fortran linker.
<PNETCDF_PATH>$ENV{TACC_PNETCDF_DIR}</PNETCDF_PATH>
</compiler>

<compiler MACH="stampede3-spr" COMPILER="intel">
<CFLAGS>
<append> -xhost -no-fma </append>
</CFLAGS>
<FFLAGS>
<append> -xhost -no-fma </append>
<append MPILIB="mpi-serial"> -mcmodel medium </append>
</FFLAGS>
<LDFLAGS>
<append>-L$ENV{TACC_HDF5_LIB} -lhdf5 $(MKL) </append>
</LDFLAGS>
<SLIBS>
<append>$SHELL{${NETCDF_PATH}/bin/nf-config --flibs} -L$ENV{TACC_HDF5_LIB} -lhdf5</append>
</SLIBS>
<TRILINOS_PATH>$ENV{TRILINOS_PATH}</TRILINOS_PATH>
</compiler>

<compiler MACH="stampede2-skx" COMPILER="intel">
<CFLAGS>
<append> -xCOMMON-AVX512 -no-fma </append>
Expand All @@ -1292,7 +1318,7 @@ using a fortran linker.
<append MPILIB="mpi-serial"> -mcmodel medium </append>
</FFLAGS>
<LDFLAGS>
<append>-L$ENV{TACC_HDF5_LIB} -lhdf5 $(MKL) -zmuldefs -xCOMMON-AVX512</append>
<append>-L$ENV{TACC_HDF5_LIB} -lhdf5 $(MKL) -xCOMMON-AVX512</append>
</LDFLAGS>
<SLIBS>
<append>$SHELL{${NETCDF_PATH}/bin/nf-config --flibs} -L$ENV{TACC_HDF5_LIB} -lhdf5</append>
Expand Down
112 changes: 97 additions & 15 deletions config/cesm/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2554,9 +2554,77 @@ This allows using a different mpirun command to launch unit tests
</environment_variables>
</machine>

<machine MACH="stampede3-spr">
<DESC>Intel Xeon CPU MAX 9480 ("Sapphire Rapids HBM") 112 cores on two sockets (2x 56 cores), batch system is SLURM</DESC>
<NODENAME_REGEX>.*.stampede3.tacc.utexas.edu</NODENAME_REGEX>
<OS>LINUX</OS>
<COMPILERS>intel</COMPILERS>
<MPILIBS>impi,mvapich</MPILIBS>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}</CIME_OUTPUT_ROOT>
<DIN_LOC_ROOT>/work/02503/edwardsj/CESM/inputdata</DIN_LOC_ROOT>
<DIN_LOC_ROOT_CLMFORC>/work/02503/edwardsj/CESM/inputdata/lmwg</DIN_LOC_ROOT_CLMFORC>
<DOUT_S_ROOT>$ENV{WORK}/archive/$CASE</DOUT_S_ROOT>
<BASELINE_ROOT>/work/02503/edwardsj/CESM/cesm_baselines</BASELINE_ROOT>
<CCSM_CPRNC>/work/02503/edwardsj/CESM/cime/tools/cprnc/cprnc</CCSM_CPRNC>
<GMAKE_J>4</GMAKE_J>
<BATCH_SYSTEM>slurm</BATCH_SYSTEM>
<SUPPORTED_BY>cseg</SUPPORTED_BY>
<MAX_TASKS_PER_NODE>112</MAX_TASKS_PER_NODE>
<MAX_MPITASKS_PER_NODE>112</MAX_MPITASKS_PER_NODE>
<mpirun mpilib="impi">
<executable>ibrun</executable>
<arguments>
<arg name="ntasks"> -n {{ total_tasks }} </arg>
</arguments>
</mpirun>
<mpirun mpilib="mvapich2">
<executable>ibrun</executable>
<arguments>
<arg name="ntasks"> -n {{ total_tasks }} </arg>
</arguments>
</mpirun>
<module_system type="module">
<init_path lang="perl">/opt/apps/lmod/lmod/init/perl</init_path>
<init_path lang="python">/opt/apps/lmod/lmod/init/env_modules_python.py</init_path>
<init_path lang="sh">/opt/apps/lmod/lmod/init/sh</init_path>
<init_path lang="csh">/opt/apps/lmod/lmod/init/csh</init_path>
<cmd_path lang="perl">/opt/apps/lmod/lmod/libexec/lmod perl</cmd_path>
<cmd_path lang="python">/opt/apps/lmod/lmod/libexec/lmod python</cmd_path>
<cmd_path lang="sh">module</cmd_path>
<cmd_path lang="csh">module</cmd_path>
<modules>
<command name="purge"></command>
<command name="load">TACC</command>
<command name="load">intel/24.0</command>
<command name="load">cmake/3.28.1</command>
</modules>
<modules mpilib="mvapich2">
<command name="load">mvapich/3.0</command>
</modules>
<modules mpilib="impi">
<command name="load">impi</command>
</modules>
<modules mpilib="!mpi-serial">
<command name="load">pnetcdf/1.12.3</command>
<command name="load">parallel-netcdf/4.9.2</command>
<command name="load">phdf5/1.14.3</command>
</modules>
<modules mpilib="mpi-serial">
<command name="load">netcdf/4.9.2</command>
</modules>
</module_system>
<environment_variables>
<env name="OMP_STACKSIZE">256M</env>
</environment_variables>
<environment_variables mpilib="impi">
<env name="I_MPI_F90">ifort</env>
</environment_variables>
</machine>



<machine MACH="stampede2-skx">
<DESC>Intel Xeon Platinum 8160 ("Skylake"),48 cores on two sockets (24 cores/socket) , batch system is SLURM</DESC>
<NODENAME_REGEX>.*stampede2</NODENAME_REGEX>
<OS>LINUX</OS>
<COMPILERS>intel</COMPILERS>
<MPILIBS>impi,mvapich2</MPILIBS>
Expand All @@ -2573,11 +2641,17 @@ This allows using a different mpirun command to launch unit tests
<MAX_MPITASKS_PER_NODE>48</MAX_MPITASKS_PER_NODE>
<mpirun mpilib="impi">
<executable>ibrun</executable>
<arguments>
<arg name="ntasks"> -n {{ total_tasks }} </arg>
</arguments>
</mpirun>
<mpirun mpilib="mvapich2">
<executable>ibrun</executable>
<arguments>
<arg name="ntasks"> -n {{ total_tasks }} </arg>
</arguments>
</mpirun>
<module_system type="module">
<module_system type="module">
<init_path lang="perl">/opt/apps/lmod/lmod/init/perl</init_path>
<init_path lang="python">/opt/apps/lmod/lmod/init/env_modules_python.py</init_path>
<init_path lang="sh">/opt/apps/lmod/lmod/init/sh</init_path>
Expand All @@ -2587,30 +2661,38 @@ This allows using a different mpirun command to launch unit tests
<cmd_path lang="sh">module</cmd_path>
<cmd_path lang="csh">module</cmd_path>
<modules>
<command name="purge"></command>
<command name="load">TACC</command>
<command name="load">python2/2.7.15</command>
<command name="load">intel/18.0.2</command>
<command name="load">cmake/3.16.1</command>
<command name="purge"></command>
<command name="load">TACC</command>
<command name="load">intel/24.0</command>
<command name="load">cmake/3.28.1</command>
</modules>
<modules mpilib="mvapich2">
<command name="load">mvapich2/2.3.1</command>
<command name="load">pnetcdf/1.11</command>
<command name="load">parallel-netcdf/4.6.2</command>
<command name="load">mvapich/3.0</command>
</modules>
<modules mpilib="impi">
<command name="rm">mvapich2</command>
<command name="load">impi/18.0.2</command>
<command name="load">pnetcdf/1.11</command>
<command name="load">parallel-netcdf/4.6.2</command>
<command name="load">impi</command>
</modules>
<modules mpilib="!mpi-serial">
<command name="load">pnetcdf/1.12.3</command>
<command name="load">parallel-netcdf/4.9.2</command>
<command name="load">phdf5/1.14.3</command>
</modules>
<modules mpilib="mpi-serial">
<command name="load">netcdf/4.3.3.1</command>
<command name="load">netcdf/4.9.2</command>
</modules>
</module_system>
<environment_variables>
<env name="OMP_STACKSIZE">256M</env>
</environment_variables>
<environment_variables comp_interface="nuopc" mpilib="impi">
<env name="ESMFMKFILE">/work2/02503/edwardsj/stampede3/intel24.0/esmf/v8.6.1b04/lib/libO/Linux.intel.64.intelmpi.default/esmf.mk</env>
<env name="ESMF_RUNTIME_PROFILE">ON</env>
<env name="ESMF_RUNTIME_PROFILE_OUTPUT">SUMMARY</env>
<env name="UGCSINPUTPATH">/work/06242/tg855414/stampede2/FV3GFS/benchmark-inputs/2012010100/gfs/fcst</env>
<env name="UGCSFIXEDFILEPATH">/work/06242/tg855414/stampede2/FV3GFS/fix_am</env>
<env name="UGCSADDONPATH">/work/06242/tg855414/stampede2/FV3GFS/addon</env>
<env name="I_MPI_F90">ifort</env>
</environment_variables>
</machine>

<machine MACH="stampede2-knl">
Expand Down
4 changes: 2 additions & 2 deletions tools/statistical_ensemble_test/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ def get_pertlim_uf(rand_num):
k = (i - 1)%100
if i%2 != 0:
ll = j + (k/2)*18
ippt = '{0:03d}'.format(ll)
ippt = '{0:03d}'.format(int(ll))
ptlim = "0."+ippt+"d-13"
else:
ll = j + ((k-1)/2)*18
ippt = '{0:03d}'.format(ll)
ippt = '{0:03d}'.format(int(ll))
ptlim = "-0."+ippt+"d-13"
return ptlim

Expand Down
2 changes: 1 addition & 1 deletion tools/statistical_ensemble_test/single_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def single_case(opts_dict, case_flags, stat_dir):
f.write(text2)
f.write(text3)
if opts_dict['pertlim'] != "0":
text = "\npertlim = " + opts_dict['pertlim']
text = "\npertlim = " + str(opts_dict['pertlim'])
f.write(text)
else:
print("Warning: no user_nl_cam found")
Expand Down

0 comments on commit ac95b79

Please sign in to comment.