diff --git a/etc/lmod-setup.sh b/etc/lmod-setup.sh index 324868afce..c969778c12 100644 --- a/etc/lmod-setup.sh +++ b/etc/lmod-setup.sh @@ -31,7 +31,7 @@ if [ "$L_MACHINE" = macos ]; then module purge elif [ "$L_MACHINE" = linux ]; then - export BASH_ENV="/usr/share/share/lmod/init/bash" + export BASH_ENV="/usr/share/lmod/lmod/init/bash" source $BASH_ENV module purge diff --git a/modulefiles/build_linux_gnu.lua b/modulefiles/build_linux_gnu.lua index 7f40d763de..cc5f6831f1 100644 --- a/modulefiles/build_linux_gnu.lua +++ b/modulefiles/build_linux_gnu.lua @@ -14,7 +14,6 @@ load("hpc") load("hpc-python") load("hpc-gnu") -load("openmpi") load("hpc-openmpi") load("srw_common") diff --git a/modulefiles/build_macos_gnu.lua b/modulefiles/build_macos_gnu.lua index d92de9a4da..36c8b80cdb 100644 --- a/modulefiles/build_macos_gnu.lua +++ b/modulefiles/build_macos_gnu.lua @@ -18,7 +18,6 @@ load("hpc") load("hpc-python") load("hpc-gnu") -load("openmpi") load("hpc-openmpi") load("srw_common") diff --git a/modulefiles/wflow_linux.lua b/modulefiles/wflow_linux.lua index dd9aec7afa..6c4cc6949d 100644 --- a/modulefiles/wflow_linux.lua +++ b/modulefiles/wflow_linux.lua @@ -5,16 +5,35 @@ This module sets a path to activate conda environment needed for running the UFS whatis([===[This module sets a path for conda environment needed for running the UFS SRW App on Linux]===]) setenv("CMAKE_Platform", "linux") -setenv("VENV", pathJoin(os.getenv("HOME"), "condaenv/envs/regional_workflow")) ---[[ -local ROCOTOmod="/Users/username/modules" -prepend_path("MODULEPATH", ROCOTOmod) -load(rocoto) ---]] +-- Conda initialization function +function init_conda(conda_path) + local shell=myShellType() + local conda_file + if shell == "csh" then + conda_file=pathJoin(conda_path,"etc/profile.d/conda.csh") + else + conda_file=pathJoin(conda_path,"etc/profile.d/conda.sh") + end + local mcmd="source " .. conda_file + execute{cmd=mcmd, modeA={"load"}} +end + +-- initialize conda +local conda_path="/home/username/miniconda3" +init_conda(conda_path) + +-- add rocoto to path +local rocoto_path="/home/username/rocoto" +prepend_path("PATH", pathJoin(rocoto_path,"bin")) + +-- add fake slurm commands +local srw_path="/home/username/ufs-srweather-app" +prepend_path("PATH", pathJoin(srw_path, "ush/rocoto_fake_slurm")) +-- display conda activation message if mode() == "load" then LmodMsgRaw([===[Please do the following to activate conda: - > conda activate $VENV + > conda activate regional_workflow ]===]) end diff --git a/modulefiles/wflow_macos.lua b/modulefiles/wflow_macos.lua index 769f1bc05e..d7cf30e0a3 100644 --- a/modulefiles/wflow_macos.lua +++ b/modulefiles/wflow_macos.lua @@ -5,17 +5,36 @@ This module set a path needed to activate conda environement for running UFS SRW whatis([===[This module activates conda environment for running the UFS SRW App on macOS]===]) setenv("CMAKE_Platform", "macos") -setenv("VENV", pathJoin(os.getenv("HOME"), "condaenv/envs/regional_workflow")) ---[[ -local ROCOTOmod="/Users/username/modules" -prepend_path("MODULEPATH", ROCOTOmod) -load(rocoto) ---]] +-- Conda initialization function +function init_conda(conda_path) + local shell=myShellType() + local conda_file + if shell == "csh" then + conda_file=pathJoin(conda_path,"etc/profile.d/conda.csh") + else + conda_file=pathJoin(conda_path,"etc/profile.d/conda.sh") + end + local mcmd="source " .. conda_file + execute{cmd=mcmd, modeA={"load"}} +end + +-- initialize conda +local conda_path="/Users/username/miniconda3" +init_conda(conda_path) + +-- add rocoto to path +local rocoto_path="/Users/username/rocoto" +prepend_path("PATH", pathJoin(rocoto_path,"bin")) + +-- add fake slurm commands +local srw_path="/Users/username/ufs-srweather-app" +prepend_path("PATH", pathJoin(srw_path, "ush/rocoto_fake_slurm")) +-- display conda activation message if mode() == "load" then LmodMsgRaw([===[Please do the following to activate conda virtual environment: - > conda activate $VENV " + > conda activate regional_workflow" ]===]) end diff --git a/parm/FV3LAM_wflow.xml b/parm/FV3LAM_wflow.xml index d46e70dee1..fda2b630c7 100644 --- a/parm/FV3LAM_wflow.xml +++ b/parm/FV3LAM_wflow.xml @@ -153,7 +153,7 @@ tasks; and the "FCST" type is used for the RUN_FCST_TN task. ]> - + {# Double quotes are required inside the strftime! Expect an error from reading the template if using single quotes. #} {{ cdate_first_cycl.strftime("%M %H %d %m %Y *") }} diff --git a/tests/WE2E/test_configs/wflow_features/config.get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml b/tests/WE2E/test_configs/wflow_features/config.get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml index 7b30698ef2..38fbbe5af6 100644 --- a/tests/WE2E/test_configs/wflow_features/config.get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml +++ b/tests/WE2E/test_configs/wflow_features/config.get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS.yaml @@ -10,8 +10,8 @@ platform: workflow: CCPP_PHYS_SUITE: FV3_GFS_2017_gfdlmp PREDEF_GRID_NAME: RRFS_CONUS_25km - DATE_FIRST_CYCL: date --utc --date="2 days ago" +%Y%m%d00 - DATE_LAST_CYCL: date --utc --date="2 days ago" +%Y%m%d00 + DATE_FIRST_CYCL: $DATE_UTIL --utc --date="2 days ago" +%Y%m%d00 + DATE_LAST_CYCL: $DATE_UTIL --utc --date="2 days ago" +%Y%m%d00 FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename task_get_extrn_ics: diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 5baa934f43..4974e79f50 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -87,6 +87,10 @@ platform: # The number of cores available per node on the compute platform, now # configurable for all platforms. # + # TASKTHROTTLE: + # The number of active tasks run simultaneously. For linux/mac setting this + # to 1 makes sense + # # BUILD_MOD_FN: # Name of alternative build module file to use if using an # unsupported platform. Is set automatically for supported machines. @@ -158,6 +162,7 @@ platform: # WORKFLOW_MANAGER: "" NCORES_PER_NODE: "" + TASKTHROTTLE: 1000 BUILD_MOD_FN: 'build_{{ user.MACHINE|lower() }}_{{ workflow.COMPILER }}' WFLOW_MOD_FN: 'wflow_{{ user.MACHINE|lower() }}' BUILD_VER_FN: 'build.ver.{{ user.MACHINE|lower() }}' diff --git a/ush/constants.yaml b/ush/constants.yaml index c4a3fa9d67..0ac61f5318 100644 --- a/ush/constants.yaml +++ b/ush/constants.yaml @@ -7,7 +7,7 @@ constants: #----------------------------------------------------------------------- # PI_GEOM: 3.14159265358979323846264338327 - DEGS_PER_RADIAN: 57.2957795131 + DEGS_PER_RADIAN: 57.29577951308232087679 RADIUS_EARTH: 6371200.0 # #----------------------------------------------------------------------- diff --git a/ush/machine/linux.yaml b/ush/machine/linux.yaml index db9749aa73..14dafe0e45 100644 --- a/ush/machine/linux.yaml +++ b/ush/machine/linux.yaml @@ -1,18 +1,31 @@ platform: - WORKFLOW_MANAGER: none + WORKFLOW_MANAGER: rocoto NCORES_PER_NODE: 8 - SCHED: none - RUN_CMD_FCST: 'mpirun -n ${PE_MEMBER01} ' - RUN_CMD_POST: 'mpirun -n 4 ' + TASKTHROTTLE: 1 + SCHED: slurm + CCPA_OBS_DIR: /home/username/DATA/UFS/obs_data/ccpa/proc + MRMS_OBS_DIR: /home/username/DATA/UFS/obs_data/mrms/proc + NDAS_OBS_DIR: /home/username/DATA/UFS/obs_data/ndas/proc + METPLUS_PATH: "" + MET_BIN_EXEC: bin + MET_INSTALL_DIR: "" + DOMAIN_PREGEN_BASEDIR: /home/username/DATA/UFS/FV3LAM_pregen + RUN_CMD_FCST: mpirun -n ${PE_MEMBER01} + RUN_CMD_POST: mpirun RUN_CMD_SERIAL: time - RUN_CMD_UTILS: mpirun -n 4 - PRE_TASK_CMDS: '{ ulimit -a; }' + RUN_CMD_UTILS: mpirun + PRE_TASK_CMDS: '{ ulimit -a; ulimit -s unlimited; }' + TEST_EXTRN_MDL_SOURCE_BASEDIR: /home/username/DATA/UFS/input_model_data + TEST_PREGEN_BASEDIR: /home/username/DATA/UFS/FV3LAM_pregen + TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /home/username/DATA/UFS/dummy_FV3GFS_sys_dir + TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /home/username/DATA/UFS/dummy_FV3GFS_sys_dir FIXaer: /home/username/DATA/UFS/fix/fix_aer FIXgsm: /home/username/DATA/UFS/fix/fix_am FIXlut: /home/username/DATA/UFS/fix/fix_lut FIXorg: /home/username/DATA/UFS/fix/fix_orog FIXsfc: /home/username/DATA/UFS/fix/fix_sfc_climo FIXshp: /home/username/DATA/UFS/NaturalEarth + EXTRN_MDL_DATA_STORES: aws nomads data: ics_lbcs: FV3GFS: /home/username/DATA/UFS/FV3GFS diff --git a/ush/machine/macos.yaml b/ush/machine/macos.yaml index 7d16746174..3cca8ecc4c 100644 --- a/ush/machine/macos.yaml +++ b/ush/machine/macos.yaml @@ -1,18 +1,31 @@ platform: - WORKFLOW_MANAGER: none + WORKFLOW_MANAGER: rocoto NCORES_PER_NODE: 8 - SCHED: none - RUN_CMD_FCST: 'mpirun -n ${PE_MEMBER01} ' - RUN_CMD_POST: 'mpirun -n 4 ' + TASKTHROTTLE: 1 + SCHED: slurm + CCPA_OBS_DIR: /Users/username/DATA/UFS/obs_data/ccpa/proc + MRMS_OBS_DIR: /Users/username/DATA/UFS/obs_data/mrms/proc + NDAS_OBS_DIR: /Users/username/DATA/UFS/obs_data/ndas/proc + DOMAIN_PREGEN_BASEDIR: /Users/username/DATA/UFS/FV3LAM_pregen + METPLUS_PATH: "" + MET_BIN_EXEC: bin + MET_INSTALL_DIR: "" + RUN_CMD_FCST: mpirun -n ${PE_MEMBER01} + RUN_CMD_POST: mpirun RUN_CMD_SERIAL: time - RUN_CMD_UTILS: mpirun -n 4 - PRE_TASK_CMDS: '{ ulimit -a; }' + RUN_CMD_UTILS: mpirun + PRE_TASK_CMDS: '{ ulimit -a; ulimit -s unlimited; }' + TEST_EXTRN_MDL_SOURCE_BASEDIR: /Users/username/DATA/UFS/input_model_data + TEST_PREGEN_BASEDIR: /Users/username/DATA/UFS/FV3LAM_pregen + TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /Users/username/DATA/UFS/dummy_FV3GFS_sys_dir + TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /Users/username/DATA/UFS/dummy_FV3GFS_sys_dir FIXaer: /Users/username/DATA/UFS/fix/fix_aer FIXgsm: /Users/username/DATA/UFS/fix/fix_am FIXlut: /Users/username/DATA/UFS/fix/fix_lut FIXorg: /Users/username/DATA/UFS/fix/fix_orog FIXsfc: /Users/username/DATA/UFS/fix/fix_sfc_climo FIXshp: /Users/username/DATA/UFS/NaturalEarth + EXTRN_MDL_DATA_STORES: aws nomads data: ics_lbcs: FV3GFS: /Users/username/DATA/UFS/FV3GFS diff --git a/ush/python_utils/config_parser.py b/ush/python_utils/config_parser.py index aa9d04aed3..c09ff8c9c5 100644 --- a/ush/python_utils/config_parser.py +++ b/ush/python_utils/config_parser.py @@ -26,7 +26,7 @@ except ModuleNotFoundError: pass # The rest of the formats: JSON/SHELL/INI/XML do not need -# external pakcages +# external packages import json import os import re @@ -398,8 +398,6 @@ def cfg_to_xml_str(cfg): ################## # CONFIG utils ################## - - def flatten_dict(dictionary, keys=None): """Flatten a recursive dictionary (e.g.yaml/json) to be one level deep diff --git a/ush/rocoto_fake_slurm/sacct b/ush/rocoto_fake_slurm/sacct new file mode 100755 index 0000000000..7b07f84720 --- /dev/null +++ b/ush/rocoto_fake_slurm/sacct @@ -0,0 +1,42 @@ +#!/bin/bash + +# Emulates slurm's sacct +if [[ "$1" = "--jobs="* ]]; then + PIDS="${1:7}" + PIDS="${PIDS//,/' '}" +elif [[ -f .job_database ]]; then + PIDS=$(cat .job_database | grep submitted | sort -u -k1,1 | awk '{print $3}') +fi + +# Output info the way rocoto calls sacct +FMT="%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s\n" +echo "JobID|User|JobName|Partition|Priority|Submit|Start|End|NCPUS|ExitCode|State" + +for pid in ${PIDS}; do + + t_sub="N/A" + t_start=$t_sub + t_end=$t_sub + name=$pid + user=${USER:-user} + exitc=0 + state="UNKNOWN" + + v=$(cat .job_database | grep "pid $pid submitted" | awk '{print $1" "$5}') + if [ ! -z "$v" ]; then + state="PENDING" + read name t_sub <<< "$v" + v=$(cat .job_database | grep "pid $pid started" | awk '{print $5" "$7}') + if [ ! -z "$v" ]; then + state="RUNNING" + read t_start t_end<<< "$v" + fi + v=$(cat .job_database | grep "pid $pid ended" | awk '{print $5" "$7}') + if [ ! -z "$v" ]; then + state="COMPLETED" + read t_end exitc <<< "$v" + fi + fi + + printf "$FMT" $pid ${user:0:30} ${name:0:30} linux 0.1 $t_sub $t_start $t_end 1 $exitc $state +done diff --git a/ush/rocoto_fake_slurm/sbatch b/ush/rocoto_fake_slurm/sbatch new file mode 100755 index 0000000000..b505c8ab9d --- /dev/null +++ b/ush/rocoto_fake_slurm/sbatch @@ -0,0 +1,48 @@ +#!/bin/bash + +# Emulates slurm's sbatch + +FD=${1:-/dev/stdin} + +#parse log file +LOG=`grep "#SBATCH -o" $FD | awk '{ print $3 }'` +if [ -z "$LOG" ]; then + LOG=/dev/null +fi + +#parse time +TIM=`grep "#SBATCH -t" $FD | awk '{ print $3 }'` +if [ -z "$TIM" ]; then + SECS= + CTIM= +else + SECS=`echo $TIM | awk 'BEGIN { FS = ":" } ; { secs = $1 * 3600 + $2 * 60 + $3; print secs };'` + CTIM="timeout ${SECS}s" +fi + +#parse job name +JOBNAME=`grep "#SBATCH --job-name" $FD | awk 'BEGIN { FS = "=" }; { print $2 }'` +if [ -z "$JOBNAME" ]; then + JOBNAME="default" +fi + +#command +CMD="`cat $FD`" + +#execute job in background +bash -c "\ + ds=\$(date --utc +%Y-%m-%d:%H:%M:%S); \ + de=\$(date --utc -d '$SECS sec' +%Y-%m-%d:%H:%M:%S); \ + echo $JOBNAME pid \$$ started \$ds ends \$de >>.job_database; \ + \ + ${CTIM} ${CMD} &>$LOG; \ + excode=\$?; \ + \ + de=\$(date --utc +%Y-%m-%d:%H:%M:%S); \ + echo $JOBNAME pid \$$ ended \$de exitcode \$excode >>.job_database;" & + +#submission info +pid=$! +dsub=$(date --utc +%Y-%m-%d:%H:%M:%S) +echo $JOBNAME pid $pid submitted $dsub >>.job_database +echo "Submitted batch job "$pid diff --git a/ush/rocoto_fake_slurm/scancel b/ush/rocoto_fake_slurm/scancel new file mode 100755 index 0000000000..94575f48c3 --- /dev/null +++ b/ush/rocoto_fake_slurm/scancel @@ -0,0 +1,4 @@ +#!/bin/bash + +# Emulates slurm's scancel +exec kill -9 -$1 diff --git a/ush/rocoto_fake_slurm/sinfo b/ush/rocoto_fake_slurm/sinfo new file mode 100755 index 0000000000..cde2d07bc7 --- /dev/null +++ b/ush/rocoto_fake_slurm/sinfo @@ -0,0 +1,4 @@ +#!/bin/bash + +# Emulates slurm's sinfo +exec lscpu diff --git a/ush/rocoto_fake_slurm/squeue b/ush/rocoto_fake_slurm/squeue new file mode 100755 index 0000000000..915ca58506 --- /dev/null +++ b/ush/rocoto_fake_slurm/squeue @@ -0,0 +1,42 @@ +#!/bin/bash + +# Emulates slurm's squeue +if [[ "$1" = "--jobs="* ]]; then + PIDS="${1:7}" + PIDS="${PIDS//,/' '}" +elif [[ -f .job_database ]]; then + PIDS=$(cat .job_database | grep submitted | sort -u -k1,1 | awk '{print $3}') +fi + +# Output info the way rocoto calls squeue +FMT="%-40s%-40s%-10s%-20s%-30s%-30s%-30s%-30s%-10s%-30s%-200s\n" +printf "$FMT" JOBID USER CPUS PARTITION SUBMIT_TIME START_TIME END_TIME PRIORITY EXIT_CODE STATE NAME + +for pid in ${PIDS}; do + + t_sub="N/A" + t_start=$t_sub + t_end=$t_sub + name=$pid + user=${USER:-user} + exitc=0 + state="UNKNOWN" + + v=$(cat .job_database | grep "pid $pid submitted" | awk '{print $1" "$5}') + if [ ! -z "$v" ]; then + state="PENDING" + read name t_sub <<< "$v" + v=$(cat .job_database | grep "pid $pid started" | awk '{print $5" "$7}') + if [ ! -z "$v" ]; then + state="RUNNING" + read t_start t_end<<< "$v" + fi + v=$(cat .job_database | grep "pid $pid ended" | awk '{print $5" "$7}') + if [ ! -z "$v" ]; then + state="COMPLETED" + read t_end exitc <<< "$v" + fi + fi + + printf "$FMT" $pid $user 1 linux $t_sub $t_start $t_end 0.1 $exitc $state $name +done diff --git a/ush/rocoto_fake_slurm/srun b/ush/rocoto_fake_slurm/srun new file mode 100755 index 0000000000..b5387a3d2d --- /dev/null +++ b/ush/rocoto_fake_slurm/srun @@ -0,0 +1,26 @@ +#!/bin/bash + +# Emulates slurm's srun +OPTS="" +CMDS="" + +# Extract only --ntasks from options +while (( "$#" )); do + case "$1" in + -n|--ntasks) + OPTS="$OPTS -n $2" + shift 2 + ;; + --ntasks=?*) + OPTS="$OPTS -n ${1:9}" + shift 1 + ;; + *) + CMDS="$CMDS $1" + shift + ;; + esac +done + +# Run with mpirun +exec mpirun ${OPTS} ${CMDS} diff --git a/ush/set_gridparams_ESGgrid.py b/ush/set_gridparams_ESGgrid.py index 3346fe0bfd..cf8ddb9ff8 100644 --- a/ush/set_gridparams_ESGgrid.py +++ b/ush/set_gridparams_ESGgrid.py @@ -90,7 +90,7 @@ def test_set_gridparams_ESGgrid(self): dely=3000.0, constants=dict( RADIUS_EARTH=6371200.0, - DEGS_PER_RADIAN=57.2957795131, + DEGS_PER_RADIAN=57.29577951308232087679, ), ) @@ -104,8 +104,8 @@ def test_set_gridparams_ESGgrid(self): 0.0, 6, 0.999, - 0.013489400626200717, - 0.013489400626200717, + 0.013489400626196555, + 0.013489400626196555, -1760, -1050, ],