Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support global-workflow using Rocky 8 on CSPs #2998

Open
wants to merge 25 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
64d4c81
using Rocky 8 on CSPs
weihuang-jedi Oct 2, 2024
ac435a2
compiled and run G-W with Rocky 8
weihuang-jedi Oct 8, 2024
b337ef4
Merge branch 'NOAA-EMC:develop' into csps-rocky8
weihuang-jedi Oct 10, 2024
d5f755c
Merge branch 'NOAA-EMC:develop' into csps-rocky8
weihuang-jedi Oct 30, 2024
56353c2
Merge branch 'NOAA-EMC:develop' into csps-rocky8
weihuang-jedi Nov 7, 2024
ef08872
Merge branch 'NOAA-EMC:develop' into csps-rocky8
weihuang-jedi Nov 12, 2024
feb2c2a
update submodules
weihuang-jedi Nov 14, 2024
fb19458
Merge branch 'csps-rocky8' of github.com:NOAA-EPIC/global-workflow-cl…
weihuang-jedi Nov 14, 2024
6785524
Merge branch 'NOAA-EMC:develop' into csps-rocky8
weihuang-jedi Nov 14, 2024
0502044
Merge branch 'NOAA-EMC:develop' into csps-rocky8
weihuang-jedi Nov 15, 2024
b078484
make AWS use different partitions for fcst and others
weihuang-jedi Nov 21, 2024
fb80afd
Merge branch 'NOAA-EMC:develop' into csps-rocky8
weihuang-jedi Nov 21, 2024
cf62b9b
sync with develop
weihuang-jedi Nov 21, 2024
86b973e
updates at Azure
weihuang-jedi Nov 21, 2024
3e0d031
Merge branch 'csps-rocky8' of github.com:NOAA-EPIC/global-workflow-cl…
weihuang-jedi Nov 21, 2024
944cfb3
GPC update to Rocky8
weihuang-jedi Nov 21, 2024
bc7c970
sync module versions
weihuang-jedi Nov 21, 2024
e97088d
update azure resource
weihuang-jedi Nov 21, 2024
3c85ff0
update azure resource
weihuang-jedi Nov 21, 2024
19e75a3
update gcp resource
weihuang-jedi Nov 21, 2024
4011950
Merge branch 'csps-rocky8' of github.com:NOAA-EPIC/global-workflow-cl…
weihuang-jedi Nov 21, 2024
fe0f3ee
wave runs fine on AWS for C48_S2SWA_gefs
weihuang-jedi Nov 22, 2024
9469504
Merge branch 'NOAA-EMC:develop' into csps-rocky8
weihuang-jedi Nov 25, 2024
ea07117
move CSPs specific packages to workflow/hosts/
weihuang-jedi Nov 26, 2024
c81c854
remove SUPPORT_WAVE from *pw.yaml, and setup_expt.py as WAVE works on…
weihuang-jedi Nov 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 151 additions & 11 deletions env/AWSPW.env
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,151 @@ else
exit 2
fi

if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then
if [[ "${step}" = "prep" ]] || [[ "${step}" = "prepbufr" ]]; then

export POE="NO"
export BACK="NO"
export sys_tp="HERA"
export launcher_PREP="srun"

elif [[ "${step}" = "prepsnowobs" ]]; then

export APRUN_CALCFIMS="${APRUN_default}"

elif [[ "${step}" = "prep_emissions" ]]; then

export APRUN="${APRUN_default}"

elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then

export CFP_MP="YES"
if [[ "${step}" = "waveprep" ]]; then export MP_PULSE=0 ; fi
export wavempexec=${launcher}
export wave_mpmd=${mpmd_opt}

elif [[ "${step}" = "atmanlvar" ]]; then

export NTHREADS_ATMANLVAR=${NTHREADSmax}
export APRUN_ATMANLVAR="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLVAR}"

elif [[ "${step}" = "atmensanlletkf" ]]; then

export NTHREADS_ATMENSANLLETKF=${NTHREADSmax}
export APRUN_ATMENSANLLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLLETKF}"

elif [[ "${step}" = "atmensanlfv3inc" ]]; then

export NTHREADS_ATMENSANLFV3INC=${NTHREADSmax}
export APRUN_ATMENSANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMENSANLFV3INC}"

elif [[ "${step}" = "aeroanlrun" ]]; then

export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}"

export NTHREADS_AEROANL=${NTHREADSmax}
export APRUN_AEROANL="${APRUN_default} --cpus-per-task=${NTHREADS_AEROANL}"

elif [[ "${step}" = "atmanlfv3inc" ]]; then

export NTHREADS_ATMANLFV3INC=${NTHREADSmax}
export APRUN_ATMANLFV3INC="${APRUN_default} --cpus-per-task=${NTHREADS_ATMANLFV3INC}"

elif [[ "${step}" = "prepobsaero" ]]; then

export NTHREADS_PREPOBSAERO=${NTHREADS1}
export APRUN_PREPOBSAERO="${APRUN_default} --cpus-per-task=${NTHREADS_PREPOBSAERO}"

elif [[ "${step}" = "snowanl" ]]; then

export NTHREADS_SNOWANL=${NTHREADSmax}
export APRUN_SNOWANL="${APRUN_default} --cpus-per-task=${NTHREADS_SNOWANL}"

export APRUN_APPLY_INCR="${launcher} -n 6"

elif [[ "${step}" = "marinebmat" ]]; then

export APRUNCFP="${launcher} -n \$ncmd --multi-prog"
export APRUN_MARINEBMAT="${APRUN_default}"

elif [[ "${step}" = "marinebmat" ]]; then

export APRUNCFP="${launcher} -n \$ncmd --multi-prog"
export APRUN_MARINEBMAT="${APRUN_default}"

elif [[ "${step}" = "ocnanalrun" ]]; then

export APRUNCFP="${launcher} -n \$ncmd --multi-prog"

export APRUN_OCNANAL="${APRUN_default}"

elif [[ "${step}" = "ocnanalchkpt" ]]; then

export APRUNCFP="${launcher} -n \$ncmd --multi-prog"

export APRUN_OCNANAL="${APRUN_default}"

elif [[ "${step}" = "ocnanalecen" ]]; then

export NTHREADS_OCNANALECEN=${NTHREADSmax}
export APRUN_OCNANALECEN="${APRUN_default} --cpus-per-task=${NTHREADS_OCNANALECEN}"

elif [[ "${step}" = "marineanalletkf" ]]; then

export NTHREADS_MARINEANALLETKF=${NTHREADSmax}
export APRUN_MARINEANALLETKF="${APRUN_default} --cpus-per-task=${NTHREADS_MARINEANALLETKF}"

elif [[ "${step}" = "anal" ]] || [[ "${step}" = "analcalc" ]]; then

export MKL_NUM_THREADS=4
export MKL_CBWR=AUTO

export CFP_MP=${CFP_MP:-"YES"}
export USE_CFP=${USE_CFP:-"YES"}
export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}"

export NTHREADS_GSI=${NTHREADSmax}
export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}"

export NTHREADS_CALCINC=${threads_per_task_calcinc:-1}
[[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task}
export APRUN_CALCINC="${launcher} \$ncmd --cpus-per-task=${NTHREADS_CALCINC}"

export NTHREADS_CYCLE=${threads_per_task_cycle:-12}
[[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node}
ntasks_cycle=${ntiles:-6}
export APRUN_CYCLE="${launcher} -n ${ntasks_cycle} --cpus-per-task=${NTHREADS_CYCLE}"

export NTHREADS_GAUSFCANL=1
ntasks_gausfcanl=${ntasks_gausfcanl:-1}
export APRUN_GAUSFCANL="${launcher} -n ${ntasks_gausfcanl} --cpus-per-task=${NTHREADS_GAUSFCANL}"

elif [[ "${step}" = "sfcanl" ]]; then

export NTHREADS_CYCLE=${threads_per_task:-14}
export APRUN_CYCLE="${APRUN_default} --cpus-per-task=${NTHREADS_CYCLE}"

elif [[ "${step}" = "eobs" ]]; then

export MKL_NUM_THREADS=4
export MKL_CBWR=AUTO

export NTHREADS_GSI=${NTHREADSmax}
export APRUN_GSI="${APRUN_default} --cpus-per-task=${NTHREADS_GSI}"

export CFP_MP=${CFP_MP:-"YES"}
export USE_CFP=${USE_CFP:-"YES"}
export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}"

elif [[ "${step}" = "eupd" ]]; then

export NTHREADS_ENKF=${NTHREADSmax}
export APRUN_ENKF="${launcher} -n ${ntasks_enkf:-${ntasks}} --cpus-per-task=${NTHREADS_ENKF}"

export CFP_MP=${CFP_MP:-"YES"}
export USE_CFP=${USE_CFP:-"YES"}
export APRUNCFP="${launcher} -n \$ncmd ${mpmd_opt}"

elif [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

export launcher="srun --mpi=pmi2 -l"

Expand All @@ -52,22 +196,18 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}

elif [[ "${step}" = "post" ]]; then

export NTHREADS_NP=${NTHREADS1}
export APRUN_NP="${APRUN_default}"

export NTHREADS_DWN=${threads_per_task_dwn:-1}
[[ ${NTHREADS_DWN} -gt ${max_threads_per_task} ]] && export NTHREADS_DWN=${max_threads_per_task}
export APRUN_DWN="${launcher} -n ${ntasks_dwn}"

elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation on Hera
export NTHREADS_UPP=${NTHREADS1}
export APRUN_UPP="${APRUN_default} --cpus-per-task=${NTHREADS_UPP}"

elif [[ "${step}" = "oceanice_products" ]]; then

export NTHREADS_OCNICEPOST=${NTHREADS1}
export APRUN_OCNICEPOST="${launcher} -n 1 --cpus-per-task=${NTHREADS_OCNICEPOST}"

elif [[ "${step}" = "atmos_products" ]]; then

export USE_CFP="YES" # Use MPMD for downstream product generation on Hera

elif [[ "${step}" = "ecen" ]]; then

export NTHREADS_ECEN=${NTHREADSmax}
Expand Down
17 changes: 11 additions & 6 deletions env/AZUREPW.env
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export mpmd_opt="--multi-prog --output=mpmd.%j.%t.out"
# Configure MPI environment
export OMP_STACKSIZE=2048000
export NTHSTACK=1024000000
export UCX_TLS=ud,sm,self

ulimit -s unlimited
ulimit -a
Expand Down Expand Up @@ -50,6 +51,10 @@ elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}
export wavempexec=${launcher}
export wave_mpmd=${mpmd_opt}

elif [[ "${step}" = "prep_emissions" ]]; then

export APRUN="${APRUN_default}"

elif [[ "${step}" = "post" ]]; then

export NTHREADS_NP=${NTHREADS1}
Expand All @@ -71,33 +76,33 @@ elif [[ "${step}" = "oceanice_products" ]]; then
elif [[ "${step}" = "ecen" ]]; then

export NTHREADS_ECEN=${NTHREADSmax}
export APRUN_ECEN="${APRUN}"
export APRUN_ECEN="${APRUN_default}"

export NTHREADS_CHGRES=${threads_per_task_chgres:-12}
[[ ${NTHREADS_CHGRES} -gt ${max_tasks_per_node} ]] && export NTHREADS_CHGRES=${max_tasks_per_node}
export APRUN_CHGRES="time"

export NTHREADS_CALCINC=${threads_per_task_calcinc:-1}
[[ ${NTHREADS_CALCINC} -gt ${max_threads_per_task} ]] && export NTHREADS_CALCINC=${max_threads_per_task}
export APRUN_CALCINC="${APRUN}"
export APRUN_CALCINC="${APRUN_default}"

elif [[ "${step}" = "esfc" ]]; then

export NTHREADS_ESFC=${NTHREADSmax}
export APRUN_ESFC="${APRUN}"
export APRUN_ESFC="${APRUN_default}"

export NTHREADS_CYCLE=${threads_per_task_cycle:-14}
[[ ${NTHREADS_CYCLE} -gt ${max_tasks_per_node} ]] && export NTHREADS_CYCLE=${max_tasks_per_node}
export APRUN_CYCLE="${APRUN}"
export APRUN_CYCLE="${APRUN_default}"

elif [[ "${step}" = "epos" ]]; then

export NTHREADS_EPOS=${NTHREADSmax}
export APRUN_EPOS="${APRUN}"
export APRUN_EPOS="${APRUN_default}"

elif [[ "${step}" = "fit2obs" ]]; then

export NTHREADS_FIT2OBS=${NTHREADS1}
export MPIRUN="${APRUN}"
export MPIRUN="${APRUN_default}"

fi
4 changes: 2 additions & 2 deletions env/GOOGLEPW.env
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ if [[ "${step}" = "fcst" ]] || [[ "${step}" = "efcs" ]]; then

elif [[ "${step}" = "prep_emissions" ]]; then

export APRUN
export APRUN="${APRUN_default}"

elif [[ "${step}" = "waveinit" ]] || [[ "${step}" = "waveprep" ]] || [[ "${step}" = "wavepostsbs" ]] || [[ "${step}" = "wavepostbndpnt" ]] || [[ "${step}" = "wavepostbndpntbll" ]] || [[ "${step}" = "wavepostpnt" ]]; then

Expand Down Expand Up @@ -102,6 +102,6 @@ elif [[ "${step}" = "epos" ]]; then
elif [[ "${step}" = "fit2obs" ]]; then

export NTHREADS_FIT2OBS=${NTHREADS1}
export MPIRUN="${APRUN}"
export MPIRUN="${APRUN_default}"

fi
3 changes: 3 additions & 0 deletions modulefiles/module_base.noaacloud.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ Load environment to run GFS on noaacloud
local spack_mod_path=(os.getenv("spack_mod_path") or "None")
prepend_path("MODULEPATH", spack_mod_path)

load("gnu")
weihuang-jedi marked this conversation as resolved.
Show resolved Hide resolved
load(pathJoin("stack-intel", (os.getenv("stack_intel_ver") or "None")))
load(pathJoin("stack-intel-oneapi-mpi", (os.getenv("stack_impi_ver") or "None")))
unload("gnu")

load(pathJoin("python", (os.getenv("python_ver") or "None")))

load(pathJoin("jasper", (os.getenv("jasper_ver") or "None")))
Expand Down
6 changes: 3 additions & 3 deletions modulefiles/module_gwci.noaacloud.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ help([[
Load environment to run GFS workflow setup scripts on noaacloud
]])

prepend_path("MODULEPATH", "/contrib/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core")
prepend_path("MODULEPATH", "/contrib/spack-stack-rocky8/spack-stack-1.6.0/envs/ue-env/install/modulefiles/Core")

load(pathJoin("stack-intel", os.getenv("2021.3.0")))
load(pathJoin("stack-intel-oneapi-mpi", os.getenv("2021.3.0")))
load(pathJoin("stack-intel", os.getenv("2021.10.0")))
load(pathJoin("stack-intel-oneapi-mpi", os.getenv("2021.10.0")))

load(pathJoin("netcdf-c", os.getenv("4.9.2")))
load(pathJoin("netcdf-fortran", os.getenv("4.6.1")))
Expand Down
13 changes: 7 additions & 6 deletions modulefiles/module_gwsetup.noaacloud.lua
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@ Load environment to run GFS workflow setup scripts on noaacloud

load(pathJoin("rocoto"))

prepend_path("MODULEPATH", "/contrib/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core")
prepend_path("MODULEPATH", "/contrib/spack-stack-rocky8/spack-stack-1.6.0/envs/ue-intel/install/modulefiles/Core")

local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.3.0"
local python_ver=os.getenv("python_ver") or "3.10.3"
load("gnu")
local stack_intel_ver=os.getenv("stack_intel_ver") or "2021.10.0"
local stack_mpi_ver=os.getenv("stack_mpi_ver") or "2021.10.0"

load(pathJoin("stack-intel", stack_intel_ver))
load(pathJoin("python", python_ver))
load(pathJoin("stack-intel-oneapi-mpi", stack_mpi_ver))
unload("gnu")

load("py-jinja2")
load("py-pyyaml")
load("py-numpy")
local git_ver=os.getenv("git_ver") or "1.8.3.1"
load(pathJoin("git", git_ver))

whatis("Description: GFS run setup environment")
6 changes: 3 additions & 3 deletions parm/config/gefs/config.resources
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ case ${machine} in
;;
"AWSPW")
export PARTITION_BATCH="compute"
max_tasks_per_node=36
max_tasks_per_node=48
;;
"AZUREPW")
export PARTITION_BATCH="compute"
max_tasks_per_node=24
max_tasks_per_node=36
;;
"GOOGLEPW")
export PARTITION_BATCH="compute"
max_tasks_per_node=32
max_tasks_per_node=30
;;
*)
echo "FATAL ERROR: Unknown machine encountered by ${BASH_SOURCE[0]}"
Expand Down
58 changes: 58 additions & 0 deletions parm/config/gefs/config.resources.AWSPW
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,61 @@ unset memory
for mem_var in $(env | grep '^memory_' | cut -d= -f1); do
unset "${mem_var}"
done

step=$1

case ${step} in
"fcst" | "efcs")
export PARTITION_BATCH="compute"
max_tasks_per_node=48
;;

"arch")
export PARTITION_BATCH="process"
max_tasks_per_node=24
;;

"prep_emissions")
export PARTITION_BATCH="process"
max_tasks_per_node=24
export ntasks=1
export threads_per_task=1
export tasks_per_node=$(( max_tasks_per_node / threads_per_task ))
;;

"waveinit")
export PARTITION_BATCH="process"
max_tasks_per_node=24
export ntasks=12
export threads_per_task=1
export tasks_per_node=$(( max_tasks_per_node / threads_per_task ))
export NTASKS=${ntasks}
;;

"wavepostpnt")
export PARTITION_BATCH="compute"
max_tasks_per_node=48
export ntasks=240
export threads_per_task=1
export tasks_per_node=$(( max_tasks_per_node / threads_per_task ))
export NTASKS=${ntasks}
;;

"wavepostsbs" | "wavepostbndpnt" | "wavepostbndpntbll")
export PARTITION_BATCH="process"
max_tasks_per_node=24
export ntasks=24
export threads_per_task=1
export tasks_per_node=$(( max_tasks_per_node / threads_per_task ))
export NTASKS=${ntasks}
;;

*)
export PARTITION_BATCH="process"
max_tasks_per_node=24
;;

esac

export max_tasks_per_node

Loading