Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Loki: Small fixes and clean-up; EC_PMON removal #111

Merged
merged 5 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion arch/ecmwf/hpc2020/gnu/11.2.0/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ module_load gcc/11.2.0
module_load hpcx-openmpi/2.10.0
module_load boost/1.71.0
module_load hdf5/1.10.6
module_load cmake/3.20.2
module_load cmake/3.25.2
module_load python3/3.8.8-01
module_load java/11.0.6

Expand Down
2 changes: 1 addition & 1 deletion arch/ecmwf/hpc2020/gnu/9.3.0/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ module_load gcc/9.3.0
module_load openmpi/4.1.1.1
module_load boost/1.71.0
module_load hdf5/1.10.6
module_load cmake/3.20.2
module_load cmake/3.25.2
module_load python3/3.8.8-01
module_load java/11.0.6

Expand Down
2 changes: 1 addition & 1 deletion bundle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ projects :

- field_api :
git : https://github.com/ecmwf-ifs/field_api.git
version : v0.3.3
version : v0.3.4
require : ecbuild
cmake : >
UTIL_MODULE_PATH=${CMAKE_SOURCE_DIR}/cloudsc-dwarf/src/common/module
Expand Down
31 changes: 2 additions & 29 deletions src/cloudsc_fortran/cloudsc_driver_field_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ MODULE CLOUDSC_DRIVER_FIELD_MOD
USE YOECLDP, ONLY : NCLV
USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK
USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM
USE EC_PMON_MOD, ONLY: EC_PMON
USE CLOUDSC_FIELD_STATE_MOD, ONLY: CLOUDSC_AUX_TYPE, CLOUDSC_FLUX_TYPE, CLOUDSC_STATE_TYPE

IMPLICIT NONE
Expand Down Expand Up @@ -44,16 +43,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( &

TYPE(PERFORMANCE_TIMER) :: TIMER
INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1
INTEGER(KIND=JPIB) :: ENERGY, POWER, POWER_TOTAL, POWER_MAX, POWER_COUNT
LOGICAL :: LEC_PMON = .FALSE.
CHARACTER(LEN=1) :: CLEC_PMON

CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON)
IF (CLEC_PMON == '1') LEC_PMON = .TRUE.

POWER_MAX = 0_JPIB
POWER_TOTAL = 0_JPIB
POWER_COUNT = 0_JPIB

NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1)
1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0)
Expand All @@ -64,14 +53,14 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( &
! Global timer for the parallel region
CALL TIMER%START(NUMOMP)

!$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power) &
!$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID) &
!$omp& num_threads(NUMOMP) firstprivate(PAUX, FLUX, TENDENCY_TMP, TENDENCY_LOC)

! Local timer for each thread
TID = GET_THREAD_NUM()
CALL TIMER%THREAD_START(TID)

!$omp do schedule(runtime) reduction(+:power_total,power_count) reduction(max:power_max)
!$omp do schedule(runtime)
DO JKGLO=1,NGPTOT,NPROMA
IBL=(JKGLO-1)/NPROMA+1
ICEND=MIN(NPROMA,NGPTOT-JKGLO+1)
Expand Down Expand Up @@ -115,16 +104,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( &
& FLUX%PFPLSL, FLUX%PFPLSN, FLUX%PFHPSL, FLUX%PFHPSN, KFLDX, &
& YDOMCST, YDOETHF, YDECLDP)

IF (LEC_PMON) THEN
! Sample power consuption
IF (MOD(IBL, 100) == 0) THEN
CALL EC_PMON(ENERGY, POWER)
POWER_MAX = MAX(POWER_MAX, POWER)
POWER_TOTAL = POWER_TOTAL + POWER
POWER_COUNT = POWER_COUNT + 1
END IF
END IF

! Log number of columns processed by this thread
CALL TIMER%THREAD_LOG(TID, IGPC=ICEND)
ENDDO
Expand All @@ -140,12 +119,6 @@ SUBROUTINE CLOUDSC_DRIVER_FIELD( &
CALL TIMER%END()

CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT)

IF (LEC_PMON) THEN
print *, "Power usage (sampled):: max: ", POWER_MAX, "avg:", &
& (REAL(POWER_TOTAL, KIND=JPRD) / REAL(POWER_COUNT, KIND=JPRD)), &
& "count:", POWER_COUNT
END IF

END SUBROUTINE CLOUDSC_DRIVER_FIELD

Expand Down
31 changes: 2 additions & 29 deletions src/cloudsc_fortran/cloudsc_driver_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ MODULE CLOUDSC_DRIVER_MOD
USE YOECLDP, ONLY : NCLV
USE CLOUDSC_MPI_MOD, ONLY: NUMPROC, IRANK
USE TIMER_MOD, ONLY : PERFORMANCE_TIMER, GET_THREAD_NUM
USE EC_PMON_MOD, ONLY: EC_PMON

IMPLICIT NONE

Expand Down Expand Up @@ -102,21 +101,11 @@ SUBROUTINE CLOUDSC_DRIVER( &

TYPE(PERFORMANCE_TIMER) :: TIMER
INTEGER(KIND=JPIM) :: TID ! thread id from 0 .. NUMOMP - 1
INTEGER(KIND=JPIB) :: ENERGY, POWER, POWER_TOTAL, POWER_MAX, POWER_COUNT
LOGICAL :: LEC_PMON = .FALSE.
CHARACTER(LEN=1) :: CLEC_PMON

TYPE(TOMCST) :: YDOMCST
TYPE(TOETHF) :: YDOETHF
TYPE(TECLDP) :: YDECLDP

CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON)
IF (CLEC_PMON == '1') LEC_PMON = .TRUE.

POWER_MAX = 0_JPIB
POWER_TOTAL = 0_JPIB
POWER_COUNT = 0_JPIB

NGPBLKS = (NGPTOT / NPROMA) + MIN(MOD(NGPTOT,NPROMA), 1)
1003 format(5x,'NUMPROC=',i0,', NUMOMP=',i0,', NGPTOTG=',i0,', NPROMA=',i0,', NGPBLKS=',i0)
if (irank == 0) then
Expand All @@ -126,14 +115,14 @@ SUBROUTINE CLOUDSC_DRIVER( &
! Global timer for the parallel region
CALL TIMER%START(NUMOMP)

!$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID,energy,power) &
!$omp parallel default(shared) private(JKGLO,IBL,ICEND,TID) &
!$omp& num_threads(NUMOMP)

! Local timer for each thread
TID = GET_THREAD_NUM()
CALL TIMER%THREAD_START(TID)

!$omp do schedule(runtime) reduction(+:power_total,power_count) reduction(max:power_max)
!$omp do schedule(runtime)
DO JKGLO=1,NGPTOT,NPROMA
IBL=(JKGLO-1)/NPROMA+1
ICEND=MIN(NPROMA,NGPTOT-JKGLO+1)
Expand Down Expand Up @@ -169,16 +158,6 @@ SUBROUTINE CLOUDSC_DRIVER( &
& KFLDX, &
& YDOMCST, YDOETHF, YDECLDP)

IF (LEC_PMON) THEN
! Sample power consuption
IF (MOD(IBL, 100) == 0) THEN
CALL EC_PMON(ENERGY, POWER)
POWER_MAX = MAX(POWER_MAX, POWER)
POWER_TOTAL = POWER_TOTAL + POWER
POWER_COUNT = POWER_COUNT + 1
END IF
END IF

! Log number of columns processed by this thread
CALL TIMER%THREAD_LOG(TID, IGPC=ICEND)
ENDDO
Expand All @@ -194,12 +173,6 @@ SUBROUTINE CLOUDSC_DRIVER( &
CALL TIMER%END()

CALL TIMER%PRINT_PERFORMANCE(NPROMA, NGPBLKS, NGPTOT)

IF (LEC_PMON) THEN
print *, "Power usage (sampled):: max: ", POWER_MAX, "avg:", &
& (REAL(POWER_TOTAL, KIND=JPRD) / REAL(POWER_COUNT, KIND=JPRD)), &
& "count:", POWER_COUNT
END IF

END SUBROUTINE CLOUDSC_DRIVER

Expand Down
15 changes: 3 additions & 12 deletions src/cloudsc_fortran/dwarf_cloudsc.F90
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@ PROGRAM DWARF_CLOUDSC

USE PARKIND1, ONLY: JPIM, JPIB
USE CLOUDSC_MPI_MOD, ONLY: CLOUDSC_MPI_INIT, CLOUDSC_MPI_END, NUMPROC, IRANK
USE CLOUDSC_GLOBAL_STATE_MOD, ONLY: CLOUDSC_GLOBAL_STATE
USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER
USE EC_PMON_MOD, ONLY: EC_PMON

USE YOECLDP , ONLY : YRECLDP
USE YOMCST , ONLY : YRCST
Expand All @@ -26,6 +23,9 @@ PROGRAM DWARF_CLOUDSC
#ifdef CLOUDSC_FIELD
USE CLOUDSC_FIELD_STATE_MOD, ONLY: CLOUDSC_FIELD_STATE
USE CLOUDSC_DRIVER_FIELD_MOD, ONLY: CLOUDSC_DRIVER_FIELD
#else
USE CLOUDSC_GLOBAL_STATE_MOD, ONLY: CLOUDSC_GLOBAL_STATE
USE CLOUDSC_DRIVER_MOD, ONLY: CLOUDSC_DRIVER
#endif

IMPLICIT NONE
Expand All @@ -40,21 +40,12 @@ PROGRAM DWARF_CLOUDSC
INTEGER(KIND=JPIM) :: NPROMA = 32 ! NPROMA blocking factor (currently active)
INTEGER(KIND=JPIM) :: NGPTOT ! Local number of grid points

INTEGER(KIND=JPIB) :: ENERGY, POWER
CHARACTER(LEN=1) :: CLEC_PMON

#ifdef CLOUDSC_FIELD
TYPE(CLOUDSC_FIELD_STATE) :: GLOBAL_STATE
#else
TYPE(CLOUDSC_GLOBAL_STATE) :: GLOBAL_STATE
#endif

CALL GET_ENVIRONMENT_VARIABLE('EC_PMON', CLEC_PMON)
IF (CLEC_PMON == '1') THEN
CALL EC_PMON(ENERGY, POWER)
print *, "EC_PMON:: Initial (idle) power: ", POWER
END IF

IARGS = COMMAND_ARGUMENT_COUNT()

! Get the number of OpenMP threads to use for the benchmark
Expand Down
4 changes: 0 additions & 4 deletions src/cloudsc_loki/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -703,8 +703,6 @@ if( HAVE_CUDA )
${COMMON_MODULE}/yoecldp.F90
INCLUDES ${COMMON_INCLUDE}
DEFINITIONS CLOUDSC_GPU_TIMING
DATA_OFFLOAD
REMOVE_OPENMP
XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-cuf-parametrise
OUTPUT
Expand Down Expand Up @@ -758,8 +756,6 @@ if( HAVE_CUDA )
${COMMON_MODULE}/yoecldp.F90
INCLUDES ${COMMON_INCLUDE}
DEFINITIONS CLOUDSC_GPU_TIMING
DATA_OFFLOAD
REMOVE_OPENMP
XMOD ${_TARGET_XMOD_DIR} ${XMOD_DIR}
BUILDDIR ${CMAKE_CURRENT_BINARY_DIR}/loki-scc-cuf-hoist
OUTPUT
Expand Down