From 178e48d0e758e6f3d3f98dbd06d7383338d75ccb Mon Sep 17 00:00:00 2001 From: lsampson Date: Thu, 13 Apr 2023 09:32:15 +0000 Subject: [PATCH 1/4] Initial regular grid GPU porting effort. --- model/src/w3pro2md.F90 | 54 +++++++++++++++++++++++++++++++++++++++--- model/src/w3uno2md.F90 | 17 +++++++++++++ model/src/w3wavemd.F90 | 6 +++++ 3 files changed, 74 insertions(+), 3 deletions(-) diff --git a/model/src/w3pro2md.F90 b/model/src/w3pro2md.F90 index 4ae31f242..79d4a5efa 100644 --- a/model/src/w3pro2md.F90 +++ b/model/src/w3pro2md.F90 @@ -814,6 +814,12 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) WRITE (NDST,9010) #endif ! +#ifdef W3_GPU +!$ACC DATA CREATE(VLCFLY, VLCFLX, VDXX, VDYY, VDXY, CXTOT, CYTOT) & +!$ACC CREATE(VFDIFX_FAC, VFDIFY_FAC, VFDIFC_FAC, VQ_OLD) & +!$ACC CREATE(HQFAC, HPFAC) +!$ACC KERNELS +#endif VLCFLX = 0. VLCFLY = 0. VFDIFX = 0. @@ -838,6 +844,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IXY) +#elif W3_GPU + !$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -858,11 +866,17 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! IF ( FLCUR ) THEN #ifdef W3_T WRITE (NDST,9022) +#endif +#ifdef W3_GPU +!$ACC KERNELS +!$ACC LOOP INDEPENDENT #endif DO ISEA=1, NSEA IXY = MAPSF(ISEA,3) @@ -873,11 +887,17 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) VQ(IXY), CXTOT(IXY), CYTOT(IXY) #endif END DO +#ifdef W3_GPU +!$ACC END KERNELS +#endif END IF ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, CP, CQ) +#elif W3_GPU +!$ACC KERNELS +!$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -892,6 +912,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! ! 2.b Diffusion coefficients @@ -901,6 +923,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, & !$OMP& DCELL, XWIND, TFAC, DSS, DNN) +#elif W3_GPU + !$ACC KERNELS + !$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -937,6 +962,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! END IF @@ -949,8 +976,10 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY ) +#elif W3_GPU + !$ACC KERNELS + !$ACC LOOP INDEPENDENT #endif - ! DO ISEA=1, NSEA IX = MAPSF(ISEA,1) IY = MAPSF(ISEA,2) @@ -960,6 +989,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! IF ( YFIRST ) THEN @@ -1014,6 +1045,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY ) +#elif W3_GPU + !$ACC KERNELS + !$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -1047,11 +1081,16 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) / CG(IK,ISEA) * CLATS(ISEA) END DO END IF +#ifdef W3_GPU + !$ACC END KERNELS +#endif ! ! 3.c Diffusion correction ! IF ( DTME .NE. 0. ) THEN - +#ifdef W3_GPU + !$ACC KERNELS +#endif IF ( GLOBAL ) THEN DO IY=1, NY VQ(IY+NX*NY) = VQ(IY) @@ -1088,8 +1127,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, & !$OMP& QXX, QYY, QXY, DVQ ) +#elif W3_GPU + !$ACC LOOP INDEPENDENT #endif - ! DO IP=1, NACT IXY = MAPAXY(IP) ISEA = MAPFS(IXY) @@ -1159,6 +1199,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS #endif ! END IF @@ -1174,6 +1216,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP PARALLEL DO PRIVATE (ISEA, IXY ) +#elif W3_GPU + !$ACC KERNELS + !$ACC LOOP INDEPENDENT #endif ! DO ISEA=1, NSEA @@ -1190,6 +1235,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY ) ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS + !$ACC END DATA #endif ! RETURN diff --git a/model/src/w3uno2md.F90 b/model/src/w3uno2md.F90 index cb8fce7fc..93ff743d8 100644 --- a/model/src/w3uno2md.F90 +++ b/model/src/w3uno2md.F90 @@ -983,6 +983,10 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & ! ! 1. Initialize aux. array FLA and closure ------------------------- * ! +#ifdef W3_GPU + !$ACC DATA CREATE(FLA) + !$ACC KERNELS +#endif FLA = 0. ! IF ( BCLOSE ) THEN @@ -1026,6 +1030,8 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & !$OMP QBO, IX, IY, IY2, IX2, QN & #endif !$OMP IXYC, IXYD, QB) +#elif W3_GPU + !$ACC LOOP INDEPENDENT #endif ! DO IP=1, NB0 @@ -1073,6 +1079,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & WRITE (NDST,9011) NB1-NB0, 'BOUNDARY ABOVE' #endif ! +#ifdef W3_GPU + !$ACC LOOP INDEPENDENT +#endif DO IP=NB0+1, NB1 IXY = MAPBOU(IP) CFL = CFLL(IXY) @@ -1099,6 +1108,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & WRITE (NDST,9011) NB2-NB1, 'BOUNDARY BELOW' #endif ! +#ifdef W3_GPU + !$ACC LOOP INDEPENDENT +#endif DO IP=NB1+1, NB2 IXY = MAPBOU(IP) CFL = CFLL(IXY+INC) @@ -1141,6 +1153,8 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & !$OMP PRIVATE(QOLD), & #endif !$OMP PRIVATE (IP, IXY, JN, JP) +#elif W3_GPU + !$ACC LOOP INDEPENDENT #endif ! DO IP=1, NACT @@ -1172,6 +1186,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, & ! #ifdef W3_OMPH !$OMP END PARALLEL DO +#elif W3_GPU + !$ACC END KERNELS + !$ACC END DATA #endif ! #ifdef W3_T0 diff --git a/model/src/w3wavemd.F90 b/model/src/w3wavemd.F90 index fb29f9152..41067fa00 100644 --- a/model/src/w3wavemd.F90 +++ b/model/src/w3wavemd.F90 @@ -1853,6 +1853,9 @@ SUBROUTINE W3WAVE ( IMOD, ODAT, TEND, STAMP, NO_OUT & ! ! ! Initialize FIELD variable +#ifdef W3_GPU +!$ACC UPDATE DEVICE(VA) +#endif FIELD = 0. ! DO ISPEC=1, NSPEC @@ -1919,6 +1922,9 @@ SUBROUTINE W3WAVE ( IMOD, ODAT, TEND, STAMP, NO_OUT & END IF END DO +#ifdef W3_GPU +!$ACC UPDATE SELF(VA) +#endif ! #ifdef W3_MPI IF ( NRQSG1 .GT. 0 ) THEN From 35455e6ecb34442c0c8584f6fe7807090b01f437 Mon Sep 17 00:00:00 2001 From: lsampson Date: Thu, 13 Apr 2023 09:57:32 +0000 Subject: [PATCH 2/4] Added switches for GPU regression tests --- regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU | 1 + regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU | 1 + regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU | 1 + regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU | 1 + regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU | 1 + regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU | 1 + 6 files changed, 6 insertions(+) create mode 100644 regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU create mode 100644 regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU create mode 100644 regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU create mode 100644 regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU create mode 100644 regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU create mode 100644 regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU diff --git a/regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU b/regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU new file mode 100644 index 000000000..69bb8a2f8 --- /dev/null +++ b/regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU @@ -0,0 +1 @@ +NOGRB SHRD PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU b/regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU new file mode 100644 index 000000000..21c790cc2 --- /dev/null +++ b/regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU @@ -0,0 +1 @@ +NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU b/regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU new file mode 100644 index 000000000..733d91d56 --- /dev/null +++ b/regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU @@ -0,0 +1 @@ +NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU b/regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU new file mode 100644 index 000000000..21c790cc2 --- /dev/null +++ b/regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU @@ -0,0 +1 @@ +NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU b/regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU new file mode 100644 index 000000000..733d91d56 --- /dev/null +++ b/regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU @@ -0,0 +1 @@ +NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU b/regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU new file mode 100644 index 000000000..21c790cc2 --- /dev/null +++ b/regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU @@ -0,0 +1 @@ +NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 From e4559784d1d4ed23ccf42024a1ce1de9bcb4a22e Mon Sep 17 00:00:00 2001 From: UKMO-lsampson Date: Thu, 13 Apr 2023 10:10:21 +0000 Subject: [PATCH 3/4] Added forth regression test switch file --- regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU | 1 + regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU | 1 + 2 files changed, 2 insertions(+) create mode 100644 regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU create mode 100644 regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU diff --git a/regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU b/regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU new file mode 100644 index 000000000..733d91d56 --- /dev/null +++ b/regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU @@ -0,0 +1 @@ +NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 diff --git a/regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU b/regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU new file mode 100644 index 000000000..21c790cc2 --- /dev/null +++ b/regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU @@ -0,0 +1 @@ +NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11 From 736b67910b0af683430f1a3072f8470c3ca8342c Mon Sep 17 00:00:00 2001 From: UKMO-lsampson Date: Thu, 13 Apr 2023 10:23:01 +0000 Subject: [PATCH 4/4] Updated matrix.base for regular grid propagation GPU tests --- regtests/bin/matrix.base | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/regtests/bin/matrix.base b/regtests/bin/matrix.base index e8d3e69dc..59da45feb 100755 --- a/regtests/bin/matrix.base +++ b/regtests/bin/matrix.base @@ -262,6 +262,10 @@ echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.2" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.3" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.4" >> matrix.body + echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.1" >> matrix.body + echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.2" >> matrix.body + echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.3" >> matrix.body + echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.4" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO_curv -g curv $ww3 ww3_tp2.4" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.5" >> matrix.body echo "$rtst -s PR2_UNO -w work_PR2_UNO_a -g a $ww3 ww3_tp2.9" >> matrix.body @@ -311,6 +315,10 @@ echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.2" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.3" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body + echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.1" >> matrix.body + echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.2" >> matrix.body + echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.3" >> matrix.body + echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_curv_MPI -g curv -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.5" >> matrix.body echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_a_MPI -g a -f -p $mpi -n $np $ww3 ww3_tp2.9" >> matrix.body