Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fb reg gpuport #40

Open
wants to merge 15 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 51 additions & 3 deletions model/src/w3pro2md.F90
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,12 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
WRITE (NDST,9010)
#endif
!
#ifdef W3_GPU
!$ACC DATA CREATE(VLCFLY, VLCFLX, VDXX, VDYY, VDXY, CXTOT, CYTOT) &
!$ACC CREATE(VFDIFX_FAC, VFDIFY_FAC, VFDIFC_FAC, VQ_OLD) &
!$ACC CREATE(HQFAC, HPFAC)
!$ACC KERNELS
#endif
VLCFLX = 0.
VLCFLY = 0.
VFDIFX = 0.
Expand All @@ -838,6 +844,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP PARALLEL DO PRIVATE (ISEA, IXY)
#elif W3_GPU
!$ACC LOOP INDEPENDENT
#endif
!
DO ISEA=1, NSEA
Expand All @@ -858,11 +866,17 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP END PARALLEL DO
#elif W3_GPU
!$ACC END KERNELS
#endif
!
IF ( FLCUR ) THEN
#ifdef W3_T
WRITE (NDST,9022)
#endif
#ifdef W3_GPU
!$ACC KERNELS
!$ACC LOOP INDEPENDENT
#endif
DO ISEA=1, NSEA
IXY = MAPSF(ISEA,3)
Expand All @@ -873,11 +887,17 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
VQ(IXY), CXTOT(IXY), CYTOT(IXY)
#endif
END DO
#ifdef W3_GPU
!$ACC END KERNELS
#endif
END IF

!
#ifdef W3_OMPH
!$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, CP, CQ)
#elif W3_GPU
!$ACC KERNELS
!$ACC LOOP INDEPENDENT
#endif
!
DO ISEA=1, NSEA
Expand All @@ -892,6 +912,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP END PARALLEL DO
#elif W3_GPU
!$ACC END KERNELS
#endif
!
! 2.b Diffusion coefficients
Expand All @@ -901,6 +923,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
#ifdef W3_OMPH
!$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, &
!$OMP& DCELL, XWIND, TFAC, DSS, DNN)
#elif W3_GPU
!$ACC KERNELS
!$ACC LOOP INDEPENDENT
#endif
!
DO ISEA=1, NSEA
Expand Down Expand Up @@ -937,6 +962,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP END PARALLEL DO
#elif W3_GPU
!$ACC END KERNELS
#endif
!
END IF
Expand All @@ -949,8 +976,10 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY )
#elif W3_GPU
!$ACC KERNELS
!$ACC LOOP INDEPENDENT
#endif
!
DO ISEA=1, NSEA
IX = MAPSF(ISEA,1)
IY = MAPSF(ISEA,2)
Expand All @@ -960,6 +989,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP END PARALLEL DO
#elif W3_GPU
!$ACC END KERNELS
#endif
!
IF ( YFIRST ) THEN
Expand Down Expand Up @@ -1014,6 +1045,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY )
#elif W3_GPU
!$ACC KERNELS
!$ACC LOOP INDEPENDENT
#endif
!
DO ISEA=1, NSEA
Expand Down Expand Up @@ -1047,11 +1081,16 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
/ CG(IK,ISEA) * CLATS(ISEA)
END DO
END IF
#ifdef W3_GPU
!$ACC END KERNELS
#endif
!
! 3.c Diffusion correction
!
IF ( DTME .NE. 0. ) THEN

#ifdef W3_GPU
!$ACC KERNELS
#endif
IF ( GLOBAL ) THEN
DO IY=1, NY
VQ(IY+NX*NY) = VQ(IY)
Expand Down Expand Up @@ -1088,8 +1127,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
#ifdef W3_OMPH
!$OMP PARALLEL DO PRIVATE (ISEA, IX, IY, IXY, &
!$OMP& QXX, QYY, QXY, DVQ )
#elif W3_GPU
!$ACC LOOP INDEPENDENT
#endif
!
DO IP=1, NACT
IXY = MAPAXY(IP)
ISEA = MAPFS(IXY)
Expand Down Expand Up @@ -1159,6 +1199,8 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP END PARALLEL DO
#elif W3_GPU
!$ACC END KERNELS
#endif
!
END IF
Expand All @@ -1174,6 +1216,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP PARALLEL DO PRIVATE (ISEA, IXY )
#elif W3_GPU
!$ACC KERNELS
!$ACC LOOP INDEPENDENT
#endif
!
DO ISEA=1, NSEA
Expand All @@ -1190,6 +1235,9 @@ SUBROUTINE W3XYP2 ( ISP, DTG, MAPSTA, MAPFS, VQ, VGX, VGY )
!
#ifdef W3_OMPH
!$OMP END PARALLEL DO
#elif W3_GPU
!$ACC END KERNELS
!$ACC END DATA
#endif
!
RETURN
Expand Down
17 changes: 17 additions & 0 deletions model/src/w3uno2md.F90
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,10 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, &
!
! 1. Initialize aux. array FLA and closure ------------------------- *
!
#ifdef W3_GPU
!$ACC DATA CREATE(FLA)
!$ACC KERNELS
#endif
FLA = 0.
!
IF ( BCLOSE ) THEN
Expand Down Expand Up @@ -1026,6 +1030,8 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, &
!$OMP QBO, IX, IY, IY2, IX2, QN &
#endif
!$OMP IXYC, IXYD, QB)
#elif W3_GPU
!$ACC LOOP INDEPENDENT
#endif
!
DO IP=1, NB0
Expand Down Expand Up @@ -1073,6 +1079,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, &
WRITE (NDST,9011) NB1-NB0, 'BOUNDARY ABOVE'
#endif
!
#ifdef W3_GPU
!$ACC LOOP INDEPENDENT
#endif
DO IP=NB0+1, NB1
IXY = MAPBOU(IP)
CFL = CFLL(IXY)
Expand All @@ -1099,6 +1108,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, &
WRITE (NDST,9011) NB2-NB1, 'BOUNDARY BELOW'
#endif
!
#ifdef W3_GPU
!$ACC LOOP INDEPENDENT
#endif
DO IP=NB1+1, NB2
IXY = MAPBOU(IP)
CFL = CFLL(IXY+INC)
Expand Down Expand Up @@ -1141,6 +1153,8 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, &
!$OMP PRIVATE(QOLD), &
#endif
!$OMP PRIVATE (IP, IXY, JN, JP)
#elif W3_GPU
!$ACC LOOP INDEPENDENT
#endif
!
DO IP=1, NACT
Expand Down Expand Up @@ -1172,6 +1186,9 @@ SUBROUTINE W3UNO2s (MX, MY, NX, NY, CFLL, TRANS, Q, BCLOSE, &
!
#ifdef W3_OMPH
!$OMP END PARALLEL DO
#elif W3_GPU
!$ACC END KERNELS
!$ACC END DATA
#endif
!
#ifdef W3_T0
Expand Down
6 changes: 6 additions & 0 deletions model/src/w3wavemd.F90
Original file line number Diff line number Diff line change
Expand Up @@ -1853,6 +1853,9 @@ SUBROUTINE W3WAVE ( IMOD, ODAT, TEND, STAMP, NO_OUT &
!
!
! Initialize FIELD variable
#ifdef W3_GPU
!$ACC UPDATE DEVICE(VA)
#endif
FIELD = 0.
!
DO ISPEC=1, NSPEC
Expand Down Expand Up @@ -1919,6 +1922,9 @@ SUBROUTINE W3WAVE ( IMOD, ODAT, TEND, STAMP, NO_OUT &

END IF
END DO
#ifdef W3_GPU
!$ACC UPDATE SELF(VA)
#endif
!
#ifdef W3_MPI
IF ( NRQSG1 .GT. 0 ) THEN
Expand Down
8 changes: 8 additions & 0 deletions regtests/bin/matrix.base
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,10 @@
echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.2" >> matrix.body
echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.3" >> matrix.body
echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.4" >> matrix.body
echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.1" >> matrix.body
echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.2" >> matrix.body
echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.3" >> matrix.body
echo "$rtst -s PR2_UNO_GPU -w work_PR2_UNO_GPU $ww3 ww3_tp2.4" >> matrix.body
echo "$rtst -s PR2_UNO -w work_PR2_UNO_curv -g curv $ww3 ww3_tp2.4" >> matrix.body
echo "$rtst -s PR2_UNO -w work_PR2_UNO $ww3 ww3_tp2.5" >> matrix.body
echo "$rtst -s PR2_UNO -w work_PR2_UNO_a -g a $ww3 ww3_tp2.9" >> matrix.body
Expand Down Expand Up @@ -311,6 +315,10 @@
echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.2" >> matrix.body
echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.3" >> matrix.body
echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body
echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.1" >> matrix.body
echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.2" >> matrix.body
echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.3" >> matrix.body
echo "$rtst -s PR2_UNO_MPI_GPU -w work_PR2_UNO_MPI_GPU -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body
echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_curv_MPI -g curv -f -p $mpi -n $np $ww3 ww3_tp2.4" >> matrix.body
echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_MPI -f -p $mpi -n $np $ww3 ww3_tp2.5" >> matrix.body
echo "$rtst -s PR2_UNO_MPI -w work_PR2_UNO_a_MPI -g a -f -p $mpi -n $np $ww3 ww3_tp2.9" >> matrix.body
Expand Down
1 change: 1 addition & 0 deletions regtests/ww3_tp2.1/input/switch_PR2_UNO_GPU
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11
1 change: 1 addition & 0 deletions regtests/ww3_tp2.1/input/switch_PR2_UNO_MPI_GPU
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11
1 change: 1 addition & 0 deletions regtests/ww3_tp2.2/input/switch_PR2_UNO_GPU
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NOGRB SHRD PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11
1 change: 1 addition & 0 deletions regtests/ww3_tp2.2/input/switch_PR2_UNO_MPI_GPU
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11
1 change: 1 addition & 0 deletions regtests/ww3_tp2.3/input/switch_PR2_UNO_GPU
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11
1 change: 1 addition & 0 deletions regtests/ww3_tp2.3/input/switch_PR2_UNO_MPI_GPU
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11
1 change: 1 addition & 0 deletions regtests/ww3_tp2.4/input/switch_PR2_UNO_GPU
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NOGRB SHRD GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11
1 change: 1 addition & 0 deletions regtests/ww3_tp2.4/input/switch_PR2_UNO_MPI_GPU
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NOGRB DIST MPI GPU PR2 UNO FLX2 LN0 ST0 NL0 BT0 DB0 TR0 BS0 IC0 IS0 REF0 WNT1 WNX1 CRT1 CRX1 O0 O1 O2 O3 O4 O5 O6 O7 O10 O11