diff --git a/README.md b/README.md index 1993ce568..fd5b1a9f9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # RTE+RRTMGP -This is the repository for RTE+RRTMGP, a set of codes for computing radiative fluxes in planetary atmospheres. RTE+RRTMGP is described in a [manuscript revised May 10, 2019](https://doi.org/10.1002/essoar.10500964.1) to [Journal of Advances in Modeling Earth Systems](http://james.agu.org). +This is the repository for RTE+RRTMGP, a set of codes for computing radiative fluxes in planetary atmospheres. RTE+RRTMGP is described in a [paper](https://doi.org/10.1029/2019MS001621) in [Journal of Advances in Modeling Earth Systems](http://james.agu.org). RRTMGP uses a k-distribution to provide an optical description (absorption and possibly Rayleigh optical depth) of the gaseous atmosphere, along with the relevant source functions, on a pre-determined spectral grid given temperatures, pressures, and gas concentration. The k-distribution currently distributed with this package is applicable to the Earth's atmosphere under present-day, pre-industrial, and 4xCO2 conditions. @@ -11,13 +11,13 @@ Example programs and documenation are evolving - please see examples/ in the rep ## Building the libraries. 1. `cd build` -2. Set environment variables `FC` (the Fortran 2003 compiler) and `FCFLAGS` (compiler flags). Alternately create a Makefile.conf that sets these variables. You could also link to an existing file. -3. Set environment variable `RTE_KERNELS` to `openacc` if you want the OpenACC kernels rather than the default. +2. Set environment variables `FC` (the Fortran 2003 compiler) and `FCFLAGS` (compiler flags). Alternately create a Makefile.conf that sets these variables. You could also link to an existing file. +3. Set environment variable `RTE_KERNELS` to `openacc` if you want the OpenACC kernels rather than the default. 4. `make` ## Building and running the examples. -1. From the root RTE+RRTMGP directory: `cd examples/rfmip-clear-sky`. -2. Set environment variables `NCHOME` and `NFHOME` to the root directories of the Netcdf C and Fortran libraries respectively. Set environment variable `RRTMGP_DIR` to the location of the libraries (`../../build`) in the default layout). +1. From the root RTE+RRTMGP directory: `cd examples/rfmip-clear-sky`. +2. Set environment variables `NCHOME` and `NFHOME` to the root directories of the Netcdf C and Fortran libraries respectively. Set environment variable `RRTMGP_DIR` to the location of the libraries (`../../build`) in the default layout). 3. `make` 4. Python scripts are provided to stage the files needed (`stage_files.py`), run the examples `run-rfmip-examples.py`), and compare to results computed on an example host (`compare-to-reference.py`). The python scripts require modules xarray and netCDF. diff --git a/build/Makefile.conf.ifort b/build/Makefile.conf.ifort index 3e6c66648..b2b47bfb3 100644 --- a/build/Makefile.conf.ifort +++ b/build/Makefile.conf.ifort @@ -8,8 +8,8 @@ export FC = ifort # # Optimized # -export FCFLAGS += -m64 -O3 -traceback -assume realloc_lhs -extend-source 132 -export F77FLAGS += -m64 -O3 -traceback +export FCFLAGS += -m64 -O3 -g -traceback -assume realloc_lhs -extend-source 132 +export F77FLAGS += -m64 -O3 -g -traceback # can add -qopt-report-phase=vec # # Debugging diff --git a/build/Makefile.conf.pgfortran-cscs b/build/Makefile.conf.pgfortran-cscs index 19eafc6e8..743a25ff1 100644 --- a/build/Makefile.conf.pgfortran-cscs +++ b/build/Makefile.conf.pgfortran-cscs @@ -2,10 +2,11 @@ # Load the following modules to compile with PGI for CPU # -# module swap PrgEnv-cray PrgEnv-pgi -# module swap pgi pgi/18.10.0 -# module load cray-netcdf cray-hdf5 -# module unload cray-libsci_acc +# module load cdt/19.06 +# module swap PrgEnv-cray PrgEnv-pgi +# module load cray-netcdf cray-hdf5 +# module load craype-accel-nvidia60 +# module unload cray-libsci_acc # # # Fortran compiler command diff --git a/build/Makefile.conf.pgfortran-cscs-gpu b/build/Makefile.conf.pgfortran-cscs-gpu index 21288b37f..571589b1d 100644 --- a/build/Makefile.conf.pgfortran-cscs-gpu +++ b/build/Makefile.conf.pgfortran-cscs-gpu @@ -2,10 +2,11 @@ # Load the following modules # -# module swap PrgEnv-cray PrgEnv-pgi -# module load cray-netcdf cray-hdf5 -# module load craype-accel-nvidia60 -# module unload cray-libsci_acc +# module load cdt/19.06 +# module swap PrgEnv-cray PrgEnv-pgi +# module load cray-netcdf cray-hdf5 +# module load craype-accel-nvidia60 +# module unload cray-libsci_acc # # # Fortran compiler command @@ -14,7 +15,7 @@ NCHOME = # Fortran compiler flags #FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mipa=fast,inline -acc -ta=tesla:6.5 -FCFLAGS = -g -ta=tesla:cc60,cuda9.1 -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mallocatable=03 +FCFLAGS = -g -ta=tesla:cc60,cuda9.2 -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mallocatable=03 # Fortran .mod files, e.g. -I if you have headers in a nonstandard directory FCINCLUDE = diff --git a/examples/mo_load_coefficients.F90 b/examples/mo_load_coefficients.F90 index 9c3a88093..edf2ea4a5 100644 --- a/examples/mo_load_coefficients.F90 +++ b/examples/mo_load_coefficients.F90 @@ -97,7 +97,7 @@ subroutine load_and_init(kdist, filename, available_gases) ! ! How big are the various arrays? ! - if(nf90_open(trim(fileName), NF90_WRITE, ncid) /= NF90_NOERR) & + if(nf90_open(trim(fileName), NF90_NOWRITE, ncid) /= NF90_NOERR) & call stop_on_err("load_and_init(): can't open file " // trim(fileName)) ntemps = get_dim_size(ncid,'temperature') npress = get_dim_size(ncid,'pressure') diff --git a/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs b/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs index c71d62c45..3bd5a213b 100644 --- a/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs +++ b/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs @@ -1,8 +1,11 @@ -# Load the following modules +# Load the following modules and set the library path # -# module swap PrgEnv-cray PrgEnv-pgi -# module load cray-netcdf cray-hdf5 -# module unload cray-libsci_acc +# module load cdt/19.06 +# module swap PrgEnv-cray PrgEnv-pgi +# module load cray-netcdf cray-hdf5 +# module load craype-accel-nvidia60 +# module unload cray-libsci_acc +# export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH export FC = ftn export FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mipa=fast,inline -Mallocatable=03 diff --git a/examples/rfmip-clear-sky/compare-to-reference.py b/examples/rfmip-clear-sky/compare-to-reference.py index ed4cb5bdd..dc805a605 100755 --- a/examples/rfmip-clear-sky/compare-to-reference.py +++ b/examples/rfmip-clear-sky/compare-to-reference.py @@ -5,12 +5,31 @@ import os import numpy as np import xarray as xr +import urllib.request ref_dir = "reference" tst_dir = "." -rrtmg_suffix = "_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc" +# +# Download reference results +# +print("Downloading reference results") +ref_dir = "./reference/" +if not os.path.exists(ref_dir): + os.makedirs(ref_dir) +urllib.request.urlretrieve("https://owncloud.gwdg.de/index.php/s/kbhl3JOSccGtR0m/download", \ + os.path.join(ref_dir, "rld_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc")) +urllib.request.urlretrieve("https://owncloud.gwdg.de/index.php/s/iFa28GFxRaNGKU1/download", \ + os.path.join(ref_dir, "rlu_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc")) +urllib.request.urlretrieve("https://owncloud.gwdg.de/index.php/s/uCemCHlGxbGK0gJ/download", \ + os.path.join(ref_dir, "rsd_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc")) +urllib.request.urlretrieve("https://owncloud.gwdg.de/index.php/s/l8ZG28j9ttZWD9r/download", \ + os.path.join(ref_dir, "rsu_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc")) +# +# Comparing reference and test results +# +rrtmg_suffix = "_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc" tst = xr.open_mfdataset(os.path.join(tst_dir, "r??" + rrtmg_suffix)) ref = xr.open_mfdataset(os.path.join(ref_dir, "r??" + rrtmg_suffix)) diff --git a/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 b/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 index 464239062..5c43e5d1d 100644 --- a/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 +++ b/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 @@ -225,10 +225,9 @@ program rrtmgp_rfmip_lw ! NOTE: these are causing problems right now, most likely due to a compiler ! bug related to the use of Fortran classes on the GPU. ! - !!!$acc enter data create(sfc_emis_spec) - !!$acc enter data create(optical_props, optical_props%tau) - !!$acc enter data create(source, source%lay_source, source%lev_source_inc, source%lev_source_dec) - !!$acc enter data create(source%sfc_source, source%band2gpt, source%gpt2band, source%band_lims_wvn) + !$acc enter data create(sfc_emis_spec) + !$acc enter data create(optical_props, optical_props%tau) + !$acc enter data create(source, source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source) ! -------------------------------------------------- #ifdef USE_TIMING ! @@ -251,7 +250,7 @@ program rrtmgp_rfmip_lw ! Expand the spectrally-constant surface emissivity to a per-band emissivity for each column ! (This is partly to show how to keep work on GPUs using OpenACC) ! - !!$acc parallel loop collapse(2) copyin(sfc_emis) + !$acc parallel loop collapse(2) copyin(sfc_emis) do icol = 1, block_size do ibnd = 1, nbnd sfc_emis_spec(ibnd,icol) = sfc_emis(icol,b) @@ -299,10 +298,10 @@ program rrtmgp_rfmip_lw ret = gptlpr(block_size) ret = gptlfinalize() #endif - !!!$acc exit data delete(sfc_emis_spec) - !!$acc exit data delete(optical_props%tau, optical_props) - !!$acc exit data delete(source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source) - !!$acc exit data delete(source%band2gpt, source%gpt2band, source%band_lims_wvn, source) + !$acc exit data delete(sfc_emis_spec) + !$acc exit data delete(optical_props%tau, optical_props) + !$acc exit data delete(source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source) + !$acc exit data delete(source) ! --------------------------------------------------m call unblock_and_write(trim(flxup_file), 'rlu', flux_up) call unblock_and_write(trim(flxdn_file), 'rld', flux_dn) diff --git a/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 b/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 index 845050a89..cdf2e72c8 100644 --- a/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 +++ b/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 @@ -46,6 +46,10 @@ program rrtmgp_rfmip_sw ! use mo_rte_kind, only: wp ! + ! Array utilities + ! + use mo_util_array, only: zero_array + ! ! Optical properties of the atmosphere as array of values ! In the longwave we include only absorption optical depth (_1scl) ! Shortwave calculations use optical depth, single-scattering albedo, asymmetry parameter (_2str) @@ -224,11 +228,9 @@ program rrtmgp_rfmip_sw flux_dn(block_size, nlay+1, nblocks)) allocate(mu0(block_size), sfc_alb_spec(nbnd,block_size)) call stop_on_err(optical_props%alloc_2str(block_size, nlay, k_dist)) - ! Handle GPU data. Leave mu0, sfc_alb_spec, toa_flux, and def_tsi on CPU for - ! now, and let compiler or CUDA runtime handle data movement because not - ! everything is in kernels at the next level down yet. - !!$acc enter data create(optical_props, optical_props%tau, optical_props%ssa, optical_props%g) - !!!$acc enter data create(mu0,sfc_alb_spec,toa_flux,def_tsi) + !$acc enter data create(optical_props, optical_props%tau, optical_props%ssa, optical_props%g) + !$acc enter data create (toa_flux, def_tsi) + !$acc enter data create (sfc_alb_spec, mu0) ! -------------------------------------------------- #ifdef USE_TIMING ! @@ -265,25 +267,27 @@ program rrtmgp_rfmip_sw #endif ! Boundary conditions ! (This is partly to show how to keep work on GPUs using OpenACC in a host application) - ! ! What's the total solar irradiance assumed by RRTMGP? - ! The first two loops could be more expressed more ompactly as def_tsi(1:block_size) = sum(toa_flux, dim=2) ! - !!!$acc parallel loop - do icol = 1, block_size - def_tsi(icol) = toa_flux(icol, 1) - end do - !!!$acc parallel loop collapse(2) +#ifdef _OPENACC + call zero_array(block_size, def_tsi) + !$acc parallel loop collapse(2) copy(def_tsi) copyin(toa_flux) do igpt = 1, ngpt do icol = 1, block_size - !!$acc atomic update + !$acc atomic update def_tsi(icol) = def_tsi(icol) + toa_flux(icol, igpt) end do end do +#else + ! + ! More compactly... + ! + def_tsi(1:block_size) = sum(toa_flux, dim=2) +#endif ! ! Normalize incoming solar flux to match RFMIP specification ! - !!!$acc parallel loop collapse(2) + !$acc parallel loop collapse(2) copyin(total_solar_irradiance, def_tsi) copy(toa_flux) do igpt = 1, ngpt do icol = 1, block_size toa_flux(icol,igpt) = toa_flux(icol,igpt) * total_solar_irradiance(icol,b)/def_tsi(icol) @@ -292,7 +296,7 @@ program rrtmgp_rfmip_sw ! ! Expand the spectrally-constant surface albedo to a per-band albedo for each column ! - !!!$acc parallel loop collapse(2) + !$acc parallel loop collapse(2) copyin(surface_albedo) do icol = 1, block_size do ibnd = 1, nbnd sfc_alb_spec(ibnd,icol) = surface_albedo(icol,b) @@ -301,7 +305,7 @@ program rrtmgp_rfmip_sw ! ! Cosine of the solar zenith angle ! - !!!$acc parallel loop + !$acc parallel loop copyin(solar_zenith_angle, usecol) do icol = 1, block_size mu0(icol) = merge(cos(solar_zenith_angle(icol,b)*deg_to_rad), 1._wp, usecol(icol,b)) end do @@ -341,8 +345,9 @@ program rrtmgp_rfmip_sw ret = gptlpr(block_size) ret = gptlfinalize() #endif - !!$acc exit data delete(optical_props, optical_props%tau, optical_props%ssa, optical_props%g) - !!!$acc exit data delete(mu0,sfc_alb_spec,toa_flux,def_tsi) + !$acc exit data delete(optical_props%tau, optical_props%ssa, optical_props%g, optical_props) + !$acc exit data delete(sfc_alb_spec, mu0) + !$acc exit data delete(toa_flux, def_tsi) ! -------------------------------------------------- call unblock_and_write(trim(flxup_file), 'rsu', flux_up) call unblock_and_write(trim(flxdn_file), 'rsd', flux_dn) diff --git a/examples/rfmip-clear-sky/stage_files.py b/examples/rfmip-clear-sky/stage_files.py index 35f340a4f..a1556e2ab 100755 --- a/examples/rfmip-clear-sky/stage_files.py +++ b/examples/rfmip-clear-sky/stage_files.py @@ -32,21 +32,4 @@ urllib.request.urlretrieve(conds_url, conds_file) print("Dowloading scripts for generating output templates") urllib.request.urlretrieve(templ_scr_url, templ_scr) -#%run -i generate-output-file-templates.py --source_id RTE-RRTMGP-181204 subprocess.run(["python3", templ_scr, "--source_id", "RTE-RRTMGP-181204"]) - -# -# Reference results -# -print("Downloading reference results") -ref_dir = "./reference/" -if not os.path.exists(ref_dir): - os.makedirs(ref_dir) -urllib.request.urlretrieve("https://owncloud.gwdg.de/index.php/s/kbhl3JOSccGtR0m/download", \ - os.path.join(ref_dir, "rld_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc")) -urllib.request.urlretrieve("https://owncloud.gwdg.de/index.php/s/iFa28GFxRaNGKU1/download", \ - os.path.join(ref_dir, "rlu_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc")) -urllib.request.urlretrieve("https://owncloud.gwdg.de/index.php/s/uCemCHlGxbGK0gJ/download", \ - os.path.join(ref_dir, "rsd_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc")) -urllib.request.urlretrieve("https://owncloud.gwdg.de/index.php/s/l8ZG28j9ttZWD9r/download", \ - os.path.join(ref_dir, "rsu_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc")) diff --git a/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 b/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 index 0889b8807..d51319fad 100644 --- a/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 +++ b/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 @@ -17,10 +17,6 @@ module mo_gas_optics_kernels use mo_rte_kind, only: wp, wl implicit none - - interface zero_array - module procedure zero_array_3D, zero_array_4D - end interface contains ! -------------------------------------------------------------------------------------- ! Compute interpolation coefficients @@ -454,7 +450,8 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt, & tau_minor = 0._wp iflav = gpt_flv(idx_tropo,igpt) ! eta interpolation depends on flavor minor_loc = minor_start + (igpt - gptS) ! add offset to starting point - kminor_loc = interpolate2D(fminor(:,:,iflav,icol,ilay), kminor, minor_loc, jeta(:,iflav,icol,ilay), jtemp(icol,ilay)) + kminor_loc = interpolate2D(fminor(:,:,iflav,icol,ilay), kminor, minor_loc, & + jeta(:,iflav,icol,ilay), jtemp(icol,ilay)) tau_minor = kminor_loc * scaling !$acc atomic update tau(igpt,ilay,icol) = tau(igpt,ilay,icol) + tau_minor @@ -553,7 +550,7 @@ subroutine compute_Planck_source( & real(wp) :: planck_function(nbnd,nlay+1,ncol) ! ----------------- - !$acc enter data copyin(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,temp_ref_min,totplnk_delta,pfracin,totplnk,gpoint_flavor,one) + !$acc enter data copyin(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor,one) !$acc enter data create(sfc_src,lay_src,lev_src_inc,lev_src_dec) !$acc enter data create(pfrac,planck_function) @@ -636,9 +633,9 @@ subroutine compute_Planck_source( & end do ! ilay end do ! icol - !$acc exit data delete(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,temp_ref_min,totplnk_delta,pfracin,totplnk,gpoint_flavor,one) - !$acc exit data copyout(sfc_src,lay_src,lev_src_inc,lev_src_dec) + !$acc exit data delete(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor,one) !$acc exit data delete(pfrac,planck_function) + !$acc exit data copyout(sfc_src,lay_src,lev_src_inc,lev_src_dec) end subroutine compute_Planck_source ! ---------------------------------------------------------- @@ -784,42 +781,4 @@ subroutine combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau_abs, tau_rayleig end do end subroutine combine_and_reorder_nstr ! ---------------------------------------------------------- - subroutine zero_array_3D(ni, nj, nk, array) bind(C, name="zero_array_3D") - integer, intent(in) :: ni, nj, nk - real(wp), dimension(ni, nj, nk), intent(out) :: array - ! ----------------------- - integer :: i,j,k - ! ----------------------- - !$acc parallel loop collapse(3) & - !$acc& copyout(array(:ni,:nj,:nk)) - do k = 1, nk - do j = 1, nj - do i = 1, ni - array(i,j,k) = 0.0_wp - end do - end do - end do - - end subroutine zero_array_3D - ! ---------------------------------------------------------- - subroutine zero_array_4D(ni, nj, nk, nl, array) bind(C, name="zero_array_4D") - integer, intent(in) :: ni, nj, nk, nl - real(wp), dimension(ni, nj, nk, nl), intent(out) :: array - ! ----------------------- - integer :: i,j,k,l - ! ----------------------- - !$acc parallel loop collapse(4) & - !$acc& copyout(array(:ni,:nj,:nk,:nl)) - do l = 1, nl - do k = 1, nk - do j = 1, nj - do i = 1, ni - array(i,j,k,l) = 0.0_wp - end do - end do - end do - end do - - end subroutine zero_array_4D - ! ---------------------------------------------------------- end module mo_gas_optics_kernels diff --git a/rrtmgp/kernels-openacc/mo_reorder_kernels.F90 b/rrtmgp/kernels-openacc/mo_reorder_kernels.F90 deleted file mode 100644 index ab683ac09..000000000 --- a/rrtmgp/kernels-openacc/mo_reorder_kernels.F90 +++ /dev/null @@ -1,61 +0,0 @@ -! This code is part of -! RRTM for GCM Applications - Parallel (RRTMGP) -! -! Eli Mlawer and Robert Pincus -! Andre Wehe and Jennifer Delamere -! email: rrtmgp@aer.com -! -! Copyright 2018, Atmospheric and Environmental Research and -! Regents of the University of Colorado. All right reserved. -! -! Use and duplication is permitted under the terms of the -! BSD 3-clause license, see http://opensource.org/licenses/BSD-3-Clause -! -! Description: Kernels to permute arrays - -module mo_reorder_kernels - use mo_rte_kind, only: wp - implicit none -contains - ! ---------------------------------------------------------------------------- - subroutine reorder_123x312_kernel(d1, d2, d3, array_in, array_out) & - bind(C, name = "reorder_123x312_kernel") - integer, intent( in) :: d1, d2, d3 - real(wp), dimension(d1, d2, d3), intent( in) :: array_in - real(wp), dimension(d3, d1, d2), intent(out) :: array_out - - integer :: i1, i2, i3 - - !$acc parallel loop collapse(3) & - !$acc& copyout(array_out(:d3,:d1,:d2)) & - !$acc& copyin(array_in(:d1,:d2,:d3)) - do i2 = 1, d2 - do i1 = 1, d1 - do i3 = 1, d3 - array_out(i3,i1,i2) = array_in(i1,i2,i3) - end do - end do - end do - end subroutine reorder_123x312_kernel - ! ---------------------------------------------------------------------------- - subroutine reorder_123x321_kernel(d1, d2, d3, array_in, array_out) & - bind(C, name="reorder_123x321_kernel") - integer, intent( in) :: d1, d2, d3 - real(wp), dimension(d1, d2, d3), intent( in) :: array_in - real(wp), dimension(d3, d2, d1), intent(out) :: array_out - - integer :: i1, i2, i3 - - !$acc parallel loop collapse(3) & - !$acc& copyout(array_out(:d3,:d2,:d1)) & - !$acc& copyin(array_in(:d1,:d2,:d3)) - do i1 = 1, d1 - do i2 = 1, d2 - do i3 = 1, d3 - array_out(i3,i2,i1) = array_in(i1,i2,i3) - end do - end do - end do - end subroutine reorder_123x321_kernel - ! ---------------------------------------------------------------------------- -end module mo_reorder_kernels diff --git a/rrtmgp/kernels/mo_gas_optics_kernels.F90 b/rrtmgp/kernels/mo_gas_optics_kernels.F90 index ce1e0f592..9b9f99e7e 100644 --- a/rrtmgp/kernels/mo_gas_optics_kernels.F90 +++ b/rrtmgp/kernels/mo_gas_optics_kernels.F90 @@ -17,10 +17,6 @@ module mo_gas_optics_kernels use mo_rte_kind, only : wp, wl implicit none - - interface zero_array - module procedure zero_array_3D, zero_array_4D - end interface contains ! -------------------------------------------------------------------------------------- ! Compute interpolation coefficients @@ -774,39 +770,4 @@ pure subroutine combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau_abs, tau_ra end do end do end subroutine combine_and_reorder_nstr - ! ---------------------------------------------------------- - pure subroutine zero_array_3D(ni, nj, nk, array) bind(C, name="zero_array_3D") - integer, intent(in) :: ni, nj, nk - real(wp), dimension(ni, nj, nk), intent(out) :: array - ! ----------------------- - integer :: i,j,k - ! ----------------------- - do k = 1, nk - do j = 1, nj - do i = 1, ni - array(i,j,k) = 0.0_wp - end do - end do - end do - - end subroutine zero_array_3D - ! ---------------------------------------------------------- - pure subroutine zero_array_4D(ni, nj, nk, nl, array) bind(C, name="zero_array_4D") - integer, intent(in) :: ni, nj, nk, nl - real(wp), dimension(ni, nj, nk, nl), intent(out) :: array - ! ----------------------- - integer :: i,j,k,l - ! ----------------------- - do l = 1, nl - do k = 1, nk - do j = 1, nj - do i = 1, ni - array(i,j,k,l) = 0.0_wp - end do - end do - end do - end do - - end subroutine zero_array_4D - ! ---------------------------------------------------------- end module mo_gas_optics_kernels diff --git a/rrtmgp/kernels/mo_reorder_kernels.F90 b/rrtmgp/kernels/mo_reorder_kernels.F90 index c8d06c88c..ab683ac09 100644 --- a/rrtmgp/kernels/mo_reorder_kernels.F90 +++ b/rrtmgp/kernels/mo_reorder_kernels.F90 @@ -26,6 +26,9 @@ subroutine reorder_123x312_kernel(d1, d2, d3, array_in, array_out) & integer :: i1, i2, i3 + !$acc parallel loop collapse(3) & + !$acc& copyout(array_out(:d3,:d1,:d2)) & + !$acc& copyin(array_in(:d1,:d2,:d3)) do i2 = 1, d2 do i1 = 1, d1 do i3 = 1, d3 @@ -43,6 +46,9 @@ subroutine reorder_123x321_kernel(d1, d2, d3, array_in, array_out) & integer :: i1, i2, i3 + !$acc parallel loop collapse(3) & + !$acc& copyout(array_out(:d3,:d2,:d1)) & + !$acc& copyin(array_in(:d1,:d2,:d3)) do i1 = 1, d1 do i2 = 1, d2 do i3 = 1, d3 diff --git a/rrtmgp/mo_gas_optics_rrtmgp.F90 b/rrtmgp/mo_gas_optics_rrtmgp.F90 index 653f66a33..e33ee4efa 100644 --- a/rrtmgp/mo_gas_optics_rrtmgp.F90 +++ b/rrtmgp/mo_gas_optics_rrtmgp.F90 @@ -23,12 +23,12 @@ module mo_gas_optics_rrtmgp use mo_rte_kind, only: wp, wl use mo_rrtmgp_constants, only: avogad, m_dry, m_h2o, grav - use mo_util_array, only: any_vals_less_than, any_vals_outside + use mo_util_array, only: zero_array, any_vals_less_than, any_vals_outside use mo_optical_props, only: ty_optical_props use mo_source_functions, only: ty_source_func_lw use mo_gas_optics_kernels, only: interpolation, & compute_tau_absorption, compute_tau_rayleigh, compute_Planck_source, & - combine_and_reorder_2str, combine_and_reorder_nstr, zero_array + combine_and_reorder_2str, combine_and_reorder_nstr use mo_util_string, only: lower_case, string_in_array, string_loc_in_array use mo_gas_concentrations, only: ty_gas_concs @@ -326,6 +326,7 @@ function gas_optics_ext(this, & integer, dimension(2, get_nflav(this),size(play,dim=1), size(play,dim=2)) :: jeta integer :: ncol, nlay, ngpt, nband, ngas, nflav + integer :: igpt, icol ! ---------------------------------------------------------- ncol = size(play,dim=1) nlay = size(play,dim=2) @@ -352,8 +353,12 @@ function gas_optics_ext(this, & ! error_msg = check_extent(toa_src, ncol, ngpt, 'toa_src') if(error_msg /= '') return - toa_src(:,:) = spread(this%solar_src(:), dim=1, ncopies=ncol) - + !$acc parallel loop collapse(2) + do igpt = 1,ngpt + do icol = 1,ncol + toa_src(icol,igpt) = this%solar_src(igpt) + end do + end do end function gas_optics_ext !------------------------------------------------------------------------------------------ ! @@ -494,12 +499,10 @@ function compute_gas_taus(this, & ! !$acc enter data create(jtemp, jpress, jeta, tropo, fmajor) !$acc enter data create(tau, tau_rayleigh) - !$acc enter data copyin(play, tlay, col_gas) !$acc enter data create(col_mix, fminor) + !$acc enter data copyin(play, tlay, col_gas) !$acc enter data copyin(this) - !$acc enter data copyin(this%flavor, this%press_ref_log, this%vmr_ref, this%gpoint_flavor) - !$acc enter data copyin(this%temp_ref) ! this one causes problems - !$acc enter data copyin(this%kminor_lower, this%kminor_upper) + !$acc enter data copyin(this%gpoint_flavor) call zero_array(ngpt, nlay, ncol, tau) call interpolation( & ncol,nlay, & ! problem dimensions @@ -565,12 +568,11 @@ function compute_gas_taus(this, & ! Combine optical depths and reorder for radiative transfer solver. call combine_and_reorder(tau, tau_rayleigh, allocated(this%krayl), optical_props) - !$acc exit data copyout(jtemp, jpress, jeta, tropo, fmajor) !$acc exit data delete(tau, tau_rayleigh) - !$acc exit data delete(play, tlay, col_gas, col_mix, fminor) - !$acc exit data delete(this%flavor, this%press_ref_log, this%vmr_ref, this%gpoint_flavor) - !!!$acc exit data delete(this%temp_ref) ! this one causes problems - !!!$acc exit data delete(this%kminor_lower, this%kminor_upper) + !$acc exit data delete(play, tlay, col_gas) + !$acc exit data delete(col_mix, fminor) + !$acc exit data delete(this%gpoint_flavor) + !$acc exit data copyout(jtemp, jpress, jeta, tropo, fmajor) end function compute_gas_taus !------------------------------------------------------------------------------------------ ! @@ -585,24 +587,24 @@ function source(this, & result(error_msg) ! inputs class(ty_gas_optics_rrtmgp), intent(in ) :: this - integer, intent(in ) :: ncol, nlay, nbnd, ngpt - real(wp), dimension(ncol,nlay), intent(in ) :: play ! layer pressures [Pa, mb] - real(wp), dimension(ncol,nlay+1), intent(in ) :: plev ! level pressures [Pa, mb] - real(wp), dimension(ncol,nlay), intent(in ) :: tlay ! layer temperatures [K] - real(wp), dimension(ncol), intent(in ) :: tsfc ! surface skin temperatures [K] + integer, intent(in ) :: ncol, nlay, nbnd, ngpt + real(wp), dimension(ncol,nlay), intent(in ) :: play ! layer pressures [Pa, mb] + real(wp), dimension(ncol,nlay+1), intent(in ) :: plev ! level pressures [Pa, mb] + real(wp), dimension(ncol,nlay), intent(in ) :: tlay ! layer temperatures [K] + real(wp), dimension(ncol), intent(in ) :: tsfc ! surface skin temperatures [K] ! Interplation coefficients - integer, dimension(ncol,nlay), intent(in ) :: jtemp, jpress - logical(wl), dimension(ncol,nlay), intent(in ) :: tropo + integer, dimension(ncol,nlay), intent(in ) :: jtemp, jpress + logical(wl), dimension(ncol,nlay), intent(in ) :: tropo real(wp), dimension(2,2,2,get_nflav(this),ncol,nlay), & - intent(in ) :: fmajor + intent(in ) :: fmajor integer, dimension(2, get_nflav(this),ncol,nlay), & - intent(in ) :: jeta + intent(in ) :: jeta class(ty_source_func_lw ), intent(inout) :: sources - real(wp), dimension(ncol,nlay+1), intent(in ), & + real(wp), dimension(ncol,nlay+1), intent(in ), & optional, target :: tlev ! level temperatures [K] character(len=128) :: error_msg ! ---------------------------------------------------------- - integer :: icol, ilay + integer :: icol, ilay, igpt real(wp), dimension(ngpt,nlay,ncol) :: lay_source_t, lev_source_inc_t, lev_source_dec_t real(wp), dimension(ngpt, ncol) :: sfc_source_t ! Variables for temperature at layer edges [K] (ncol, nlay+1) @@ -644,6 +646,9 @@ function source(this, & !------------------------------------------------------------------- ! Compute internal (Planck) source functions at layers and levels, ! which depend on mapping from spectral space that creates k-distribution. + !$acc enter data copyin(sources) + !$acc enter data create(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) + !$acc enter data create(sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) attach(tlev_wk) call compute_Planck_source(ncol, nlay, nbnd, ngpt, & get_nflav(this), this%get_neta(), this%get_npres(), this%get_ntemp(), this%get_nPlanckTemp(), & tlay, tlev_wk, tsfc, merge(1,nlay,play(1,1) > play(1,nlay)), & @@ -651,10 +656,18 @@ function source(this, & this%get_gpoint_bands(), this%get_band_lims_gpoint(), this%planck_frac, this%temp_ref_min,& this%totplnk_delta, this%totplnk, this%gpoint_flavor, & sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) - sources%sfc_source = transpose(sfc_source_t) + !$acc parallel loop collapse(2) + do igpt = 1, ngpt + do icol = 1, ncol + sources%sfc_source(icol,igpt) = sfc_source_t(igpt,icol) + end do + end do call reorder123x321(lay_source_t, sources%lay_source) call reorder123x321(lev_source_inc_t, sources%lev_source_inc) call reorder123x321(lev_source_dec_t, sources%lev_source_dec) + !$acc exit data delete(sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) detach(tlev_wk) + !$acc exit data copyout(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) + !$acc exit data copyout(sources) end function source !-------------------------------------------------------------------------------------------------------------------- ! @@ -1535,7 +1548,7 @@ subroutine combine_and_reorder(tau, tau_rayleigh, has_rayleigh, optical_props) ncol = size(tau, 3) nlay = size(tau, 2) ngpt = size(tau, 1) - + !$acc enter data copyin(optical_props) if (.not. has_rayleigh) then ! index reorder (ngpt, nlay, ncol) -> (ncol,nlay,gpt) !$acc enter data copyin(tau) @@ -1579,6 +1592,7 @@ subroutine combine_and_reorder(tau, tau_rayleigh, has_rayleigh, optical_props) end select !$acc exit data delete(tau, tau_rayleigh) end if + !$acc exit data copyout(optical_props) end subroutine combine_and_reorder !-------------------------------------------------------------------------------------------------------------------- diff --git a/rte/Make.depends b/rte/Make.depends index 0f0aeee3e..13123d913 100644 --- a/rte/Make.depends +++ b/rte/Make.depends @@ -36,6 +36,7 @@ mo_fluxes.o: mo_rte_kind.o mo_fluxes_broadband_kernels.o mo mo_rte_solver_kernels.o: mo_rte_kind.o mo_rte_solver_kernels.F90 mo_rte_lw.o: mo_rte_kind.o \ + mo_util_array.o \ mo_optical_props.o \ mo_source_functions.o \ mo_fluxes.o \ @@ -43,6 +44,7 @@ mo_rte_lw.o: mo_rte_kind.o \ mo_rte_lw.F90 mo_rte_sw.o: mo_rte_kind.o \ + mo_util_array.o \ mo_optical_props.o \ mo_source_functions.o \ mo_fluxes.o \ diff --git a/rte/kernels-openacc/mo_fluxes_broadband_kernels.F90 b/rte/kernels-openacc/mo_fluxes_broadband_kernels.F90 deleted file mode 100644 index a698fe0c0..000000000 --- a/rte/kernels-openacc/mo_fluxes_broadband_kernels.F90 +++ /dev/null @@ -1,109 +0,0 @@ -! This code is part of Radiative Transfer for Energetics (RTE) -! -! Eli Mlawer and Robert Pincus -! email: rrtmgp@aer.com -! -! Copyright 2015-2018, Atmospheric and Environmental Research and -! Regents of the University of Colorado. All right reserved. -! -! Use and duplication is permitted under the terms of the -! BSD 3-clause license, see http://opensource.org/licenses/BSD-3-Clause -! ------------------------------------------------------------------------------------------------- -! -! Kernels for computing broadband fluxes by summing over all elements in the spectral dimension -! -! ------------------------------------------------------------------------------------------------- -module mo_fluxes_broadband_kernels - use, intrinsic :: iso_c_binding - use mo_rte_kind, only: wp - implicit none - private - public :: sum_broadband, net_broadband - - interface net_broadband - module procedure net_broadband_full, net_broadband_precalc - end interface net_broadband -contains - ! ---------------------------------------------------------------------------- - ! - ! Spectral reduction over all points - ! - subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) bind (C, name="sum_broadband") - integer, intent(in ) :: ncol, nlev, ngpt - real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux - real(wp), dimension(ncol, nlev), intent(out) :: broadband_flux - - integer :: icol, ilev, igpt - real(wp) :: total - !$acc enter data copyin(spectral_flux) create(broadband_flux) - !$acc parallel loop collapse(2) - do ilev = 1, nlev - do icol = 1, ncol - broadband_flux(icol,ilev) = 0._wp - end do - end do - - !$acc parallel loop collapse(3) - do ilev = 1, nlev - do icol = 1, ncol - do igpt = 1, ngpt - !$acc atomic update - broadband_flux(icol,ilev) = broadband_flux(icol,ilev) + spectral_flux(icol, ilev, igpt) - end do - end do - end do - !$acc exit data delete(spectral_flux) copyout(broadband_flux) - end subroutine sum_broadband - ! ---------------------------------------------------------------------------- - ! - ! Net flux: Spectral reduction over all points - ! - subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_flux_up, broadband_flux_net) & - bind(C, name="net_broadband_full") - integer, intent(in ) :: ncol, nlev, ngpt - real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux_dn, spectral_flux_up - real(wp), dimension(ncol, nlev), intent(out) :: broadband_flux_net - integer :: icol, ilev, igpt - real(wp) :: total, tmp - - !$acc enter data copyin(spectral_flux_dn, spectral_flux_up) create(broadband_flux_net) - !$acc parallel loop collapse(2) - do ilev = 1, nlev - do icol = 1, ncol - broadband_flux_net(icol,ilev) = 0._wp - end do - end do - - !$acc parallel loop collapse(3) - do ilev = 1, nlev - do icol = 1, ncol - do igpt = 1, ngpt - tmp = spectral_flux_dn(icol, ilev, igpt) - spectral_flux_up(icol, ilev, igpt) - !$acc atomic update - broadband_flux_net(icol,ilev) = broadband_flux_net(icol,ilev) + tmp - end do - end do - end do - !$acc exit data delete(spectral_flux_dn, spectral_flux_up) copyout(broadband_flux_net) - end subroutine net_broadband_full - ! ---------------------------------------------------------------------------- - ! - ! Net flux when bradband flux up and down are already available - ! - subroutine net_broadband_precalc(ncol, nlay, flux_dn, flux_up, broadband_flux_net) & - bind (C, name="net_broadband_precalc") - integer, intent(in ) :: ncol, nlay - real(wp), dimension(ncol, nlay), intent(in ) :: flux_dn, flux_up - real(wp), dimension(ncol, nlay), intent(out) :: broadband_flux_net - integer :: icol, ilay - !$acc enter data copyin(flux_dn, flux_up) create(broadband_flux_net) - !$acc parallel loop collapse(2) - do ilay = 1, nlay - do icol = 1, ncol - broadband_flux_net(icol,ilay) = flux_dn(icol,ilay) - flux_up(icol,ilay) - end do - end do - !$acc exit data delete(flux_dn, flux_up) copyout(broadband_flux_net) - end subroutine net_broadband_precalc - ! ---------------------------------------------------------------------------- -end module mo_fluxes_broadband_kernels diff --git a/rte/kernels-openacc/mo_rte_solver_kernels.F90 b/rte/kernels-openacc/mo_rte_solver_kernels.F90 index 4f5659f15..da7e0d307 100644 --- a/rte/kernels-openacc/mo_rte_solver_kernels.F90 +++ b/rte/kernels-openacc/mo_rte_solver_kernels.F90 @@ -106,15 +106,15 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, lev_source_dn => lev_source_dec end if - !$acc enter data copyin(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,radn_dn) async - !$acc enter data create(tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo,radn_up) async - !$acc enter data attach(lev_source_up,lev_source_dn) async + !$acc enter data copyin(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,radn_dn) + !$acc enter data create(tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo,radn_up) + !$acc enter data attach(lev_source_up,lev_source_dn) ! NOTE: This kernel produces small differences between GPU and CPU ! implementations on Ascent with PGI, we assume due to floating point ! differences in the exp() function. These differences are small in the ! RFMIP test case (10^-6). - !$acc parallel loop collapse(3) async + !$acc parallel loop collapse(3) do igpt = 1, ngpt do ilev = 1, nlay do icol = 1, ncol @@ -127,7 +127,7 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, end do end do - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol ! @@ -160,7 +160,7 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, ! ! Convert intensity to flux assuming azimuthal isotropy and quadrature weight ! - !$acc parallel loop collapse(3) async + !$acc parallel loop collapse(3) do igpt = 1, ngpt do ilev = 1, nlay+1 do icol = 1, ncol @@ -170,9 +170,9 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, end do end do - !$acc exit data copyout(radn_dn,radn_up) async - !$acc exit data delete(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo) async - !$acc exit data detach(lev_source_up,lev_source_dn) async + !$acc exit data copyout(radn_dn,radn_up) + !$acc exit data delete(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo) + !$acc exit data detach(lev_source_up,lev_source_dn) end subroutine lw_solver_noscat ! --------------------------------------------------------------- @@ -209,12 +209,12 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig integer :: imu, top_level integer :: icol, ilev, igpt - !$acc enter data copyin(Ds,weights,tau,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,flux_dn) async - !$acc enter data create(flux_up,radn_dn,radn_up,Ds_ncol) async + !$acc enter data copyin(Ds,weights,tau,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,flux_dn) + !$acc enter data create(flux_up,radn_dn,radn_up,Ds_ncol,flux_top) ! ------------------------------------ ! ------------------------------------ - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol Ds_ncol(icol, igpt) = Ds(1) @@ -229,7 +229,7 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig ! For more than one angle use local arrays ! top_level = MERGE(1, nlay+1, top_at_1) - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1,ngpt do icol = 1,ncol flux_top(icol,igpt) = flux_dn(icol,top_level,igpt) @@ -238,7 +238,7 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig call apply_BC(ncol, nlay, ngpt, top_at_1, flux_top, radn_dn) do imu = 2, nmus - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol Ds_ncol(icol, igpt) = Ds(imu) @@ -248,7 +248,7 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig top_at_1, Ds_ncol, weights(imu), tau, & lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & radn_up, radn_dn) - !$acc parallel loop collapse(3) async + !$acc parallel loop collapse(3) do igpt = 1, ngpt do ilev = 1, nlay+1 do icol = 1, ncol @@ -260,10 +260,8 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig end do ! imu loop - !$acc exit data copyout(flux_up,flux_dn) async - !$acc exit data delete(Ds,weights,tau,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,radn_dn,radn_up,Ds_ncol) async - - !$acc wait + !$acc exit data copyout(flux_up,flux_dn) + !$acc exit data delete(Ds,weights,tau,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,radn_dn,radn_up,Ds_ncol,flux_top) end subroutine lw_solver_noscat_GaussQuad ! ------------------------------------------------------------------------------------------------- ! @@ -494,7 +492,7 @@ subroutine lw_source_noscat(ncol, nlay, ngpt, lay_source, lev_source_up, lev_sou real(wp), parameter :: tau_thresh = sqrt(epsilon(tau)) ! --------------------------------------------------------------- ! --------------------------------------------------------------- - !$acc parallel loop collapse(3) async + !$acc parallel loop collapse(3) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -545,7 +543,7 @@ subroutine lw_transport_noscat(ncol, nlay, ngpt, top_at_1, & ! ! Top of domain is index 1 ! - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol ! Downward propagation @@ -566,7 +564,7 @@ subroutine lw_transport_noscat(ncol, nlay, ngpt, top_at_1, & ! ! Top of domain is index nlay+1 ! - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol ! Downward propagation @@ -1104,21 +1102,20 @@ subroutine apply_BC_gpt(ncol, nlay, ngpt, top_at_1, inc_flux, flux_dn) bind (C, ! -------------- ! Upper boundary condition if(top_at_1) then - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, 1, igpt) = inc_flux(icol,igpt) end do end do else - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, nlay+1, igpt) = inc_flux(icol,igpt) end do end do end if - !$acc wait end subroutine apply_BC_gpt ! --------------------- subroutine apply_BC_factor(ncol, nlay, ngpt, top_at_1, inc_flux, factor, flux_dn) bind (C, name="apply_BC_factor") @@ -1134,21 +1131,20 @@ subroutine apply_BC_factor(ncol, nlay, ngpt, top_at_1, inc_flux, factor, flux_dn ! Upper boundary condition if(top_at_1) then - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, 1, igpt) = inc_flux(icol,igpt) * factor(icol) end do end do else - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, nlay+1, igpt) = inc_flux(icol,igpt) * factor(icol) end do end do end if - !$acc wait end subroutine apply_BC_factor ! --------------------- subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="apply_BC_0") @@ -1162,21 +1158,20 @@ subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="apply_ ! Upper boundary condition if(top_at_1) then - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, 1, igpt) = 0._wp end do end do else - !$acc parallel loop collapse(2) async + !$acc parallel loop collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, nlay+1, igpt) = 0._wp end do end do end if - !$acc wait end subroutine apply_BC_0 end module mo_rte_solver_kernels diff --git a/rte/kernels-openacc/mo_util_array.F90 b/rte/kernels-openacc/mo_util_array.F90 deleted file mode 100644 index 37efadd37..000000000 --- a/rte/kernels-openacc/mo_util_array.F90 +++ /dev/null @@ -1,73 +0,0 @@ -! This code is part of Radiative Transfer for Energetics (RTE) -! -! Contacts: Robert Pincus and Eli Mlawer -! email: rrtmgp@aer.com -! -! Copyright 2015-2019, Atmospheric and Environmental Research and -! Regents of the University of Colorado. All right reserved. -! -! Use and duplication is permitted under the terms of the -! BSD 3-clause license, see http://opensource.org/licenses/BSD-3-Clause -! ------------------------------------------------------------------------------------------------- -module mo_util_array -! -! This module provide utilites for sanitizing input arrays: -! checking values and sizes -! These are in a module so code can be written for both CPUs and GPUs -! Used only by Fortran classes so routines don't need C bindings and can use assumed-shape -! Currently only for 3D arrays; could extend through overloading to other ranks -! -! - use mo_rte_kind, only: wp - implicit none - private - public :: any_vals_less_than, any_vals_outside -contains -!------------------------------------------------------------------------------------------------- - logical function any_vals_less_than(array, minVal) - real(wp), dimension(:,:,:), intent(in) :: array - real(wp), intent(in) :: minVal - - real(wp) :: minValue - integer :: i, j, k - - ! This could be written far more compactly as - ! any_vals_less_than = any(array < minVal) - ! but an explicit loop also works on GPUs - minValue = minVal - !$acc parallel loop collapse(3) copyin(array) reduction(min:minValue) - do k = 1, size(array,3) - do j = 1, size(array,2) - do i = 1, size(array,1) - minValue = min(array(i,j,k), minValue) - end do - end do - end do - any_vals_less_than = (minValue < minVal) - end function any_vals_less_than - ! --------------------------------- - logical function any_vals_outside(array, minVal, maxVal) - real(wp), dimension(:,:,:), intent(in) :: array - real(wp), intent(in) :: minVal, maxVal - - real(wp) :: minValue, maxValue - integer :: i, j, k - - ! This could be written far more compactly as - ! any_vals_outside = any(array < minVal .or. array > maxVal) - ! but an explicit loop also works on GPUs - minValue = minVal - maxValue = maxVal - !$acc parallel loop collapse(3) copyin(array) reduction(min:minValue) reduction(max:maxValue) - do k = 1, size(array,3) - do j = 1, size(array,2) - do i = 1, size(array,1) - minValue = min(array(i,j,k), minValue) - maxValue = max(array(i,j,k), minValue) - end do - end do - end do - any_vals_outside = (minValue < minVal .or. maxValue > maxVal) - end function any_vals_outside -! --------------------------------- -end module mo_util_array diff --git a/rte/kernels/mo_fluxes_broadband_kernels.F90 b/rte/kernels/mo_fluxes_broadband_kernels.F90 index b8206e0e8..cfc3bb7a0 100644 --- a/rte/kernels/mo_fluxes_broadband_kernels.F90 +++ b/rte/kernels/mo_fluxes_broadband_kernels.F90 @@ -35,18 +35,23 @@ pure subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) b integer :: icol, ilev, igpt + !$acc enter data copyin(spectral_flux) create(broadband_flux) + !$acc parallel loop collapse(2) do ilev = 1, nlev do icol = 1, ncol broadband_flux(icol, ilev) = spectral_flux(icol, ilev, 1) end do end do + !$acc parallel loop collapse(3) do igpt = 2, ngpt do ilev = 1, nlev do icol = 1, ncol + !$acc atomic update broadband_flux(icol, ilev) = broadband_flux(icol, ilev) + spectral_flux(icol, ilev, igpt) end do end do end do + !$acc exit data delete(spectral_flux) copyout(broadband_flux) end subroutine sum_broadband ! ---------------------------------------------------------------------------- ! @@ -58,33 +63,48 @@ pure subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_ real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux_dn, spectral_flux_up real(wp), dimension(ncol, nlev), intent(out) :: broadband_flux_net - integer :: icol, ilev, igpt + integer :: icol, ilev, igpt + real(wp) :: diff + !$acc enter data copyin(spectral_flux_dn, spectral_flux_up) create(broadband_flux_net) + !$acc parallel loop collapse(2) do ilev = 1, nlev do icol = 1, ncol - broadband_flux_net(icol, ilev) = spectral_flux_dn(icol, ilev, 1 ) - spectral_flux_up(icol, ilev, 1) + diff = spectral_flux_dn(icol, ilev, 1 ) - spectral_flux_up(icol, ilev, 1) + broadband_flux_net(icol, ilev) = diff end do end do + !$acc parallel loop collapse(3) do igpt = 2, ngpt do ilev = 1, nlev do icol = 1, ncol - broadband_flux_net(icol, ilev) = broadband_flux_net(icol, ilev) + & - spectral_flux_dn(icol, ilev, igpt) - spectral_flux_up(icol, ilev, igpt) + diff = spectral_flux_dn(icol, ilev, igpt) - spectral_flux_up(icol, ilev, igpt) + !$acc atomic update + broadband_flux_net(icol, ilev) = broadband_flux_net(icol, ilev) + diff end do end do end do + !$acc exit data delete(spectral_flux_dn, spectral_flux_up) copyout(broadband_flux_net) end subroutine net_broadband_full ! ---------------------------------------------------------------------------- ! ! Net flux when bradband flux up and down are already available ! - pure subroutine net_broadband_precalc(ncol, nlay, flux_dn, flux_up, broadband_flux_net) & + pure subroutine net_broadband_precalc(ncol, nlev, flux_dn, flux_up, broadband_flux_net) & bind(C, name="net_broadband_precalc") - integer, intent(in ) :: ncol, nlay - real(wp), dimension(ncol, nlay), intent(in ) :: flux_dn, flux_up - real(wp), dimension(ncol, nlay), intent(out) :: broadband_flux_net + integer, intent(in ) :: ncol, nlev + real(wp), dimension(ncol, nlev), intent(in ) :: flux_dn, flux_up + real(wp), dimension(ncol, nlev), intent(out) :: broadband_flux_net - broadband_flux_net(1:ncol,1:nlay) = flux_dn(1:ncol,1:nlay) - flux_up(1:ncol,1:nlay) + integer :: icol, ilev + !$acc enter data copyin(flux_dn, flux_up) create(broadband_flux_net) + !$acc parallel loop collapse(2) + do ilev = 1, nlev + do icol = 1, ncol + broadband_flux_net(icol,ilev) = flux_dn(icol,ilev) - flux_up(icol,ilev) + end do + end do + !$acc exit data delete(flux_dn, flux_up) copyout(broadband_flux_net) end subroutine net_broadband_precalc ! ---------------------------------------------------------------------------- end module mo_fluxes_broadband_kernels diff --git a/rte/kernels/mo_util_array.F90 b/rte/kernels/mo_util_array.F90 deleted file mode 100644 index e9c374490..000000000 --- a/rte/kernels/mo_util_array.F90 +++ /dev/null @@ -1,40 +0,0 @@ -! This code is part of Radiative Transfer for Energetics (RTE) -! -! Contacts: Robert Pincus and Eli Mlawer -! email: rrtmgp@aer.com -! -! Copyright 2015-2019, Atmospheric and Environmental Research and -! Regents of the University of Colorado. All right reserved. -! -! Use and duplication is permitted under the terms of the -! BSD 3-clause license, see http://opensource.org/licenses/BSD-3-Clause -! ------------------------------------------------------------------------------------------------- -module mo_util_array -! -! This module provide utilites for sanitizing input arrays: -! checking values and sizes -! These are in a module so code can be written for both CPUs and GPUs -! Used only by Fortran classes so routines don't need C bindings and can use assumed-shape -! Currently only for 3D arrays; could extend through overloading to other ranks -! - use mo_rte_kind, only: wp - implicit none - private - public :: any_vals_less_than, any_vals_outside -contains -!------------------------------------------------------------------------------------------------- - logical function any_vals_less_than(array, minVal) - real(wp), dimension(:,:,:), intent(in) :: array - real(wp), intent(in) :: minVal - - any_vals_less_than = any(array < minVal) - end function any_vals_less_than - ! --------------------------------- - logical function any_vals_outside(array, minVal, maxVal) - real(wp), dimension(:,:,:), intent(in) :: array - real(wp), intent(in) :: minVal, maxVal - - any_vals_outside = any(array < minVal .or. array > maxVal) - end function any_vals_outside -! --------------------------------- -end module mo_util_array diff --git a/rte/mo_rte_lw.F90 b/rte/mo_rte_lw.F90 index dc0b24d93..9439f0b73 100644 --- a/rte/mo_rte_lw.F90 +++ b/rte/mo_rte_lw.F90 @@ -35,6 +35,7 @@ ! ------------------------------------------------------------------------------------------------- module mo_rte_lw use mo_rte_kind, only: wp, wl + use mo_util_array, only: any_vals_less_than, any_vals_outside use mo_optical_props, only: ty_optical_props, & ty_optical_props_arry, ty_optical_props_1scl, ty_optical_props_2str, ty_optical_props_nstr use mo_source_functions, & @@ -132,7 +133,7 @@ function rte_lw(optical_props, top_at_1, & ! if(any([size(sfc_emis,1), size(sfc_emis,2)] /= [nband, ncol])) & error_msg = "rte_lw: sfc_emis inconsistently sized" - if(any(sfc_emis < 0._wp .or. sfc_emis > 1._wp)) & + if(any_vals_outside(sfc_emis, 0._wp, 1._wp)) & error_msg = "rte_lw: sfc_emis has values < 0 or > 1" if(len_trim(error_msg) > 0) return @@ -142,7 +143,7 @@ function rte_lw(optical_props, top_at_1, & if(present(inc_flux)) then if(any([size(inc_flux,1), size(inc_flux,2)] /= [ncol, ngpt])) & error_msg = "rte_lw: inc_flux inconsistently sized" - if(any(inc_flux < 0._wp)) & + if(any_vals_less_than(inc_flux, 0._wp)) & error_msg = "rte_lw: inc_flux has values < 0" end if if(len_trim(error_msg) > 0) return @@ -174,8 +175,8 @@ function rte_lw(optical_props, top_at_1, & ! allocate(gpt_flux_up (ncol, nlay+1, ngpt), gpt_flux_dn(ncol, nlay+1, ngpt)) allocate(sfc_emis_gpt(ncol, ngpt)) - !$acc enter data copyin(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) - !$acc enter data copyin(gauss_Ds, gauss_wts) + !!$acc enter data copyin(sources, sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) + !$acc enter data copyin(optical_props) !$acc enter data create(gpt_flux_dn, gpt_flux_up) !$acc enter data create(sfc_emis_gpt) call expand_and_transpose(optical_props, sfc_emis, sfc_emis_gpt) @@ -202,6 +203,8 @@ function rte_lw(optical_props, top_at_1, & ! No scattering two-stream calculation ! !$acc enter data copyin(optical_props%tau) + error_msg = optical_props%validate() + if(len_trim(error_msg) > 0) return call lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, logical(top_at_1, wl), & n_quad_angs, gauss_Ds(1:n_quad_angs,n_quad_angs), gauss_wts(1:n_quad_angs,n_quad_angs), & optical_props%tau, & @@ -214,6 +217,8 @@ function rte_lw(optical_props, top_at_1, & ! two-stream calculation with scattering ! !$acc enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g) + error_msg = optical_props%validate() + if(len_trim(error_msg) > 0) return call lw_solver_2stream(ncol, nlay, ngpt, logical(top_at_1, wl), & optical_props%tau, optical_props%ssa, optical_props%g, & sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, & @@ -232,9 +237,10 @@ function rte_lw(optical_props, top_at_1, & ! ...and reduce spectral fluxes to desired output quantities ! error_msg = fluxes%reduce(gpt_flux_up, gpt_flux_dn, optical_props, top_at_1) - !$acc exit data delete(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) - !$acc exit data delete(sfc_emis_gpt, gauss_Ds, gauss_wts) + !$acc exit data delete(sfc_emis_gpt) !$acc exit data delete(gpt_flux_up,gpt_flux_dn) + !$acc exit data delete(optical_props) + !!$acc exit data delete(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source,sources) end function rte_lw !-------------------------------------------------------------------------------------------------------------------- ! diff --git a/rte/mo_rte_sw.F90 b/rte/mo_rte_sw.F90 index 0a5aec0bd..bb69a37d9 100644 --- a/rte/mo_rte_sw.F90 +++ b/rte/mo_rte_sw.F90 @@ -29,6 +29,7 @@ ! ------------------------------------------------------------------------------------------------- module mo_rte_sw use mo_rte_kind, only: wp, wl + use mo_util_array, only: any_vals_less_than, any_vals_outside use mo_optical_props, only: ty_optical_props, & ty_optical_props_arry, ty_optical_props_1scl, ty_optical_props_2str, ty_optical_props_nstr use mo_fluxes, only: ty_fluxes @@ -87,35 +88,29 @@ function rte_sw(atmos, top_at_1, & ! if( size(mu0, 1) /= ncol ) & error_msg = "rte_sw: mu0 inconsistently sized" - if(any(mu0 <= 0._wp .or. mu0 > 1)) & + if(any_vals_outside(mu0, 0._wp, 1._wp)) & error_msg = "rte_sw: one or more mu0 <= 0 or > 1" if(any([size(inc_flux, 1), size(inc_flux, 2)] /= [ncol, ngpt])) & error_msg = "rte_sw: inc_flux inconsistently sized" - if(any(inc_flux < 0._wp)) & + if(any_vals_less_than(inc_flux, 0._wp)) & error_msg = "rte_sw: one or more inc_flux < 0" if(present(inc_flux_dif)) then if(any([size(inc_flux_dif, 1), size(inc_flux_dif, 2)] /= [ncol, ngpt])) & error_msg = "rte_sw: inc_flux_dif inconsistently sized" - if(any(inc_flux_dif < 0._wp)) & + if(any_vals_less_than(inc_flux_dif, 0._wp)) & error_msg = "rte_sw: one or more inc_flux_dif < 0" end if if(any([size(sfc_alb_dir, 1), size(sfc_alb_dir, 2)] /= [nband, ncol])) & error_msg = "rte_sw: sfc_alb_dir inconsistently sized" - if(any(sfc_alb_dir < 0._wp .or. sfc_alb_dir > 1._wp)) & + if(any_vals_outside(sfc_alb_dir, 0._wp, 1._wp)) & error_msg = "rte_sw: sfc_alb_dir out of bounds [0,1]" if(any([size(sfc_alb_dif, 1), size(sfc_alb_dif, 2)] /= [nband, ncol])) & error_msg = "rte_sw: sfc_alb_dif inconsistently sized" - if(any(sfc_alb_dif < 0._wp .or. sfc_alb_dif > 1._wp)) & + if(any_vals_outside(sfc_alb_dif, 0._wp, 1._wp)) & error_msg = "rte_sw: sfc_alb_dif out of bounds [0,1]" - if(len_trim(error_msg) > 0) return - - ! - ! Ensure values of tau, ssa, and g are reasonable - ! - error_msg = atmos%validate() if(len_trim(error_msg) > 0) then if(len_trim(atmos%get_name()) > 0) & error_msg = trim(atmos%get_name()) // ': ' // trim(error_msg) @@ -161,6 +156,8 @@ function rte_sw(atmos, top_at_1, & ! Direct beam only ! !$acc enter data copyin(atmos, atmos%tau) + error_msg = atmos%validate() + if(len_trim(error_msg) > 0) return call sw_solver_noscat(ncol, nlay, ngpt, logical(top_at_1, wl), & atmos%tau, mu0, & gpt_flux_dir) @@ -175,6 +172,8 @@ function rte_sw(atmos, top_at_1, & ! two-stream calculation with scattering ! !$acc enter data copyin(atmos, atmos%tau, atmos%ssa, atmos%g) + error_msg = atmos%validate() + if(len_trim(error_msg) > 0) return call sw_solver_2stream(ncol, nlay, ngpt, logical(top_at_1, wl), & atmos%tau, atmos%ssa, atmos%g, mu0, & sfc_alb_dir_gpt, sfc_alb_dif_gpt, & @@ -189,7 +188,11 @@ function rte_sw(atmos, top_at_1, & ! error_msg = 'sw_solver(...ty_optical_props_nstr...) not yet implemented' end select - if (error_msg /= '') return + if(len_trim(error_msg) > 0) then + if(len_trim(atmos%get_name()) > 0) & + error_msg = trim(atmos%get_name()) // ': ' // trim(error_msg) + return + end if ! ! ...and reduce spectral fluxes to desired output quantities ! diff --git a/rte/mo_util_array.F90 b/rte/mo_util_array.F90 index 37efadd37..28e8f254d 100644 --- a/rte/mo_util_array.F90 +++ b/rte/mo_util_array.F90 @@ -15,47 +15,151 @@ module mo_util_array ! checking values and sizes ! These are in a module so code can be written for both CPUs and GPUs ! Used only by Fortran classes so routines don't need C bindings and can use assumed-shape -! Currently only for 3D arrays; could extend through overloading to other ranks -! ! use mo_rte_kind, only: wp implicit none + interface any_vals_less_than + module procedure any_vals_less_than_1D, any_vals_less_than_2D, any_vals_less_than_3D + end interface + interface any_vals_outside + module procedure any_vals_outside_1D, any_vals_outside_2D, any_vals_outside_3D + end interface + interface zero_array + module procedure zero_array_1D, zero_array_3D, zero_array_4D + end interface + private - public :: any_vals_less_than, any_vals_outside + public :: zero_array, any_vals_less_than, any_vals_outside contains + !------------------------------------------------------------------------------------------------- + ! Values less than a floor + !------------------------------------------------------------------------------------------------- + logical function any_vals_less_than_1D(array, minVal) + real(wp), dimension(:), intent(in) :: array + real(wp), intent(in) :: minVal + +#ifdef _OPENACC + ! Compact version using intrinsics below + ! but an explicit loop is the only current solution on GPUs + real(wp) :: minValue + integer :: i + + minValue = minVal + !$acc parallel loop copyin(array) reduction(min:minValue) + do i = 1, size(array,1) + minValue = min(array(i), minValue) + end do + any_vals_less_than_1D = (minValue < minVal) +#else + any_vals_less_than_1D = any(array < minVal) +#endif +end function any_vals_less_than_1D + !------------------------------------------------------------------------------------------------- + logical function any_vals_less_than_2D(array, minVal) + real(wp), dimension(:,:), intent(in) :: array + real(wp), intent(in) :: minVal + +#ifdef _OPENACC + ! Compact version using intrinsics below + ! but an explicit loop is the only current solution on GPUs + real(wp) :: minValue + integer :: i, j + + minValue = minVal + !$acc parallel loop collapse(2) copyin(array) reduction(min:minValue) + do j = 1, size(array,2) + do i = 1, size(array,1) + minValue = min(array(i,j), minValue) + end do + end do + any_vals_less_than_2D = (minValue < minVal) +#else + any_vals_less_than_2D = any(array < minVal) +#endif + end function any_vals_less_than_2D !------------------------------------------------------------------------------------------------- - logical function any_vals_less_than(array, minVal) + logical function any_vals_less_than_3D(array, minVal) real(wp), dimension(:,:,:), intent(in) :: array real(wp), intent(in) :: minVal - real(wp) :: minValue - integer :: i, j, k +#ifdef _OPENACC + ! Compact version using intrinsics below + ! but an explicit loop is the only current solution on GPUs + real(wp) :: minValue + integer :: i, j, k - ! This could be written far more compactly as - ! any_vals_less_than = any(array < minVal) - ! but an explicit loop also works on GPUs + minValue = minVal + !$acc parallel loop collapse(3) copyin(array) reduction(min:minValue) + do k = 1, size(array,3) + do j = 1, size(array,2) + do i = 1, size(array,1) + minValue = min(array(i,j,k), minValue) + end do + end do + end do + any_vals_less_than_3D = (minValue < minVal) +#else + any_vals_less_than_3D = any(array < minVal) +#endif + end function any_vals_less_than_3D + !------------------------------------------------------------------------------------------------- + ! Values outside a range + !------------------------------------------------------------------------------------------------- + logical function any_vals_outside_1D(array, minVal, maxVal) + real(wp), dimension(:), intent(in) :: array + real(wp), intent(in) :: minVal, maxVal + +#ifdef _OPENACC + ! Compact version using intrinsics below + ! but an explicit loop is the only current solution on GPUs + real(wp) :: minValue, maxValue + integer :: i minValue = minVal - !$acc parallel loop collapse(3) copyin(array) reduction(min:minValue) - do k = 1, size(array,3) - do j = 1, size(array,2) - do i = 1, size(array,1) - minValue = min(array(i,j,k), minValue) - end do + maxValue = maxVal + !$acc parallel loop copyin(array) reduction(min:minValue) reduction(max:maxValue) + do i = 1, size(array,1) + minValue = min(array(i), minValue) + maxValue = max(array(i), maxValue) + end do + any_vals_outside_1D = (minValue < minVal .or. maxValue > maxVal) +#else + any_vals_outside_1D = any(array < minVal .or. array > maxVal) +#endif + end function any_vals_outside_1D +! ---------------------------------------------------------- + logical function any_vals_outside_2D(array, minVal, maxVal) + real(wp), dimension(:,:), intent(in) :: array + real(wp), intent(in) :: minVal, maxVal + +#ifdef _OPENACC + ! Compact version using intrinsics below + ! but an explicit loop is the only current solution on GPUs + real(wp) :: minValue, maxValue + integer :: i, j + minValue = minVal + maxValue = maxVal + !$acc parallel loop collapse(2) copyin(array) reduction(min:minValue) reduction(max:maxValue) + do j = 1, size(array,2) + do i = 1, size(array,1) + minValue = min(array(i,j), minValue) + maxValue = max(array(i,j), maxValue) end do end do - any_vals_less_than = (minValue < minVal) - end function any_vals_less_than - ! --------------------------------- - logical function any_vals_outside(array, minVal, maxVal) + any_vals_outside_2D = (minValue < minVal .or. maxValue > maxVal) +#else + any_vals_outside_2D = any(array < minVal .or. array > maxVal) +#endif + end function any_vals_outside_2D +! ---------------------------------------------------------- + logical function any_vals_outside_3D(array, minVal, maxVal) real(wp), dimension(:,:,:), intent(in) :: array real(wp), intent(in) :: minVal, maxVal +#ifdef _OPENACC + ! Compact version using intrinsics below + ! but an explicit loop is the only current solution on GPUs real(wp) :: minValue, maxValue integer :: i, j, k - - ! This could be written far more compactly as - ! any_vals_outside = any(array < minVal .or. array > maxVal) - ! but an explicit loop also works on GPUs minValue = minVal maxValue = maxVal !$acc parallel loop collapse(3) copyin(array) reduction(min:minValue) reduction(max:maxValue) @@ -63,11 +167,64 @@ logical function any_vals_outside(array, minVal, maxVal) do j = 1, size(array,2) do i = 1, size(array,1) minValue = min(array(i,j,k), minValue) - maxValue = max(array(i,j,k), minValue) + maxValue = max(array(i,j,k), maxValue) + end do + end do + end do + any_vals_outside_3D = (minValue < minVal .or. maxValue > maxVal) +#else + any_vals_outside_3D = any(array < minVal .or. array > maxVal) +#endif + end function any_vals_outside_3D + !------------------------------------------------------------------------------------------------- + ! Initializing arrays to 0 + !------------------------------------------------------------------------------------------------- + subroutine zero_array_1D(ni, array) bind(C, name="zero_array_1D") + integer, intent(in ) :: ni + real(wp), dimension(ni), intent(out) :: array + ! ----------------------- + integer :: i + ! ----------------------- + !$acc parallel loop copyout(array) + do i = 1, ni + array(i) = 0.0_wp + end do + end subroutine zero_array_1D + ! ---------------------------------------------------------- + subroutine zero_array_3D(ni, nj, nk, array) bind(C, name="zero_array_3D") + integer, intent(in ) :: ni, nj, nk + real(wp), dimension(ni, nj, nk), intent(out) :: array + ! ----------------------- + integer :: i,j,k + ! ----------------------- + !$acc parallel loop collapse(3) copyout(array) + do k = 1, nk + do j = 1, nj + do i = 1, ni + array(i,j,k) = 0.0_wp end do end do end do - any_vals_outside = (minValue < minVal .or. maxValue > maxVal) - end function any_vals_outside -! --------------------------------- + + end subroutine zero_array_3D + ! ---------------------------------------------------------- + subroutine zero_array_4D(ni, nj, nk, nl, array) bind(C, name="zero_array_4D") + integer, intent(in ) :: ni, nj, nk, nl + real(wp), dimension(ni, nj, nk, nl), intent(out) :: array + ! ----------------------- + integer :: i,j,k,l + ! ----------------------- + !$acc parallel loop collapse(4) copyout(array) + do l = 1, nl + do k = 1, nk + do j = 1, nj + do i = 1, ni + array(i,j,k,l) = 0.0_wp + end do + end do + end do + end do + + end subroutine zero_array_4D +! ---------------------------------------------------------- end module mo_util_array