From aca56b723e8c35d0fc0166c25ead3c906530cf72 Mon Sep 17 00:00:00 2001 From: Robert Pincus Date: Wed, 14 Apr 2021 23:22:50 -0400 Subject: [PATCH] Streamlined LW calculations, OpenMP GPU support, small efficiency changes, updates to continuous integration (#110) OpenMP GPU offload, tested with CCE 11.0.0. Longwave Jacobians aren't computed if the optional arguments aren't provided. Finalization methods for optical properties arrays are introduced. Make is streamlined to use environment variables and omit config files. Small changes for efficiency and robustness. Continuous integration is evolved; includes gfortran 9 and 10 on Github actions; current Nvidia and Intel HPC compilers in containers; Cray CPU and OpenMP GPU and PGI 20 OpenACC. Co-authored-by: Nichols Romero Co-authored-by: Dustin Swales Co-authored-by: Peter Ukkonen Co-authored-by: Chiel van Heerwaarden --- .github/workflows/containerized-ci.yml | 77 ++ .github/workflows/continuous-integration.yml | 142 +++ .github/workflows/main.yml | 110 --- Compiler-flags.md | 30 + Makefile | 27 + README.md | 14 +- azure-pipelines.yml | 72 +- build/Makefile | 4 +- build/Makefile.conf.gfortran | 17 - build/Makefile.conf.ifort | 18 - build/Makefile.conf.nagfor | 12 - build/Makefile.conf.olcf_pgi | 19 - build/Makefile.conf.pgfortran | 12 - build/Makefile.conf.pgfortran-cscs | 36 - build/Makefile.conf.pgfortran-cscs-gpu | 36 - examples/all-sky/Makefile | 18 +- examples/all-sky/README.md | 5 +- examples/all-sky/mo_garand_atmos_io.F90 | 0 examples/all-sky/rrtmgp_allsky.F90 | 812 ++++++++-------- examples/rfmip-clear-sky/Makefile | 44 +- examples/rfmip-clear-sky/Makefile.libs.macos | 10 - examples/rfmip-clear-sky/Makefile.libs.olcf | 10 - .../Makefile.libs.pgfortran-cscs | 20 - examples/rfmip-clear-sky/Readme.md | 6 +- .../rfmip-clear-sky/compare-to-reference.py | 12 + examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 | 11 +- examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 | 17 +- examples/rfmip-clear-sky/stage_files.sh | 5 + extensions/cloud_optics/mo_cloud_optics.F90 | 35 +- extensions/mo_fluxes_byband.F90 | 2 + extensions/mo_fluxes_byband_kernels.F90 | 8 +- rrtmgp/Make.depends | 2 +- .../kernels-openacc/mo_gas_optics_kernels.F90 | 110 ++- rrtmgp/kernels/mo_gas_optics_kernels.F90 | 17 +- .../mo_rrtmgp_util_reorder_kernels.F90 | 2 + rrtmgp/mo_gas_concentrations.F90 | 86 ++ rrtmgp/mo_gas_optics_rrtmgp.F90 | 72 +- .../mo_optical_props_kernels.F90 | 111 ++- rte/kernels-openacc/mo_rte_solver_kernels.F90 | 917 +++++++----------- rte/kernels/mo_fluxes_broadband_kernels.F90 | 17 +- rte/kernels/mo_rte_solver_kernels.F90 | 589 ++++------- rte/mo_optical_props.F90 | 69 +- rte/mo_rte_lw.F90 | 149 ++- rte/mo_rte_sw.F90 | 15 + rte/mo_rte_util_array.F90 | 65 ++ tests/Makefile | 26 +- tests/validation-plots.py | 6 +- 47 files changed, 2021 insertions(+), 1873 deletions(-) create mode 100644 .github/workflows/containerized-ci.yml create mode 100644 .github/workflows/continuous-integration.yml delete mode 100644 .github/workflows/main.yml create mode 100644 Compiler-flags.md create mode 100644 Makefile delete mode 100644 build/Makefile.conf.gfortran delete mode 100644 build/Makefile.conf.ifort delete mode 100644 build/Makefile.conf.nagfor delete mode 100755 build/Makefile.conf.olcf_pgi delete mode 100644 build/Makefile.conf.pgfortran delete mode 100644 build/Makefile.conf.pgfortran-cscs delete mode 100644 build/Makefile.conf.pgfortran-cscs-gpu mode change 100755 => 100644 examples/all-sky/Makefile mode change 100755 => 100644 examples/all-sky/mo_garand_atmos_io.F90 mode change 100755 => 100644 examples/all-sky/rrtmgp_allsky.F90 delete mode 100644 examples/rfmip-clear-sky/Makefile.libs.macos delete mode 100644 examples/rfmip-clear-sky/Makefile.libs.olcf delete mode 100644 examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs create mode 100644 examples/rfmip-clear-sky/stage_files.sh mode change 100755 => 100644 rrtmgp/mo_gas_concentrations.F90 diff --git a/.github/workflows/containerized-ci.yml b/.github/workflows/containerized-ci.yml new file mode 100644 index 000000000..ec473d15d --- /dev/null +++ b/.github/workflows/containerized-ci.yml @@ -0,0 +1,77 @@ +name: Continuous integration in a box +on: [push, pull_request] + +jobs: + Containerized-CI: + runs-on: ubuntu-20.04 + strategy: + matrix: + rte-kernels: [default, openacc] + container: ["earthsystemradiation/rte-rrtmgp-ci:ifort","earthsystemradiation/rte-rrtmgp-ci:nvfortran"] + container: + image: ${{ matrix.container }} + env: + NCHOME: /home/runner/netcdf-c + NFHOME: /home/runner/netcdf-fortran + RFMIP_DIR: /home/runner/rfmip-files + steps: + ############################################################################ + # Checks out repository under $GITHUB_WORKSPACE + - name: Check out code + uses: actions/checkout@v2 + - name: Environmental variables + # This might be able to be set in the ENV section above + run: echo "RRTMGP_ROOT=${GITHUB_WORKSPACE}" >> $GITHUB_ENV + - name: Environmental variables - ifort + if: contains(matrix.container, 'ifort') + run: echo "FCFLAGS=-m64 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -check bounds,uninit,pointers,stack -stand f08" >> $GITHUB_ENV + - name: Environmental variables - nvfortran + if: contains(matrix.container, 'nvfortran') + run: echo "FCFLAGS=-Mallocatable=03 -Mstandard -Mbounds -Mchkptr -Kieee -Mchkstk" >> $GITHUB_ENV + + - name: Make library, examples, tests + shell: bash + env: + RTE_KERNELS: ${{ matrix.rte-kernels }} + run: | + source /opt/intel/oneapi/setvars.sh || true + cd ${RRTMGP_ROOT} + ${FC} --version + make libs + ############################################################################ + - name: Cache RFMIP files + id: cache-rfmip-files + uses: actions/cache@v2 + with: + path: /home/runner/rfmip-files # Same as #{RFMIP_DIR} + key: rfmip-files + + - name: Stage RFMIP files + if: steps.cache-rfmip-files.outputs.cache-hit != 'true' + run: | + mkdir -p ${RFMIP_DIR} + cd ${RFMIP_DIR} + python ${RRTMGP_ROOT}/examples/rfmip-clear-sky/stage_files.py + ############################################################################ + - name: Run examples, tests + shell: bash + env: + LD_LIBRARY_PATH: /home/runner/netcdf-c/lib + run: | + source /opt/intel/oneapi/setvars.sh || true + export LD_LIBRARY_PATH=${NFHOME}/lib:${LD_LIBRARY_PATH} + make tests + - name: Comparison + run: make check + ############################################################################ + - name: Validation plots + if: contains(matrix.container, 'ifort') && contains(matrix.rte-kernels, 'default') + run: | + cd ${RRTMGP_ROOT}/tests + python validation-plots.py + - name: Upload plots + if: contains(matrix.container, 'ifort') && contains(matrix.rte-kernels, 'default') + uses: actions/upload-artifact@v2 + with: + name: valdiation-plot + path: tests/validation-figures.pdf diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml new file mode 100644 index 000000000..674e217ff --- /dev/null +++ b/.github/workflows/continuous-integration.yml @@ -0,0 +1,142 @@ +name: Continuous Integration +on: [push, pull_request] + +jobs: + CI: + runs-on: ubuntu-20.04 + strategy: + matrix: + fortran-compiler: [gfortran-9, gfortran-10] + rte-kernels: [default, openacc] + env: + FC: ${{ matrix.fortran-compiler }} + FCFLAGS: "-ffree-line-length-none -m64 -std=f2008 -march=native -fbounds-check -finit-real=nan -g -DUSE_CBOOL" + CC: gcc + NCHOME: /home/runner/netcdf-c + NFHOME: /home/runner/netcdf-fortran + RFMIP_DIR: /home/runner/rfmip-files + steps: + - name: Update system packages + run: sudo apt-get update + ############################################################################ + # + # Compilers.... + # + # Gfortran 10 not available in Github CI stack, so install + # + - name: gfortran-10 setup compiler + if: contains(matrix.fortran-compiler, 'gfortran-10') + run: | + sudo apt-get install gfortran-10 gcc-10 + echo "CC=gcc-10" >> $GITHUB_ENV + + ############################################################################ + # + # Netcdf C and Fortran + # + - name: Install HDF5 library + run: | + sudo apt-get install libhdf5-dev libcurl4-gnutls-dev hdf5-helpers + dpkg -L libhdf5-dev + + # Skipping this for now - netCDF configure doesn't see the HDF libararies + - name: cache-netcdf-c + id: cache-netcdf-c + uses: actions/cache@v2 + with: + path: /home/runner/netcdf-c + key: netcdf-c-4.7.4a-${{ runner.os }}-${{ matrix.fortran-compiler }} + + - name: Install netcdf C library from source + if: steps.cache-netcdf-c.outputs.cache-hit != 'true' + env: + CPPFLAGS: -I/usr/include/hdf5/serial + LDFLAGS: -L/usr/lib/x86_64-linux-gnu/hdf5/serial/ + run: | + ${CC} --version + git clone https://github.com/Unidata/netcdf-c.git --branch v4.7.4 + cd netcdf-c + ls /usr/include + ./configure --prefix=${NCHOME} + make -j + sudo make install + + # Would be great to encode version info + - name: cache-netcdf-fortran + id: cache-netcdf-fortran + uses: actions/cache@v2 + with: + path: /home/runner/netcdf-fortran + key: netcdf-fortran-4.5.3-${{ runner.os }}-${{ matrix.fortran-compiler }} + + - name: Build NetCDF Fortran library + # Here too it would be nice to use the environment to specify netcdf-c location + env: + CPPFLAGS: -I/home/runner/netcdf-c/include + LDFLAGS: -L/home/runner/netcdf-c/lib + LD_LIBRARY_PATH: /home/runner/netcdf-c/lib + FCFLAGS: -fPIC + if: steps.cache-netcdf-fortran.outputs.cache-hit != 'true' + run: | + echo ${TEST} + ${FC} --version + git clone https://github.com/Unidata/netcdf-fortran.git --branch v4.5.3 + cd netcdf-fortran + echo ${CPPFLAGS} + ./configure --prefix=${NFHOME} + make -j + sudo make install + ############################################################################ + # Checks out repository under $GITHUB_WORKSPACE + - name: Check out code + uses: actions/checkout@v2 + + - name: Environmental variables + run: echo "RRTMGP_ROOT=${GITHUB_WORKSPACE}" >> $GITHUB_ENV + + - name: Make library, examples, tests + env: + RTE_KERNELS: ${{ matrix.rte-kernels }} + run: | + cd ${RRTMGP_ROOT} + ${FC} --version + make libs + + ############################################################################ + # Set up Python and packages + # + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Setup conda + uses: s-weigand/setup-conda@v1 + with: + python-version: 3.7 + - name: Install python packages + run: conda install --yes urllib3 netcdf4 xarray dask scipy matplotlib seaborn colorcet + ############################################################################ + - name: Cache RFMIP files + id: cache-rfmip-files + uses: actions/cache@v2 + with: + path: /home/runner/rfmip-files # Same as #{RFMIP_DIR} + key: rfmip-files + + - name: Stage RFMIP files + if: steps.cache-rfmip-files.outputs.cache-hit != 'true' + run: | + mkdir -p ${RFMIP_DIR} + cd ${RFMIP_DIR} + python ${RRTMGP_ROOT}/examples/rfmip-clear-sky/stage_files.py + ############################################################################ + # Would be great to encode version info + - name: Run examples, tests + env: + LD_LIBRARY_PATH: /home/runner/netcdf-c/lib + run: | + export LD_LIBRARY_PATH=${NFHOME}/lib:${LD_LIBRARY_PATH} + make tests + - name: Comparison + run: | + make check diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index 7dcdfce66..000000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,110 +0,0 @@ -name: Continuous Integration -on: [push, pull_request] - -jobs: - CI: - runs-on: ubuntu-18.04 - strategy: - matrix: - fortran-compiler: [gfortran-8, gfortran-9, ifort] - rte-kernels: [default, openacc] - env: - FC: ${{ matrix.fortran-compiler }} - NCHOME: /usr - NFHOME: /usr - # I would like to use this variable within the run scripts - # i.e. to source files and change FCFLAGS, but can't figure out how - USING_IFORT: ${{ contains(matrix.fortran-compiler, 'ifort') }} - steps: - # - # Checks-out repository under $GITHUB_WORKSPACE - # - - uses: actions/checkout@v2 - # - # Set up Python and dependencies - # - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - name: Setup conda - uses: s-weigand/setup-conda@v1 - with: - python-version: 3.7 - - name: Install python dependencies - run: conda install --yes urllib3 netcdf4 xarray dask scipy - # - # Install NetCDF library - # - - name: Install netcdf C library - run: sudo apt-get install libnetcdf-dev - # - # Intel compilers and libraries if needed - # https://software.intel.com/content/www/us/en/develop/articles/oneapi-repo-instructions.html#aptpkg - # - - name: Install Intel compilers libraries - if: contains(matrix.fortran-compiler, 'ifort') - run: | - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - sudo apt-get update - sudo apt-get install intel-oneapi-common-licensing - sudo apt-get install intel-oneapi-common-vars - sudo apt-get install intel-oneapi-dev-utilities - sudo apt-get install intel-oneapi-ifort - # - # NetCDF FORTRAN library - # - - name: Build NetCDF Fortran library - run: | - if [[ -e /opt/intel/inteloneapi/setvars.sh ]]; then source /opt/intel/inteloneapi/setvars.sh; fi - git clone https://github.com/Unidata/netcdf-fortran.git --branch v4.4.4 - cd netcdf-fortran - ./configure --prefix=${NFHOME} F77=${FC} - make - sudo make install - - name: Stage RFMIP files - run: | - export RRTMGP_ROOT=${GITHUB_WORKSPACE} - cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky - python ./stage_files.py - - name: Make library, examples, tests - # Compiler flags for gfortran 8 and 9. Over-ridden in run script if using ifort - env: - FCFLAGS: "-ffree-line-length-none -m64 -std=f2008 -march=native -fbounds-check -finit-real=nan -DUSE_CBOOL" - run: | - export RRTMGP_ROOT=${GITHUB_WORKSPACE} - export RTE_KERNELS=${{ matrix.rte-kernels }} - if [[ -e /opt/intel/inteloneapi/setvars.sh ]]; then - source /opt/intel/inteloneapi/setvars.sh; - export FCFLAGS="-m64 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -check bounds,uninit,pointers,stack -stand f08"; - fi - cd ${RRTMGP_ROOT} - ${FC} --version - make -C build -j 2 - make -C tests -j 1 - make -C examples/all-sky -j 2 - export RRTMGP_BUILD=${RRTMGP_ROOT}/build - make -C examples/rfmip-clear-sky -j 2 - - name: Run examples, tests - run: | - export RRTMGP_ROOT=${GITHUB_WORKSPACE} - if [[ -e /opt/intel/inteloneapi/setvars.sh ]]; then source /opt/intel/inteloneapi/setvars.sh; fi - cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky - python ./run-rfmip-examples.py --block_size 8 - cd ${RRTMGP_ROOT}/examples/all-sky - python ./run-allsky-example.py - cd ${RRTMGP_ROOT}/tests - cp ${RRTMGP_ROOT}/examples/rfmip-clear-sky/multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc test_atmospheres.nc - ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc - ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc - - name: Comparison - run: | - export RRTMGP_ROOT=${GITHUB_WORKSPACE} - cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky - python ./compare-to-reference.py --fail=7.e-4 - cd ${RRTMGP_ROOT}/examples/all-sky - python ./compare-to-reference.py - cd ${RRTMGP_ROOT}/tests - python verification.py diff --git a/Compiler-flags.md b/Compiler-flags.md new file mode 100644 index 000000000..cef9c3f42 --- /dev/null +++ b/Compiler-flags.md @@ -0,0 +1,30 @@ +# Compiler flag Examples + +Before using the Makefiles supplied with the `RTE+RRTMGP` repository, the environment variables `FC` and +`FCFLAGS`, identifying the Fortran compiler and flags passed to it, need to be set. Here are some examples +used during development and testing. + +To build any of the executables in `examples/` or `tests` the locations of the C and Fortran netCDF libraries +need to be set via environment variables `NCHOME` and `NFHOME`, and the variable `RRTMGP_ROOT` must be set to the +root of the RTE+RRTMGP installation. + +## Gnu Fortran +`FC: gfortran-8` or `gfortran-9` or `gfortran-10` +### Debugging flags +`FCFLAGS: "-ffree-line-length-none -m64 -std=f2008 -march=native -fbounds-check -finit-real=nan -DUSE_CBOOL"` +### Even stricter debugging flags +`FCFLAGS: "-ffree-line-length-none -m64 -std=f2008 -march=native -fbounds-check -fbacktrace -finit-real=nan -DUSE_CBOOL -pedantic -g -Wall"` + +## Intel Fortran +`FC: ifort` +### Debugging flags +`FCFLAGS: "-m64 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -check bounds,uninit,pointers,stack -stand f08"` +### Optimization flags: +`FCFLAGS:"-m64 -O3 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132"` + +## PGI Fortran +`FC: pgfortran` or `FC: nvfortran` (if using the Nvidia HPC SDK) +### Debugging flags +`FCFLAGS: "-g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mallocatable=03 -Mpreprocess"` +### Optimization flags: +`FCFLAGS: "-m64 -O3 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132"` diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..9f57b43d8 --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +# +# Top-level Makefile +# +.PHONY: libs tests check +all: libs tests check + +libs: + make -C build -j + make -C tests -j 1 + make -C examples/all-sky -j + make -C examples/rfmip-clear-sky -j + +tests: + make -C examples/rfmip-clear-sky tests + make -C examples/all-sky tests + make -C tests tests + +check: + make -C examples/rfmip-clear-sky check + make -C examples/all-sky check + make -C tests check + +clean: + make -C build clean + make -C examples/rfmip-clear-sky clean + make -C examples/all-sky clean + make -C tests clean diff --git a/README.md b/README.md index 13c42f8ea..9b618f210 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,15 @@ Example programs and documentation are evolving - please see examples/ in the re Relative to commit `69d36c9` to `master` on Apr 20, 2020, the required arguments to both the longwave and shortwave versions of `ty_gas_optics_rrtmgp%load()`have changed. -## Building the libraries. +## Building the libraries, examples, and unit-testing codes. -1. `cd build` -2. Set environment variables `FC` (the Fortran 2003 compiler) and `FCFLAGS` (compiler flags). Alternately create a Makefile.conf that sets these variables. You could also link to an existing file. -3. Set environment variable `RTE_KERNELS` to `openacc` if you want the OpenACC kernels rather than the default. -4. `make` +1. Set environment variables `FC` (the Fortran 2003 compiler) and `FCFLAGS` (compiler flags). Examples are provided in the `Compiler-flags.md` file. +2. Set environment variables `RRTMGP_ROOT` to the top-level RTE+RRTMGP directory and `RTE_KERNELS` to `openacc` if you want the OpenACC/OpenMP kernels rather than the default. +3. `make libs` in the top-level directory will make the RTE and RRTMGP libraries. +4. The examples and testing codes use netCDF. Set the variables `NCHOME` and `NFHOME` to the roots of the C and Fortran netCDF installations, then `make tests` to build and run these. (A few files need to be downloaded for `examples/rfmaip-clear-sky`. The default is to download these with `wget` but a Python script is also available.) +5. Evaluating the results of the tests requires `Python` with the `xarray` package and its depdencies installed. Comparisons can be made with `make check` in the top level directory. +6. `make` invoked without a target in the top level attempts all three steps. ## Examples -Two examples are provided, one for clear skies and one including clouds. See the README file and codes in each directory for further information. +Two examples are provided in `examples/`, one for clear skies and one including clouds. Directory `tests/` contains regression testing (e.g. to ensure that answers are independent of orientation) and unit testing (to be sure all the code paths are tested). See the README file and codes in each directory for further information. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 166ff69e8..624c3af20 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -3,13 +3,6 @@ jobs: pool: CSCS strategy: matrix: - pgi_19_9_gpu: - compiler_base: pgi - compiler_module: PGI/19.9 - accel_module: cudatoolkit - FCFLAGS: "-O3 -ta=tesla:cc60,cuda10.1 -Mallocatable=03 -gopt -Minline,reshape,maxsize:40" - RTE_KERNELS: openacc - RUN_CMD: "srun -C gpu -A c15 -p cscsci" pgi_default_gpu: compiler_base: pgi compiler_module: pgi @@ -17,27 +10,21 @@ jobs: # Generic accelerator flag FCFLAGS: "-O3 -acc -Mallocatable=03 -gopt" RTE_KERNELS: openacc - RUN_CMD: "srun -C gpu -A c15 -p cscsci" - pgi_19_10_cpu: - compiler_base: pgi - compiler_module: PGI/19.10 - accel_module: - # Error checking flags - FCFLAGS: "-Mallocatable=03 -Mstandard -Mbounds -Mchkptr -Kieee -Mchkstk" - RUN_CMD: - pgi_19_9_cpu: - compiler_base: pgi - compiler_module: PGI/19.9 - accel_module: - # Error checking flags - FCFLAGS: "-Mallocatable=03 -Mstandard -Mbounds -Mchkptr -Kieee -Mchkstk" - RUN_CMD: + RUN_CMD: "srun -C gpu -A pr55 -p cscsci" cce-cpu-icon-production: compiler_base: cray - compiler_module: ccce-icon/9.0.2-classic + compiler_module: cce-icon/11.0.0 accel_module: # Production flags for Icon model FCFLAGS: "-hadd_paren -r am -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -hnoacc -O1,cache0" + cce-openmp: + compiler_base: cray + compiler_module: cce/11.0.0 + accel_module: craype-accel-nvidia60 + # OpenMP flags from Nichols Romero (Argonne) + FCFLAGS: "-hnoacc -homp -O0" + RTE_KERNELS: openacc + RUN_CMD: "srun -C gpu -A pr55 -p cscsci" maxParallel: 2 workspace: @@ -47,7 +34,7 @@ jobs: - script: | set -e - echo " + echo ' module load daint-gpu export PATH=$CRAY_BINUTILS_BIN:$PATH module swap PrgEnv-cray PrgEnv-$(compiler_base) @@ -55,49 +42,33 @@ jobs: module load $(accel_module) module load cray-netcdf cray-hdf5 export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH + export CUDA_HOME=$CUDATOOLKIT_HOME echo Compiler Environment: module list echo LD_LIBRARY_PATH is: echo $LD_LIBRARY_PATH - " > compiler_modules + ' > compiler_modules displayName: 'Create module environment' - script: | set -e - module load daint-gpu - module load netcdf-python cd examples/rfmip-clear-sky - python ./stage_files.py + source ./stage_files.sh displayName: 'Stage files' - script: | set -e source compiler_modules export RRTMGP_ROOT=$PWD - export RRTMGP_BUILD=$PWD/build export FC=ftn - make -C build/ clean - - make -C build/ -j 8 - make -C tests clean - make -C tests -j 1 - make -C examples/all-sky clean - make -C examples/all-sky -j 8 - make -C examples/rfmip-clear-sky clean - make -C examples/rfmip-clear-sky -j 8 + make clean + make libs displayName: 'Make' - script: | set -e source compiler_modules - module load cray-python/3.6.5.7 + module load cray-python export RRTMGP_ROOT=$PWD - cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky - ${RUN_CMD} python ./run-rfmip-examples.py --block_size 1800 - cd ${RRTMGP_ROOT}/examples/all-sky - ${RUN_CMD} python ./run-allsky-example.py - cd ${RRTMGP_ROOT}/tests - cp ${RRTMGP_ROOT}/examples/rfmip-clear-sky/multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc test_atmospheres.nc - ${RUN_CMD} ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc - ${RUN_CMD} ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc + make tests displayName: 'Run' - script: | set -e @@ -105,10 +76,5 @@ jobs: export RRTMGP_ROOT=$PWD # This module will unload some of the build modules, so do the checks separately module load netcdf-python - cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky - python ./compare-to-reference.py --fail=7.e-4 - cd ${RRTMGP_ROOT}/examples/all-sky - python ./compare-to-reference.py - cd ${RRTMGP_ROOT}/tests - python verification.py + make check displayName: 'Check results' diff --git a/build/Makefile b/build/Makefile index d18d52159..ec79c985f 100644 --- a/build/Makefile +++ b/build/Makefile @@ -13,9 +13,7 @@ RTE_KERNEL_DIR += ../rte/kernels RRTMGP_KERNEL_DIR += ../rrtmgp/kernels VPATH = $(RTE_DIR):$(RTE_KERNEL_DIR):$(RRTMGP_DIR):$(RRTMGP_KERNEL_DIR) # -# Compiler variables FC, FCFLAGS can be set in the environment or in Makefile.conf -# --include Makefile.conf +# Compiler variables FC, FCFLAGS must be set in the environment all: librte.a librrtmgp.a diff --git a/build/Makefile.conf.gfortran b/build/Makefile.conf.gfortran deleted file mode 100644 index ff8deddf2..000000000 --- a/build/Makefile.conf.gfortran +++ /dev/null @@ -1,17 +0,0 @@ -# -# Gfortran 7 -# - -# Fortran compiler command -export FC = gfortran - -# Fortran compiler flags -# -# Optimized -# -export FCFLAGS = -ffree-line-length-none -m64 -std=f2003 -march=native -O3 -DUSE_CBOOL -# -# Debugging - -mno-avx is particular to (broken) gcc setups using MacPorts -# -#export FCFLAGS = -ffree-line-length-none -m64 -std=f2003 -march=native -mno-avx -pedantic -g -fbounds-check -Wall -fbacktrace -finit-real=nan -DUSE_CBOOL -#export FCFLAGS = -ffree-line-length-none -m64 -std=f2003 -march=native -pedantic -g -fbounds-check -Wall -fbacktrace -finit-real=nan -DUSE_CBOOL diff --git a/build/Makefile.conf.ifort b/build/Makefile.conf.ifort deleted file mode 100644 index 325892b63..000000000 --- a/build/Makefile.conf.ifort +++ /dev/null @@ -1,18 +0,0 @@ -# -# Intel Fortran 18 -# -# Fortran compiler command -export FC = ifort -# -# Fortran compiler flags -# -# Optimized -# -export FCFLAGS += -m64 -O3 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -export F77FLAGS += -m64 -O3 -g -traceback -# can add -qopt-report-phase=vec -# -# Debugging -# -# export FCFLAGS = -m64 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -check bounds,uninit,pointers,stack -stand f08 -# export F77FLAGS = -m64 -g -traceback -check bounds,uninit,pointers,stack diff --git a/build/Makefile.conf.nagfor b/build/Makefile.conf.nagfor deleted file mode 100644 index 9e130c569..000000000 --- a/build/Makefile.conf.nagfor +++ /dev/null @@ -1,12 +0,0 @@ -# -# NAG Fortran -# -# Fortran compiler command -export FC = nagfor - -# Fortran compiler flags -# -# Debugging -# -export FCFLAGS = -g -f2003 -ieee=nonstd -gline -colour -C -C=dangling -export F77FLAGS = -g -ieee=nonstd -gline -colour -C -C=dangling -dusty diff --git a/build/Makefile.conf.olcf_pgi b/build/Makefile.conf.olcf_pgi deleted file mode 100755 index dbb69b694..000000000 --- a/build/Makefile.conf.olcf_pgi +++ /dev/null @@ -1,19 +0,0 @@ -# -# PGI Fortran on Ascent/Summit -# -# Fortran compiler command -export FC = mpif90 - -# Fortran compiler flags -# -# NOTE: There is a bug in the PGI OpenACC implementation related to optimization -# and C-compatible logical types (c_bool). The temporary workaround (until the -# compiler bug is fixed) is to compile without optimization. Alternatively, we -# can change all variables of type logical(wl) or logical(kind=c_bool) with -# intent(in) to intent(inout), which access memory differently. Requiring -# building without optimization seemed like the less intrusive approach for the -# time being though. This allows the tests to build and run, and gives the right -# answers against baselines, but will need to be fixed before running big -# expensive jobs probably. -# -export FCFLAGS = -O0 -g -ta=nvidia,cc70,ptxinfo -Minfo=accel diff --git a/build/Makefile.conf.pgfortran b/build/Makefile.conf.pgfortran deleted file mode 100644 index 33b3c065b..000000000 --- a/build/Makefile.conf.pgfortran +++ /dev/null @@ -1,12 +0,0 @@ -# -# PG Fortran -# -# Fortran compiler command -export FC = ftn - -# Fortran compiler flags -# -# Debugging -# -export FCFLAGS = -g -Mallocatable=03 -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mpreprocess -export F77FLAGS = $(FCFLAGS) diff --git a/build/Makefile.conf.pgfortran-cscs b/build/Makefile.conf.pgfortran-cscs deleted file mode 100644 index 1e3f28ef1..000000000 --- a/build/Makefile.conf.pgfortran-cscs +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/make - -# Load the following modules to compile with PGI for CPU -# -# module load cdt/19.06 -# module swap PrgEnv-cray PrgEnv-pgi -# module load cray-netcdf cray-hdf5 -# module load craype-accel-nvidia60 -# module unload cray-libsci_acc -# -# -# Fortran compiler command -FC = ftn -NCHOME = - -# Fortran compiler flags -#FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mipa=fast,inline -Mallocatable=03 -FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mallocatable=03 -Mpreprocess - -# Fortran .mod files, e.g. -I if you have headers in a nonstandard directory -FCINCLUDE = - -# linker flags, e.g. -L if you have libraries in a nonstandard directory -#LDFLAGS = -L/opt/nvidia/cudatoolkit6.5/6.5.14-1.0502.9836.8.1/lib64 -ta=tesla6.5 -lcudart_static -Bdynamic -#LDFLAGS = -ta=tesla -LDFLAGS = - -# -L/sw/squeeze-x64/netcdf_fortran-4.2.0-static-pgi15/lib -lnetcdff -L/sw/squeeze-x64/szip-2.1-static/lib -L/sw/squeeze-x64/hdf5-1.8.12-static/lib -L/sw/squeeze-x64/netcdf-4.3.1.1-static/lib -lnetcdf -lnetcdf -lhdf5_hl -lhdf5 -lsz -lz -lm -ldl -# libraries to pass to the linker, e.g. -l -LIBS = - -# build directory -BUILDDIR = build - -# these files will not be compiled -#BLACKLIST = mo_gas_desc.F90 diff --git a/build/Makefile.conf.pgfortran-cscs-gpu b/build/Makefile.conf.pgfortran-cscs-gpu deleted file mode 100644 index 8042ad70a..000000000 --- a/build/Makefile.conf.pgfortran-cscs-gpu +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/make - -# Load the following modules -# -# module load cdt/19.06 -# module swap PrgEnv-cray PrgEnv-pgi -# module load cray-netcdf cray-hdf5 -# module load craype-accel-nvidia60 -# module unload cray-libsci_acc -# -# -# Fortran compiler command -FC = ftn -NCHOME = - -# Fortran compiler flags -#FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mipa=fast,inline -acc -ta=tesla:6.5 -FCFLAGS = -g -ta=tesla:cc60,cuda9.2 -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mallocatable=03 -Mpreprocess - -# Fortran .mod files, e.g. -I if you have headers in a nonstandard directory -FCINCLUDE = - -# linker flags, e.g. -L if you have libraries in a nonstandard directory -#LDFLAGS = -L/opt/nvidia/cudatoolkit6.5/6.5.14-1.0502.9836.8.1/lib64 -ta=tesla6.5 -lcudart_static -Bdynamic -#LDFLAGS = -ta=tesla -LDFLAGS = - -# -L/sw/squeeze-x64/netcdf_fortran-4.2.0-static-pgi15/lib -lnetcdff -L/sw/squeeze-x64/szip-2.1-static/lib -L/sw/squeeze-x64/hdf5-1.8.12-static/lib -L/sw/squeeze-x64/netcdf-4.3.1.1-static/lib -lnetcdf -lnetcdf -lhdf5_hl -lhdf5 -lsz -lz -lm -ldl -# libraries to pass to the linker, e.g. -l -LIBS = - -# build directory -BUILDDIR = build - -# these files will not be compiled -#BLACKLIST = mo_gas_desc.F90 diff --git a/examples/all-sky/Makefile b/examples/all-sky/Makefile old mode 100755 new mode 100644 index 919d6b5b2..1d58ddc76 --- a/examples/all-sky/Makefile +++ b/examples/all-sky/Makefile @@ -1,10 +1,7 @@ +# # Location of RTE+RRTMGP libraries, module files. +# RRTMGP_BUILD = $(RRTMGP_ROOT)/build -# Sets macros FC, FCFLAGS consistent with RTE+RRTMGP --include $(RRTMGP_BUILD)/Makefile.conf - -# Location of netcdf C and Fortran libraries. Could specify with environment variables if file doesn't exist --include $(RRTMGP_ROOT)/examples/rfmip-clear-sky/Makefile.libs # # RRTMGP library, module files # @@ -14,7 +11,7 @@ FCINCLUDE += -I$(RRTMGP_BUILD) # # netcdf library, module files -# C and Fortran interfaces respectively +# Environment variables NCHOME and NFHOME point to root of C and Fortran interfaces respectively - # FCINCLUDE += -I$(NFHOME)/include LDFLAGS += -L$(NFHOME)/lib -L$(NCHOME)/lib @@ -28,7 +25,6 @@ VPATH = ../:$(RRTMGP_ROOT)/extensions/cloud_optics %: %.o $(FC) $(FCFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) - # # Extra sources -- extensions to RRTMGP classes, shared infrastructure, local sources # @@ -49,5 +45,13 @@ mo_load_coefficients.o: mo_simple_netcdf.o mo_load_coefficients.F90 mo_garand_atmos_io.o: mo_simple_netcdf.o mo_garand_atmos_io.F90 mo_load_cloud_coefficients.o: mo_simple_netcdf.o mo_cloud_optics.o mo_load_cloud_coefficients.F90 +tests: + cp garand-atmos-1.nc rrtmgp-allsky.nc + $(RUN_CMD) ./rrtmgp_allsky rrtmgp-allsky.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc ${RRTMGP_ROOT}/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-lw.nc 128 + $(RUN_CMD) ./rrtmgp_allsky rrtmgp-allsky.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc ${RRTMGP_ROOT}/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-sw.nc 128 + +check: + python ./compare-to-reference.py + clean: -rm rrtmgp_allsky *.o *.optrpt ../*.optrpt *.mod diff --git a/examples/all-sky/README.md b/examples/all-sky/README.md index 62916f9e7..ff894bbd4 100644 --- a/examples/all-sky/README.md +++ b/examples/all-sky/README.md @@ -4,7 +4,8 @@ This example provides a modestly more realistic setting the clear-sky problem do The example uses the first of the Garand atmosphere used for developing RRTMGP, as described in the [paper](https://doi.org/10.1029/2019MS001621) documenting the code, repeats the column a user-specified number of times, computes the optical properties of an arbitrary cloud in each column, and computes the broadband fluxes. Fractional cloudiness is not considered, and the clouds are extensive but quite boring, with uniform condensate amount and particle size everywhere (though with different values for liquid and ice). -1. Build the RTE+RRTMGP libraries in `../../build/`. This will require setting environmental variables `FC` for the Fortran compiler and `FCFLAGS`, or creating `../../build/Makefile.conf` with that information. -2. Build the executables in this directory, which will require providing the locations of the netCDF C and Fortran libraries and module files as environmental variables (NCHOME and NFHOME) or via file `Makefile.libs` +1. Build the RTE+RRTMGP libraries in `../../build/`. This will require setting environmental variables `FC` for the Fortran compiler and `FCFLAGS`. +2. Build the executables in this directory, which will require providing the locations of the netCDF C and Fortran libraries and module files as environmental +variables `NCHOME` and `NFHOME`, as well a variable `RRTMGP_ROOT` pointing to the root of the installation (the absolute path to `../../`). 4. Use Python script `run-rfmip-examples.py` to run the examples. The script takes some optional arguments, see `run-rfmip-examples.py -h` 5. Python script `compare-to-reference.py` will compare the results to reference answers for 128 columns, produced on a Mac with Intel 19 Fortran compiler. diff --git a/examples/all-sky/mo_garand_atmos_io.F90 b/examples/all-sky/mo_garand_atmos_io.F90 old mode 100755 new mode 100644 diff --git a/examples/all-sky/rrtmgp_allsky.F90 b/examples/all-sky/rrtmgp_allsky.F90 old mode 100755 new mode 100644 index a1c3ea666..3279a191c --- a/examples/all-sky/rrtmgp_allsky.F90 +++ b/examples/all-sky/rrtmgp_allsky.F90 @@ -1,390 +1,422 @@ -subroutine stop_on_err(error_msg) - use iso_fortran_env, only : error_unit - character(len=*), intent(in) :: error_msg - - if(error_msg /= "") then - write (error_unit,*) trim(error_msg) - write (error_unit,*) "rte_rrtmgp_clouds stopping" - stop - end if -end subroutine stop_on_err - -subroutine vmr_2d_to_1d(gas_concs, gas_concs_garand, name, sz1, sz2) - use mo_gas_concentrations, only: ty_gas_concs - use mo_rte_kind, only: wp - - type(ty_gas_concs), intent(in) :: gas_concs_garand - type(ty_gas_concs), intent(inout) :: gas_concs - character(len=*), intent(in) :: name - integer, intent(in) :: sz1, sz2 - - real(wp) :: tmp(sz1, sz2), tmp_col(sz2) - - !$acc data create(tmp, tmp_col) - call stop_on_err(gas_concs_garand%get_vmr(name, tmp)) - !$acc kernels - tmp_col(:) = tmp(1, :) - !$acc end kernels - - call stop_on_err(gas_concs%set_vmr (name, tmp_col)) - !$acc end data -end subroutine vmr_2d_to_1d -! ---------------------------------------------------------------------------------- -program rte_rrtmgp_clouds - use mo_rte_kind, only: wp, i8 - use mo_optical_props, only: ty_optical_props, & - ty_optical_props_arry, ty_optical_props_1scl, ty_optical_props_2str - use mo_gas_optics_rrtmgp, only: ty_gas_optics_rrtmgp - use mo_cloud_optics, only: ty_cloud_optics - use mo_gas_concentrations, only: ty_gas_concs - use mo_source_functions, only: ty_source_func_lw - use mo_fluxes, only: ty_fluxes_broadband - use mo_rte_lw, only: rte_lw - use mo_rte_sw, only: rte_sw - use mo_load_coefficients, only: load_and_init - use mo_load_cloud_coefficients, & - only: load_cld_lutcoeff, load_cld_padecoeff - use mo_garand_atmos_io, only: read_atmos, write_lw_fluxes, write_sw_fluxes - implicit none - ! ---------------------------------------------------------------------------------- - ! Variables - ! ---------------------------------------------------------------------------------- - ! Arrays: dimensions (col, lay) - real(wp), dimension(:,:), allocatable :: p_lay, t_lay, p_lev - real(wp), dimension(:,:), allocatable :: col_dry - real(wp), dimension(:,:), allocatable :: temp_array - - ! - ! Longwave only - ! - real(wp), dimension(:,:), allocatable :: t_lev - real(wp), dimension(:), allocatable :: t_sfc - real(wp), dimension(:,:), allocatable :: emis_sfc ! First dimension is band - ! - ! Shortwave only - ! - real(wp), dimension(:), allocatable :: mu0 - real(wp), dimension(:,:), allocatable :: sfc_alb_dir, sfc_alb_dif ! First dimension is band - ! - ! Source functions - ! - ! Longwave - type(ty_source_func_lw), save :: lw_sources - ! Shortwave - real(wp), dimension(:,:), allocatable, save :: toa_flux - ! - ! Clouds - ! - real(wp), allocatable, dimension(:,:) :: lwp, iwp, rel, rei - logical, allocatable, dimension(:,:) :: cloud_mask - ! - ! Output variables - ! - real(wp), dimension(:,:), target, & - allocatable :: flux_up, flux_dn, flux_dir - ! - ! Derived types from the RTE and RRTMGP libraries - ! - type(ty_gas_optics_rrtmgp) :: k_dist - type(ty_cloud_optics) :: cloud_optics - type(ty_gas_concs) :: gas_concs, gas_concs_garand, gas_concs_1col - class(ty_optical_props_arry), & - allocatable :: atmos, clouds - type(ty_fluxes_broadband) :: fluxes - - ! - ! Inputs to RRTMGP - ! - logical :: top_at_1, is_sw, is_lw - - integer :: ncol, nlay, nbnd, ngpt - integer :: icol, ilay, ibnd, iloop, igas - real(wp) :: rel_val, rei_val - - character(len=8) :: char_input - integer :: nUserArgs=0, nloops - logical :: use_luts = .true., write_fluxes = .true. - integer, parameter :: ngas = 8 - character(len=3), dimension(ngas) & - :: gas_names = ['h2o', 'co2', 'o3 ', 'n2o', 'co ', 'ch4', 'o2 ', 'n2 '] - - character(len=256) :: input_file, k_dist_file, cloud_optics_file - ! - ! Timing variables - ! - integer(kind=i8) :: start, finish, start_all, finish_all, clock_rate - real(wp) :: avg - integer(kind=i8), allocatable :: elapsed(:) - !$omp threadprivate( lw_sources, toa_flux, flux_up, flux_dn, flux_dir ) - ! ---------------------------------------------------------------------------------- - ! Code - ! ---------------------------------------------------------------------------------- - ! - ! Parse command line for any file names, block size - ! - ! rrtmgp_clouds rrtmgp-clouds.nc $RRTMGP_ROOT/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc $RRTMGP_ROOT/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-lw.nc 128 1 - ! rrtmgp_clouds rrtmgp-clouds.nc $RRTMGP_ROOT/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc $RRTMGP_ROOT/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-sw.nc 128 1 - nUserArgs = command_argument_count() - nloops = 1 - if (nUserArgs < 4) call stop_on_err("Need to supply input_file k_distribution_file ncol.") - if (nUserArgs >= 1) call get_command_argument(1,input_file) - if (nUserArgs >= 2) call get_command_argument(2,k_dist_file) - if (nUserArgs >= 3) call get_command_argument(3,cloud_optics_file) - if (nUserArgs >= 4) then - call get_command_argument(4, char_input) - read(char_input, '(i8)') ncol - if(ncol <= 0) call stop_on_err("Specify positive ncol.") - end if - if (nUserArgs >= 5) then - call get_command_argument(5, char_input) - read(char_input, '(i8)') nloops - if(nloops <= 0) call stop_on_err("Specify positive nloops.") - end if - if (nUserArgs > 6) print *, "Ignoring command line arguments beyond the first five..." - if(trim(input_file) == '-h' .or. trim(input_file) == "--help") then - call stop_on_err("rrtmgp_clouds input_file absorption_coefficients_file cloud_optics_file ncol") - end if - ! - ! Read temperature, pressure, gas concentrations. - ! Arrays are allocated as they are read - ! - call read_atmos(input_file, & - p_lay, t_lay, p_lev, t_lev, & - gas_concs_garand, col_dry) - deallocate(col_dry) - nlay = size(p_lay, 2) - ! For clouds we'll use the first column, repeated over and over - call stop_on_err(gas_concs%init(gas_names)) - do igas = 1, ngas - call vmr_2d_to_1d(gas_concs, gas_concs_garand, gas_names(igas), size(p_lay, 1), nlay) - end do - ! If we trusted in Fortran allocate-on-assign we could skip the temp_array here - allocate(temp_array(ncol, nlay)) - temp_array = spread(p_lay(1,:), dim = 1, ncopies=ncol) - call move_alloc(temp_array, p_lay) - allocate(temp_array(ncol, nlay)) - temp_array = spread(t_lay(1,:), dim = 1, ncopies=ncol) - call move_alloc(temp_array, t_lay) - allocate(temp_array(ncol, nlay+1)) - temp_array = spread(p_lev(1,:), dim = 1, ncopies=ncol) - call move_alloc(temp_array, p_lev) - allocate(temp_array(ncol, nlay+1)) - temp_array = spread(t_lev(1,:), dim = 1, ncopies=ncol) - call move_alloc(temp_array, t_lev) - ! This puts pressure and temperature arrays on the GPU - !$acc enter data copyin(p_lay, p_lev, t_lay, t_lev) - ! ---------------------------------------------------------------------------- - ! load data into classes - call load_and_init(k_dist, k_dist_file, gas_concs) - is_sw = k_dist%source_is_external() - is_lw = .not. is_sw - ! - ! Should also try with Pade calculations - ! call load_cld_padecoeff(cloud_optics, cloud_optics_file) - ! - if(use_luts) then - call load_cld_lutcoeff (cloud_optics, cloud_optics_file) - else - call load_cld_padecoeff(cloud_optics, cloud_optics_file) - end if - call stop_on_err(cloud_optics%set_ice_roughness(2)) - ! ---------------------------------------------------------------------------- - ! - ! Problem sizes - ! - nbnd = k_dist%get_nband() - ngpt = k_dist%get_ngpt() - top_at_1 = p_lay(1, 1) < p_lay(1, nlay) - - ! ---------------------------------------------------------------------------- - ! LW calculations neglect scattering; SW calculations use the 2-stream approximation - ! Here we choose the right variant of optical_props. - ! - if(is_sw) then - allocate(ty_optical_props_2str::atmos) - allocate(ty_optical_props_2str::clouds) - else - allocate(ty_optical_props_1scl::atmos) - allocate(ty_optical_props_1scl::clouds) - end if - ! Clouds optical props are defined by band - call stop_on_err(clouds%init(k_dist%get_band_lims_wavenumber())) - ! - ! Allocate arrays for the optical properties themselves. - ! - select type(atmos) - class is (ty_optical_props_1scl) - !$acc enter data copyin(atmos) - call stop_on_err(atmos%alloc_1scl(ncol, nlay, k_dist)) - !$acc enter data copyin(atmos) create(atmos%tau) - class is (ty_optical_props_2str) - call stop_on_err(atmos%alloc_2str( ncol, nlay, k_dist)) - !$acc enter data copyin(atmos) create(atmos%tau, atmos%ssa, atmos%g) - class default - call stop_on_err("rte_rrtmgp_clouds: Don't recognize the kind of optical properties ") - end select - select type(clouds) - class is (ty_optical_props_1scl) - call stop_on_err(clouds%alloc_1scl(ncol, nlay)) - !$acc enter data copyin(clouds) create(clouds%tau) - class is (ty_optical_props_2str) - call stop_on_err(clouds%alloc_2str(ncol, nlay)) - !$acc enter data copyin(clouds) create(clouds%tau, clouds%ssa, clouds%g) - class default - call stop_on_err("rte_rrtmgp_clouds: Don't recognize the kind of optical properties ") - end select - ! ---------------------------------------------------------------------------- - ! Boundary conditions depending on whether the k-distribution being supplied - ! is LW or SW - if(is_sw) then - ! toa_flux is threadprivate - !$omp parallel - allocate(toa_flux(ncol, ngpt)) - !$omp end parallel - ! - allocate(sfc_alb_dir(nbnd, ncol), sfc_alb_dif(nbnd, ncol), mu0(ncol)) - !$acc enter data create(sfc_alb_dir, sfc_alb_dif, mu0) - ! Ocean-ish values for no particular reason - !$acc kernels - sfc_alb_dir = 0.06_wp - sfc_alb_dif = 0.06_wp - mu0 = .86_wp - !$acc end kernels - else - ! lw_sorces is threadprivate - !$omp parallel - call stop_on_err(lw_sources%alloc(ncol, nlay, k_dist)) - !$omp end parallel - - allocate(t_sfc(ncol), emis_sfc(nbnd, ncol)) - !$acc enter data create(t_sfc, emis_sfc) - ! Surface temperature - !$acc kernels - t_sfc = t_lev(1, merge(nlay+1, 1, top_at_1)) - emis_sfc = 0.98_wp - !$acc end kernels - end if - ! ---------------------------------------------------------------------------- - ! - ! Fluxes - ! - !$omp parallel - allocate(flux_up(ncol,nlay+1), flux_dn(ncol,nlay+1)) - !$omp end parallel - - !$acc enter data create(flux_up, flux_dn) - if(is_sw) then - allocate(flux_dir(ncol,nlay+1)) - !$acc enter data create(flux_dir) - end if - ! - ! Clouds - ! - allocate(lwp(ncol,nlay), iwp(ncol,nlay), & - rel(ncol,nlay), rei(ncol,nlay), cloud_mask(ncol,nlay)) - !$acc enter data create(cloud_mask, lwp, iwp, rel, rei) - - ! Restrict clouds to troposphere (> 100 hPa = 100*100 Pa) - ! and not very close to the ground (< 900 hPa), and - ! put them in 2/3 of the columns since that's roughly the - ! total cloudiness of earth - rel_val = 0.5 * (cloud_optics%get_min_radius_liq() + cloud_optics%get_max_radius_liq()) - rei_val = 0.5 * (cloud_optics%get_min_radius_ice() + cloud_optics%get_max_radius_ice()) - !$acc parallel loop collapse(2) copyin(t_lay) copyout(lwp, iwp, rel, rei) - do ilay=1,nlay - do icol=1,ncol - cloud_mask(icol,ilay) = p_lay(icol,ilay) > 100._wp * 100._wp .and. & - p_lay(icol,ilay) < 900._wp * 100._wp .and. & - mod(icol, 3) /= 0 - ! - ! Ice and liquid will overlap in a few layers - ! - lwp(icol,ilay) = merge(10._wp, 0._wp, cloud_mask(icol,ilay) .and. t_lay(icol,ilay) > 263._wp) - iwp(icol,ilay) = merge(10._wp, 0._wp, cloud_mask(icol,ilay) .and. t_lay(icol,ilay) < 273._wp) - rel(icol,ilay) = merge(rel_val, 0._wp, lwp(icol,ilay) > 0._wp) - rei(icol,ilay) = merge(rei_val, 0._wp, iwp(icol,ilay) > 0._wp) - end do - end do - !$acc exit data delete(cloud_mask) - ! ---------------------------------------------------------------------------- - ! - ! Multiple iterations for big problem sizes, and to help identify data movement - ! For CPUs we can introduce OpenMP threading over loop iterations - ! - allocate(elapsed(nloops)) - ! - call system_clock(start_all) - ! - !$omp parallel do firstprivate(fluxes) - do iloop = 1, nloops - call system_clock(start) - call stop_on_err( & - cloud_optics%cloud_optics(lwp, iwp, rel, rei, clouds)) - ! - ! Solvers - ! - fluxes%flux_up => flux_up(:,:) - fluxes%flux_dn => flux_dn(:,:) - if(is_lw) then - !$acc enter data create(lw_sources, lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source) - call stop_on_err(k_dist%gas_optics(p_lay, p_lev, & - t_lay, t_sfc, & - gas_concs, & - atmos, & - lw_sources, & - tlev = t_lev)) - call stop_on_err(clouds%increment(atmos)) - call stop_on_err(rte_lw(atmos, top_at_1, & - lw_sources, & - emis_sfc, & - fluxes)) - !$acc exit data delete(lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source, lw_sources) - else - !$acc enter data create(toa_flux) - fluxes%flux_dn_dir => flux_dir(:,:) - - call stop_on_err(k_dist%gas_optics(p_lay, p_lev, & - t_lay, & - gas_concs, & - atmos, & - toa_flux)) - call stop_on_err(clouds%delta_scale()) - call stop_on_err(clouds%increment(atmos)) - call stop_on_err(rte_sw(atmos, top_at_1, & - mu0, toa_flux, & - sfc_alb_dir, sfc_alb_dif, & - fluxes)) - !$acc exit data delete(toa_flux) - end if - !print *, "******************************************************************" - call system_clock(finish, clock_rate) - elapsed(iloop) = finish - start - end do - ! - call system_clock(finish_all, clock_rate) - ! - !$acc exit data delete(lwp, iwp, rel, rei) - !$acc exit data delete(p_lay, p_lev, t_lay, t_lev) - -#ifdef _OPENACC - avg = sum( elapsed(merge(2,1,nloops>1):) ) / real(merge(nloops-1,nloops,nloops>1)) - - print *, "Execution times - min(s) :", minval(elapsed) / real(clock_rate) - print *, " - avg(s) :", avg / real(clock_rate) - print *, " - per column(ms):", avg / real(ncol) / (1.0e-3*clock_rate) -#else - print *, "Execution times - total(s) :", (finish_all-start_all) / real(clock_rate) - print *, " - per column(ms):", (finish_all-start_all) / real(ncol*nloops) / (1.0e-3*clock_rate) -#endif - - if(is_lw) then - !$acc exit data copyout(flux_up, flux_dn) - if(write_fluxes) call write_lw_fluxes(input_file, flux_up, flux_dn) - !$acc exit data delete(t_sfc, emis_sfc) - else - !$acc exit data copyout(flux_up, flux_dn, flux_dir) - if(write_fluxes) call write_sw_fluxes(input_file, flux_up, flux_dn, flux_dir) - !$acc exit data delete(sfc_alb_dir, sfc_alb_dif, mu0) - end if - !$acc enter data create(lwp, iwp, rel, rei) -end program rte_rrtmgp_clouds +subroutine stop_on_err(error_msg) + use iso_fortran_env, only : error_unit + character(len=*), intent(in) :: error_msg + + if(error_msg /= "") then + write (error_unit,*) trim(error_msg) + write (error_unit,*) "rte_rrtmgp_clouds stopping" + error stop 1 + end if +end subroutine stop_on_err + +subroutine vmr_2d_to_1d(gas_concs, gas_concs_garand, name, sz1, sz2) + use mo_gas_concentrations, only: ty_gas_concs + use mo_rte_kind, only: wp + + type(ty_gas_concs), intent(in) :: gas_concs_garand + type(ty_gas_concs), intent(inout) :: gas_concs + character(len=*), intent(in) :: name + integer, intent(in) :: sz1, sz2 + + real(wp) :: tmp(sz1, sz2), tmp_col(sz2) + + !$acc data create(tmp, tmp_col) + !$omp target data map(alloc:tmp, tmp_col) + call stop_on_err(gas_concs_garand%get_vmr(name, tmp)) + !$acc kernels + !$omp target + tmp_col(:) = tmp(1, :) + !$acc end kernels + !$omp end target + + call stop_on_err(gas_concs%set_vmr (name, tmp_col)) + !$acc end data + !$omp end target data +end subroutine vmr_2d_to_1d +! ---------------------------------------------------------------------------------- +program rte_rrtmgp_clouds + use mo_rte_kind, only: wp, i8 + use mo_optical_props, only: ty_optical_props, & + ty_optical_props_arry, ty_optical_props_1scl, ty_optical_props_2str + use mo_gas_optics_rrtmgp, only: ty_gas_optics_rrtmgp + use mo_cloud_optics, only: ty_cloud_optics + use mo_gas_concentrations, only: ty_gas_concs + use mo_source_functions, only: ty_source_func_lw + use mo_fluxes, only: ty_fluxes_broadband + use mo_rte_lw, only: rte_lw + use mo_rte_sw, only: rte_sw + use mo_load_coefficients, only: load_and_init + use mo_load_cloud_coefficients, & + only: load_cld_lutcoeff, load_cld_padecoeff + use mo_garand_atmos_io, only: read_atmos, write_lw_fluxes, write_sw_fluxes + implicit none + ! ---------------------------------------------------------------------------------- + ! Variables + ! ---------------------------------------------------------------------------------- + ! Arrays: dimensions (col, lay) + real(wp), dimension(:,:), allocatable :: p_lay, t_lay, p_lev + real(wp), dimension(:,:), allocatable :: col_dry + real(wp), dimension(:,:), allocatable :: temp_array + + ! + ! Longwave only + ! + real(wp), dimension(:,:), allocatable :: t_lev + real(wp), dimension(:), allocatable :: t_sfc + real(wp), dimension(:,:), allocatable :: emis_sfc ! First dimension is band + ! + ! Shortwave only + ! + real(wp), dimension(:), allocatable :: mu0 + real(wp), dimension(:,:), allocatable :: sfc_alb_dir, sfc_alb_dif ! First dimension is band + ! + ! Source functions + ! + ! Longwave + type(ty_source_func_lw), save :: lw_sources + ! Shortwave + real(wp), dimension(:,:), allocatable, save :: toa_flux + ! + ! Clouds + ! + real(wp), allocatable, dimension(:,:) :: lwp, iwp, rel, rei + logical, allocatable, dimension(:,:) :: cloud_mask + ! + ! Output variables + ! + real(wp), dimension(:,:), target, & + allocatable :: flux_up, flux_dn, flux_dir + ! + ! Derived types from the RTE and RRTMGP libraries + ! + type(ty_gas_optics_rrtmgp) :: k_dist + type(ty_cloud_optics) :: cloud_optics + type(ty_gas_concs) :: gas_concs, gas_concs_garand, gas_concs_1col + class(ty_optical_props_arry), & + allocatable :: atmos, clouds + type(ty_fluxes_broadband) :: fluxes + + ! + ! Inputs to RRTMGP + ! + logical :: top_at_1, is_sw, is_lw + + integer :: ncol, nlay, nbnd, ngpt + integer :: icol, ilay, ibnd, iloop, igas + real(wp) :: rel_val, rei_val + + character(len=8) :: char_input + integer :: nUserArgs=0, nloops + logical :: use_luts = .true., write_fluxes = .true. + integer, parameter :: ngas = 8 + character(len=3), dimension(ngas) & + :: gas_names = ['h2o', 'co2', 'o3 ', 'n2o', 'co ', 'ch4', 'o2 ', 'n2 '] + + character(len=256) :: input_file, k_dist_file, cloud_optics_file + ! + ! Timing variables + ! + integer(kind=i8) :: start, finish, start_all, finish_all, clock_rate + real(wp) :: avg + integer(kind=i8), allocatable :: elapsed(:) + ! NAR OpenMP CPU directives in compatible with OpenMP GPU directives + !!$omp threadprivate( lw_sources, toa_flux, flux_up, flux_dn, flux_dir ) + ! ---------------------------------------------------------------------------------- + ! Code + ! ---------------------------------------------------------------------------------- + ! + ! Parse command line for any file names, block size + ! + ! rrtmgp_clouds rrtmgp-clouds.nc $RRTMGP_ROOT/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc $RRTMGP_ROOT/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-lw.nc 128 1 + ! rrtmgp_clouds rrtmgp-clouds.nc $RRTMGP_ROOT/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc $RRTMGP_ROOT/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-sw.nc 128 1 + nUserArgs = command_argument_count() + nloops = 1 + if (nUserArgs < 4) call stop_on_err("Need to supply input_file k_distribution_file ncol.") + if (nUserArgs >= 1) call get_command_argument(1,input_file) + if (nUserArgs >= 2) call get_command_argument(2,k_dist_file) + if (nUserArgs >= 3) call get_command_argument(3,cloud_optics_file) + if (nUserArgs >= 4) then + call get_command_argument(4, char_input) + read(char_input, '(i8)') ncol + if(ncol <= 0) call stop_on_err("Specify positive ncol.") + end if + if (nUserArgs >= 5) then + call get_command_argument(5, char_input) + read(char_input, '(i8)') nloops + if(nloops <= 0) call stop_on_err("Specify positive nloops.") + end if + if (nUserArgs > 6) print *, "Ignoring command line arguments beyond the first five..." + if(trim(input_file) == '-h' .or. trim(input_file) == "--help") then + call stop_on_err("rrtmgp_clouds input_file absorption_coefficients_file cloud_optics_file ncol") + end if + ! + ! Read temperature, pressure, gas concentrations. + ! Arrays are allocated as they are read + ! + call read_atmos(input_file, & + p_lay, t_lay, p_lev, t_lev, & + gas_concs_garand, col_dry) + deallocate(col_dry) + nlay = size(p_lay, 2) + ! For clouds we'll use the first column, repeated over and over + call stop_on_err(gas_concs%init(gas_names)) + do igas = 1, ngas + call vmr_2d_to_1d(gas_concs, gas_concs_garand, gas_names(igas), size(p_lay, 1), nlay) + end do + ! If we trusted in Fortran allocate-on-assign we could skip the temp_array here + allocate(temp_array(ncol, nlay)) + temp_array = spread(p_lay(1,:), dim = 1, ncopies=ncol) + call move_alloc(temp_array, p_lay) + allocate(temp_array(ncol, nlay)) + temp_array = spread(t_lay(1,:), dim = 1, ncopies=ncol) + call move_alloc(temp_array, t_lay) + allocate(temp_array(ncol, nlay+1)) + temp_array = spread(p_lev(1,:), dim = 1, ncopies=ncol) + call move_alloc(temp_array, p_lev) + allocate(temp_array(ncol, nlay+1)) + temp_array = spread(t_lev(1,:), dim = 1, ncopies=ncol) + call move_alloc(temp_array, t_lev) + ! This puts pressure and temperature arrays on the GPU + !$acc enter data copyin(p_lay, p_lev, t_lay, t_lev) + !$omp target enter data map(to:p_lay, p_lev, t_lay, t_lev) + ! ---------------------------------------------------------------------------- + ! load data into classes + call load_and_init(k_dist, k_dist_file, gas_concs) + is_sw = k_dist%source_is_external() + is_lw = .not. is_sw + ! + ! Should also try with Pade calculations + ! call load_cld_padecoeff(cloud_optics, cloud_optics_file) + ! + if(use_luts) then + call load_cld_lutcoeff (cloud_optics, cloud_optics_file) + else + call load_cld_padecoeff(cloud_optics, cloud_optics_file) + end if + call stop_on_err(cloud_optics%set_ice_roughness(2)) + ! ---------------------------------------------------------------------------- + ! + ! Problem sizes + ! + nbnd = k_dist%get_nband() + ngpt = k_dist%get_ngpt() + top_at_1 = p_lay(1, 1) < p_lay(1, nlay) + + ! ---------------------------------------------------------------------------- + ! LW calculations neglect scattering; SW calculations use the 2-stream approximation + ! Here we choose the right variant of optical_props. + ! + if(is_sw) then + allocate(ty_optical_props_2str::atmos) + allocate(ty_optical_props_2str::clouds) + else + allocate(ty_optical_props_1scl::atmos) + allocate(ty_optical_props_1scl::clouds) + end if + ! Clouds optical props are defined by band + call stop_on_err(clouds%init(k_dist%get_band_lims_wavenumber())) + ! + ! Allocate arrays for the optical properties themselves. + ! + select type(atmos) + class is (ty_optical_props_1scl) + !$acc enter data copyin(atmos) + call stop_on_err(atmos%alloc_1scl(ncol, nlay, k_dist)) + !$acc enter data copyin(atmos) create(atmos%tau) + !$omp target enter data map(alloc:atmos%tau) + class is (ty_optical_props_2str) + call stop_on_err(atmos%alloc_2str( ncol, nlay, k_dist)) + !$acc enter data copyin(atmos) create(atmos%tau, atmos%ssa, atmos%g) + !$omp target enter data map(alloc:atmos%tau, atmos%ssa, atmos%g) + class default + call stop_on_err("rte_rrtmgp_clouds: Don't recognize the kind of optical properties ") + end select + select type(clouds) + class is (ty_optical_props_1scl) + call stop_on_err(clouds%alloc_1scl(ncol, nlay)) + !$acc enter data copyin(clouds) create(clouds%tau) + !$omp target enter data map(alloc:clouds%tau) + class is (ty_optical_props_2str) + call stop_on_err(clouds%alloc_2str(ncol, nlay)) + !$acc enter data copyin(clouds) create(clouds%tau, clouds%ssa, clouds%g) + !$omp target enter data map(alloc:clouds%tau, clouds%ssa, clouds%g) + class default + call stop_on_err("rte_rrtmgp_clouds: Don't recognize the kind of optical properties ") + end select + ! ---------------------------------------------------------------------------- + ! Boundary conditions depending on whether the k-distribution being supplied + ! is LW or SW + if(is_sw) then + ! toa_flux is threadprivate + !!$omp parallel + allocate(toa_flux(ncol, ngpt)) + !!$omp end parallel + ! + allocate(sfc_alb_dir(nbnd, ncol), sfc_alb_dif(nbnd, ncol), mu0(ncol)) + !$acc enter data create(sfc_alb_dir, sfc_alb_dif, mu0) + !$omp target enter data map(alloc:sfc_alb_dir, sfc_alb_dif, mu0) + ! Ocean-ish values for no particular reason + !$acc kernels + !$omp target + sfc_alb_dir = 0.06_wp + sfc_alb_dif = 0.06_wp + mu0 = .86_wp + !$acc end kernels + !$omp end target + else + ! lw_sorces is threadprivate + !!$omp parallel + call stop_on_err(lw_sources%alloc(ncol, nlay, k_dist)) + !!$omp end parallel + + allocate(t_sfc(ncol), emis_sfc(nbnd, ncol)) + !$acc enter data create(t_sfc, emis_sfc) + !$omp target enter data map(alloc:t_sfc, emis_sfc) + ! Surface temperature + !$acc kernels + !$omp target + t_sfc = t_lev(1, merge(nlay+1, 1, top_at_1)) + emis_sfc = 0.98_wp + !$acc end kernels + !$omp end target + end if + ! ---------------------------------------------------------------------------- + ! + ! Fluxes + ! + !!$omp parallel + allocate(flux_up(ncol,nlay+1), flux_dn(ncol,nlay+1)) + !!$omp end parallel + + !$acc enter data create(flux_up, flux_dn) + !$omp target enter data map(alloc:flux_up, flux_dn) + if(is_sw) then + allocate(flux_dir(ncol,nlay+1)) + !$acc enter data create(flux_dir) + !$omp target enter data map(alloc:flux_dir) + end if + ! + ! Clouds + ! + allocate(lwp(ncol,nlay), iwp(ncol,nlay), & + rel(ncol,nlay), rei(ncol,nlay), cloud_mask(ncol,nlay)) + !$acc enter data create(cloud_mask, lwp, iwp, rel, rei) + !$omp target enter data map(alloc:cloud_mask, lwp, iwp, rel, rei) + + ! Restrict clouds to troposphere (> 100 hPa = 100*100 Pa) + ! and not very close to the ground (< 900 hPa), and + ! put them in 2/3 of the columns since that's roughly the + ! total cloudiness of earth + rel_val = 0.5 * (cloud_optics%get_min_radius_liq() + cloud_optics%get_max_radius_liq()) + rei_val = 0.5 * (cloud_optics%get_min_radius_ice() + cloud_optics%get_max_radius_ice()) + !$acc parallel loop collapse(2) copyin(t_lay) copyout(lwp, iwp, rel, rei) + !$omp target teams distribute parallel do simd collapse(2) map(to:t_lay) map(from:lwp, iwp, rel, rei) + do ilay=1,nlay + do icol=1,ncol + cloud_mask(icol,ilay) = p_lay(icol,ilay) > 100._wp * 100._wp .and. & + p_lay(icol,ilay) < 900._wp * 100._wp .and. & + mod(icol, 3) /= 0 + ! + ! Ice and liquid will overlap in a few layers + ! + lwp(icol,ilay) = merge(10._wp, 0._wp, cloud_mask(icol,ilay) .and. t_lay(icol,ilay) > 263._wp) + iwp(icol,ilay) = merge(10._wp, 0._wp, cloud_mask(icol,ilay) .and. t_lay(icol,ilay) < 273._wp) + rel(icol,ilay) = merge(rel_val, 0._wp, lwp(icol,ilay) > 0._wp) + rei(icol,ilay) = merge(rei_val, 0._wp, iwp(icol,ilay) > 0._wp) + end do + end do + !$acc exit data delete(cloud_mask) + !$omp target exit data map(release:cloud_mask) + ! ---------------------------------------------------------------------------- + ! + ! Multiple iterations for big problem sizes, and to help identify data movement + ! For CPUs we can introduce OpenMP threading over loop iterations + ! + allocate(elapsed(nloops)) + ! + call system_clock(start_all) + ! + !!$omp parallel do firstprivate(fluxes) + do iloop = 1, nloops + call system_clock(start) + call stop_on_err( & + cloud_optics%cloud_optics(lwp, iwp, rel, rei, clouds)) + ! + ! Solvers + ! + fluxes%flux_up => flux_up(:,:) + fluxes%flux_dn => flux_dn(:,:) + if(is_lw) then + !$acc enter data create(lw_sources, lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source) + !$omp target enter data map(alloc:lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source) + call stop_on_err(k_dist%gas_optics(p_lay, p_lev, & + t_lay, t_sfc, & + gas_concs, & + atmos, & + lw_sources, & + tlev = t_lev)) + call stop_on_err(clouds%increment(atmos)) + call stop_on_err(rte_lw(atmos, top_at_1, & + lw_sources, & + emis_sfc, & + fluxes)) + !$acc exit data delete(lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source, lw_sources) + !$omp target exit data map(release:lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source) + else + !$acc enter data create(toa_flux) + !$omp target enter data map(alloc:toa_flux) + fluxes%flux_dn_dir => flux_dir(:,:) + + call stop_on_err(k_dist%gas_optics(p_lay, p_lev, & + t_lay, & + gas_concs, & + atmos, & + toa_flux)) + call stop_on_err(clouds%delta_scale()) + call stop_on_err(clouds%increment(atmos)) + call stop_on_err(rte_sw(atmos, top_at_1, & + mu0, toa_flux, & + sfc_alb_dir, sfc_alb_dif, & + fluxes)) + !$acc exit data delete(toa_flux) + !$omp target exit data map(release:toa_flux) + end if + !print *, "******************************************************************" + call system_clock(finish, clock_rate) + elapsed(iloop) = finish - start + end do + ! + call system_clock(finish_all, clock_rate) + ! + !$acc exit data delete(lwp, iwp, rel, rei) + !$omp target exit data map(release:lwp, iwp, rel, rei) + !$acc exit data delete(p_lay, p_lev, t_lay, t_lev) + !$omp target exit data map(release:p_lay, p_lev, t_lay, t_lev) + +#if defined(_OPENACC) || defined(_OPENMP) + avg = sum( elapsed(merge(2,1,nloops>1):) ) / real(merge(nloops-1,nloops,nloops>1)) + + print *, "Execution times - min(s) :", minval(elapsed) / real(clock_rate) + print *, " - avg(s) :", avg / real(clock_rate) + print *, " - per column(ms):", avg / real(ncol) / (1.0e-3*clock_rate) +#else + print *, "Execution times - total(s) :", (finish_all-start_all) / real(clock_rate) + print *, " - per column(ms):", (finish_all-start_all) / real(ncol*nloops) / (1.0e-3*clock_rate) +#endif + + if(is_lw) then + !$acc exit data copyout(flux_up, flux_dn) + !$omp target exit data map(from:flux_up, flux_dn) + if(write_fluxes) call write_lw_fluxes(input_file, flux_up, flux_dn) + !$acc exit data delete(t_sfc, emis_sfc) + !$omp target exit data map(release:t_sfc, emis_sfc) + else + !$acc exit data copyout(flux_up, flux_dn, flux_dir) + !$omp target exit data map(from:flux_up, flux_dn, flux_dir) + if(write_fluxes) call write_sw_fluxes(input_file, flux_up, flux_dn, flux_dir) + !$acc exit data delete(sfc_alb_dir, sfc_alb_dif, mu0) + !$omp target exit data map(release:sfc_alb_dir, sfc_alb_dif, mu0) + end if + !$acc enter data create(lwp, iwp, rel, rei) + !$omp target enter data map(alloc:lwp, iwp, rel, rei) +end program rte_rrtmgp_clouds diff --git a/examples/rfmip-clear-sky/Makefile b/examples/rfmip-clear-sky/Makefile index 74d59c44c..500f29966 100644 --- a/examples/rfmip-clear-sky/Makefile +++ b/examples/rfmip-clear-sky/Makefile @@ -1,40 +1,35 @@ # -# Here set variables RRTMGP_BUILD, NCHOME, NFHOME, TIME_DIR (for GPTL) -# or have those variables set in the environment +# Location of RTE+RRTMGP libraries, module files. # -RRTMGP_BUILD=$(RRTMGP_ROOT)/build --include Makefile.libs --include $(RRTMGP_BUILD)/Makefile.conf +RRTMGP_BUILD = $(RRTMGP_ROOT)/build # # RRTMGP library, module files # -LDFLAGS += -L$(RRTMGP_BUILD) -LIBS += -lrrtmgp -lrte +# LDFLAGS += -L$(RRTMGP_BUILD) +# LIBS += -lrrtmgp -lrte FCINCLUDE += -I$(RRTMGP_BUILD) # # netcdf library, module files -# C and Fortran interfaces respectively +# Environment variables NCHOME and NFHOME point to root of C and Fortran interfaces respectively - # FCINCLUDE += -I$(NFHOME)/include LDFLAGS += -L$(NFHOME)/lib -L$(NCHOME)/lib LIBS += -lnetcdff -lnetcdf # -# Setting macro TIMING=yes uses routines from the General Purpose Timing Library -# https://jmrosinski.github.io/GPTL/ +# General Purpose Timing Library https://jmrosinski.github.io/GPTL/ +# Set environment variable GPTL_DIR to the root of a GPTL installation to build +# the RFMIP example with timers # -TIMING=no -# Compiler specific - e.g. turn off for pgfortran, set to -cpp for gfortran -#FCFLAGS += -fpp -ifeq ($(TIMING),yes) +ifneq ($(origin GPTL_DIR),undefined) # # Timing library # - FCINCLUDE += -I$(TIME_DIR)/include + FCINCLUDE += -I$(GPTL_DIR)/include # Compiler specific - FCFLAGS += -DUSE_TIMING - LDFLAGS += -L$(TIME_DIR)/lib + FCFLAGS += -DUSE_TIMING + LDFLAGS += -L$(GPTL_DIR)/lib LIBS += -lgptl endif @@ -58,7 +53,7 @@ rrtmgp_rfmip_lw: rrtmgp_rfmip_lw.o $(ADDITIONS) $(RRTMGP_BUILD)/librte.a $(R rrtmgp_rfmip_lw.o: rrtmgp_rfmip_lw.F90 $(ADDITIONS) -rrtmgp_rfmip_sw: rrtmgp_rfmip_sw.o $(ADDITIONS) $(RRTMGP_BUILD)/librte.a $(RRTMGP_BUILD)/librrtmgp.a +rrtmgp_rfmip_sw: rrtmgp_rfmip_sw.o $(ADDITIONS) $(RRTMGP_BUILD)/librrtmgp.a $(RRTMGP_BUILD)/librte.a rrtmgp_rfmip_sw.o: rrtmgp_rfmip_sw.F90 $(ADDITIONS) @@ -66,5 +61,16 @@ mo_rfmip_io.o: mo_rfmip_io.F90 mo_simple_netcdf.o mo_load_coefficients.o: mo_load_coefficients.F90 mo_simple_netcdf.o +tests: multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc # Also the template files + # Files need to have been generated/downloaded before + $(RUN_CMD) ./rrtmgp_rfmip_lw 8 multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc + $(RUN_CMD) ./rrtmgp_rfmip_sw 8 multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc + +check: + cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky && python ./compare-to-reference.py --fail=7.e-4 + +multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc: + bash ./stage_files.sh + clean: - -rm rrtmgp_rfmip_sw rrtmgp_rfmip_lw *.o *.mod *.optrpt + -rm rrtmgp_rfmip_sw rrtmgp_rfmip_lw *.o *.mod *.optrpt *.nc diff --git a/examples/rfmip-clear-sky/Makefile.libs.macos b/examples/rfmip-clear-sky/Makefile.libs.macos deleted file mode 100644 index 465cdd3aa..000000000 --- a/examples/rfmip-clear-sky/Makefile.libs.macos +++ /dev/null @@ -1,10 +0,0 @@ -# Location of RTE+RRTMGP libraries, module files. -export RRTMGP_BUILD = $(RRTMGP_ROOT)/build -# Sets macros FC, FCFLAGS consistent with RTE+RRTMGP - -# NetCDF C and Fortran libraries, module files -export NCHOME = /opt/local -export NFHOME = $(HOME)/Applications/$(FC) - -# GPTL libraries and module files if desired (https://jmrosinski.github.io/GPTL/) -export TIME_DIR = $(HOME)/Codes/GPTL/gptl-v5.5.3/macos diff --git a/examples/rfmip-clear-sky/Makefile.libs.olcf b/examples/rfmip-clear-sky/Makefile.libs.olcf deleted file mode 100644 index 38ebdc75c..000000000 --- a/examples/rfmip-clear-sky/Makefile.libs.olcf +++ /dev/null @@ -1,10 +0,0 @@ -# Location of RTE+RRTMGP libraries, module files. -export RRTMGP_BUILD = ../../build -# Sets macros FC, FCFLAGS consistent with RTE+RRTMGP - -# NetCDF C and Fortran libraries, module files -NCHOME = $(OLCF_NETCDF_ROOT) -NFHOME = $(OLCF_NETCDF_FORTRAN_ROOT) - -# GPTL libraries and module files if desired (https://jmrosinski.github.io/GPTL/) -# export TIME_DIR diff --git a/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs b/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs deleted file mode 100644 index 3bd5a213b..000000000 --- a/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs +++ /dev/null @@ -1,20 +0,0 @@ -# Load the following modules and set the library path -# -# module load cdt/19.06 -# module swap PrgEnv-cray PrgEnv-pgi -# module load cray-netcdf cray-hdf5 -# module load craype-accel-nvidia60 -# module unload cray-libsci_acc -# export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH - -export FC = ftn -export FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mipa=fast,inline -Mallocatable=03 - -# Where to find Fortran .mod files -export FCINCLUDE = - -# Libraries including netCDF and Fortran interface to netCDF -export LDFLAGS = - -#export LIBS = -lnetcdff -lnetcdf # Not necessary if ftn wrapper is used -export LIBS = diff --git a/examples/rfmip-clear-sky/Readme.md b/examples/rfmip-clear-sky/Readme.md index a2d2323e6..511bb315c 100644 --- a/examples/rfmip-clear-sky/Readme.md +++ b/examples/rfmip-clear-sky/Readme.md @@ -4,11 +4,11 @@ the [RTE+RRTMGP](https://github.com/RobertPincus/rte-rrtmgp) radiation parameter [RFMIP](https://www.earthsystemcog.org/projects/rfmip/) cases. 1. Build the RTE+RRTMGP libraries in `../../build/`. This will require setting -environmental variables `FC` for the Fortran compiler and `FCFLAGS`, or creating -`../../build/Makefile.conf` with that information. +environmental variables `FC` for the Fortran compiler and `FCFLAGS`. 2. Build the executables in this directory, which will require providing the locations of the netCDF C and Fortran libraries and module files as environmental -variables (NCHOME and NFHOME) or via file `Makefile.libs` +variables `NCHOME` and `NFHOME`, as well a variable `RRTMGP_ROOT` pointing to the root of the installation +(the absolute path to `../../`). 3. Use Python script `stage_files.py` to download relevant files from the [RFMIP web site](https://www.earthsystemcog.org/projects/rfmip/resources/).This script invokes another Python script to create empty output files. 4. Use Python script `run-rfmip-examples.py` to run the examples. The script takes diff --git a/examples/rfmip-clear-sky/compare-to-reference.py b/examples/rfmip-clear-sky/compare-to-reference.py index f1b87e66c..a8f104db0 100755 --- a/examples/rfmip-clear-sky/compare-to-reference.py +++ b/examples/rfmip-clear-sky/compare-to-reference.py @@ -12,11 +12,23 @@ tst_dir = "." rrtmgp_suffix = "_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc" +def construct_esgf_remote_name(var): + # + # For a given variable name, provide the OpenDAP URL for the RTE+RRTMGP RFMIP results + # This doesn't seem to work on CSCS Piz Daint within the netcdf-python module + # + esgf_url_base = "http://esgf3.dkrz.de/thredds/dodsC/cmip6/RFMIP/RTE-RRTMGP-Consortium/RTE-RRTMGP-181204/rad-irf/r1i1p1f1/Efx/" + # DKRZ server has been unstable - better to try the other if one fails + esgf_url_base = "http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/RFMIP/RTE-RRTMGP-Consortium/RTE-RRTMGP-181204/rad-irf/r1i1p1f1/Efx/" + esgf_url_ver = "gn/v20191007/" + return(os.path.join(esgf_url_base, var, esgf_url_ver, var+rrtmgp_suffix)) # # Construct URL for RTE+RRTMGP results for RFMIP from ESGF # def construct_esgf_file(var): esgf_url_base = "http://esgf3.dkrz.de/thredds/fileServer/cmip6/RFMIP/RTE-RRTMGP-Consortium/RTE-RRTMGP-181204/rad-irf/r1i1p1f1/Efx/" + # DKRZ node goes down frequently + esgf_url_base = "http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/RFMIP/RTE-RRTMGP-Consortium/RTE-RRTMGP-181204/rad-irf/r1i1p1f1/Efx/" esgf_url_ver = "gn/v20191007/" return (os.path.join(esgf_url_base, var, esgf_url_ver, var+rrtmgp_suffix)) # diff --git a/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 b/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 index 5c43e5d1d..b41fd82fc 100644 --- a/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 +++ b/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 @@ -29,7 +29,7 @@ subroutine stop_on_err(error_msg) if(error_msg /= "") then write (error_unit,*) trim(error_msg) write (error_unit,*) "rrtmgp_rfmip_lw stopping" - stop + error stop 1 end if end subroutine stop_on_err ! ------------------------------------------------------------------------------------------------- @@ -226,8 +226,11 @@ program rrtmgp_rfmip_lw ! bug related to the use of Fortran classes on the GPU. ! !$acc enter data create(sfc_emis_spec) + !$omp target enter data map(alloc:sfc_emis_spec) !$acc enter data create(optical_props, optical_props%tau) + !$omp target enter data map(alloc:optical_props%tau) !$acc enter data create(source, source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source) + !$omp target enter data map(alloc:source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source) ! -------------------------------------------------- #ifdef USE_TIMING ! @@ -241,7 +244,7 @@ program rrtmgp_rfmip_lw ! Loop over blocks ! #ifdef USE_TIMING - do i = 1, 32 + do i = 1, 4 #endif do b = 1, nblocks fluxes%flux_up => flux_up(:,:,b) @@ -251,6 +254,7 @@ program rrtmgp_rfmip_lw ! (This is partly to show how to keep work on GPUs using OpenACC) ! !$acc parallel loop collapse(2) copyin(sfc_emis) + !$omp target teams distribute parallel do simd collapse(2) map(to:sfc_emis) do icol = 1, block_size do ibnd = 1, nbnd sfc_emis_spec(ibnd,icol) = sfc_emis(icol,b) @@ -299,8 +303,11 @@ program rrtmgp_rfmip_lw ret = gptlfinalize() #endif !$acc exit data delete(sfc_emis_spec) + !$omp target exit data map(release:sfc_emis_spec) !$acc exit data delete(optical_props%tau, optical_props) + !$omp target exit data map(release:optical_props%tau) !$acc exit data delete(source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source) + !$omp target exit data map(release:source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source) !$acc exit data delete(source) ! --------------------------------------------------m call unblock_and_write(trim(flxup_file), 'rlu', flux_up) diff --git a/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 b/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 index 3c01f8dda..52215ade3 100644 --- a/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 +++ b/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 @@ -29,7 +29,7 @@ subroutine stop_on_err(error_msg) if(error_msg /= "") then write (error_unit,*) trim(error_msg) write (error_unit,*) "rrtmgp_rfmip_sw stopping" - stop + error stop 1 end if end subroutine stop_on_err ! ------------------------------------------------------------------------------------------------- @@ -229,8 +229,11 @@ program rrtmgp_rfmip_sw allocate(mu0(block_size), sfc_alb_spec(nbnd,block_size)) call stop_on_err(optical_props%alloc_2str(block_size, nlay, k_dist)) !$acc enter data create(optical_props, optical_props%tau, optical_props%ssa, optical_props%g) + !$omp target enter data map(alloc:optical_props%tau, optical_props%ssa, optical_props%g) !$acc enter data create (toa_flux, def_tsi) + !$omp target enter data map(alloc:toa_flux, def_tsi) !$acc enter data create (sfc_alb_spec, mu0) + !$omp target enter data map(alloc:sfc_alb_spec, mu0) ! -------------------------------------------------- #ifdef USE_TIMING ! @@ -244,7 +247,7 @@ program rrtmgp_rfmip_sw ! Loop over blocks ! #ifdef USE_TIMING - do i = 1, 32 + do i = 1, 4 #endif do b = 1, nblocks fluxes%flux_up => flux_up(:,:,b) @@ -269,12 +272,14 @@ program rrtmgp_rfmip_sw ! (This is partly to show how to keep work on GPUs using OpenACC in a host application) ! What's the total solar irradiance assumed by RRTMGP? ! -#ifdef _OPENACC +#if defined(_OPENACC) || defined(_OPENMP) call zero_array(block_size, def_tsi) !$acc parallel loop collapse(2) copy(def_tsi) copyin(toa_flux) + !$omp target teams distribute parallel do simd collapse(2) map(tofrom:def_tsi) map(to:toa_flux) do igpt = 1, ngpt do icol = 1, block_size !$acc atomic update + !$omp atomic update def_tsi(icol) = def_tsi(icol) + toa_flux(icol, igpt) end do end do @@ -288,6 +293,7 @@ program rrtmgp_rfmip_sw ! Normalize incoming solar flux to match RFMIP specification ! !$acc parallel loop collapse(2) copyin(total_solar_irradiance, def_tsi) copy(toa_flux) + !$omp target teams distribute parallel do simd collapse(2) map(to:total_solar_irradiance, def_tsi) map(tofrom:toa_flux) do igpt = 1, ngpt do icol = 1, block_size toa_flux(icol,igpt) = toa_flux(icol,igpt) * total_solar_irradiance(icol,b)/def_tsi(icol) @@ -297,6 +303,7 @@ program rrtmgp_rfmip_sw ! Expand the spectrally-constant surface albedo to a per-band albedo for each column ! !$acc parallel loop collapse(2) copyin(surface_albedo) + !$omp target teams distribute parallel do simd collapse(2) map(to:surface_albedo) do icol = 1, block_size do ibnd = 1, nbnd sfc_alb_spec(ibnd,icol) = surface_albedo(icol,b) @@ -306,6 +313,7 @@ program rrtmgp_rfmip_sw ! Cosine of the solar zenith angle ! !$acc parallel loop copyin(solar_zenith_angle, usecol) + !$omp target teams distribute parallel do simd map(to:solar_zenith_angle, usecol) do icol = 1, block_size mu0(icol) = merge(cos(solar_zenith_angle(icol,b)*deg_to_rad), 1._wp, usecol(icol,b)) end do @@ -346,8 +354,11 @@ program rrtmgp_rfmip_sw ret = gptlfinalize() #endif !$acc exit data delete(optical_props%tau, optical_props%ssa, optical_props%g, optical_props) + !$omp target exit data map(release:optical_props%tau, optical_props%ssa, optical_props%g) !$acc exit data delete(sfc_alb_spec, mu0) + !$omp target exit data map(release:sfc_alb_spec, mu0) !$acc exit data delete(toa_flux, def_tsi) + !$omp target exit data map(release:toa_flux, def_tsi) ! -------------------------------------------------- call unblock_and_write(trim(flxup_file), 'rsu', flux_up) call unblock_and_write(trim(flxdn_file), 'rsd', flux_dn) diff --git a/examples/rfmip-clear-sky/stage_files.sh b/examples/rfmip-clear-sky/stage_files.sh new file mode 100644 index 000000000..53ed9318e --- /dev/null +++ b/examples/rfmip-clear-sky/stage_files.sh @@ -0,0 +1,5 @@ +wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc +wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/rld_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc +wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/rlu_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc +wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/rsd_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc +wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/rsu_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc diff --git a/extensions/cloud_optics/mo_cloud_optics.F90 b/extensions/cloud_optics/mo_cloud_optics.F90 index bd812ed88..3e4309eb7 100644 --- a/extensions/cloud_optics/mo_cloud_optics.F90 +++ b/extensions/cloud_optics/mo_cloud_optics.F90 @@ -151,6 +151,9 @@ function load_lut(this, band_lims_wvn, & !$acc enter data create(this) & !$acc create(this%lut_extliq, this%lut_ssaliq, this%lut_asyliq) & !$acc create(this%lut_extice, this%lut_ssaice, this%lut_asyice) + !$omp target enter data & + !$omp map(alloc:this%lut_extliq, this%lut_ssaliq, this%lut_asyliq) & + !$omp map(alloc:this%lut_extice, this%lut_ssaice, this%lut_asyice) ! Load LUT constants this%radliq_lwr = radliq_lwr this%radliq_upr = radliq_upr @@ -159,6 +162,7 @@ function load_lut(this, band_lims_wvn, & ! Load LUT coefficients !$acc kernels + !$omp target this%lut_extliq = lut_extliq this%lut_ssaliq = lut_ssaliq this%lut_asyliq = lut_asyliq @@ -166,6 +170,7 @@ function load_lut(this, band_lims_wvn, & this%lut_ssaice = lut_ssaice this%lut_asyice = lut_asyice !$acc end kernels + !$omp end target ! ! Set default ice roughness - min values ! @@ -277,10 +282,16 @@ function load_pade(this, band_lims_wvn, & !$acc create(this%pade_extice, this%pade_ssaice, this%pade_asyice) & !$acc create(this%pade_sizreg_extliq, this%pade_sizreg_ssaliq, this%pade_sizreg_asyliq) & !$acc create(this%pade_sizreg_extice, this%pade_sizreg_ssaice, this%pade_sizreg_asyice) + !$omp target enter data & + !$omp map(alloc:this%pade_extliq, this%pade_ssaliq, this%pade_asyliq) & + !$omp map(alloc:this%pade_extice, this%pade_ssaice, this%pade_asyice) & + !$omp map(alloc:this%pade_sizreg_extliq, this%pade_sizreg_ssaliq, this%pade_sizreg_asyliq) & + !$omp map(alloc:this%pade_sizreg_extice, this%pade_sizreg_ssaice, this%pade_sizreg_asyice) ! ! Load data ! !$acc kernels + !$omp target this%pade_extliq = pade_extliq this%pade_ssaliq = pade_ssaliq this%pade_asyliq = pade_asyliq @@ -294,6 +305,7 @@ function load_pade(this, band_lims_wvn, & this%pade_sizreg_ssaice = pade_sizreg_ssaice this%pade_sizreg_asyice = pade_sizreg_asyice !$acc end kernels + !$omp end target ! ! Set default ice roughness - min values ! @@ -318,6 +330,8 @@ subroutine finalize(this) !$acc exit data delete(this%lut_extliq, this%lut_ssaliq, this%lut_asyliq) & !$acc delete(this%lut_extice, this%lut_ssaice, this%lut_asyice) & !$acc delete(this) + !$omp target exit data map(release:this%lut_extliq, this%lut_ssaliq, this%lut_asyliq) & + !$omp map(release:this%lut_extice, this%lut_ssaice, this%lut_asyice) deallocate(this%lut_extliq, this%lut_ssaliq, this%lut_asyliq, & @@ -336,6 +350,10 @@ subroutine finalize(this) !$acc delete(this%pade_sizreg_extliq, this%pade_sizreg_ssaliq, this%pade_sizreg_asyliq) & !$acc delete(this%pade_sizreg_extice, this%pade_sizreg_ssaice, this%pade_sizreg_asyice) & !$acc delete(this) + !$omp target exit data map(release:this%pade_extliq, this%pade_ssaliq, this%pade_asyliq) & + !$omp map(release:this%pade_extice, this%pade_ssaice, this%pade_asyice) & + !$omp map(release:this%pade_sizreg_extliq, this%pade_sizreg_ssaliq, this%pade_sizreg_asyliq) & + !$omp map(release:this%pade_sizreg_extice, this%pade_sizreg_ssaice, this%pade_sizreg_asyice) deallocate(this%pade_extliq, this%pade_ssaliq, this%pade_asyliq, & this%pade_extice, this%pade_ssaice, this%pade_asyice, & @@ -420,10 +438,14 @@ function cloud_optics(this, & !$acc data copyin(clwp, ciwp, reliq, reice) & !$acc create(ltau, ltaussa, ltaussag, itau, itaussa, itaussag) & !$acc create(liqmsk,icemsk) + !$omp target data map(to:clwp, ciwp, reliq, reice) & + !$omp map(alloc:ltau, ltaussa, ltaussag, itau, itaussa, itaussag) & + !$omp map(alloc:liqmsk, icemsk) ! ! Cloud masks; don't need value re values if there's no cloud ! !$acc parallel loop gang vector default(none) collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do ilay = 1, nlay do icol = 1, ncol liqmsk(icol,ilay) = clwp(icol,ilay) > 0._wp @@ -441,8 +463,8 @@ function cloud_optics(this, & error_msg = 'cloud optics: ice effective radius is out of bounds' if(any_vals_less_than(clwp, liqmsk, 0._wp) .or. any_vals_less_than(ciwp, icemsk, 0._wp)) & error_msg = 'cloud optics: negative clwp or ciwp where clouds are supposed to be' - if(error_msg == "") then end if + if(error_msg == "") then ! ! ! ---------------------------------------- @@ -500,6 +522,8 @@ function cloud_optics(this, & type is (ty_optical_props_1scl) !$acc parallel loop gang vector default(none) collapse(3) & !$acc copyin(optical_props) copyout(optical_props%tau) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp map(from:optical_props%tau) do ibnd = 1, nbnd do ilay = 1, nlay @@ -513,6 +537,8 @@ function cloud_optics(this, & type is (ty_optical_props_2str) !$acc parallel loop gang vector default(none) collapse(3) & !$acc copyin(optical_props) copyout(optical_props%tau, optical_props%ssa, optical_props%g) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp map(from:optical_props%tau, optical_props%ssa, optical_props%g) do ibnd = 1, nbnd do ilay = 1, nlay do icol = 1,ncol @@ -528,9 +554,9 @@ function cloud_optics(this, & type is (ty_optical_props_nstr) error_msg = "cloud optics: n-stream calculations not yet supported" end select - - end if ! error_msg == "" + end if !$acc end data + !$omp end target data end function cloud_optics !-------------------------------------------------------------------------------------------------------------------- ! @@ -616,6 +642,7 @@ subroutine compute_all_from_table(ncol, nlay, nbnd, mask, lwp, re, & real(wp) :: t, ts, tsg ! tau, tau*ssa, tau*ssa*g ! --------------------------- !$acc parallel loop gang vector default(present) collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do ibnd = 1, nbnd do ilay = 1,nlay do icol = 1, ncol @@ -670,6 +697,7 @@ subroutine compute_all_from_pade(ncol, nlay, nbnd, nsizes, & real(wp) :: t, ts !$acc parallel loop gang vector default(present) collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do ibnd = 1, nbnd do ilay = 1, nlay do icol = 1, ncol @@ -742,6 +770,7 @@ end function pade_eval_nbnd ! function pade_eval_1(iband, nbnd, nrads, m, n, irad, re, pade_coeffs) !$acc routine seq + !$omp declare target ! integer, intent(in) :: iband, nbnd, nrads, m, n, irad real(wp), dimension(nbnd, nrads, 0:m+n), & diff --git a/extensions/mo_fluxes_byband.F90 b/extensions/mo_fluxes_byband.F90 index c330f63a6..a83261e0e 100644 --- a/extensions/mo_fluxes_byband.F90 +++ b/extensions/mo_fluxes_byband.F90 @@ -98,6 +98,7 @@ function reduce_byband(this, gpt_flux_up, gpt_flux_dn, spectral_disc, top_at_1, ! ------- !$acc enter data copyin(band_lims) + !$omp target enter data map(to:band_lims) ! Band-by-band fluxes ! Up flux if(associated(this%bnd_flux_up)) then @@ -128,6 +129,7 @@ function reduce_byband(this, gpt_flux_up, gpt_flux_dn, spectral_disc, top_at_1, end if end if !$acc exit data delete(band_lims) + !$omp target exit data map(release:band_lims) end function reduce_byband ! -------------------------------------------------------------------------------------- ! Are any fluxes desired from this set of g-point fluxes? We can tell because memory will diff --git a/extensions/mo_fluxes_byband_kernels.F90 b/extensions/mo_fluxes_byband_kernels.F90 index 0e69e46e5..708853aa4 100644 --- a/extensions/mo_fluxes_byband_kernels.F90 +++ b/extensions/mo_fluxes_byband_kernels.F90 @@ -28,7 +28,7 @@ module mo_fluxes_byband_kernels ! ! Spectral reduction over all points ! - pure subroutine sum_byband(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux, byband_flux) bind (C) + subroutine sum_byband(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux, byband_flux) bind (C) integer, intent(in ) :: ncol, nlev, ngpt, nbnd integer, dimension(2, nbnd), intent(in ) :: band_lims real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux @@ -36,6 +36,7 @@ pure subroutine sum_byband(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux, byb integer :: icol, ilev, igpt, ibnd !$acc parallel loop collapse(3) copyin(spectral_flux, band_lims) copyout(byband_flux) + !$omp target teams distribute parallel do collapse(3) map(to:spectral_flux, band_lims) map(from:byband_flux) do ibnd = 1, nbnd do ilev = 1, nlev do icol = 1, ncol @@ -52,7 +53,7 @@ end subroutine sum_byband ! ! Net flux: Spectral reduction over all points ! - pure subroutine net_byband_full(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux_dn, spectral_flux_up, byband_flux_net) bind (C) + subroutine net_byband_full(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux_dn, spectral_flux_up, byband_flux_net) bind (C) integer, intent(in ) :: ncol, nlev, ngpt, nbnd integer, dimension(2, nbnd), intent(in ) :: band_lims real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux_dn, spectral_flux_up @@ -61,6 +62,7 @@ pure subroutine net_byband_full(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux integer :: icol, ilev, igpt, ibnd !$acc parallel loop collapse(3) copyin(spectral_flux_dn, spectral_flux_up, band_lims) copyout(byband_flux_net) + !$omp target teams distribute parallel do collapse(3) map(to:spectral_flux_dn, spectral_flux_up, band_lims) map(from:byband_flux_net) do ibnd = 1, nbnd do ilev = 1, nlev do icol = 1, ncol @@ -77,7 +79,7 @@ pure subroutine net_byband_full(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux end do end subroutine net_byband_full ! ---------------------------------------------------------------------------- - pure subroutine net_byband_precalc(ncol, nlev, nbnd, byband_flux_dn, byband_flux_up, byband_flux_net) bind (C) + subroutine net_byband_precalc(ncol, nlev, nbnd, byband_flux_dn, byband_flux_up, byband_flux_net) bind (C) integer, intent(in ) :: ncol, nlev, nbnd real(wp), dimension(ncol, nlev, nbnd), intent(in ) :: byband_flux_dn, byband_flux_up real(wp), dimension(ncol, nlev, nbnd), intent(out) :: byband_flux_net diff --git a/rrtmgp/Make.depends b/rrtmgp/Make.depends index 1395b71c5..26733df03 100644 --- a/rrtmgp/Make.depends +++ b/rrtmgp/Make.depends @@ -28,7 +28,7 @@ mo_gas_concentrations.o: mo_rte_kind.o mo_rte_config.o mo_rte_util_array.o mo_rr # # Gas optics # -mo_gas_optics_kernels.o: mo_rte_kind.o mo_gas_optics_kernels.F90 +mo_gas_optics_kernels.o: mo_rte_kind.o mo_rte_util_array.o mo_gas_optics_kernels.F90 mo_gas_optics.o: mo_rte_kind.o mo_rte_config.o mo_gas_concentrations.o \ mo_optical_props.o mo_source_functions.o \ diff --git a/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 b/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 index bbfc2ba36..75cc0a5dd 100644 --- a/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 +++ b/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 @@ -16,6 +16,7 @@ module mo_gas_optics_kernels use mo_rte_kind, only: wp, wl + use mo_rte_util_array,only: zero_array implicit none public contains @@ -67,10 +68,14 @@ subroutine interpolation( & integer :: icol, ilay, iflav, igases(2), itropo, itemp !$acc enter data copyin(flavor,press_ref_log,temp_ref,vmr_ref,play,tlay,col_gas) + !$omp target enter data map(to:flavor, press_ref_log, temp_ref, vmr_ref, play, tlay, col_gas) !$acc enter data create(jtemp,jpress,tropo,jeta,col_mix,fmajor,fminor) + !$omp target enter data map(alloc:jtemp, jpress, tropo, jeta, col_mix, fmajor, fminor) !$acc enter data create(ftemp,fpress) + !$omp target enter data map(alloc:ftemp, fpress) !$acc parallel loop gang vector collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do ilay = 1, nlay do icol = 1, ncol ! index and factor for temperature interpolation @@ -92,6 +97,7 @@ subroutine interpolation( & ! PGI BUG WORKAROUND: if present(vmr_ref) isn't there, OpenACC runtime ! thinks it isn't present. !$acc parallel loop gang vector collapse(4) private(igases) present(vmr_ref) + !$omp target teams distribute parallel do simd collapse(4) private(igases) do ilay = 1, nlay do icol = 1, ncol ! loop over implemented combinations of major species @@ -127,8 +133,11 @@ subroutine interpolation( & end do !$acc exit data delete(flavor,press_ref_log,temp_ref,vmr_ref,play,tlay,col_gas) + !$omp target exit data map(release:flavor, press_ref_log, temp_ref, vmr_ref, play, tlay, col_gas) !$acc exit data copyout(jtemp,jpress,tropo,jeta,col_mix,fmajor,fminor) + !$omp target exit data map(from:jtemp, jpress, tropo, jeta, col_mix, fmajor, fminor) !$acc exit data delete(ftemp,fpress) + !$omp target exit data map(release:ftemp, fpress) end subroutine interpolation ! -------------------------------------------------------------------------------------- @@ -213,7 +222,9 @@ subroutine compute_tau_absorption( & ! ---------------------------------------------------------------- !$acc enter data create(itropo_lower, itropo_upper) + !$omp target enter data map(alloc:itropo_lower, itropo_upper) !$acc enter data copyin(play, tlay, tropo, gpoint_flavor, jeta, jtemp, col_gas, fminor, tau) + !$omp target enter data map(to:play, tlay, tropo, gpoint_flavor, jeta, jtemp, col_gas, fminor, tau) ! --------------------- ! Layer limits of upper, lower atmospheres @@ -221,19 +232,31 @@ subroutine compute_tau_absorption( & top_at_1 = play(1,1) < play(1, nlay) if(top_at_1) then !$acc parallel loop + !$omp target teams distribute parallel do simd do icol = 1,ncol itropo_lower(icol,2) = nlay +#ifdef _CRAYFTN + itropo_upper(icol,1) = 1 + call minmaxloc(icol, tropo, play, itropo_lower(icol,1), itropo_upper(icol,2)) +#else itropo_lower(icol,1) = minloc(play(icol,:), dim=1, mask=tropo(icol,:)) itropo_upper(icol,1) = 1 itropo_upper(icol,2) = maxloc(play(icol,:), dim=1, mask=(.not. tropo(icol,:))) +#endif end do else !$acc parallel loop + !$omp target teams distribute parallel do simd do icol = 1,ncol itropo_lower(icol,1) = 1 +#ifdef _CRAYFTN + itropo_upper(icol,2) = nlay + call minmaxloc(icol, tropo, play, itropo_lower(icol,2), itropo_upper(icol,1)) +#else itropo_lower(icol,2) = minloc(play(icol,:), dim=1, mask=tropo(icol,:)) itropo_upper(icol,2) = nlay itropo_upper(icol,1) = maxloc(play(icol,:), dim=1, mask=(.not.tropo(icol,:))) +#endif end do end if ! --------------------- @@ -292,8 +315,11 @@ subroutine compute_tau_absorption( & tau) !$acc exit data delete(itropo_lower,itropo_upper) + !$omp target exit data map(release:itropo_lower, itropo_upper) !$acc exit data delete(play, tlay, tropo, gpoint_flavor, jeta, jtemp, col_gas, fminor) + !$omp target exit data map(release:play, tlay, tropo, gpoint_flavor, jeta, jtemp, col_gas, fminor) !$acc exit data copyout(tau) + !$omp target exit data map(from:tau) end subroutine compute_tau_absorption ! -------------------------------------------------------------------------------------- @@ -338,6 +364,7 @@ subroutine gas_optical_depths_major(ncol,nlay,nbnd,ngpt,& ! optical depth calculation for major species !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do ilay = 1, nlay do icol = 1, ncol ! optical depth calculation for major species @@ -415,6 +442,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt, & max_gpt_diff = maxval( minor_limits_gpt(2,:) - minor_limits_gpt(1,:) ) !$acc parallel loop gang vector collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do ilay = 1 , nlay do icol = 1, ncol do igpt0 = 0, max_gpt_diff @@ -446,7 +474,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt, & if (scale_by_complement(imnr)) then ! scale by densities of all gases but the special one scaling = scaling * (1._wp - mycol_gas_imnr * vmr_fact * dry_fact) else - scaling = scaling * mycol_gas_imnr * vmr_fact * dry_fact + scaling = scaling * (mycol_gas_imnr * vmr_fact * dry_fact) endif endif endif @@ -474,6 +502,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt, & tau_minor = kminor_loc * scaling !$acc atomic update + !$omp atomic update tau(igpt,ilay,icol) = tau(igpt,ilay,icol) + tau_minor endif @@ -517,6 +546,7 @@ subroutine compute_tau_rayleigh(ncol,nlay,nbnd,ngpt, & ! ----------------- !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do ilay = 1, nlay do icol = 1, ncol do igpt = 1, ngpt @@ -576,12 +606,17 @@ subroutine compute_Planck_source( & ! ----------------- !$acc enter data copyin(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor) + !$omp target enter data map(to:tlay, tlev, tsfc, fmajor, jeta, tropo, jtemp, jpress, gpoint_bands, pfracin, totplnk, gpoint_flavor) !$acc enter data create(sfc_src,lay_src,lev_src_inc,lev_src_dec) + !$omp target enter data map(alloc:sfc_src, lay_src, lev_src_inc, lev_src_dec) !$acc enter data create(pfrac,planck_function) + !$omp target enter data map(alloc:pfrac, planck_function) !$acc enter data create(sfc_source_Jac) + !$omp target enter data map(alloc:sfc_source_Jac) ! Calculation of fraction of band's Planck irradiance associated with each g-point !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do icol = 1, ncol do ilay = 1, nlay do igpt = 1, ngpt @@ -601,6 +636,7 @@ subroutine compute_Planck_source( & ! Compute surface source irradiance for g-point, equals band irradiance x fraction for g-point ! !$acc parallel loop + !$omp target teams distribute parallel do simd do icol = 1, ncol call interpolate1D(tsfc(icol) , temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,1,icol)) call interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,2,icol)) @@ -609,6 +645,7 @@ subroutine compute_Planck_source( & ! Map to g-points ! !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol sfc_src (igpt,icol) = pfrac(igpt,sfc_lay,icol) * planck_function(gpoint_bands(igpt),1,icol) @@ -618,6 +655,7 @@ subroutine compute_Planck_source( & end do ! icol !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do icol = 1, ncol do ilay = 1, nlay ! Compute layer source irradiance for g-point, equals band irradiance x fraction for g-point @@ -631,6 +669,7 @@ subroutine compute_Planck_source( & ! Helps to achieve higher bandwidth ! !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do icol = 1, ncol, 2 do ilay = 1, nlay do igpt = 1, ngpt @@ -643,11 +682,13 @@ subroutine compute_Planck_source( & ! compute level source irradiances for each g-point, one each for upward and downward paths !$acc parallel loop + !$omp target teams distribute parallel do simd do icol = 1, ncol call interpolate1D(tlev(icol, 1), temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd, 1,icol)) end do !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do icol = 1, ncol do ilay = 2, nlay+1 call interpolate1D(tlev(icol,ilay), temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,ilay,icol)) @@ -660,6 +701,7 @@ subroutine compute_Planck_source( & ! Same unrolling as mentioned before ! !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do icol = 1, ncol, 2 do ilay = 1, nlay do igpt = 1, ngpt @@ -674,9 +716,13 @@ subroutine compute_Planck_source( & end do ! icol !$acc exit data delete(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor) + !$omp target exit data map(release:tlay, tlev, tsfc, fmajor, jeta, tropo, jtemp, jpress, gpoint_bands, pfracin, totplnk, gpoint_flavor) !$acc exit data delete(pfrac,planck_function) + !$omp target exit data map(release:pfrac, planck_function) !$acc exit data copyout(sfc_src,lay_src,lev_src_inc,lev_src_dec) + !$omp target exit data map(from:sfc_src, lay_src, lev_src_inc, lev_src_dec) !$acc exit data copyout(sfc_source_Jac) + !$omp target exit data map(from:sfc_source_Jac) end subroutine compute_Planck_source ! ---------------------------------------------------------- @@ -685,6 +731,7 @@ end subroutine compute_Planck_source ! subroutine interpolate1D(val, offset, delta, table, res) !$acc routine seq + !$omp declare target ! input real(wp), intent(in) :: val, & ! axis value at which to evaluate table offset, & ! minimum of table axis @@ -709,6 +756,7 @@ end subroutine interpolate1D ! function interpolate2D(fminor, k, igpt, jeta, jtemp) result(res) !$acc routine seq + !$omp declare target real(wp), dimension(2,2), intent(in) :: fminor ! interpolation fractions for minor species ! index(1) : reference eta level (temperature dependent) ! index(2) : reference temperature level @@ -728,6 +776,7 @@ end function interpolate2D ! interpolation in temperature, pressure, and eta function interpolate3D(scaling, fmajor, k, igpt, jeta, jtemp, jpress) result(res) !$acc routine seq + !$omp declare target real(wp), dimension(2), intent(in) :: scaling real(wp), dimension(2,2,2), intent(in) :: fmajor ! interpolation fractions for major species ! index(1) : reference eta level (temperature dependent) @@ -770,12 +819,15 @@ subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau !$acc data copy(tau, ssa, g) & !$acc copyin(tau_rayleigh, tau_abs) + call zero_array(ncol, nlay, ngpt, g) ! We are using blocking memory accesses here to improve performance ! of the transpositions. See also comments in mo_rrtmgp_util_reorder_kernels.F90 ! !$acc parallel default(none) vector_length(tile*tile) !$acc loop gang collapse(3) - do ilay = 1, nlay + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:tau, ssa, g) map(to:tau_rayleigh, tau_abs) + do ilay = 1, nlay do icol0 = 1, ncol, tile do igpt0 = 1, ngpt, tile @@ -785,16 +837,13 @@ subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau icol = icol0 + icdiff igpt = igpt0 + igdiff if (icol > ncol .or. igpt > ngpt) cycle - - t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol) - tau(icol,ilay,igpt) = t - g (icol,ilay,igpt) = 0._wp - if(t > 2._wp * tiny(t)) then - ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t - else - ssa(icol,ilay,igpt) = 0._wp - end if - + t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol) + tau(icol,ilay,igpt) = t + if(t > 2._wp * tiny(t)) then + ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t + else + ssa(icol,ilay,igpt) = 0._wp + end if end do end do @@ -823,6 +872,9 @@ subroutine combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau_abs, tau_rayleig !$acc parallel loop collapse(3) & !$acc& copy(tau, ssa, p) & !$acc& copyin(tau_rayleigh(:ngpt,:nlay,:ncol),tau_abs(:ngpt,:nlay,:ncol)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:tau, ssa, p) & + !$omp& map(to:tau_rayleigh(:ngpt, :nlay, :ncol), tau_abs(:ngpt, :nlay, :ncol)) do icol = 1, ncol do ilay = 1, nlay do igpt = 1, ngpt @@ -841,5 +893,39 @@ subroutine combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau_abs, tau_rayleig end do end do end subroutine combine_and_reorder_nstr + + ! ---------------------------------------------------------- + ! + ! In-house subroutine for handling minloc and maxloc for + ! compilers which do not support GPU versions + ! + subroutine minmaxloc(i, mask, a, minl, maxl) + !$acc routine seq + !$omp declare target + implicit none + integer :: i, minl, maxl + logical(wl) :: mask(:,:) + real(wp) :: a(:,:) + integer :: j, n + real(wp) :: aij, amax, amin + n = size(a,2) + amax = -huge(amax) + amin = huge(amin) + do j = 1, n + aij = a(i,j) + if (mask(i,j)) then + if (aij.lt.amin) then + amin = aij + minl = j + end if + else + if (aij.gt.amax) then + amax = aij + maxl = j + end if + end if + end do + end subroutine ! ---------------------------------------------------------- + end module mo_gas_optics_kernels diff --git a/rrtmgp/kernels/mo_gas_optics_kernels.F90 b/rrtmgp/kernels/mo_gas_optics_kernels.F90 index 4f064aa6c..857377a42 100644 --- a/rrtmgp/kernels/mo_gas_optics_kernels.F90 +++ b/rrtmgp/kernels/mo_gas_optics_kernels.F90 @@ -16,6 +16,7 @@ module mo_gas_optics_kernels use mo_rte_kind, only : wp, wl + use mo_rte_util_array,only : zero_array implicit none public contains @@ -364,7 +365,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt, & real(wp), dimension(ngpt,nlay,ncol), intent(inout) :: tau ! ----------------- ! local variables - real(wp), parameter :: PaTohPa = 0.01 + real(wp), parameter :: PaTohPa = 0.01_wp real(wp) :: vmr_fact, dry_fact ! conversion from column abundance to dry vol. mixing ratio; real(wp) :: scaling, kminor_loc ! minor species absorption coefficient, optical depth integer :: icol, ilay, iflav, igpt, imnr @@ -403,7 +404,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt, & if (scale_by_complement(imnr)) then ! scale by densities of all gases but the special one scaling = scaling * (1._wp - col_gas(icol,ilay,idx_minor_scaling(imnr)) * vmr_fact * dry_fact) else - scaling = scaling * col_gas(icol,ilay,idx_minor_scaling(imnr)) * vmr_fact * dry_fact + scaling = scaling * (col_gas(icol,ilay,idx_minor_scaling(imnr)) * vmr_fact * dry_fact) endif endif endif @@ -542,7 +543,7 @@ subroutine compute_Planck_source( & ! do icol = 1, ncol planck_function(1:nbnd,1,icol) = interpolate1D(tsfc(icol) , temp_ref_min, totplnk_delta, totplnk) - planck_function(1:nbnd,2,icol) = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk) + planck_function(1:nbnd,2,icol) = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk) ! ! Map to g-points ! @@ -720,7 +721,7 @@ end function interpolate3D_byflav ! ! Combine absoprtion and Rayleigh optical depths for total tau, ssa, g ! - pure subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau, ssa, g) & + subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau, ssa, g) & bind(C, name="combine_and_reorder_2str") integer, intent(in) :: ncol, nlay, ngpt real(wp), dimension(ngpt,nlay,ncol), intent(in ) :: tau_abs, tau_rayleigh @@ -729,12 +730,12 @@ pure subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh integer :: icol, ilay, igpt real(wp) :: t ! ----------------------- - do icol = 1, ncol - do ilay = 1, nlay - do igpt = 1, ngpt + call zero_array(ncol, nlay, ngpt, g) + do ilay = 1, nlay + do igpt = 1, ngpt + do icol = 1, ncol t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol) tau(icol,ilay,igpt) = t - g (icol,ilay,igpt) = 0._wp if(t > 2._wp * tiny(t)) then ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t else diff --git a/rrtmgp/kernels/mo_rrtmgp_util_reorder_kernels.F90 b/rrtmgp/kernels/mo_rrtmgp_util_reorder_kernels.F90 index 9d5d850c2..8d876a907 100644 --- a/rrtmgp/kernels/mo_rrtmgp_util_reorder_kernels.F90 +++ b/rrtmgp/kernels/mo_rrtmgp_util_reorder_kernels.F90 @@ -41,6 +41,7 @@ subroutine reorder_123x312_kernel(d1, d2, d3, array_in, array_out) & !$acc& copyout(array_out) & !$acc& copyin(array_in) !$acc loop gang collapse(3) + !$omp target teams distribute parallel do simd collapse(3) map(to:array_in) map(from:array_out) do i2 = 1, d2 do i10 = 1, d1, tile do i30 = 1, d3, tile @@ -79,6 +80,7 @@ subroutine reorder_123x321_kernel(d1, d2, d3, array_in, array_out) & !$acc& copyin(array_in) !$acc loop gang collapse(3) ! private(cache(:,:)) + !$omp target teams distribute parallel do simd collapse(3) map(to:array_in) map(from:array_out) do i2 = 1, d2 do i10 = 1, d1, tile do i30 = 1, d3, tile diff --git a/rrtmgp/mo_gas_concentrations.F90 b/rrtmgp/mo_gas_concentrations.F90 old mode 100755 new mode 100644 index 2c6e14879..46de19fd4 --- a/rrtmgp/mo_gas_concentrations.F90 +++ b/rrtmgp/mo_gas_concentrations.F90 @@ -107,6 +107,7 @@ function init(this, gas_names) result(error_msg) allocate(this%gas_name(ngas), this%concs(ngas)) !$acc enter data copyin(this) !$acc enter data copyin(this%concs) + !$omp target enter data map(to:this%concs) this%gas_name(:) = gas_names(:) end function @@ -122,6 +123,7 @@ function set_vmr_scalar(this, gas, w) result(error_msg) real(wp), intent(in ) :: w character(len=128) :: error_msg ! --------- + real(wp), dimension(:,:), pointer :: p integer :: igas ! --------- error_msg = '' @@ -142,6 +144,7 @@ function set_vmr_scalar(this, gas, w) result(error_msg) if (associated(this%concs(igas)%conc)) then if ( any(shape(this%concs(igas)%conc) /= [1, 1]) ) then !$acc exit data delete(this%concs(igas)%conc) + !$omp target exit data map(release:this%concs(igas)%conc) deallocate(this%concs(igas)%conc) nullify (this%concs(igas)%conc) end if @@ -149,11 +152,19 @@ function set_vmr_scalar(this, gas, w) result(error_msg) if (.not. associated(this%concs(igas)%conc)) then allocate(this%concs(igas)%conc(1,1)) !$acc enter data create(this%concs(igas)%conc) + !$omp target enter data map(alloc:this%concs(igas)%conc) end if + p => this%concs(igas)%conc(:,:) !$acc kernels + !$omp target map(to:w) +#ifdef _CRAYFTN + p(:,:) = w +#else this%concs(igas)%conc(:,:) = w +#endif !$acc end kernels + !$omp end target end function set_vmr_scalar ! ------------------------------------------------------------------------------------- function set_vmr_1d(this, gas, w) result(error_msg) @@ -164,6 +175,7 @@ function set_vmr_1d(this, gas, w) result(error_msg) intent(in ) :: w character(len=128) :: error_msg ! --------- + real(wp), dimension(:,:), pointer :: p integer :: igas ! --------- error_msg = '' @@ -191,6 +203,7 @@ function set_vmr_1d(this, gas, w) result(error_msg) if (associated(this%concs(igas)%conc)) then if ( any(shape(this%concs(igas)%conc) /= [1, this%nlay]) ) then !$acc exit data delete(this%concs(igas)%conc) + !$omp target exit data map(release:this%concs(igas)%conc) deallocate(this%concs(igas)%conc) nullify (this%concs(igas)%conc) end if @@ -198,11 +211,19 @@ function set_vmr_1d(this, gas, w) result(error_msg) if (.not. associated(this%concs(igas)%conc)) then allocate(this%concs(igas)%conc(1,this%nlay)) !$acc enter data create(this%concs(igas)%conc) + !$omp target enter data map(alloc:this%concs(igas)%conc) end if + p => this%concs(igas)%conc(:,:) !$acc kernels copyin(w) + !$omp target map(to:w) +#ifdef _CRAYFTN + p(1,:) = w +#else this%concs(igas)%conc(1,:) = w +#endif !$acc end kernels + !$omp end target !$acc exit data delete(w) end function set_vmr_1d @@ -215,6 +236,7 @@ function set_vmr_2d(this, gas, w) result(error_msg) intent(in ) :: w character(len=128) :: error_msg ! --------- + real(wp), dimension(:,:), pointer :: p integer :: igas ! --------- error_msg = '' @@ -249,6 +271,7 @@ function set_vmr_2d(this, gas, w) result(error_msg) if (associated(this%concs(igas)%conc)) then if ( any(shape(this%concs(igas)%conc) /= [this%ncol,this%nlay]) ) then !$acc exit data delete(this%concs(igas)%conc) + !$omp target exit data map(release:this%concs(igas)%conc) deallocate(this%concs(igas)%conc) nullify (this%concs(igas)%conc) end if @@ -256,11 +279,19 @@ function set_vmr_2d(this, gas, w) result(error_msg) if (.not. associated(this%concs(igas)%conc)) then allocate(this%concs(igas)%conc(this%ncol,this%nlay)) !$acc enter data create(this%concs(igas)%conc) + !$omp target enter data map(alloc:this%concs(igas)%conc) end if + p => this%concs(igas)%conc(:,:) !$acc kernels copyin(w) + !$omp target map(to:w) +#ifdef _CRAYFTN + p(:,:) = w(:,:) +#else this%concs(igas)%conc(:,:) = w(:,:) +#endif !$acc end kernels + !$omp end target end function set_vmr_2d ! ------------------------------------------------------------------------------------- ! @@ -276,6 +307,7 @@ function get_vmr_1d(this, gas, array) result(error_msg) real(wp), dimension(:), intent(out) :: array character(len=128) :: error_msg ! --------------------- + real(wp), dimension(:,:), pointer :: p integer :: igas ! --------------------- error_msg = '' @@ -294,17 +326,32 @@ function get_vmr_1d(this, gas, array) result(error_msg) end if if(error_msg /= "") return + p => this%concs(igas)%conc(:,:) !$acc data copyout (array) present(this) + !$omp target data map(from:array) if(size(this%concs(igas)%conc, 2) > 1) then !$acc kernels default(none) + !$omp target +#ifdef _CRAYFTN + array(:) = p(1,:) +#else array(:) = this%concs(igas)%conc(1,:) +#endif !$acc end kernels + !$omp end target else !$acc kernels default(none) + !$omp target +#ifdef _CRAYFTN + array(:) = p(1,1) +#else array(:) = this%concs(igas)%conc(1,1) +#endif !$acc end kernels + !$omp end target end if !$acc end data + !$omp end target data end function get_vmr_1d ! ------------------------------------------------------------------------------------- @@ -317,6 +364,7 @@ function get_vmr_2d(this, gas, array) result(error_msg) real(wp), dimension(:,:), intent(out) :: array character(len=128) :: error_msg ! --------------------- + real(wp), dimension(:,:), pointer :: p integer :: icol, ilay, igas ! --------------------- error_msg = '' @@ -338,31 +386,49 @@ function get_vmr_2d(this, gas, array) result(error_msg) end if if(error_msg /= "") return + p => this%concs(igas)%conc(:,:) !$acc data copyout (array) present(this, this%concs) + !$omp target data map(from:array) if(size(this%concs(igas)%conc, 1) > 1) then ! Concentration stored as 2D !$acc parallel loop collapse(2) default(none) + !$omp target teams distribute parallel do simd do ilay = 1, size(array,2) do icol = 1, size(array,1) !print *, (size(this%concs)) +#ifdef _CRAYFTN + array(icol,ilay) = p(icol,ilay) +#else array(icol,ilay) = this%concs(igas)%conc(icol,ilay) +#endif end do end do else if(size(this%concs(igas)%conc, 2) > 1) then ! Concentration stored as 1D !$acc parallel loop collapse(2) default(none) + !$omp target teams distribute parallel do simd do ilay = 1, size(array,2) do icol = 1, size(array,1) +#ifdef _CRAYFTN + array(icol,ilay) = p(1,ilay) +#else array(icol, ilay) = this%concs(igas)%conc(1,ilay) +#endif end do end do else ! Concentration stored as scalar !$acc parallel loop collapse(2) default(none) + !$omp target teams distribute parallel do simd do ilay = 1, size(array,2) do icol = 1, size(array,1) +#ifdef _CRAYFTN + array(icol,ilay) = p(1,1) +#else array(icol,ilay) = this%concs(igas)%conc(1,1) +#endif end do end do end if !$acc end data + !$omp end target data end function get_vmr_2d ! ------------------------------------------------------------------------------------- @@ -376,6 +442,7 @@ function get_subset_range(this, start, n, subset) result(error_msg) class(ty_gas_concs), intent(inout) :: subset character(len=128) :: error_msg ! --------------------- + real(wp), dimension(:,:), pointer :: p1, p2 integer :: i ! --------------------- error_msg = '' @@ -391,6 +458,7 @@ function get_subset_range(this, start, n, subset) result(error_msg) allocate(subset%gas_name(size(this%gas_name)), & subset%concs (size(this%concs))) ! These two arrays should be the same length !$acc enter data create(subset, subset%concs) + !$omp target enter data map(alloc:subset%concs) subset%nlay = this%nlay subset%ncol = merge(n, 0, this%ncol > 0) subset%gas_name(:) = this%gas_name(:) @@ -402,15 +470,30 @@ function get_subset_range(this, start, n, subset) result(error_msg) ! allocate(subset%concs(i)%conc(min(max(subset%ncol,1), size(this%concs(i)%conc, 1)), & min( subset%nlay, size(this%concs(i)%conc, 2)))) + p1 => subset%concs(i)%conc(:,:) + p2 => this%concs(i)%conc(:,:) !$acc enter data create(subset%concs(i)%conc) + !$omp target enter data map(alloc:subset%concs(i)%conc) if(size(this%concs(i)%conc, 1) > 1) then ! Concentration stored as 2D !$acc kernels + !$omp target +#ifdef _CRAYFTN + p1(:,:) = p2(start:(start+n-1),:) +#else subset%concs(i)%conc(:,:) = this%concs(i)%conc(start:(start+n-1),:) +#endif !$acc end kernels + !$omp end target else !$acc kernels + !$omp target +#ifdef _CRAYFTN + p1(:,:) = p2(:,:) +#else subset%concs(i)%conc(:,:) = this%concs(i)%conc(:,:) +#endif !$acc end kernels + !$omp end target end if end do @@ -432,11 +515,13 @@ subroutine reset(this) do i = 1, size(this%concs) if(associated(this%concs(i)%conc)) then !$acc exit data delete(this%concs(i)%conc) + !$omp target exit data map(release:this%concs(i)%conc) deallocate(this%concs(i)%conc) nullify(this%concs(i)%conc) end if end do !$acc exit data delete(this%concs) + !$omp target exit data map(release:this%concs) deallocate(this%concs) end if end subroutine reset @@ -477,6 +562,7 @@ function find_gas(this, gas) ! ----------------- find_gas = GAS_NOT_IN_LIST if(.not. allocated(this%gas_name)) return + ! search gases using a loop. Fortran intrinsic findloc would be faster, but only supported since gfortran 9 do igas = 1, size(this%gas_name) if (lower_case(trim(this%gas_name(igas))) == lower_case(trim(gas))) then find_gas = igas diff --git a/rrtmgp/mo_gas_optics_rrtmgp.F90 b/rrtmgp/mo_gas_optics_rrtmgp.F90 index 3982f0978..944e04a9a 100644 --- a/rrtmgp/mo_gas_optics_rrtmgp.F90 +++ b/rrtmgp/mo_gas_optics_rrtmgp.F90 @@ -257,6 +257,7 @@ function gas_optics_int(this, & ! Gas optics ! !$acc enter data create(jtemp, jpress, tropo, fmajor, jeta) + !$omp target enter data map(alloc:jtemp, jpress, tropo, fmajor, jeta) error_msg = compute_gas_taus(this, & ncol, nlay, ngpt, nband, & play, plev, tlay, gas_desc, & @@ -271,6 +272,7 @@ function gas_optics_int(this, & ! input data sizes and values ! !$acc enter data copyin(tsfc, tlev) ! Should be fine even if tlev is not supplied + !$omp target enter data map(to:tsfc, tlev) if(check_extents) then if(.not. extents_are(tsfc, ncol)) & @@ -312,6 +314,7 @@ function gas_optics_int(this, & sources, & tlev) !$acc exit data delete(tlev) + !$omp target exit data map(release:tlev) else error_msg = source(this, & ncol, nlay, nband, ngpt, & @@ -320,7 +323,9 @@ function gas_optics_int(this, & sources) end if !$acc exit data delete(tsfc) + !$omp target exit data map(release:tsfc) !$acc exit data delete(jtemp, jpress, tropo, fmajor, jeta) + !$omp target exit data map(release:jtemp, jpress, tropo, fmajor, jeta) end function gas_optics_int !------------------------------------------------------------------------------------------ ! @@ -366,6 +371,7 @@ function gas_optics_ext(this, & ! Gas optics ! !$acc enter data create(jtemp, jpress, tropo, fmajor, jeta) + !$omp target enter data map(alloc:jtemp, jpress, tropo, fmajor, jeta) error_msg = compute_gas_taus(this, & ncol, nlay, ngpt, nband, & play, plev, tlay, gas_desc, & @@ -373,6 +379,7 @@ function gas_optics_ext(this, & jtemp, jpress, jeta, tropo, fmajor, & col_dry) !$acc exit data delete(jtemp, jpress, tropo, fmajor, jeta) + !$omp target exit data map(release:jtemp, jpress, tropo, fmajor, jeta) if(error_msg /= '') return ! ---------------------------------------------------------- @@ -380,6 +387,7 @@ function gas_optics_ext(this, & ! External source function is constant ! !$acc enter data create(toa_src) + !$omp target enter data map(alloc:toa_src) if(check_extents) then if(.not. extents_are(toa_src, ncol, ngpt)) & error_msg = "gas_optics(): array toa_src has wrong size" @@ -387,12 +395,14 @@ function gas_optics_ext(this, & if(error_msg /= '') return !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1,ngpt do icol = 1,ncol toa_src(icol,igpt) = this%solar_source(igpt) end do end do !$acc exit data copyout(toa_src) + !$omp target exit data map(from:toa_src) end function gas_optics_ext !------------------------------------------------------------------------------------------ ! @@ -469,6 +479,7 @@ function compute_gas_taus(this, & ! Check input data sizes and values ! !$acc enter data copyin(play,plev,tlay) + !$omp target enter data map(to:play, plev, tlay) if(check_extents) then if(.not. extents_are(play, ncol, nlay )) & error_msg = "gas_optics(): array play has wrong size" @@ -490,7 +501,7 @@ function compute_gas_taus(this, & if(check_values) then if(any_vals_outside(play, this%press_ref_min,this%press_ref_max)) & error_msg = "gas_optics(): array play has values outside range" - if(any_vals_outside(plev, this%press_ref_min,this%press_ref_max)) & + if(any_vals_less_than(plev, 0._wp)) & error_msg = "gas_optics(): array plev has values outside range" if(any_vals_outside(tlay, this%temp_ref_min, this%temp_ref_max)) & error_msg = "gas_optics(): array tlay has values outside range" @@ -517,6 +528,7 @@ function compute_gas_taus(this, & ! Fill out the array of volume mixing ratios ! !$acc enter data create(vmr) + !$omp target enter data map(alloc:vmr) do igas = 1, ngas ! ! Get vmr if gas is provided in ty_gas_concs @@ -533,6 +545,7 @@ function compute_gas_taus(this, & idx_h2o = string_loc_in_array('h2o', this%gas_names) col_dry_wk => col_dry_arr !$acc enter data create(col_dry_wk, col_dry_arr, col_gas) + !$omp target enter data map(alloc:col_dry_wk, col_dry_arr, col_gas) if (present(col_dry)) then col_dry_wk => col_dry else @@ -543,12 +556,14 @@ function compute_gas_taus(this, & ! compute column gas amounts [molec/cm^2] ! !$acc parallel loop gang vector collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do ilay = 1, nlay do icol = 1, ncol col_gas(icol,ilay,0) = col_dry_wk(icol,ilay) end do end do !$acc parallel loop gang vector collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igas = 1, ngas do ilay = 1, nlay do icol = 1, ncol @@ -557,15 +572,20 @@ function compute_gas_taus(this, & end do end do !$acc exit data delete(vmr) + !$omp target exit data map(release:vmr) ! ! ---- calculate gas optical depths ---- ! !$acc enter data create(jtemp, jpress, jeta, tropo, fmajor) + !$omp target enter data map(alloc:jtemp, jpress, jeta, tropo, fmajor) !$acc enter data create(tau, tau_rayleigh) + !$omp target enter data map(alloc:tau, tau_rayleigh) !$acc enter data create(col_mix, fminor) + !$omp target enter data map(alloc:col_mix, fminor) !$acc enter data copyin(this) !$acc enter data copyin(this%gpoint_flavor) + !$omp target enter data map(to:this%gpoint_flavor) call zero_array(ngpt, nlay, ncol, tau) call interpolation( & ncol,nlay, & ! problem dimensions @@ -616,6 +636,7 @@ function compute_gas_taus(this, & tau) if (allocated(this%krayl)) then !$acc enter data attach(col_dry_wk) copyin(this%krayl) + !$omp target enter data map(to:col_dry_wk) map(to:this%krayl) call compute_tau_rayleigh( & !Rayleigh scattering optical depths ncol,nlay,nband,ngpt, & ngas,nflav,neta,npres,ntemp, & ! dimensions @@ -626,16 +647,22 @@ function compute_gas_taus(this, & fminor,jeta,tropo,jtemp, & ! local input tau_rayleigh) !$acc exit data detach(col_dry_wk) delete(this%krayl) + !$omp target exit data map(from:col_dry_wk) map(release:this%krayl) end if if (error_msg /= '') return ! Combine optical depths and reorder for radiative transfer solver. call combine_and_reorder(tau, tau_rayleigh, allocated(this%krayl), optical_props) !$acc exit data delete(play, tlay, plev) + !$omp target exit data map(release:play, tlay, plev) !$acc exit data delete(tau, tau_rayleigh) + !$omp target exit data map(release:tau, tau_rayleigh) !$acc exit data delete(col_dry_wk, col_dry_arr, col_gas, col_mix, fminor) + !$omp target exit data map(release:col_dry_wk, col_dry_arr, col_gas, col_mix, fminor) !$acc exit data delete(this%gpoint_flavor) + !$omp target exit data map(release:this%gpoint_flavor) !$acc exit data copyout(jtemp, jpress, jeta, tropo, fmajor) + !$omp target exit data map(from:jtemp, jpress, jeta, tropo, fmajor) end function compute_gas_taus !------------------------------------------------------------------------------------------ ! @@ -678,6 +705,7 @@ function set_solar_variability(this, & ! Calculate solar source function for provided facular and sunspot indices ! !$acc parallel loop + !$omp target teams distribute parallel do simd do igpt = 1, size(this%solar_source_quiet) this%solar_source(igpt) = this%solar_source_quiet(igpt) + & (mg_index - a_offset) * this%solar_source_facular(igpt) + & @@ -708,9 +736,11 @@ function set_tsi(this, tsi) result(error_msg) ! Scale the solar source function to the input tsi ! !$acc kernels + !$omp target norm = 1._wp/sum(this%solar_source(:)) this%solar_source(:) = this%solar_source(:) * tsi * norm !$acc end kernels + !$omp end target end if end function set_tsi @@ -790,9 +820,13 @@ function source(this, & ! which depend on mapping from spectral space that creates k-distribution. !$acc enter data copyin(sources) !$acc enter data create(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) + !$omp target enter data map(alloc:sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) !$acc enter data create(sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) attach(tlev_wk) + !$omp target enter data map(alloc:sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) map(to:tlev_wk) !$acc enter data create(sfc_source_Jac) + !$omp target enter data map(alloc:sfc_source_Jac) !$acc enter data create(sources%sfc_source_Jac) + !$omp target enter data map(alloc:sources%sfc_source_Jac) call compute_Planck_source(ncol, nlay, nbnd, ngpt, & get_nflav(this), this%get_neta(), this%get_npres(), this%get_ntemp(), this%get_nPlanckTemp(), & tlay, tlev_wk, tsfc, merge(1,nlay,play(1,1) > play(1,nlay)), & @@ -802,6 +836,7 @@ function source(this, & sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t, & sfc_source_Jac) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol sources%sfc_source (icol,igpt) = sfc_source_t (igpt,icol) @@ -815,9 +850,13 @@ function source(this, & ! Transposition of a 2D array, for which we don't have a routine in mo_rrtmgp_util_reorder. ! !$acc exit data delete(sfc_source_Jac) + !$omp target exit data map(release:sfc_source_Jac) !$acc exit data delete(sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) detach(tlev_wk) + !$omp target exit data map(release:sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) map(from:tlev_wk) !$acc exit data copyout(sources%sfc_source_Jac) + !$omp target exit data map(from:sources%sfc_source_Jac) !$acc exit data copyout(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) + !$omp target exit data map(from:sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) !$acc exit data copyout(sources) end function source !-------------------------------------------------------------------------------------------------------------------- @@ -905,11 +944,14 @@ function load_int(this, available_gases, gas_names, key_species, & this%planck_frac (size(planck_frac,1), size(planck_frac,2), size(planck_frac,3), size(planck_frac,4)), & this%optimal_angle_fit(size(optimal_angle_fit, 1), size(optimal_angle_fit, 2))) !$acc enter data create(this%totplnk, this%planck_frac, this%optimal_angle_fit) + !$omp target enter data map(alloc:this%totplnk, this%planck_frac, this%optimal_angle_fit) !$acc kernels + !$omp target this%totplnk = totplnk this%planck_frac = planck_frac this%optimal_angle_fit = optimal_angle_fit !$acc end kernels + !$omp end target ! Temperature steps for Planck function interpolation ! Assumes that temperature minimum and max are the same for the absorption coefficient grid and the @@ -1014,11 +1056,14 @@ function load_ext(this, available_gases, gas_names, key_species, & allocate(this%solar_source_quiet(ngpt), this%solar_source_facular(ngpt), & this%solar_source_sunspot(ngpt), this%solar_source(ngpt)) !$acc enter data create(this%solar_source_quiet, this%solar_source_facular, this%solar_source_sunspot, this%solar_source) + !$omp target enter data map(alloc:this%solar_source_quiet, this%solar_source_facular, this%solar_source_sunspot, this%solar_source) !$acc kernels + !$omp target this%solar_source_quiet = solar_quiet this%solar_source_facular = solar_facular this%solar_source_sunspot = solar_sunspot !$acc end kernels + !$omp end target err_message = this%set_solar_variability(mg_default, sb_default) end function load_ext !-------------------------------------------------------------------------------------------------------------------- @@ -1168,6 +1213,7 @@ function init_abs_coeffs(this, & this%temp_ref = temp_ref this%kmajor = kmajor !$acc enter data copyin(this%kmajor) + !$omp target enter data map(to:this%kmajor) if(allocated(rayl_lower) .neqv. allocated(rayl_upper)) then @@ -1185,6 +1231,7 @@ function init_abs_coeffs(this, & allocate(this%press_ref_log(size(this%press_ref))) this%press_ref_log(:) = log(this%press_ref(:)) !$acc enter data copyin(this%press_ref_log) + !$omp target enter data map(to:this%press_ref_log) ! log scale of reference pressure @@ -1377,20 +1424,24 @@ function get_col_dry(vmr_h2o, plev, latitude) result(col_dry) ncol = size(plev, dim=1) nlev = size(plev, dim=2) !$acc enter data create(g0) + !$omp target enter data map(alloc:g0) if(present(latitude)) then ! A purely OpenACC implementation would probably compute g0 within the kernel below !$acc parallel loop + !$omp target teams distribute parallel do simd do icol = 1, ncol g0(icol) = helmert1 - helmert2 * cos(2.0_wp * pi * latitude(icol) / 180.0_wp) ! acceleration due to gravity [m/s^2] end do else !$acc parallel loop + !$omp target teams distribute parallel do simd do icol = 1, ncol g0(icol) = grav end do end if !$acc parallel loop gang vector collapse(2) copyin(plev,vmr_h2o) copyout(col_dry) + !$omp target teams distribute parallel do simd collapse(2) map(to:plev, vmr_h2o) map(from:col_dry) do ilev = 1, nlev-1 do icol = 1, ncol delta_plev = abs(plev(icol,ilev) - plev(icol,ilev+1)) @@ -1401,6 +1452,7 @@ function get_col_dry(vmr_h2o, plev, latitude) result(col_dry) end do end do !$acc exit data delete (g0) + !$omp target exit data map(release:g0) end function get_col_dry !-------------------------------------------------------------------------------------------------------------------- ! @@ -1432,6 +1484,7 @@ function compute_optimal_angles(this, optical_props, optimal_angles) result(err_ ! column transmissivity ! !$acc parallel loop gang vector collapse(2) copyin(optical_props, optical_props%tau, optical_props%gpt2band) copyout(optimal_angles) + !$omp target teams distribute parallel do simd collapse(2) map(to: optical_props%tau, optical_props%gpt2band) map(from:optimal_angles) do icol = 1, ncol do igpt = 1, ngpt ! @@ -1720,6 +1773,7 @@ subroutine reduce_minor_arrays(available_gases, & enddo endif !$acc enter data copyin(kminor_atm_red) + !$omp target enter data map(to:kminor_atm_red) end subroutine reduce_minor_arrays @@ -1782,45 +1836,61 @@ subroutine combine_and_reorder(tau, tau_rayleigh, has_rayleigh, optical_props) if (.not. has_rayleigh) then ! index reorder (ngpt, nlay, ncol) -> (ncol,nlay,gpt) !$acc enter data copyin(tau) + !$omp target enter data map(to:tau) !$acc enter data create(optical_props%tau) + !$omp target enter data map(alloc:optical_props%tau) call reorder123x321(tau, optical_props%tau) select type(optical_props) type is (ty_optical_props_2str) !$acc enter data create(optical_props%ssa, optical_props%g) + !!$omp target enter data map(alloc:optical_props%ssa, optical_props%g) ! Not needed with Cray compiler call zero_array( ncol,nlay,ngpt,optical_props%ssa) call zero_array( ncol,nlay,ngpt,optical_props%g ) !$acc exit data copyout(optical_props%ssa, optical_props%g) + !!$omp target exit data map(from:optical_props%ssa, optical_props%g) ! Not needed with Cray compiler type is (ty_optical_props_nstr) ! We ought to be able to combine this with above nmom = size(optical_props%p, 1) !$acc enter data create(optical_props%ssa, optical_props%p) + !$omp target enter data map(alloc:optical_props%ssa, optical_props%p) call zero_array( ncol,nlay,ngpt,optical_props%ssa) call zero_array(nmom,ncol,nlay,ngpt,optical_props%p ) !$acc exit data copyout(optical_props%ssa, optical_props%p) + !$omp target exit data map(from:optical_props%ssa, optical_props%p) end select !$acc exit data copyout(optical_props%tau) + !$omp target exit data map(from:optical_props%tau) !$acc exit data delete(tau) + !$omp target exit data map(release:tau) else ! combine optical depth and rayleigh scattering !$acc enter data copyin(tau, tau_rayleigh) + !$omp target enter data map(to:tau, tau_rayleigh) select type(optical_props) type is (ty_optical_props_1scl) ! User is asking for absorption optical depth !$acc enter data create(optical_props%tau) + !$omp target enter data map(alloc:optical_props%tau) call reorder123x321(tau, optical_props%tau) !$acc exit data copyout(optical_props%tau) + !$omp target exit data map(from:optical_props%tau) type is (ty_optical_props_2str) !$acc enter data create(optical_props%tau, optical_props%ssa, optical_props%g) + !$omp target enter data map(alloc:optical_props%tau, optical_props%ssa, optical_props%g) call combine_and_reorder_2str(ncol, nlay, ngpt, tau, tau_rayleigh, & optical_props%tau, optical_props%ssa, optical_props%g) !$acc exit data copyout(optical_props%tau, optical_props%ssa, optical_props%g) + !$omp target exit data map(from:optical_props%tau, optical_props%ssa, optical_props%g) type is (ty_optical_props_nstr) ! We ought to be able to combine this with above nmom = size(optical_props%p, 1) !$acc enter data create(optical_props%tau, optical_props%ssa, optical_props%p) + !$omp target enter data map(alloc:optical_props%tau, optical_props%ssa, optical_props%p) call combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau, tau_rayleigh, & optical_props%tau, optical_props%ssa, optical_props%p) !$acc exit data copyout(optical_props%tau, optical_props%ssa, optical_props%p) + !$omp target exit data map(from:optical_props%tau, optical_props%ssa, optical_props%p) end select !$acc exit data delete(tau, tau_rayleigh) + !$omp target exit data map(release:tau, tau_rayleigh) end if !$acc exit data copyout(optical_props) end subroutine combine_and_reorder diff --git a/rte/kernels-openacc/mo_optical_props_kernels.F90 b/rte/kernels-openacc/mo_optical_props_kernels.F90 index 65c91eae4..711a64778 100644 --- a/rte/kernels-openacc/mo_optical_props_kernels.F90 +++ b/rte/kernels-openacc/mo_optical_props_kernels.F90 @@ -63,6 +63,10 @@ subroutine delta_scale_2str_f_k(ncol, nlay, ngpt, tau, ssa, g, f) & !$acc& copy(ssa(:ncol,:nlay,:ngpt),tau(:ncol,:nlay,:ngpt)) & !$acc& copyin(f(:ncol,:nlay,:ngpt)) & !$acc& copy(g(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:ssa(:ncol, :nlay, :ngpt), tau(:ncol, :nlay, :ngpt)) & + !$omp& map(to:f(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:g(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -92,6 +96,8 @@ subroutine delta_scale_2str_k(ncol, nlay, ngpt, tau, ssa, g) & !$acc parallel loop collapse(3) & !$acc& copy(tau(:ncol,:nlay,:ngpt),ssa(:ncol,:nlay,:ngpt),g(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:tau(:ncol, :nlay, :ngpt), ssa(:ncol, :nlay, :ngpt), g(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -137,6 +143,9 @@ subroutine increment_1scalar_by_1scalar(ncol, nlay, ngpt, & !$acc parallel loop collapse(3) & !$acc& copyin(tau2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -162,6 +171,10 @@ subroutine increment_1scalar_by_2stream(ncol, nlay, ngpt, & !$acc& copyin(tau2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) & !$acc& copyin(ssa2(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -188,6 +201,10 @@ subroutine increment_1scalar_by_nstream(ncol, nlay, ngpt, & !$acc& copyin(tau2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) & !$acc& copyin(ssa2(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -216,6 +233,10 @@ subroutine increment_2stream_by_1scalar(ncol, nlay, ngpt, & !$acc& copy(ssa1(:ncol,:nlay,:ngpt)) & !$acc& copyin(tau2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -248,6 +269,11 @@ subroutine increment_2stream_by_2stream(ncol, nlay, ngpt, & !$acc& copy(ssa1(:ncol,:nlay,:ngpt)) & !$acc& copyin(ssa2(:ncol,:nlay,:ngpt),tau2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt),g1(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:g2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :ngpt), tau2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt), g1(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -289,6 +315,11 @@ subroutine increment_2stream_by_nstream(ncol, nlay, ngpt, nmom2, & !$acc& copy(ssa1(:ncol,:nlay,:ngpt)) & !$acc& copyin(ssa2(:ncol,:nlay,:ngpt),tau2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt),g1(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:p2(:1, :ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :ngpt), tau2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt), g1(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -328,6 +359,10 @@ subroutine increment_nstream_by_1scalar(ncol, nlay, ngpt, & !$acc& copy(ssa1(:ncol,:nlay,:ngpt)) & !$acc& copyin(tau2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -364,8 +399,15 @@ subroutine increment_nstream_by_2stream(ncol, nlay, ngpt, nmom1, & !$acc& copyin(ssa2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) & !$acc& copyin(g2(:ncol,:nlay,:ngpt)) & - !$acc& copy(temp_moms(:nmom1)) & + !$acc& private(temp_moms) & !$acc& copyin(tau2(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:p1(:nmom1, :ncol, :nlay, :ngpt), ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:g2(:ncol, :nlay, :ngpt)) & + !$omp& private(temp_moms) & + !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -415,6 +457,11 @@ subroutine increment_nstream_by_nstream(ncol, nlay, ngpt, nmom1, nmom2, & !$acc& copy(ssa1(:ncol,:nlay,:ngpt)) & !$acc& copyin(ssa2(:ncol,:nlay,:ngpt),tau2(:ncol,:nlay,:ngpt)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt),p1(:mom_lim,:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:p2(:mom_lim, :ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :ngpt), tau2(:ncol, :nlay, :ngpt)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt), p1(:mom_lim, :ncol, :nlay, :ngpt)) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -457,6 +504,10 @@ subroutine inc_1scalar_by_1scalar_bybnd(ncol, nlay, ngpt, & !$acc& copyin(tau2(:ncol,:nlay,:nbnd)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) & !$acc& copyin(gpt_lims(:,:nbnd)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:gpt_lims(:, :nbnd)) do igpt = 1 , ngpt do ilay = 1 , nlay do icol = 1 , ncol @@ -485,6 +536,10 @@ subroutine inc_1scalar_by_2stream_bybnd(ncol, nlay, ngpt, & !$acc& copyin(tau2(:ncol,:nlay,:nbnd),ssa2(:ncol,:nlay,:nbnd)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) & !$acc& copyin(gpt_lims(:,:nbnd)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:tau2(:ncol, :nlay, :nbnd), ssa2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:gpt_lims(:, :nbnd)) do igpt = 1 , ngpt do ilay = 1 , nlay do icol = 1 , ncol @@ -513,6 +568,10 @@ subroutine inc_1scalar_by_nstream_bybnd(ncol, nlay, ngpt, & !$acc& copyin(gpt_lims(:,:nbnd),tau2(:ncol,:nlay,:nbnd)) & !$acc& copy(tau1(:ncol,:nlay,:ngpt)) & !$acc& copyin(ssa2(:ncol,:nlay,:nbnd)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:gpt_lims(:, :nbnd), tau2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :nbnd)) do igpt = 1 , ngpt do ilay = 1 , nlay do icol = 1 , ncol @@ -545,6 +604,11 @@ subroutine inc_2stream_by_1scalar_bybnd(ncol, nlay, ngpt, & !$acc& copyin(tau2(:ncol,:nlay,:nbnd)) & !$acc& copy(ssa1(:ncol,:nlay,:ngpt)) & !$acc& copyin(gpt_lims(:,:nbnd)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:gpt_lims(:, :nbnd)) do igpt = 1 , ngpt do ilay = 1, nlay do icol = 1, ncol @@ -580,6 +644,13 @@ subroutine inc_2stream_by_2stream_bybnd(ncol, nlay, ngpt, & !$acc& copyin(gpt_lims(:,:nbnd)) & !$acc& copy(g1(:ncol,:nlay,:ngpt)) & !$acc& copyin(g2(:ncol,:nlay,:nbnd)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:tau2(:ncol, :nlay, :nbnd), ssa2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:gpt_lims(:, :nbnd)) & + !$omp& map(tofrom:g1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:g2(:ncol, :nlay, :nbnd)) do igpt = 1 , ngpt do ilay = 1, nlay do icol = 1, ncol @@ -624,6 +695,12 @@ subroutine inc_2stream_by_nstream_bybnd(ncol, nlay, ngpt, nmom2, & !$acc& copy(ssa1(:ncol,:nlay,:ngpt)) & !$acc& copyin(p2(:1,:ncol,:nlay,:nbnd),gpt_lims(:,:nbnd)) & !$acc& copy(g1(:ncol,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:tau2(:ncol, :nlay, :nbnd), ssa2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:p2(:1, :ncol, :nlay, :nbnd), gpt_lims(:, :nbnd)) & + !$omp& map(tofrom:g1(:ncol, :nlay, :ngpt)) do igpt = 1 , ngpt do ilay = 1, nlay do icol = 1, ncol @@ -666,6 +743,11 @@ subroutine inc_nstream_by_1scalar_bybnd(ncol, nlay, ngpt, & !$acc& copyin(tau2(:ncol,:nlay,:nbnd)) & !$acc& copy(ssa1(:ncol,:nlay,:ngpt)) & !$acc& copyin(gpt_lims(:,:nbnd)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:gpt_lims(:, :nbnd)) do igpt = 1 , ngpt do ilay = 1, nlay do icol = 1, ncol @@ -704,8 +786,15 @@ subroutine inc_nstream_by_2stream_bybnd(ncol, nlay, ngpt, nmom1, & !$acc& copyin(ssa2(:ncol,:nlay,:nbnd)) & !$acc& copy(ssa1(:ncol,:nlay,:ngpt),p1(:nmom1,:ncol,:nlay,:ngpt)) & !$acc& copyin(tau2(:ncol,:nlay,:nbnd)) & - !$acc& copy(temp_moms(:nmom1)) & + !$acc& private(temp_moms) & !$acc& copyin(gpt_lims(:,:nbnd),g2(:ncol,:nlay,:nbnd)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt), p1(:nmom1, :ncol, :nlay, :ngpt)) & + !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) & + !$omp& private(temp_moms) & + !$omp& map(to:gpt_lims(:, :nbnd), g2(:ncol, :nlay, :nbnd)) do igpt = 1 , ngpt do ilay = 1, nlay do icol = 1, ncol @@ -760,6 +849,14 @@ subroutine inc_nstream_by_nstream_bybnd(ncol, nlay, ngpt, nmom1, nmom2, & !$acc& copyin(tau2(:ncol,:nlay,:nbnd)) & !$acc& copy(p1(:mom_lim,:ncol,:nlay,:ngpt)) & !$acc& copyin(gpt_lims(:,:nbnd)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:p2(:mom_lim, :ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:ssa2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) & + !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) & + !$omp& map(tofrom:p1(:mom_lim, :ncol, :nlay, :ngpt)) & + !$omp& map(to:gpt_lims(:, :nbnd)) do igpt = 1 , ngpt do ilay = 1, nlay do icol = 1, ncol @@ -802,6 +899,9 @@ subroutine extract_subset_dim1_3d(ncol, nlay, ngpt, array_in, colS, colE, array_ !$acc parallel loop collapse(3) & !$acc& copyout(array_out(:cole-cols+1,:nlay,:ngpt)) & !$acc& copyin(array_in(cols:cole,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(from:array_out) & + !$omp& map(to:array_in) do igpt = 1, ngpt do ilay = 1, nlay do icol = colS, colE @@ -825,6 +925,9 @@ subroutine extract_subset_dim2_4d(nmom, ncol, nlay, ngpt, array_in, colS, colE, !$acc parallel loop collapse(4) & !$acc& copyout(array_out(:nmom,:cole-cols+1,:nlay,:ngpt)) & !$acc& copyin(array_in(:nmom,cols:cole,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(4) & + !$omp& map(from:array_out) & + !$omp& map(to:array_in) do igpt = 1, ngpt do ilay = 1, nlay do icol = colS, colE @@ -855,6 +958,10 @@ subroutine extract_subset_absorption_tau(ncol, nlay, ngpt, tau_in, ssa_in, & !$acc& copyin(ssa_in(cols:cole,:nlay,:ngpt)) & !$acc& copyout(tau_out(:cole-cols+1,:nlay,:ngpt)) & !$acc& copyin(tau_in(cols:cole,:nlay,:ngpt)) + !$omp target teams distribute parallel do simd collapse(3) & + !$omp& map(to:ssa_in) & + !$omp& map(from:tau_out) & + !$omp& map(to:tau_in) do igpt = 1, ngpt do ilay = 1, nlay do icol = colS, colE diff --git a/rte/kernels-openacc/mo_rte_solver_kernels.F90 b/rte/kernels-openacc/mo_rte_solver_kernels.F90 index 775c57520..719922a92 100644 --- a/rte/kernels-openacc/mo_rte_solver_kernels.F90 +++ b/rte/kernels-openacc/mo_rte_solver_kernels.F90 @@ -39,11 +39,8 @@ module mo_rte_solver_kernels lw_solver_noscat, lw_solver_noscat_GaussQuad, lw_solver_2stream, & sw_solver_noscat, sw_solver_2stream - public :: lw_solver_1rescl_GaussQuad, lw_solver_1rescl ! These routines don't really need to be visible but making them so is useful for testing. - public :: lw_source_noscat, lw_combine_sources, & - lw_source_2str, sw_source_2str, & - lw_two_stream, sw_two_stream, & + public :: lw_two_stream, sw_two_stream, & adding real(wp), parameter :: pi = acos(-1._wp) @@ -62,42 +59,55 @@ module mo_rte_solver_kernels subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, & tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & radn_up, radn_dn, & - sfc_srcJac, radn_upJac) bind(C, name="lw_solver_noscat") - integer, intent( in) :: ncol, nlay, ngpt ! Number of columns, layers, g-points - logical(wl), intent( in) :: top_at_1 - real(wp), dimension(ncol, ngpt), intent( in) :: D ! secant of propagation angle [] - real(wp), intent( in) :: weight ! quadrature weight - real(wp), dimension(ncol,nlay, ngpt), intent( in) :: tau ! Absorption optical thickness [] - real(wp), dimension(ncol,nlay, ngpt), intent( in) :: lay_source ! Planck source at layer average temperature [W/m2] + do_Jacobians, sfc_srcJac, radn_upJac, & + do_rescaling, ssa, g) bind(C, name="lw_solver_noscat") + integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points + logical(wl), intent(in ) :: top_at_1 + real(wp), dimension(ncol, ngpt), intent(in ) :: D ! secant of propagation angle [] + real(wp), intent(in ) :: weight ! quadrature weight + real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: tau ! Absorption optical thickness [] + real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lay_source ! Planck source at layer average temperature [W/m2] ! Planck source at layer edge for radiation in increasing/decreasing ilay direction ! lev_source_dec applies the mapping in layer i to the Planck function at layer i ! lev_source_inc applies the mapping in layer i to the Planck function at layer i+1 real(wp), dimension(ncol,nlay, ngpt), target, & - intent( in) :: lev_source_inc, lev_source_dec - real(wp), dimension(ncol, ngpt), intent( in) :: sfc_emis ! Surface emissivity [] - real(wp), dimension(ncol, ngpt), intent( in) :: sfc_src ! Surface source function [W/m2] - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Radiances [W/m2-str] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: radn_up ! Radiances [W/m2-str] - ! Top level must contain incident flux boundary condition - - real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_srcJac ! surface temperature Jacobian of surface source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: radn_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] - ! Local variables, no g-point dependency + intent(in ) :: lev_source_inc, lev_source_dec + real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_emis ! Surface emissivity [] + real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_src ! Surface source function [W/m2] + real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: radn_up ! Radiances [W/m2-str] + real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Top level must contain incident flux boundary condition + + ! + ! Optional variables - arrays aren't referenced if corresponding logical == False + ! + logical(wl), intent(in ) :: do_Jacobians + real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_srcJac ! surface temperature Jacobian of surface source function [W/m2/K] + real(wp), dimension(ncol,nlay+1 ), intent( out) :: radn_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + logical(wl), intent(in ) :: do_rescaling + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: ssa, g ! single-scattering albedo, asymmetry parameter + ! ------------------------------------------------------------------------------------------------- + ! Local variables real(wp), dimension(ncol,nlay,ngpt) :: tau_loc, & ! path length (tau/mu) trans ! transmissivity = exp(-tau) real(wp), dimension(ncol,nlay,ngpt) :: source_dn, source_up - real(wp), dimension(ncol, ngpt) :: source_sfc, sfc_albedo, source_sfcJac real(wp), dimension(:,:,:), pointer :: lev_source_up, lev_source_dn ! Mapping increasing/decreasing indicies to up/down real(wp), parameter :: pi = acos(-1._wp) - integer :: icol, ilev, igpt, top_level, ilay + integer :: icol, ilay, ilev, igpt, top_level, sfc_level real(wp) :: fact real(wp), parameter :: tau_thresh = sqrt(epsilon(tau_loc)) - ! ------------------------------------ - - + ! + ! For Jacobians + ! + real(wp), dimension(ncol,nlay+1,ngpt) :: gpt_Jac + real(wp) :: scalar_Jac ! Temp variable for reduction + ! + ! Used when approximating scattering + ! + real(wp) :: ssal, wb, scaleTau, scaling + real(wp), dimension(ncol,nlay, ngpt) :: An, Cn ! ------------------------------------ ! Which way is up? ! Level Planck sources for upward and downward radiation @@ -105,19 +115,33 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, ! lev_source_dn => lev_source_inc, and vice-versa if(top_at_1) then top_level = 1 + sfc_level = nlay+1 lev_source_up => lev_source_dec lev_source_dn => lev_source_inc else top_level = nlay+1 + sfc_level = 1 lev_source_up => lev_source_inc lev_source_dn => lev_source_dec end if - !$acc enter data copyin(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,radn_dn) - !$acc enter data create(tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo,radn_up) - !$acc enter data attach(lev_source_up,lev_source_dn) + !$acc enter data copyin(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,radn_dn) + !$omp target enter data map(to:d, tau, sfc_src, sfc_emis, lev_source_dec, lev_source_inc, lay_source, radn_dn) + !$acc enter data attach(lev_source_up,lev_source_dn) + !$omp target enter data map(to:lev_source_up, lev_source_dn) + !$acc enter data create( tau_loc,trans,source_dn,source_up,radn_up) + !$omp target enter data map(alloc:tau_loc,trans,source_dn,source_up,radn_up) + + !$acc enter data copyin(sfc_srcJac) if(do_Jacobians) + !$omp target enter data map(to:sfc_srcJac) if(do_Jacobians) + !$acc enter data create( radn_upJac, gpt_Jac) if(do_Jacobians) + !$omp target enter data map(alloc:radn_upJac, gpt_Jac) if(do_Jacobians) + + !$acc enter data create( An, Cn) if(do_rescaling) + !$omp target enter data map(alloc:An, Cn) if(do_rescaling) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol ! @@ -125,70 +149,126 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, ! convert flux at top of domain to intensity assuming azimuthal isotropy ! radn_dn(icol,top_level,igpt) = radn_dn(icol,top_level,igpt)/(2._wp * pi * weight) - ! - ! Surface albedo, surface source function - ! - sfc_albedo(icol,igpt) = 1._wp - sfc_emis(icol,igpt) - source_sfc(icol,igpt) = sfc_emis(icol,igpt) * sfc_src(icol,igpt) end do end do - !$acc enter data copyin(sfc_srcJac) - !$acc enter data create(source_sfcJac, radn_upJac) - !$acc parallel loop collapse(2) - do igpt = 1, ngpt - do icol = 1, ncol - source_sfcJac(icol,igpt) = sfc_emis(icol,igpt) * sfc_srcJac(icol,igpt) - end do - end do - ! NOTE: This kernel produces small differences between GPU and CPU - ! implementations on Ascent with PGI, we assume due to floating point - ! differences in the exp() function. These differences are small in the - ! RFMIP test case (10^-6). !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol ! - ! Optical path and transmission, used in source function and transport calculations + ! The wb and scaleTau terms are independent of propagation + ! angle D and could be pre-computed if several values of D are used + ! We re-compute them here to keep not have to localize memory use ! - tau_loc(icol,ilay,igpt) = tau(icol,ilay,igpt)*D(icol,igpt) - trans (icol,ilay,igpt) = exp(-tau_loc(icol,ilay,igpt)) - - call lw_source_noscat_stencil(ncol, nlay, ngpt, icol, ilay, igpt, & - lay_source, lev_source_up, lev_source_dn, & - tau_loc, trans, & - source_dn, source_up) + if(do_rescaling) then + ssal = ssa(icol, ilay, igpt) + wb = ssal*(1._wp - g(icol, ilay, igpt)) * 0.5_wp + scaleTau = (1._wp - ssal + wb) + ! here wb/scaleTau is parameter wb/(1-w(1-b)) of Eq.21 of the Tang paper + ! actually it is in line of parameter rescaling defined in Eq.7 + ! potentialy if g=ssa=1 then wb/scaleTau = NaN + ! it should not happen because g is never 1 in atmospheres + ! explanation of factor 0.4 note A of Table + Cn(icol,ilay,igpt) = 0.4_wp*wb/scaleTau + ! Eq.15 of the paper, multiplied by path length + tau_loc(icol,ilay,igpt) = tau(icol,ilay,igpt)*D(icol,igpt)*scaleTau + trans (icol,ilay,igpt) = exp(-tau_loc(icol,ilay,igpt)) + An (icol,ilay,igpt) = (1._wp-trans(icol,ilay,igpt)**2) + else + ! + ! Optical path and transmission, used in source function and transport calculations + ! + tau_loc(icol,ilay,igpt) = tau(icol,ilay,igpt)*D(icol,igpt) + trans (icol,ilay,igpt) = exp(-tau_loc(icol,ilay,igpt)) + end if + call lw_source_noscat(lay_source(icol,ilay,igpt), & + lev_source_up(icol,ilay,igpt), lev_source_dn(icol,ilay,igpt), & + tau_loc(icol,ilay,igpt), trans(icol,ilay,igpt), & + source_dn(icol,ilay,igpt), source_up(icol,ilay,igpt)) end do end do end do ! - ! Transport + ! Transport down ! - call lw_transport_noscat(ncol, nlay, ngpt, top_at_1, & - tau_loc, trans, sfc_albedo, source_dn, source_up, source_sfc, & - radn_up, radn_dn, source_sfcJac, radn_upJac) - + call lw_transport_noscat_dn(ncol, nlay, ngpt, top_at_1, trans, source_dn, radn_dn) + ! + ! Surface reflection and emission + ! + !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) + do igpt = 1, ngpt + do icol = 1, ncol + ! + ! Surface albedo, surface source function + ! + radn_up (icol,sfc_level,igpt) = radn_dn(icol,sfc_level,igpt)*(1._wp - sfc_emis(icol,igpt)) + & + sfc_src(icol, igpt)* sfc_emis(icol,igpt) + if(do_Jacobians) & + gpt_Jac(icol,sfc_level,igpt) = sfc_srcJac(icol, igpt)* sfc_emis(icol,igpt) + end do + end do + ! + ! Transport up, or up and down again if using rescaling + ! + if(do_rescaling) then + call lw_transport_1rescl(ncol, nlay, ngpt, top_at_1, trans, & + source_dn, source_up, & + radn_up, radn_dn, An, Cn, & + do_Jacobians, gpt_Jac) ! Standing in for Jacobian, i.e. rad_up_Jac, rad_dn_Jac) + else + call lw_transport_noscat_up(ncol, nlay, ngpt, top_at_1, trans, source_up, radn_up, & + do_Jacobians, gpt_Jac) + end if ! ! Convert intensity to flux assuming azimuthal isotropy and quadrature weight ! !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 1, ngpt do ilev = 1, nlay+1 do icol = 1, ncol radn_dn (icol,ilev,igpt) = 2._wp * pi * weight * radn_dn (icol,ilev,igpt) radn_up (icol,ilev,igpt) = 2._wp * pi * weight * radn_up (icol,ilev,igpt) - radn_upJac(icol,ilev,igpt) = 2._wp * pi * weight * radn_upJac(icol,ilev,igpt) end do end do end do - !$acc exit data delete(sfc_srcJac, source_sfcJac) - !$acc exit data copyout(radn_upJac) + ! + ! Only broadband-integrated Jacobians are provided + ! + if (do_Jacobians) then + ! + ! Reduce by summing along g-point dimension + ! + !$acc parallel loop gang vector collapse(2) + !$omp target teams distribute parallel do simd collapse(2) + do ilev = 1, nlay+1 + do icol = 1, ncol + scalar_Jac = 0.0_wp + do igpt = 1, ngpt + scalar_Jac = scalar_Jac + gpt_Jac(icol, ilev, igpt) + end do + radn_upJac(icol, ilev) = 2._wp * pi * weight * scalar_Jac + end do + end do + end if - !$acc exit data copyout(radn_dn,radn_up) - !$acc exit data delete(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo) - !$acc exit data detach(lev_source_up,lev_source_dn) + !$acc exit data delete( sfc_srcJac, gpt_Jac) if(do_Jacobians) + !$omp target exit data map(release:sfc_srcJac, gpt_Jac) if(do_Jacobians) + !$acc exit data copyout( radn_upJac) if(do_Jacobians) + !$omp target exit data map(from:radn_upJac) if(do_Jacobians) + + !$acc exit data copyout( radn_dn,radn_up) + !$omp target exit data map(from:radn_dn,radn_up) + !$acc exit data delete( d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,tau_loc,trans,source_dn,source_up) + !$omp target exit data map(release:d, tau, sfc_src, sfc_emis, lev_source_dec, lev_source_inc, lay_source, tau_loc, trans, source_dn, source_up) + !$acc exit data detach( lev_source_up,lev_source_dn) + !$omp target exit data map(from:lev_source_up, lev_source_dn) + !$acc exit data delete( An, Cn) if(do_rescaling) + !$omp target exit data map(release:An, Cn) if(do_rescaling) end subroutine lw_solver_noscat ! --------------------------------------------------------------- @@ -200,7 +280,8 @@ end subroutine lw_solver_noscat ! --------------------------------------------------------------- subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weights, & tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_up, flux_dn,& - sfc_srcJac, flux_upJac) & + do_Jacobians, sfc_srcJac, flux_upJac, & + do_rescaling, ssa, g) & bind (C, name="lw_solver_noscat_GaussQuad") integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points logical(wl), intent(in ) :: top_at_1 @@ -215,13 +296,22 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig ! Planck source at layer edge for radiation in decreasing ilay direction [W/m2] real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_emis ! Surface emissivity [] real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_src ! Surface source function [W/m2] - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn ! Radiances [W/m2-str] + real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn ! Radiances [W/m2-str], Top level must contain incident flux boundary condition real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: flux_up ! Radiances [W/m2-str] - ! Top level must contain incident flux boundary condition - real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_srcJac ! surface temperature Jacobian of surface source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: flux_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + ! + ! Optional variables - arrays aren't referenced if corresponding logical == False + ! + logical(wl), intent(in ) :: do_Jacobians + real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_srcJac ! surface temperature Jacobian of surface source function [W/m2/K] + real(wp), dimension(ncol,nlay+1 ), intent(out ) :: flux_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + logical(wl), intent(in ) :: do_rescaling + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: ssa, g ! single-scattering albedo, asymmetry parameter + ! ------------------------------------ + ! ! Local variables - real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up, radn_upJac ! Fluxes per quad angle + ! + real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up + real(wp), dimension(ncol,nlay+1 ) :: radn_upJac ! Fluxes per quad angle real(wp), dimension(ncol, ngpt) :: Ds_ncol real(wp), dimension(ncol, ngpt) :: flux_top @@ -229,13 +319,19 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig integer :: icol, ilev, igpt !$acc enter data copyin(Ds,weights,tau,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,flux_dn) + !$omp target enter data map(to:Ds, weights, tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_dn) !$acc enter data create(flux_up,radn_dn,radn_up,Ds_ncol,flux_top) - !$acc enter data copyin(sfc_srcJac) - !$acc enter data create(flux_upJac, radn_upJac) + !$omp target enter data map(alloc:flux_up, radn_dn, radn_up, Ds_ncol, flux_top) + !$acc enter data copyin(sfc_srcJac) if(do_Jacobians) + !$omp target enter data map(to:sfc_srcJac) if(do_Jacobians) + !$acc enter data create( flux_upJac, radn_upJac) if(do_Jacobians) + !$omp target enter data map(alloc:flux_upJac, radn_upJac) if(do_Jacobians) + ! radn_upJac is needed only if do_Jacobians is true .and. nmus > 1 ! ------------------------------------ ! ------------------------------------ !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol Ds_ncol(icol, igpt) = Ds(1) @@ -245,12 +341,15 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig call lw_solver_noscat(ncol, nlay, ngpt, & top_at_1, Ds_ncol, weights(1), tau, & lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & - flux_up, flux_dn, sfc_srcJac, flux_upJac) + flux_up, flux_dn, & + do_Jacobians, sfc_srcJac, flux_upJac, & + do_rescaling, ssa, g) ! ! For more than one angle use local arrays ! top_level = MERGE(1, nlay+1, top_at_1) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1,ngpt do icol = 1,ncol flux_top(icol,igpt) = flux_dn(icol,top_level,igpt) @@ -260,6 +359,7 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig do imu = 2, nmus !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol Ds_ncol(icol, igpt) = Ds(imu) @@ -268,24 +368,32 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig call lw_solver_noscat(ncol, nlay, ngpt, & top_at_1, Ds_ncol, weights(imu), tau, & lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & - radn_up, radn_dn, sfc_srcJac, radn_upJac) + radn_up, radn_dn, & + do_Jacobians, sfc_srcJac, radn_upJac, & + do_rescaling, ssa, g) !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 1, ngpt do ilev = 1, nlay+1 do icol = 1, ncol - flux_up (icol,ilev,igpt) = flux_up (icol,ilev,igpt) + radn_up (icol,ilev,igpt) - flux_dn (icol,ilev,igpt) = flux_dn (icol,ilev,igpt) + radn_dn (icol,ilev,igpt) - flux_upJac(icol,ilev,igpt) = flux_upJac(icol,ilev,igpt) + radn_upJac(icol,ilev,igpt) + flux_up(icol,ilev,igpt) = flux_up(icol,ilev,igpt) + radn_up(icol,ilev,igpt) + flux_dn(icol,ilev,igpt) = flux_dn(icol,ilev,igpt) + radn_dn(icol,ilev,igpt) + if(do_Jacobians .and. igpt == 1) & + flux_upJac(icol,ilev) = flux_upJac(icol,ilev ) + radn_upJac(icol,ilev ) end do end do end do end do ! imu loop - !$acc exit data delete(sfc_srcJac, radn_upJac) - !$acc exit data copyout(flux_upJac) + !$acc exit data delete( sfc_srcJac, radn_upJac) if(do_Jacobians) + !$omp target exit data map(release:sfc_srcJac, radn_upJac) if(do_Jacobians) + !$acc exit data copyout( flux_upJac) if(do_Jacobians) + !$omp target exit data map(from:flux_upJac) if(do_Jacobians) !$acc exit data copyout(flux_up,flux_dn) + !$omp target exit data map(from:flux_up, flux_dn) !$acc exit data delete(Ds,weights,tau,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,radn_dn,radn_up,Ds_ncol,flux_top) + !$omp target exit data map(release:Ds, weights, tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, radn_dn, radn_up, Ds_ncol, flux_top) end subroutine lw_solver_noscat_GaussQuad ! ------------------------------------------------------------------------------------------------- ! @@ -328,7 +436,9 @@ subroutine lw_solver_2stream (ncol, nlay, ngpt, top_at_1, & ! ------------------------------------ ! ------------------------------------ !$acc enter data copyin(tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_dn) + !$omp target enter data map(to:tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_dn) !$acc enter data create(flux_up, Rdif, Tdif, gamma1, gamma2, sfc_albedo, lev_source, source_dn, source_up, source_sfc) + !$omp target enter data map(alloc:flux_up, Rdif, Tdif, gamma1, gamma2, sfc_albedo, lev_source, source_dn, source_up, source_sfc) ! ! RRTMGP provides source functions at each level using the spectral mapping ! of each adjacent layer. Combine these for two-stream calculations @@ -354,6 +464,7 @@ subroutine lw_solver_2stream (ncol, nlay, ngpt, top_at_1, & source_dn, source_up, source_sfc) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol sfc_albedo(icol,igpt) = 1._wp - sfc_emis(icol,igpt) @@ -368,8 +479,11 @@ subroutine lw_solver_2stream (ncol, nlay, ngpt, top_at_1, & source_dn, source_up, source_sfc, & flux_up, flux_dn) !$acc exit data delete(tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src) + !$omp target exit data map(release:tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src) !$acc exit data delete(Rdif, Tdif, gamma1, gamma2, sfc_albedo, lev_source, source_dn, source_up, source_sfc) + !$omp target exit data map(release:Rdif, Tdif, gamma1, gamma2, sfc_albedo, lev_source, source_dn, source_up, source_sfc) !$acc exit data copyout(flux_up, flux_dn) + !$omp target exit data map(from:flux_up, flux_dn) end subroutine lw_solver_2stream ! ------------------------------------------------------------------------------------------------- ! @@ -393,7 +507,9 @@ subroutine sw_solver_noscat(ncol, nlay, ngpt, & ! ------------------------------------ ! ------------------------------------ !$acc enter data copyin(tau, mu0) create(mu0_inv, flux_dir) + !$omp target enter data map(to:tau, mu0) map(alloc:mu0_inv, flux_dir) !$acc parallel loop + !$omp target teams distribute parallel do simd do icol = 1, ncol mu0_inv(icol) = 1._wp/mu0(icol) enddo @@ -408,6 +524,7 @@ subroutine sw_solver_noscat(ncol, nlay, ngpt, & ! layer index = level index - 1 ! previous level is up (-1) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol do ilev = 2, nlay+1 @@ -419,6 +536,7 @@ subroutine sw_solver_noscat(ncol, nlay, ngpt, & ! layer index = level index ! previous level is up (+1) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol do ilev = nlay, 1, -1 @@ -428,6 +546,7 @@ subroutine sw_solver_noscat(ncol, nlay, ngpt, & end do end if !$acc exit data delete(tau, mu0, mu0_inv) copyout(flux_dir) + !$omp target exit data map(release:tau, mu0, mu0_inv) map(from:flux_dir) end subroutine sw_solver_noscat ! ------------------------------------------------------------------------------------------------- ! @@ -463,7 +582,9 @@ subroutine sw_solver_2stream (ncol, nlay, ngpt, top_at_1, & ! Cell properties: transmittance and reflectance for direct and diffuse radiation ! !$acc enter data copyin(tau, ssa, g, mu0, sfc_alb_dir, sfc_alb_dif, flux_dn, flux_dir) + !$omp target enter data map(to:tau, ssa, g, mu0, sfc_alb_dir, sfc_alb_dif, flux_dn, flux_dir) !$acc enter data create(Rdif, Tdif, Rdir, Tdir, Tnoscat, source_up, source_dn, source_srf, flux_up) + !$omp target enter data map(alloc:Rdif, Tdif, Rdir, Tdir, Tnoscat, source_up, source_dn, source_srf, flux_up) call sw_two_stream(ncol, nlay, ngpt, mu0, & tau , ssa , g , & Rdif, Tdif, Rdir, Tdir, Tnoscat) @@ -477,6 +598,7 @@ subroutine sw_solver_2stream (ncol, nlay, ngpt, top_at_1, & ! adding computes only diffuse flux; flux_dn is total ! !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 1, ngpt do ilay = 1, nlay+1 do icol = 1, ncol @@ -485,7 +607,9 @@ subroutine sw_solver_2stream (ncol, nlay, ngpt, top_at_1, & end do end do !$acc exit data copyout(flux_up, flux_dn, flux_dir) + !$omp target exit data map(from:flux_up, flux_dn, flux_dir) !$acc exit data delete (tau, ssa, g, mu0, sfc_alb_dir, sfc_alb_dif, Rdif, Tdif, Rdir, Tdir, Tnoscat, source_up, source_dn, source_srf) + !$omp target exit data map(release:tau, ssa, g, mu0, sfc_alb_dir, sfc_alb_dif, Rdif, Tdif, Rdir, Tdir, Tnoscat, source_up, source_dn, source_srf) end subroutine sw_solver_2stream @@ -500,75 +624,40 @@ end subroutine sw_solver_2stream ! This routine implements point-wise stencil, and has to be called in a loop ! ! --------------------------------------------------------------- - subroutine lw_source_noscat_stencil(ncol, nlay, ngpt, icol, ilay, igpt, & - lay_source, lev_source_up, lev_source_dn, tau, trans, & - source_dn, source_up) + subroutine lw_source_noscat(lay_source, lev_source_up, lev_source_dn, tau, trans, & + source_dn, source_up) !$acc routine seq + !$omp declare target ! - integer, intent(in) :: ncol, nlay, ngpt - integer, intent(in) :: icol, ilay, igpt ! Working point coordinates - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: lay_source, & ! Planck source at layer center - lev_source_up, & ! Planck source at levels (layer edges), - lev_source_dn, & ! increasing/decreasing layer index - tau, & ! Optical path (tau/mu) - trans ! Transmissivity (exp(-tau)) - real(wp), dimension(ncol, nlay, ngpt), intent(inout):: source_dn, source_up - ! Source function at layer edges - ! Down at the bottom of the layer, up at the top + real(wp), intent(in) :: lay_source, & ! Planck source at layer center + lev_source_up, & ! Planck source at levels (layer edges), + lev_source_dn, & ! increasing/decreasing layer index + tau, & ! Optical path (tau/mu) + trans ! Transmissivity (exp(-tau)) + real(wp), intent(inout):: source_dn, source_up + ! Source function at layer edges + ! Down at the bottom of the layer, up at the top ! -------------------------------- real(wp), parameter :: tau_thresh = sqrt(epsilon(tau)) real(wp) :: fact - ! --------------------------------------------------------------- ! ! Weighting factor. Use 2nd order series expansion when rounding error (~tau^2) ! is of order epsilon (smallest difference from 1. in working precision) ! Thanks to Peter Blossey ! - if(tau(icol,ilay,igpt) > tau_thresh) then - fact = (1._wp - trans(icol,ilay,igpt))/tau(icol,ilay,igpt) - trans(icol,ilay,igpt) + if(tau > tau_thresh) then + fact = (1._wp - trans)/tau - trans else - fact = tau(icol, ilay,igpt) * (0.5_wp - 1._wp/3._wp*tau(icol,ilay,igpt)) + fact = tau * (0.5_wp - 1._wp/3._wp*tau) end if ! ! Equation below is developed in Clough et al., 1992, doi:10.1029/92JD01419, Eq 13 ! - source_dn(icol,ilay,igpt) = (1._wp - trans(icol,ilay,igpt)) * lev_source_dn(icol,ilay,igpt) + & - 2._wp * fact * (lay_source(icol,ilay,igpt) - lev_source_dn(icol,ilay,igpt)) - source_up(icol,ilay,igpt) = (1._wp - trans(icol,ilay,igpt)) * lev_source_up(icol,ilay,igpt) + & - 2._wp * fact * (lay_source(icol,ilay,igpt) - lev_source_up(icol,ilay,igpt)) - - end subroutine lw_source_noscat_stencil - ! --------------------------------------------------------------- - ! - ! Driver function to compute LW source function for upward and downward emission - ! - ! --------------------------------------------------------------- - subroutine lw_source_noscat(ncol, nlay, ngpt, lay_source, lev_source_up, lev_source_dn, tau, trans, & - source_dn, source_up) bind(C, name="lw_source_noscat") - integer, intent(in) :: ncol, nlay, ngpt - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: lay_source, & ! Planck source at layer center - lev_source_up, & ! Planck source at levels (layer edges), - lev_source_dn, & ! increasing/decreasing layer index - tau, & ! Optical path (tau/mu) - trans ! Transmissivity (exp(-tau)) - real(wp), dimension(ncol, nlay, ngpt), intent(out):: source_dn, source_up - ! Source function at layer edges - ! Down at the bottom of the layer, up at the top - ! -------------------------------- - integer :: icol, ilay, igpt - ! --------------------------------------------------------------- - !$acc parallel loop collapse(3) - do igpt = 1, ngpt - do ilay = 1, nlay - do icol = 1, ncol - call lw_source_noscat_stencil(ncol, nlay, ngpt, icol, ilay, igpt, & - lay_source, lev_source_up, lev_source_dn, & - tau, trans, & - source_dn, source_up) - end do - end do - end do + source_dn = (1._wp - trans) * lev_source_dn + & + 2._wp * fact * (lay_source - lev_source_dn) + source_up = (1._wp - trans) * lev_source_up + & + 2._wp * fact * (lay_source - lev_source_up) end subroutine lw_source_noscat ! --------------------------------------------------------------- @@ -576,22 +665,15 @@ end subroutine lw_source_noscat ! Longwave no-scattering transport ! ! --------------------------------------------------------------- - subroutine lw_transport_noscat(ncol, nlay, ngpt, top_at_1, & - tau, trans, sfc_albedo, source_dn, source_up, source_sfc, & - radn_up, radn_dn, source_sfcJac, radn_upJac) bind(C, name="lw_transport_noscat") + subroutine lw_transport_noscat_dn(ncol, nlay, ngpt, top_at_1, & + trans, source_dn,radn_dn) bind(C, name="lw_transport_noscat_dn") + !dir$ optimize(-O0) integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points logical(wl), intent(in ) :: top_at_1 ! - real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: tau, & ! Absorption optical thickness, pre-divided by mu [] - trans ! transmissivity = exp(-tau) - real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_albedo ! Surface albedo - real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: source_dn, & - source_up ! Diffuse radiation emitted by the layer - real(wp), dimension(ncol ,ngpt), intent(in ) :: source_sfc ! Surface source function [W/m2] - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Radiances [W/m2-str] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: radn_up ! Radiances [W/m2-str] - ! Top level must contain incident flux boundary condition - real(wp), dimension(ncol ,ngpt), intent(in ) :: source_sfcJac ! surface temperature Jacobian of surface source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: radn_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: trans ! transmissivity = exp(-tau) + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: source_dn ! Diffuse radiation emitted by the layer + real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Radiances [W/m2-str] + ! Top level must contain incident flux boundary condition ! Local variables integer :: igpt, ilev, icol ! --------------------------------------------------- @@ -601,22 +683,12 @@ subroutine lw_transport_noscat(ncol, nlay, ngpt, top_at_1, & ! Top of domain is index 1 ! !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol - ! Downward propagation do ilev = 2, nlay+1 radn_dn(icol,ilev,igpt) = trans(icol,ilev-1,igpt)*radn_dn(icol,ilev-1,igpt) + source_dn(icol,ilev-1,igpt) end do - - ! Surface reflection and emission - radn_up (icol,nlay+1,igpt) = radn_dn(icol,nlay+1,igpt)*sfc_albedo(icol,igpt) + source_sfc (icol,igpt) - radn_upJac(icol,nlay+1,igpt) = source_sfcJac(icol,igpt) - - ! Upward propagation - do ilev = nlay, 1, -1 - radn_up (icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up (icol,ilev+1,igpt) + source_up(icol,ilev,igpt) - radn_upJac(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_upJac(icol,ilev+1,igpt) - end do end do end do else @@ -624,27 +696,72 @@ subroutine lw_transport_noscat(ncol, nlay, ngpt, top_at_1, & ! Top of domain is index nlay+1 ! !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol - ! Downward propagation do ilev = nlay, 1, -1 radn_dn(icol,ilev,igpt) = trans(icol,ilev ,igpt)*radn_dn(icol,ilev+1,igpt) + source_dn(icol,ilev,igpt) end do + end do + end do + end if - ! Surface reflection and emission - radn_up (icol,1,igpt) = radn_dn(icol,1,igpt)*sfc_albedo(icol,igpt) + source_sfc (icol,igpt) - radn_upJac(icol,1,igpt) = source_sfcJac(icol,igpt) + end subroutine lw_transport_noscat_dn + ! ------------------------------------------------------------------------------------------------- + subroutine lw_transport_noscat_up(ncol, nlay, ngpt, & + top_at_1, trans, source_up, radn_up, do_Jacobians, radn_upJac) bind(C, name="lw_transport_noscat_up") + !dir$ optimize(-O0) + integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points + logical(wl), intent(in ) :: top_at_1 ! + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: trans ! transmissivity = exp(-tau) + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: source_up ! Diffuse radiation emitted by the layer + real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: radn_up ! Radiances [W/m2-str] + logical(wl), intent(in ) :: do_Jacobians + real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: radn_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + ! Local variables + integer :: igpt, ilev, icol + ! --------------------------------------------------- + ! --------------------------------------------------- + if(top_at_1) then + ! + ! Top of domain is index 1 + ! + !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) + do igpt = 1, ngpt + do icol = 1, ncol + do ilev = nlay, 1, -1 + radn_up (icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up (icol,ilev+1,igpt) + source_up(icol,ilev,igpt) + end do + if(do_Jacobians) then + do ilev = nlay, 1, -1 + radn_upJac(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_upJac(icol,ilev+1,igpt) + end do + end if + end do + end do - ! Upward propagation + else + ! + ! Top of domain is index nlay+1 + ! + !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) + do igpt = 1, ngpt + do icol = 1, ncol do ilev = 2, nlay+1 - radn_up (icol,ilev,igpt) = trans(icol,ilev-1,igpt) * radn_up (icol,ilev-1,igpt) + source_up(icol,ilev-1,igpt) - radn_upJac(icol,ilev,igpt) = trans(icol,ilev-1,igpt) * radn_upJac(icol,ilev-1,igpt) + radn_up (icol,ilev,igpt) = trans(icol,ilev-1,igpt) * radn_up (icol,ilev-1,igpt) + source_up(icol,ilev-1,igpt) end do + if(do_Jacobians) then + do ilev = nlay, 1, -1 + radn_upJac(icol,ilev,igpt) = trans(icol,ilev-1,igpt) * radn_upJac(icol,ilev-1,igpt) + end do + end if end do end do end if - end subroutine lw_transport_noscat + end subroutine lw_transport_noscat_up ! ------------------------------------------------------------------------------------------------- ! ! Longwave two-stream solutions to diffuse reflectance and transmittance for a layer @@ -673,9 +790,12 @@ subroutine lw_two_stream(ncol, nlay, ngpt, tau, w0, g, & ! --------------------------------- ! --------------------------------- !$acc enter data copyin(tau, w0, g) + !$omp target enter data map(to:tau, w0, g) !$acc enter data create(gamma1, gamma2, Rdif, Tdif) + !$omp target enter data map(alloc:gamma1, gamma2, Rdif, Tdif) !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -715,7 +835,9 @@ subroutine lw_two_stream(ncol, nlay, ngpt, tau, w0, g, & end do end do !$acc exit data delete (tau, w0, g) + !$omp target exit data map(release:tau, w0, g) !$acc exit data copyout(gamma1, gamma2, Rdif, Tdif) + !$omp target exit data map(from:gamma1, gamma2, Rdif, Tdif) end subroutine lw_two_stream ! ------------------------------------------------------------------------------------------------- ! @@ -736,9 +858,12 @@ subroutine lw_combine_sources(ncol, nlay, ngpt, top_at_1, & ! --------------------------------------------------------------- ! --------------------------------- !$acc enter data copyin(lev_src_inc, lev_src_dec) + !$omp target enter data map(to:lev_src_inc, lev_src_dec) !$acc enter data create(lev_source) + !$omp target enter data map(alloc:lev_source) !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 1, ngpt do ilay = 1, nlay+1 do icol = 1,ncol @@ -754,7 +879,9 @@ subroutine lw_combine_sources(ncol, nlay, ngpt, top_at_1, & end do end do !$acc exit data delete (lev_src_inc, lev_src_dec) + !$omp target exit data map(release:lev_src_inc, lev_src_dec) !$acc exit data copyout(lev_source) + !$omp target exit data map(from:lev_source) end subroutine lw_combine_sources ! --------------------------------------------------------------- ! @@ -786,9 +913,12 @@ subroutine lw_source_2str(ncol, nlay, ngpt, top_at_1, & ! --------------------------------------------------------------- ! --------------------------------- !$acc enter data copyin(sfc_emis, sfc_src, lay_source, tau, gamma1, gamma2, rdif, tdif, lev_source) + !$omp target enter data map(to:sfc_emis, sfc_src, lay_source, tau, gamma1, gamma2, rdif, tdif, lev_source) !$acc enter data create(source_dn, source_up, source_sfc) + !$omp target enter data map(alloc:source_dn, source_up, source_sfc) !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -819,7 +949,9 @@ subroutine lw_source_2str(ncol, nlay, ngpt, top_at_1, & end do end do !$acc exit data delete(sfc_emis, sfc_src, lay_source, tau, gamma1, gamma2, rdif, tdif, lev_source) + !$omp target exit data map(release:sfc_emis, sfc_src, lay_source, tau, gamma1, gamma2, rdif, tdif, lev_source) !$acc exit data copyout(source_dn, source_up, source_sfc) + !$omp target exit data map(from:source_dn, source_up, source_sfc) end subroutine lw_source_2str ! ------------------------------------------------------------------------------------------------- @@ -856,9 +988,12 @@ subroutine sw_two_stream(ncol, nlay, ngpt, mu0, tau, w0, g, & ! --------------------------------- ! --------------------------------- !$acc enter data copyin (mu0, tau, w0, g) + !$omp target enter data map(to:mu0, tau, w0, g) !$acc enter data create(Rdif, Tdif, Rdir, Tdir, Tnoscat, mu0_inv) + !$omp target enter data map(alloc:Rdif, Tdif, Rdir, Tdir, Tnoscat, mu0_inv) !$acc parallel loop + !$omp target teams distribute parallel do simd do icol = 1, ncol mu0_inv(icol) = 1._wp/mu0(icol) enddo @@ -867,6 +1002,7 @@ subroutine sw_two_stream(ncol, nlay, ngpt, mu0, tau, w0, g, & ! and CPU. This *might* be floating point differences in implementation of ! the exp function. !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 1, ngpt do ilay = 1, nlay do icol = 1, ncol @@ -944,7 +1080,9 @@ subroutine sw_two_stream(ncol, nlay, ngpt, mu0, tau, w0, g, & end do end do !$acc exit data delete (mu0, tau, w0, g, mu0_inv) + !$omp target exit data map(release:mu0, tau, w0, g, mu0_inv) !$acc exit data copyout(Rdif, Tdif, Rdir, Tdir, Tnoscat) + !$omp target exit data map(from:Rdif, Tdif, Rdir, Tdir, Tnoscat) end subroutine sw_two_stream ! --------------------------------------------------------------- @@ -967,10 +1105,13 @@ subroutine sw_source_2str(ncol, nlay, ngpt, top_at_1, Rdir, Tdir, Tnoscat, sfc_a ! --------------------------------- ! --------------------------------- !$acc enter data copyin (Rdir, Tdir, Tnoscat, sfc_albedo, flux_dn_dir) + !$omp target enter data map(to:Rdir, Tdir, Tnoscat, sfc_albedo, flux_dn_dir) !$acc enter data create(source_dn, source_up, source_sfc) + !$omp target enter data map(alloc:source_dn, source_up, source_sfc) if(top_at_1) then !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol do ilev = 1, nlay @@ -985,6 +1126,7 @@ subroutine sw_source_2str(ncol, nlay, ngpt, top_at_1, Rdir, Tdir, Tnoscat, sfc_a ! layer index = level index ! previous level is up (+1) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol do ilev = nlay, 1, -1 @@ -997,7 +1139,9 @@ subroutine sw_source_2str(ncol, nlay, ngpt, top_at_1, Rdir, Tdir, Tnoscat, sfc_a end do end if !$acc exit data copyout(source_dn, source_up, source_sfc, flux_dn_dir) + !$omp target exit data map(from:source_dn, source_up, source_sfc, flux_dn_dir) !$acc exit data delete(Rdir, Tdir, Tnoscat, sfc_albedo) + !$omp target exit data map(release:Rdir, Tdir, Tnoscat, sfc_albedo) end subroutine sw_source_2str ! --------------------------------------------------------------- @@ -1012,6 +1156,7 @@ subroutine adding(ncol, nlay, ngpt, top_at_1, & rdif, tdif, & src_dn, src_up, src_sfc, & flux_up, flux_dn) bind(C, name="adding") + !dir$ optimize(-O0) integer, intent(in ) :: ncol, nlay, ngpt logical(wl), intent(in ) :: top_at_1 real(wp), dimension(ncol ,ngpt), intent(in ) :: albedo_sfc @@ -1042,10 +1187,13 @@ subroutine adding(ncol, nlay, ngpt, top_at_1, & ! We write the loops out explicitly so compilers will have no trouble optimizing them. ! !$acc enter data copyin(albedo_sfc, rdif, tdif, src_dn, src_up, src_sfc, flux_dn) + !$omp target enter data map(to:albedo_sfc, rdif, tdif, src_dn, src_up, src_sfc, flux_dn) !$acc enter data create(flux_up, albedo, src, denom) + !$omp target enter data map(alloc:flux_up, albedo, src, denom) if(top_at_1) then !$acc parallel loop gang vector collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol ilev = nlay + 1 @@ -1093,6 +1241,7 @@ subroutine adding(ncol, nlay, ngpt, top_at_1, & else !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol ilev = 1 @@ -1139,7 +1288,9 @@ subroutine adding(ncol, nlay, ngpt, top_at_1, & end do end if !$acc exit data delete(albedo_sfc, rdif, tdif, src_dn, src_up, src_sfc, albedo, src, denom) + !$omp target exit data map(release:albedo_sfc, rdif, tdif, src_dn, src_up, src_sfc, albedo, src, denom) !$acc exit data copyout(flux_up, flux_dn) + !$omp target exit data map(from:flux_up, flux_dn) end subroutine adding ! --------------------------------------------------------------- ! @@ -1158,6 +1309,7 @@ subroutine apply_BC_gpt(ncol, nlay, ngpt, top_at_1, inc_flux, flux_dn) bind (C, ! Upper boundary condition if(top_at_1) then !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, 1, igpt) = inc_flux(icol,igpt) @@ -1165,6 +1317,7 @@ subroutine apply_BC_gpt(ncol, nlay, ngpt, top_at_1, inc_flux, flux_dn) bind (C, end do else !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, nlay+1, igpt) = inc_flux(icol,igpt) @@ -1187,6 +1340,7 @@ subroutine apply_BC_factor(ncol, nlay, ngpt, top_at_1, inc_flux, factor, flux_dn ! Upper boundary condition if(top_at_1) then !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, 1, igpt) = inc_flux(icol,igpt) * factor(icol) @@ -1194,6 +1348,7 @@ subroutine apply_BC_factor(ncol, nlay, ngpt, top_at_1, inc_flux, factor, flux_dn end do else !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, nlay+1, igpt) = inc_flux(icol,igpt) * factor(icol) @@ -1214,6 +1369,7 @@ subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="apply_ ! Upper boundary condition if(top_at_1) then !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, 1, igpt) = 0._wp @@ -1221,6 +1377,7 @@ subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="apply_ end do else !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol flux_dn(icol, nlay+1, igpt) = 0._wp @@ -1230,360 +1387,6 @@ subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="apply_ end subroutine apply_BC_0 ! ------------------------------------------------------------------------------------------------- ! -! Similar to Longwave no-scattering (lw_solver_noscat) -! a) relies on rescaling of the optical parameters based on asymetry factor and single scattering albedo -! scaling can be computed by scaling_1rescl -! b) adds adustment term based on cloud properties (lw_transport_1rescl) -! adustment terms is computed based on solution of the Tang equations -! for "linear-in-tau" internal source (not in the paper) -! -! Attention: -! use must prceompute scaling before colling the function -! -! Implemented based on the paper -! Tang G, et al, 2018: https://doi.org/10.1175/JAS-D-18-0014.1 -! -! ------------------------------------------------------------------------------------------------- - subroutine lw_solver_1rescl(ncol, nlay, ngpt, top_at_1, D, & - tau, scaling, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & - radn_up, radn_dn, & - sfc_srcJac, rad_up_Jac, rad_dn_Jac) bind(C, name="lw_solver_1rescl") - integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points - logical(wl), intent(in ) :: top_at_1 - real(wp), dimension(ncol, ngpt), intent(in ) :: D ! secant of propagation angle [] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: tau ! Absorption optical thickness [] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: scaling ! single scattering albedo [] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lay_source ! Planck source at layer average temperature [W/m2] - ! Planck source at layer edge for radiation in increasing/decreasing ilay direction - ! lev_source_dec applies the mapping in layer i to the Planck function at layer i - ! lev_source_inc applies the mapping in layer i to the Planck function at layer i+1 - real(wp), dimension(ncol,nlay, ngpt), target, & - intent(in ) :: lev_source_inc, lev_source_dec - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_emis ! Surface emissivity [] - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_src ! Surface source function [W/m2] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: radn_up ! Radiances [W/m2-str] - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Top level must contain incident flux boundary condition - - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_srcJac ! Surface Temperature Jacobian source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: rad_up_Jac ! Surface Temperature Jacobians [W/m2-str/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: rad_dn_Jac ! Top level set to 0 - ! Local variables, WITH g-point dependency - real(wp), dimension(ncol,nlay,ngpt) :: tau_loc, & ! path length (tau/mu) - trans ! transmissivity = exp(-tau) - real(wp), dimension(ncol,nlay,ngpt) :: source_dn, source_up - real(wp), dimension(ncol, ngpt) :: source_sfc, sfc_albedo - real(wp), dimension(ncol, ngpt) :: source_sfcJac - - real(wp), dimension(:,:,:), pointer :: lev_source_up, lev_source_dn ! Mapping increasing/decreasing indicies to up/down - - real(wp), parameter :: pi = acos(-1._wp) - integer :: ilev, igpt, top_level - ! ------------------------------------ - real(wp) :: fact - real(wp), parameter :: tau_thresh = sqrt(epsilon(tau)) - integer :: icol - real(wp), dimension(ncol ) :: sfcSource - real(wp), dimension(ncol,nlay,ngpt) :: An, Cn - ! ------------------------------------ - - ! Which way is up? - ! Level Planck sources for upward and downward radiation - ! When top_at_1, lev_source_up => lev_source_dec - ! lev_source_dn => lev_source_inc, and vice-versa - if(top_at_1) then - top_level = 1 - lev_source_up => lev_source_dec - lev_source_dn => lev_source_inc - else - top_level = nlay+1 - lev_source_up => lev_source_inc - lev_source_dn => lev_source_dec - end if - - !$acc enter data copyin(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,radn_dn) - !$acc enter data copyin(scaling) - !$acc enter data create(tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo,radn_up) - !$acc enter data create(sfcSource, An, Cn) - !$acc enter data attach(lev_source_up,lev_source_dn) - - !$acc enter data create(rad_up_Jac, rad_dn_Jac, source_sfcJac) - !$acc enter data copyin(sfc_srcJac) - - ! NOTE: This kernel produces small differences between GPU and CPU - ! implementations on Ascent with PGI, we assume due to floating point - ! differences in the exp() function. These differences are small in the - ! RFMIP test case (10^-6). - !$acc parallel loop collapse(3) - do igpt = 1, ngpt - do ilev = 1, nlay - do icol = 1, ncol - ! - ! Optical path and transmission, used in source function and transport calculations - ! - tau_loc(icol,ilev,igpt) = tau(icol,ilev,igpt)*D(icol,igpt) - trans (icol,ilev,igpt) = exp(-tau_loc(icol,ilev,igpt)) - ! here scaling is used to store parameter wb/[(]1-w(1-b)] of Eq.21 of the Tang's paper - ! explanation of factor 0.4 note A of Table - Cn(icol,ilev,igpt) = 0.4_wp*scaling(icol,ilev,igpt) - An(icol,ilev,igpt) = (1._wp-trans(icol,ilev,igpt)*trans(icol,ilev,igpt)) - - ! initialize radn_dn_Jac - rad_dn_Jac(icol,ilev,igpt) = 0._wp - end do - end do - end do - - !$acc parallel loop collapse(2) - do igpt = 1, ngpt - do icol = 1, ncol - ! - ! Surface albedo, surface source function - ! - sfc_albedo (icol,igpt) = 1._wp - sfc_emis(icol,igpt) - source_sfc (icol,igpt) = sfc_emis(icol,igpt) * sfc_src (icol,igpt) - source_sfcJac(icol,igpt) = sfc_emis(icol,igpt) * sfc_srcJac(icol,igpt) - end do - end do - - - ! - ! Source function for diffuse radiation - ! - call lw_source_noscat(ncol, nlay, ngpt, & - lay_source, lev_source_up, lev_source_dn, & - tau_loc, trans, source_dn, source_up) - - ! - ! Transport - ! - ! compute no-scattering fluxes - call lw_transport_noscat(ncol, nlay, ngpt, top_at_1, & - tau_loc, trans, sfc_albedo, source_dn, source_up, source_sfc, & - radn_up, radn_dn,& - source_sfcJac, rad_up_Jac) - ! make adjustment - call lw_transport_1rescl(ncol, nlay, ngpt, top_at_1, & - tau_loc, trans, & - sfc_albedo, source_dn, source_up, & - radn_up, radn_dn, An, Cn, rad_up_Jac, rad_dn_Jac) - - !$acc exit data copyout(rad_up_Jac, rad_dn_Jac) - !$acc exit data delete(source_sfcJac, sfc_srcJac) - - - !$acc exit data copyout(radn_dn,radn_up) - !$acc exit data delete(sfcSource, An, Cn) - !$acc exit data delete(scaling) - !$acc exit data delete(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo) - !$acc exit data detach(lev_source_up,lev_source_dn) - - end subroutine lw_solver_1rescl -! ------------------------------------------------------------------------------------------------- -! -! Similar to lw_solver_noscat_GaussQuad. -! It is main solver to use the Tang approximation for fluxes -! In addition to the no scattering input parameters the user must provide -! scattering related properties (ssa and g) that the solver uses to compute scaling -! -! --------------------------------------------------------------- - subroutine lw_solver_1rescl_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weights, & - tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, & - sfc_emis, sfc_src,& - flux_up, flux_dn, & - sfc_src_Jac, flux_up_Jac, flux_dn_Jac) & - bind(C, name="lw_solver_1rescl_GaussQuad") - integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points - logical(wl), intent(in ) :: top_at_1 - integer, intent(in ) :: nmus ! number of quadrature angles - real(wp), dimension(nmus), intent(in ) :: Ds, weights ! quadrature secants, weights - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: tau ! Optical thickness, - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: ssa ! single-scattering albedo, - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: g ! asymmetry parameter [] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lay_source ! Planck source at layer average temperature [W/m2] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lev_source_inc - ! Planck source at layer edge for radiation in increasing ilay direction [W/m2] - ! Includes spectral weighting that accounts for state-dependent frequency to g-space mapping - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lev_source_dec - ! Planck source at layer edge for radiation in decreasing ilay direction [W/m2] - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_emis ! Surface emissivity [] - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_src ! Surface source function [W/m2] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: flux_up ! Radiances [W/m2-str] - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn ! Top level must contain incident flux boundary condition - - real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_src_Jac ! surface temperature Jacobian of surface source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: flux_up_Jac ! surface temperature Jacobian of Radiances [W/m2-str / K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: flux_dn_Jac ! surface temperature Jacobian of Radiances [W/m2-str / K] - - ! Local variables - real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up ! Fluxes per quad angle - real(wp), dimension(ncol, ngpt) :: Ds_ncol - real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn_Jac, radn_up_Jac ! Surface temperature Jsacobians per quad angle - - integer :: imu, top_level,icol,ilev,igpt - real :: weight - - real(wp), dimension(ncol,ngpt) :: fluxTOA ! downward flux at TOA - real(wp), dimension(ncol,nlay, ngpt) :: tauLoc ! rescaled Tau - real(wp), dimension(ncol,nlay, ngpt) :: scaling ! scaling - real(wp), parameter :: tresh=1.0_wp - 1e-6_wp - - !$acc enter data copyin(Ds,weights,tau,ssa,g,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,flux_dn,sfc_src_Jac) - !$acc enter data create(flux_up,radn_dn,radn_up,Ds_ncol, scaling, tauLoc,flux_up_Jac,flux_dn_Jac,radn_dn_Jac, radn_up_Jac) - - - ! Tang rescaling - if (any(ssa*g >= tresh)) then - call scaling_1rescl_safe(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g) - else - call scaling_1rescl(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g) - endif - - ! ------------------------------------ - ! - ! For the first angle output arrays store total flux - ! - top_level = MERGE(1, nlay+1, top_at_1) - ! store TOA flux - fluxTOA = flux_dn(1:ncol, top_level, 1:ngpt) - - Ds_ncol(:,:) = Ds(1) - weight = 2._wp*pi*weights(1) - ! Transport is for intensity - ! convert flux at top of domain to intensity assuming azimuthal isotropy - ! - radn_dn(1:ncol, top_level, 1:ngpt) = fluxTOA(1:ncol, 1:ngpt) / weight - - call lw_solver_1rescl(ncol, nlay, ngpt, & - top_at_1, Ds_ncol, tauLoc, scaling, & - lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & - flux_up, flux_dn,sfc_src_Jac,flux_up_Jac,flux_dn_Jac) - !$acc parallel loop collapse(3) - do igpt = 1, ngpt - do ilev = 1, nlay+1 - do icol = 1, ncol - flux_up (icol,ilev,igpt) = weight*flux_up (icol,ilev,igpt) - flux_dn (icol,ilev,igpt) = weight*flux_dn (icol,ilev,igpt) - flux_up_Jac(icol,ilev,igpt) = weight*flux_up_Jac(icol,ilev,igpt) - flux_dn_Jac(icol,ilev,igpt) = weight*flux_dn_Jac(icol,ilev,igpt) - enddo - enddo - enddo - - do imu = 2, nmus - Ds_ncol(:,:) = Ds(imu) - weight = 2._wp*pi*weights(imu) - radn_dn(1:ncol, top_level, 1:ngpt) = fluxTOA(1:ncol, 1:ngpt) / weight - call lw_solver_1rescl(ncol, nlay, ngpt, & - top_at_1, Ds_ncol, tauLoc, scaling, & - lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & - radn_up, radn_dn,sfc_src_Jac,radn_up_Jac,radn_dn_Jac) - - !$acc parallel loop collapse(3) - do igpt = 1, ngpt - do ilev = 1, nlay+1 - do icol = 1, ncol - flux_up (icol,ilev,igpt) = flux_up (icol,ilev,igpt) + weight*radn_up (icol,ilev,igpt) - flux_dn (icol,ilev,igpt) = flux_dn (icol,ilev,igpt) + weight*radn_dn (icol,ilev,igpt) - flux_up_Jac(icol,ilev,igpt) = flux_up_Jac(icol,ilev,igpt) + weight*radn_up_Jac(icol,ilev,igpt) - flux_dn_Jac(icol,ilev,igpt) = flux_dn_Jac(icol,ilev,igpt) + weight*radn_dn_Jac(icol,ilev,igpt) - enddo - enddo - enddo - - end do - !$acc exit data delete(sfc_src_Jac,radn_dn_Jac, radn_up_Jac) - !$acc exit data copyout(flux_up_Jac,flux_dn_Jac) - !$acc exit data copyout(flux_up,flux_dn) - !$acc exit data delete(Ds,weights,tau,ssa,g,tauLoc,scaling,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,radn_dn,radn_up,Ds_ncol) - end subroutine lw_solver_1rescl_GaussQuad -! ------------------------------------------------------------------------------------------------- -! -! Computes Tang scaling of layer optical thickness and scaling parameter -! unsafe if ssa*g =1. -! -! --------------------------------------------------------------- - pure subroutine scaling_1rescl(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g) - integer , intent(in) :: ncol - integer , intent(in) :: nlay - integer , intent(in) :: ngpt - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: tau - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: ssa - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: g - - real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: tauLoc - real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: scaling - - integer :: icol, ilay, igpt - real(wp) :: wb, ssal, scaleTau - !$acc enter data copyin(tau, ssa, g) - !$acc enter data create(tauLoc, scaling) - !$acc parallel loop collapse(3) - do igpt=1,ngpt - do ilay=1,nlay - do icol=1,ncol - ssal = ssa(icol, ilay, igpt) - wb = ssal*(1._wp - g(icol, ilay, igpt)) / 2._wp - scaleTau = (1._wp - ssal + wb ) - - tauLoc(icol, ilay, igpt) = scaleTau * tau(icol, ilay, igpt) ! Eq.15 of the paper - ! - ! here scaling is used to store parameter wb/[1-w(1-b)] of Eq.21 of the Tang's paper - ! actually it is in line of parameter rescaling defined in Eq.7 - ! potentialy if g=ssa=1 then wb/scaleTau = NaN - ! it should not happen - scaling(icol, ilay, igpt) = wb / scaleTau - enddo - enddo - enddo - !$acc exit data copyout(tauLoc, scaling) - !$acc exit data delete(tau, ssa, g) - end subroutine scaling_1rescl -! ------------------------------------------------------------------------------------------------- -! -! Computes Tang scaling of layer optical thickness and scaling parameter -! Safe implementation -! -! --------------------------------------------------------------- - pure subroutine scaling_1rescl_safe(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g) - integer , intent(in) :: ncol - integer , intent(in) :: nlay - integer , intent(in) :: ngpt - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: tau - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: ssa - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: g - - real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: tauLoc - real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: scaling - - integer :: icol, ilay, igpt - real(wp) :: wb, ssal, scaleTau - !$acc enter data copyin(tau, ssa, g) - !$acc enter data create(tauLoc, scaling) - !$acc parallel loop collapse(3) - do igpt=1,ngpt - do ilay=1,nlay - do icol=1,ncol - ssal = ssa(icol, ilay, igpt) - wb = ssal*(1._wp - g(icol, ilay, igpt)) / 2._wp - scaleTau = (1._wp - ssal + wb ) - - tauLoc(icol, ilay, igpt) = scaleTau * tau(icol, ilay, igpt) ! Eq.15 of the paper - ! - ! here scaling is used to store parameter wb/[1-w(1-b)] of Eq.21 of the Tang's paper - ! actually it is in line of parameter rescaling defined in Eq.7 - if (scaleTau < 1e-6_wp) then - scaling(icol, ilay, igpt) = 1.0_wp - else - scaling(icol, ilay, igpt) = wb / scaleTau - endif - enddo - enddo - enddo - !$acc exit data copyout(tauLoc, scaling) - !$acc exit data delete(tau, ssa, g) - end subroutine scaling_1rescl_safe -! ------------------------------------------------------------------------------------------------- -! ! Similar to Longwave no-scattering tarnsport (lw_transport_noscat) ! a) adds adjustment factor based on cloud properties ! @@ -1591,86 +1394,96 @@ end subroutine scaling_1rescl_safe ! the adjustmentFactor computation can be skipped where Cn <= epsilon ! ! ------------------------------------------------------------------------------------------------- - subroutine lw_transport_1rescl(ncol, nlay, ngpt, top_at_1, & - tau, trans, sfc_albedo, source_dn, source_up, & - radn_up, radn_dn, An, Cn,& - rad_up_Jac, rad_dn_Jac) bind(C, name="lw_transport_1rescl") +subroutine lw_transport_1rescl(ncol, nlay, ngpt, top_at_1, & + trans, source_dn, source_up, & + radn_up, radn_dn, An, Cn, & + do_Jacobians, radn_up_Jac) bind(C, name="lw_transport_1rescl") integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points logical(wl), intent(in ) :: top_at_1 ! - real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: tau, & ! Absorption optical thickness, pre-divided by mu [] - trans ! transmissivity = exp(-tau) - real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_albedo ! Surface albedo + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: trans ! transmissivity = exp(-tau) real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: source_dn, & source_up ! Diffuse radiation emitted by the layer real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_up ! Radiances [W/m2-str] real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn !Top level must contain incident flux boundary condition real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: An, Cn - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: rad_up_Jac ! Radiances [W/m2-str] - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: rad_dn_Jac !Top level must contain incident flux boundary condition + logical(wl), intent(in ) :: do_Jacobians + real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_up_Jac ! Radiances [W/m2-str] + ! --------------------------------------------------- ! Local variables integer :: ilev, icol, igpt - ! --------------------------------------------------- real(wp) :: adjustmentFactor + ! --------------------------------------------------- if(top_at_1) then ! ! Top of domain is index 1 ! ! Downward propagation !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol - ! 1st Upward propagation + ! Upward propagation do ilev = nlay, 1, -1 - radn_up (icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up (icol,ilev+1,igpt) + source_up(icol,ilev,igpt) - rad_up_Jac(icol,ilev,igpt) = trans(icol,ilev,igpt)*rad_up_Jac(icol,ilev+1,igpt) - - adjustmentFactor = Cn(icol,ilev,igpt)*& + adjustmentFactor = Cn(icol,ilev,igpt) * & ( An(icol,ilev,igpt)*radn_dn(icol,ilev,igpt) - & - source_dn(icol,ilev,igpt) *trans(icol,ilev,igpt ) - & + source_dn(icol,ilev,igpt)*trans(icol,ilev,igpt ) - & source_up(icol,ilev,igpt)) - radn_up(icol,ilev,igpt) = radn_up(icol,ilev,igpt) + adjustmentFactor + radn_up(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up (icol,ilev+1,igpt) + & + source_up(icol,ilev,igpt) + adjustmentFactor enddo + if(do_Jacobians) then + do ilev = nlay, 1, -1 + radn_up_Jac(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up_Jac(icol,ilev+1,igpt) + end do + end if + + ! radn_dn_Jac(icol,1,igpt) = 0._wp ! 2nd Downward propagation do ilev = 1, nlay - radn_dn (icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_dn (icol,ilev,igpt) + source_dn(icol,ilev,igpt) - rad_dn_Jac(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*rad_dn_Jac(icol,ilev,igpt) + ! radn_dn_Jac(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_dn_Jac(icol,ilev,igpt) adjustmentFactor = Cn(icol,ilev,igpt)*( & An(icol,ilev,igpt)*radn_up(icol,ilev,igpt) - & source_up(icol,ilev,igpt)*trans(icol,ilev,igpt) - & source_dn(icol,ilev,igpt) ) - radn_dn(icol,ilev+1,igpt) = radn_dn(icol,ilev+1,igpt) + adjustmentFactor - - adjustmentFactor = Cn(icol,ilev,igpt)*An(icol,ilev,igpt)*rad_up_Jac(icol,ilev,igpt) - rad_dn_Jac(icol,ilev+1,igpt) = rad_dn_Jac(icol,ilev+1,igpt) + adjustmentFactor + radn_dn(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_dn (icol,ilev, igpt) + & + source_dn(icol,ilev,igpt) + adjustmentFactor + ! adjustmentFactor = Cn(icol,ilev,igpt)*An(icol,ilev,igpt)*radn_up_Jac(icol,ilev,igpt) + ! radn_dn_Jac(icol,ilev+1,igpt) = radn_dn_Jac(icol,ilev+1,igpt) + adjustmentFactor enddo enddo enddo else !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do igpt = 1, ngpt do icol = 1, ncol ! Upward propagation do ilev = 1, nlay - radn_up (icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_up (icol,ilev,igpt) + source_up(icol,ilev,igpt) - rad_up_Jac(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*rad_up_Jac(icol,ilev,igpt) adjustmentFactor = Cn(icol,ilev,igpt)*& ( An(icol,ilev,igpt)*radn_dn(icol,ilev+1,igpt) - & source_dn(icol,ilev,igpt) *trans(icol,ilev ,igpt) - & source_up(icol,ilev,igpt)) - radn_up(icol,ilev+1,igpt) = radn_up(icol,ilev+1,igpt) + adjustmentFactor + radn_up(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_up (icol,ilev,igpt) + & + source_up(icol,ilev,igpt) + adjustmentFactor end do + if(do_Jacobians) then + do ilev = 1, nlay + radn_up_Jac(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_up_Jac(icol,ilev,igpt) + end do + end if + ! 2st Downward propagation + ! radn_dn_Jac(icol,nlay+1,igpt) = 0._wp do ilev = nlay, 1, -1 - radn_dn (icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_dn (icol,ilev+1,igpt) + source_dn(icol,ilev,igpt) - rad_dn_Jac(icol,ilev,igpt) = trans(icol,ilev,igpt)*rad_dn_Jac(icol,ilev+1,igpt) + ! radn_dn_Jac(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_dn_Jac(icol,ilev+1,igpt) adjustmentFactor = Cn(icol,ilev,igpt)*( & An(icol,ilev,igpt)*radn_up(icol,ilev,igpt) - & source_up(icol,ilev,igpt)*trans(icol,ilev ,igpt ) - & source_dn(icol,ilev,igpt) ) - radn_dn(icol,ilev,igpt) = radn_dn(icol,ilev,igpt) + adjustmentFactor - - adjustmentFactor = Cn(icol,ilev,igpt)*An(icol,ilev,igpt)*rad_up_Jac(icol,ilev,igpt) - rad_dn_Jac(icol,ilev,igpt) = rad_dn_Jac(icol,ilev,igpt) + adjustmentFactor + radn_dn(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_dn (icol,ilev+1,igpt) + & + source_dn(icol,ilev,igpt) + adjustmentFactor + ! adjustmentFactor = Cn(icol,ilev,igpt)*An(icol,ilev,igpt)*radn_up_Jac(icol,ilev,igpt) + ! radn_dn_Jac(icol,ilev,igpt) = radn_dn_Jac(icol,ilev,igpt) + adjustmentFactor end do enddo enddo diff --git a/rte/kernels/mo_fluxes_broadband_kernels.F90 b/rte/kernels/mo_fluxes_broadband_kernels.F90 index b49c6db99..4dedac741 100644 --- a/rte/kernels/mo_fluxes_broadband_kernels.F90 +++ b/rte/kernels/mo_fluxes_broadband_kernels.F90 @@ -28,7 +28,7 @@ module mo_fluxes_broadband_kernels ! ! Spectral reduction over all points ! - pure subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) bind(C, name="sum_broadband") + subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) bind(C, name="sum_broadband") integer, intent(in ) :: ncol, nlev, ngpt real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux real(wp), dimension(ncol, nlev), intent(out) :: broadband_flux @@ -37,7 +37,9 @@ pure subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) b real(wp) :: bb_flux_s ! local scalar version !$acc enter data copyin(spectral_flux) create(broadband_flux) + !$omp target enter data map(to:spectral_flux) map(alloc:broadband_flux) !$acc parallel loop gang vector collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do ilev = 1, nlev do icol = 1, ncol @@ -51,12 +53,13 @@ pure subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) b end do end do !$acc exit data delete(spectral_flux) copyout(broadband_flux) + !$omp target exit data map(release:spectral_flux) map(from:broadband_flux) end subroutine sum_broadband ! ---------------------------------------------------------------------------- ! ! Net flux: Spectral reduction over all points ! - pure subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_flux_up, broadband_flux_net) & + subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_flux_up, broadband_flux_net) & bind(C, name="net_broadband_full") integer, intent(in ) :: ncol, nlev, ngpt real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux_dn, spectral_flux_up @@ -66,7 +69,9 @@ pure subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_ real(wp) :: diff !$acc enter data copyin(spectral_flux_dn, spectral_flux_up) create(broadband_flux_net) + !$omp target enter data map(to:spectral_flux_dn, spectral_flux_up) map(alloc:broadband_flux_net) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do ilev = 1, nlev do icol = 1, ncol diff = spectral_flux_dn(icol, ilev, 1 ) - spectral_flux_up(icol, ilev, 1) @@ -74,22 +79,25 @@ pure subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_ end do end do !$acc parallel loop collapse(3) + !$omp target teams distribute parallel do simd collapse(3) do igpt = 2, ngpt do ilev = 1, nlev do icol = 1, ncol diff = spectral_flux_dn(icol, ilev, igpt) - spectral_flux_up(icol, ilev, igpt) !$acc atomic update + !$omp atomic update broadband_flux_net(icol, ilev) = broadband_flux_net(icol, ilev) + diff end do end do end do !$acc exit data delete(spectral_flux_dn, spectral_flux_up) copyout(broadband_flux_net) + !$omp target exit data map(release:spectral_flux_dn, spectral_flux_up) map(from:broadband_flux_net) end subroutine net_broadband_full ! ---------------------------------------------------------------------------- ! ! Net flux when bradband flux up and down are already available ! - pure subroutine net_broadband_precalc(ncol, nlev, flux_dn, flux_up, broadband_flux_net) & + subroutine net_broadband_precalc(ncol, nlev, flux_dn, flux_up, broadband_flux_net) & bind(C, name="net_broadband_precalc") integer, intent(in ) :: ncol, nlev real(wp), dimension(ncol, nlev), intent(in ) :: flux_dn, flux_up @@ -97,13 +105,16 @@ pure subroutine net_broadband_precalc(ncol, nlev, flux_dn, flux_up, broadband_fl integer :: icol, ilev !$acc enter data copyin(flux_dn, flux_up) create(broadband_flux_net) + !$omp target enter data map(to:flux_dn, flux_up) map(alloc:broadband_flux_net) !$acc parallel loop collapse(2) + !$omp target teams distribute parallel do simd collapse(2) do ilev = 1, nlev do icol = 1, ncol broadband_flux_net(icol,ilev) = flux_dn(icol,ilev) - flux_up(icol,ilev) end do end do !$acc exit data delete(flux_dn, flux_up) copyout(broadband_flux_net) + !$omp target exit data map(release:flux_dn, flux_up) map(from:broadband_flux_net) end subroutine net_broadband_precalc ! ---------------------------------------------------------------------------- end module mo_fluxes_broadband_kernels diff --git a/rte/kernels/mo_rte_solver_kernels.F90 b/rte/kernels/mo_rte_solver_kernels.F90 index a88391536..f20557cda 100644 --- a/rte/kernels/mo_rte_solver_kernels.F90 +++ b/rte/kernels/mo_rte_solver_kernels.F90 @@ -39,8 +39,6 @@ module mo_rte_solver_kernels lw_solver_noscat, lw_solver_noscat_GaussQuad, lw_solver_2stream, & sw_solver_noscat, sw_solver_2stream - public :: lw_solver_1rescl_GaussQuad, lw_solver_1rescl - ! These routines don't really need to be visible but making them so is useful for testing. public :: lw_source_noscat, lw_combine_sources, & lw_source_2str, sw_source_2str, & @@ -63,7 +61,8 @@ module mo_rte_solver_kernels subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, & tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & radn_up, radn_dn, & - sfc_srcJac, radn_upJac) bind(C, name="lw_solver_noscat") + do_Jacobians, sfc_srcJac, radn_upJac, & + do_rescaling, ssa, g) bind(C, name="lw_solver_noscat") integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points logical(wl), intent(in ) :: top_at_1 real(wp), dimension(ncol, ngpt), intent(in ) :: D ! secant of propagation angle [] @@ -80,19 +79,41 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: radn_up ! Radiances [W/m2-str] real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Top level must contain incident flux boundary condition - real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_srcJac ! surface temperature Jacobian of surface source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: radn_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] - + ! + ! Optional variables - arrays aren't referenced if corresponding logical == False + ! + logical(wl), intent(in ) :: do_Jacobians + real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_srcJac ! surface temperature Jacobian of surface source function [W/m2/K] + real(wp), dimension(ncol,nlay+1 ), intent( out) :: radn_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + logical(wl), intent(in ) :: do_rescaling + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: ssa, g ! single-scattering albedo, asymmetry parameter + ! ------------------------------------ ! Local variables, no g-point dependency + ! real(wp), dimension(ncol,nlay) :: tau_loc, & ! path length (tau/mu) - trans ! transmissivity = exp(-tau) + trans ! transmissivity = exp(-tau) real(wp), dimension(ncol,nlay) :: source_dn, source_up - real(wp), dimension(ncol ) :: source_sfc, sfc_albedo, source_sfcJac + real(wp), dimension(ncol ) :: sfc_albedo real(wp), dimension(:,:,:), pointer :: lev_source_up, lev_source_dn ! Mapping increasing/decreasing indicies to up/down real(wp), parameter :: pi = acos(-1._wp) - integer :: ilev, igpt, top_level + integer :: icol, ilay, igpt, top_level, sfc_level + ! ------------------------------------------------------------------------------------------------- + ! Optionally, use an approximate treatment of scattering using rescaling + ! Implemented based on the paper + ! Tang G, et al, 2018: https://doi.org/10.1175/JAS-D-18-0014.1 + ! a) relies on rescaling of the optical parameters based on asymetry factor and single scattering albedo + ! scaling can be computed by scaling_1rescl + ! b) adds adustment term based on cloud properties (lw_transport_1rescl) + ! adustment terms is computed based on solution of the Tang equations + ! for "linear-in-tau" internal source (not in the paper) + ! + ! Used when approximating scattering + ! + real(wp) :: ssal, wb, scaleTau + real(wp), dimension(ncol,nlay ) :: An, Cn + real(wp), dimension(ncol,nlay+1) :: gpt_flux_Jac ! ------------------------------------ ! Which way is up? ! Level Planck sources for upward and downward radiation @@ -100,10 +121,12 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, ! lev_source_dn => lev_source_inc, and vice-versa if(top_at_1) then top_level = 1 + sfc_level = nlay+1 lev_source_up => lev_source_dec lev_source_dn => lev_source_inc else top_level = nlay+1 + sfc_level = 1 lev_source_up => lev_source_inc lev_source_dn => lev_source_dec end if @@ -118,10 +141,35 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, ! ! Optical path and transmission, used in source function and transport calculations ! - do ilev = 1, nlay - tau_loc(:,ilev) = tau(:,ilev,igpt)*D(:,igpt) - trans (:,ilev) = exp(-tau_loc(:,ilev)) - end do + if (do_rescaling) then + ! + ! The scaling and scaleTau terms are independent of propagation + ! angle D and could be pre-computed if several values of D are used + ! We re-compute them here to keep not have to localize memory use + ! + do ilay = 1, nlay + do icol = 1, ncol + ssal = ssa(icol, ilay, igpt) + wb = ssal*(1._wp - g(icol, ilay, igpt)) * 0.5_wp + scaleTau = (1._wp - ssal + wb) + ! here wb/scaleTau is parameter wb/(1-w(1-b)) of Eq.21 of the Tang paper + ! actually it is in line of parameter rescaling defined in Eq.7 + ! potentialy if g=ssa=1 then wb/scaleTau = NaN + ! it should not happen because g is never 1 in atmospheres + ! explanation of factor 0.4 note A of Table + Cn(icol,ilay) = 0.4_wp*wb/scaleTau + ! Eq.15 of the paper, multiplied by path length + tau_loc(icol,ilay) = tau(icol,ilay,igpt)*D(icol,igpt)*scaleTau + end do + trans (:,ilay) = exp(-tau_loc(:,ilay)) + An(:,ilay) = (1._wp-trans(:,ilay)**2) + end do + else + do ilay = 1, nlay + tau_loc(:,ilay) = tau(:,ilay,igpt)*D(:,igpt) + trans (:,ilay) = exp(-tau_loc(:,ilay)) + end do + end if ! ! Source function for diffuse radiation ! @@ -129,25 +177,47 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight, lay_source(:,:,igpt), lev_source_up(:,:,igpt), lev_source_dn(:,:,igpt), & tau_loc, trans, source_dn, source_up) ! - ! Surface albedo, surface source function + ! Transport down ! - sfc_albedo(:) = 1._wp - sfc_emis(:,igpt) - source_sfc(:) = sfc_emis(:,igpt) * sfc_src(:,igpt) - source_sfcJac(:) = sfc_emis(:,igpt) * sfc_srcJac(:,igpt) + call lw_transport_noscat_dn(ncol, nlay, top_at_1, trans, source_dn, radn_dn(:,:,igpt)) ! - ! Transport + ! Surface albedo, surface source function, reflection and emission + ! + sfc_albedo(:) = 1._wp - sfc_emis(:,igpt) + radn_up (:,sfc_level,igpt) = radn_dn(:,sfc_level,igpt)*sfc_albedo(:) + & + sfc_emis(:,igpt) * sfc_src(:,igpt) + if(do_Jacobians) & + gpt_flux_Jac(:,sfc_level) = sfc_emis(:,igpt) * sfc_srcJac(:,igpt) ! - call lw_transport_noscat(ncol, nlay, top_at_1, & - tau_loc, trans, sfc_albedo, source_dn, source_up, source_sfc, & - radn_up(:,:,igpt), radn_dn(:,:,igpt), & - source_sfcJac, radn_upJac(:,:,igpt)) + ! Transport up, or up and down again if using rescaling + ! + if(do_rescaling) then + call lw_transport_1rescl(ncol, nlay, top_at_1, trans, & + source_dn, source_up, & + radn_up(:,:,igpt), radn_dn(:,:,igpt), An, Cn, & + do_Jacobians, gpt_flux_Jac) ! Standing in for Jacobian, i.e. rad_up_Jac(:,:,igpt), rad_dn_Jac(:,:,igpt)) + else + call lw_transport_noscat_up(ncol, nlay, top_at_1, trans, source_up, radn_up(:,:,igpt), & + do_Jacobians, gpt_flux_Jac) + end if + ! ! Convert intensity to flux assuming azimuthal isotropy and quadrature weight ! radn_dn (:,:,igpt) = 2._wp * pi * weight * radn_dn (:,:,igpt) radn_up (:,:,igpt) = 2._wp * pi * weight * radn_up (:,:,igpt) - radn_upJac(:,:,igpt) = 2._wp * pi * weight * radn_upJac(:,:,igpt) + ! + ! Only broadband-integrated Jacobians are provided + ! + if(do_Jacobians) then + if(igpt == 1) then + radn_upJac(:,:) = gpt_flux_Jac(:,:) + else + radn_upJac(:,:) = radn_upJac(:,:) + gpt_flux_Jac(:,:) + end if + end if end do ! g point loop + if(do_Jacobians) radn_upJac(:,:) = 2._wp * pi * weight * radn_upJac(:,:) end subroutine lw_solver_noscat ! ------------------------------------------------------------------------------------------------- @@ -159,7 +229,8 @@ end subroutine lw_solver_noscat ! --------------------------------------------------------------- subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weights, & tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_up, flux_dn,& - sfc_srcJac, flux_upJac) & + do_Jacobians, sfc_srcJac, flux_upJac, & + do_rescaling, ssa, g) & bind(C, name="lw_solver_noscat_GaussQuad") integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points logical(wl), intent(in ) :: top_at_1 @@ -177,12 +248,21 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: flux_up ! Radiances [W/m2-str] real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn ! Top level must contain incident flux boundary condition - real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_srcJac ! surface temperature Jacobian of surface source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: flux_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + ! + ! Optional variables - arrays aren't referenced if corresponding logical == False + ! + logical(wl), intent(in ) :: do_Jacobians + real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_srcJac ! surface temperature Jacobian of surface source function [W/m2/K] + real(wp), dimension(ncol,nlay+1 ), intent(out ) :: flux_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + logical(wl), intent(in ) :: do_rescaling + real(wp), dimension(ncol,nlay ,ngpt), intent(in ) :: ssa, g ! single-scattering albedo, asymmetry parameter + ! ------------------------------------ + ! ! Local variables + ! real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up ! Fluxes per quad angle real(wp), dimension(ncol, ngpt) :: Ds_ncol - real(wp), dimension(ncol,nlay+1,ngpt) :: radn_upJac ! perturbed Fluxes per quad angle + real(wp), dimension(ncol,nlay+1 ) :: radn_upJac ! perturbed Fluxes per quad angle integer :: imu, top_level ! ------------------------------------ @@ -194,23 +274,25 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig top_at_1, Ds_ncol, weights(1), tau, & lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & flux_up, flux_dn, & - sfc_srcJac, flux_upJac) + do_Jacobians, sfc_srcJac, flux_upJac, & + do_rescaling, ssa, g) ! ! For more than one angle use local arrays ! top_level = MERGE(1, nlay+1, top_at_1) call apply_BC(ncol, nlay, ngpt, top_at_1, flux_dn(:,top_level,:), radn_dn) - do imu = 2, nmus Ds_ncol(:,:) = Ds(imu) call lw_solver_noscat(ncol, nlay, ngpt, & top_at_1, Ds_ncol, weights(imu), tau, & lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & radn_up, radn_dn, & - sfc_srcJac, radn_upJac) + do_Jacobians, sfc_srcJac, radn_upJac, & + do_rescaling, ssa, g) flux_up (:,:,:) = flux_up (:,:,:) + radn_up (:,:,:) flux_dn (:,:,:) = flux_dn (:,:,:) + radn_dn (:,:,:) - flux_upJac(:,:,:) = flux_upJac(:,:,:) + radn_upJac(:,:,:) + if (do_Jacobians) & + flux_upJac(:,:) = flux_upJac(:,: ) + radn_upJac(:,: ) end do end subroutine lw_solver_noscat_GaussQuad @@ -440,26 +522,18 @@ subroutine lw_source_noscat(ncol, nlay, lay_source, lev_source_up, lev_source_dn end subroutine lw_source_noscat ! ------------------------------------------------------------------------------------------------- ! - ! Longwave no-scattering transport + ! Longwave no-scattering transport - separate routines for up and down ! ! ------------------------------------------------------------------------------------------------- - subroutine lw_transport_noscat(ncol, nlay, top_at_1, & - tau, trans, sfc_albedo, source_dn, source_up, source_sfc, & - radn_up, radn_dn,& - source_sfcJac, radn_upJac) bind(C, name="lw_transport_noscat") + subroutine lw_transport_noscat_dn(ncol, nlay, top_at_1, & + trans, source_dn, radn_dn) bind(C, name="lw_transport_noscat_dn") integer, intent(in ) :: ncol, nlay ! Number of columns, layers, g-points logical(wl), intent(in ) :: top_at_1 ! - real(wp), dimension(ncol,nlay ), intent(in ) :: tau, & ! Absorption optical thickness, pre-divided by mu [] - trans ! transmissivity = exp(-tau) - real(wp), dimension(ncol ), intent(in ) :: sfc_albedo ! Surface albedo - real(wp), dimension(ncol,nlay ), intent(in ) :: source_dn, & - source_up ! Diffuse radiation emitted by the layer - real(wp), dimension(ncol ), intent(in ) :: source_sfc ! Surface source function [W/m2] - real(wp), dimension(ncol,nlay+1), intent( out) :: radn_up ! Radiances [W/m2-str] - real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_dn !Top level must contain incident flux boundary condition + real(wp), dimension(ncol,nlay ), intent(in ) :: trans ! transmissivity = exp(-tau) + real(wp), dimension(ncol,nlay ), intent(in ) :: source_dn ! Diffuse radiation emitted by the layer + real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_dn ! Radiances [W/m2-str] Top level must contain incident flux boundary condition - real(wp), dimension(ncol ), intent(in ) :: source_sfcJac ! surface temperature Jacobian of surface source function [W/m2/K] - real(wp), dimension(ncol,nlay+1), intent(out) :: radn_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] + ! --------------------------------------------------- ! Local variables integer :: ilev ! --------------------------------------------------- @@ -467,40 +541,55 @@ subroutine lw_transport_noscat(ncol, nlay, top_at_1, & ! ! Top of domain is index 1 ! - ! Downward propagation do ilev = 2, nlay+1 radn_dn(:,ilev) = trans(:,ilev-1)*radn_dn(:,ilev-1) + source_dn(:,ilev-1) end do - - ! Surface reflection and emission - radn_up (:,nlay+1) = radn_dn(:,nlay+1)*sfc_albedo(:) + source_sfc (:) - radn_upJac(:,nlay+1) = source_sfcJac(:) - - ! Upward propagation - do ilev = nlay, 1, -1 - radn_up (:,ilev) = trans(:,ilev )*radn_up (:,ilev+1) + source_up(:,ilev) - radn_upJac(:,ilev) = trans(:,ilev )*radn_upJac(:,ilev+1) - end do else ! ! Top of domain is index nlay+1 ! - ! Downward propagation do ilev = nlay, 1, -1 radn_dn(:,ilev) = trans(:,ilev )*radn_dn(:,ilev+1) + source_dn(:,ilev) end do + end if + end subroutine lw_transport_noscat_dn + ! ------------------------------------------------------------------------------------------------- + subroutine lw_transport_noscat_up(ncol, nlay, top_at_1, & + trans, source_up, radn_up, do_Jacobians, radn_upJac) bind(C, name="lw_transport_noscat_up") + integer, intent(in ) :: ncol, nlay ! Number of columns, layers, g-points + logical(wl), intent(in ) :: top_at_1 ! + real(wp), dimension(ncol,nlay ), intent(in ) :: trans ! transmissivity = exp(-tau) + real(wp), dimension(ncol,nlay ), intent(in ) :: source_up ! Diffuse radiation emitted by the layer + real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_up ! Radiances [W/m2-str] Top level must contain incident flux boundary condition + logical(wl), intent(in ) :: do_Jacobians + real(wp), dimension(ncol,nlay+1), intent( out) :: radn_upJac ! surface temperature Jacobian of Radiances [W/m2-str / K] - ! Surface reflection and emission - radn_up (:, 1) = radn_dn(:,1)*sfc_albedo(:) + source_sfc (:) - radn_upJac(:, 1) = source_sfcJac(:) - + ! --------------------------------------------------- + ! Local variables + integer :: ilev + ! --------------------------------------------------- + if(top_at_1) then + ! + ! Top of domain is index 1 + ! + ! Upward propagation + do ilev = nlay, 1, -1 + radn_up (:,ilev) = trans(:,ilev )*radn_up (:,ilev+1) + source_up(:,ilev) + if(do_Jacobians) & + radn_upJac(:,ilev) = trans(:,ilev )*radn_upJac(:,ilev+1) + end do + else + ! + ! Top of domain is index nlay+1 + ! ! Upward propagation do ilev = 2, nlay+1 - radn_up (:,ilev) = trans(:,ilev-1) * radn_up (:,ilev-1) + source_up(:,ilev-1) - radn_upJac(:,ilev) = trans(:,ilev-1) * radn_upJac(:,ilev-1) + radn_up (:,ilev) = trans(:,ilev-1) * radn_up (:,ilev-1) + source_up(:,ilev-1) + if(do_Jacobians) & + radn_upJac(:,ilev) = trans(:,ilev-1) * radn_upJac(:,ilev-1) end do end if - end subroutine lw_transport_noscat + end subroutine lw_transport_noscat_up ! ------------------------------------------------------------------------------------------------- ! ! Longwave two-stream solutions to diffuse reflectance and transmittance for a layer @@ -537,16 +626,14 @@ pure subroutine lw_two_stream(ncol, nlay, tau, w0, g, & ! gamma1(i,j)= LW_diff_sec * (1._wp - 0.5_wp * w0(i,j) * (1._wp + g(i,j))) ! Fu et al. Eq 2.9 gamma2(i,j)= LW_diff_sec * 0.5_wp * w0(i,j) * (1._wp - g(i,j)) ! Fu et al. Eq 2.10 + ! Eq 18; k = SQRT(gamma1**2 - gamma2**2), limited below to avoid div by 0. + ! k = 0 for isotropic, conservative scattering; this lower limit on k + ! gives relative error with respect to conservative solution + ! of < 0.1% in Rdif down to tau = 10^-9 + k(i) = sqrt(max((gamma1(i,j) - gamma2(i,j)) * (gamma1(i,j) + gamma2(i,j)), 1.e-12_wp)) end do - ! Written to encourage vectorization of exponential, square root - ! Eq 18; k = SQRT(gamma1**2 - gamma2**2), limited below to avoid div by 0. - ! k = 0 for isotropic, conservative scattering; this lower limit on k - ! gives relative error with respect to conservative solution - ! of < 0.1% in Rdif down to tau = 10^-9 - k(1:ncol) = sqrt(max((gamma1(1:ncol,j) - gamma2(1:ncol,j)) * & - (gamma1(1:ncol,j) + gamma2(1:ncol,j)), & - 1.e-12_wp)) + ! Written to encourage vectorization of exponential exp_minusktau(1:ncol) = exp(-tau(1:ncol,j)*k(1:ncol)) ! @@ -705,16 +792,14 @@ pure subroutine sw_two_stream(ncol, nlay, mu0, tau, w0, g, & alpha1(i) = gamma1(i) * gamma4(i) + gamma2(i) * gamma3(i) ! Eq. 16 alpha2(i) = gamma1(i) * gamma3(i) + gamma2(i) * gamma4(i) ! Eq. 17 + ! Eq 18; k = SQRT(gamma1**2 - gamma2**2), limited below to avoid div by 0. + ! k = 0 for isotropic, conservative scattering; this lower limit on k + ! gives relative error with respect to conservative solution + ! of < 0.1% in Rdif down to tau = 10^-9 + k(i) = sqrt(max((gamma1(i) - gamma2(i)) * (gamma1(i) + gamma2(i)), 1.e-12_wp)) end do - ! Written to encourage vectorization of exponential, square root - ! Eq 18; k = SQRT(gamma1**2 - gamma2**2), limited below to avoid div by 0. - ! k = 0 for isotropic, conservative scattering; this lower limit on k - ! gives relative error with respect to conservative solution - ! of < 0.1% in Rdif down to tau = 10^-9 - k(1:ncol) = sqrt(max((gamma1(1:ncol) - gamma2(1:ncol)) * & - (gamma1(1:ncol) + gamma2(1:ncol)), & - 1.e-12_wp)) + ! Written to encourage vectorization of exponential exp_minusktau(1:ncol) = exp(-tau(1:ncol,j)*k(1:ncol)) ! @@ -977,293 +1062,6 @@ pure subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="a end subroutine apply_BC_0 ! ------------------------------------------------------------------------------------------------- ! -! Similar to Longwave no-scattering (lw_solver_noscat) -! a) relies on rescaling of the optical parameters based on asymetry factor and single scattering albedo -! scaling can be computed by scaling_1rescl -! b) adds adustment term based on cloud properties (lw_transport_1rescl) -! adustment terms is computed based on solution of the Tang equations -! for "linear-in-tau" internal source (not in the paper) -! -! Attention: -! use must prceompute scaling before colling the function -! -! Implemented based on the paper -! Tang G, et al, 2018: https://doi.org/10.1175/JAS-D-18-0014.1 -! -! ------------------------------------------------------------------------------------------------- -subroutine lw_solver_1rescl(ncol, nlay, ngpt, top_at_1, D, & - tau, scaling, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & - radn_up, radn_dn, & - sfc_srcJac, rad_up_Jac, rad_dn_Jac) bind(C, name="lw_solver_1rescl") - integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points - logical(wl), intent(in ) :: top_at_1 - real(wp), dimension(ncol, ngpt), intent(in ) :: D ! secant of propagation angle [] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: tau ! Absorption optical thickness [] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: scaling ! single scattering albedo [] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lay_source ! Planck source at layer average temperature [W/m2] - ! Planck source at layer edge for radiation in increasing/decreasing ilay direction - ! lev_source_dec applies the mapping in layer i to the Planck function at layer i - ! lev_source_inc applies the mapping in layer i to the Planck function at layer i+1 - real(wp), dimension(ncol,nlay, ngpt), target, & - intent(in ) :: lev_source_inc, lev_source_dec - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_emis ! Surface emissivity [] - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_src ! Surface source function [W/m2] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: radn_up ! Radiances [W/m2-str] - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Top level must contain incident flux boundary condition - - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_srcJac ! Surface Temperature Jacobian source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: rad_up_Jac ! Surface Temperature Jacobians [W/m2-str/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: rad_dn_Jac ! Top level set to 0 - - ! Local variables, no g-point dependency - real(wp), dimension(ncol,nlay) :: tau_loc, & ! path length (tau/mu) - trans ! transmissivity = exp(-tau) - real(wp), dimension(ncol,nlay) :: source_dn, source_up - real(wp), dimension(ncol ) :: source_sfc, sfc_albedo - real(wp), dimension(ncol,nlay) :: An, Cn - real(wp), dimension(ncol,nlay+1) :: dummyRadn_upJac - - real(wp), dimension(ncol,nlay+1) :: Radn_upJac - real(wp), dimension(ncol,nlay+1) :: Radn_dnJac - real(wp), dimension(ncol) :: source_sfcJac - - real(wp), dimension(:,:,:), pointer :: lev_source_up, lev_source_dn ! Mapping increasing/decreasing indicies to up/down - - real(wp), parameter :: pi = acos(-1._wp) - integer :: ilev, igpt, top_level - ! ------------------------------------ - real(wp), parameter :: tau_thresh = sqrt(epsilon(tau)) - ! ------------------------------------ - - ! Which way is up? - ! Level Planck sources for upward and downward radiation - ! When top_at_1, lev_source_up => lev_source_dec - ! lev_source_dn => lev_source_inc, and vice-versa - if(top_at_1) then - top_level = 1 - lev_source_up => lev_source_dec - lev_source_dn => lev_source_inc - else - top_level = nlay+1 - lev_source_up => lev_source_inc - lev_source_dn => lev_source_dec - end if - - do igpt = 1, ngpt - ! - ! Optical path and transmission, used in source function and transport calculations - ! - do ilev = 1, nlay - tau_loc(:,ilev) = tau(:,ilev,igpt)*D(:,igpt) - trans (:,ilev) = exp(-tau_loc(:,ilev)) - ! - ! here scaling is used to store parameter wb/(1-w(1-b)) of Eq.21 of the Tang's paper - ! explanation of factor 0.4 note A of Table - ! - Cn(:,ilev) = 0.4_wp*scaling(:,ilev,igpt) - An(:,ilev) = (1._wp-trans(:,ilev)*trans(:,ilev)) - end do - - ! Source function for diffuse radiation - ! - call lw_source_noscat(ncol, nlay, & - lay_source(:,:,igpt), lev_source_up(:,:,igpt), lev_source_dn(:,:,igpt), & - tau_loc, trans, source_dn, source_up) - - ! - ! Surface albedo, surface source function - ! - sfc_albedo(:) = 1._wp - sfc_emis(:,igpt) - source_sfc(:) = sfc_emis(:,igpt) * sfc_src (:,igpt) - source_sfcJac(:) = sfc_emis(:,igpt) * sfc_srcJac(:,igpt) - ! - ! Transport - ! - ! compute no-scattering fluxes - call lw_transport_noscat(ncol, nlay, top_at_1, & - tau_loc, trans, sfc_albedo, source_dn, source_up, source_sfc, & - radn_up(:,:,igpt), radn_dn(:,:,igpt), & - source_sfcJac, rad_up_Jac(:,:,igpt)) - - rad_dn_Jac(:,:,igpt) = 0._wp - ! make adjustment - call lw_transport_1rescl(ncol, nlay, top_at_1, trans, & - source_dn, source_up, & - radn_up(:,:,igpt), radn_dn(:,:,igpt), An, Cn,& - rad_up_Jac(:,:,igpt), rad_dn_Jac(:,:,igpt)) - - end do ! g point loop -end subroutine lw_solver_1rescl -! ------------------------------------------------------------------------------------------------- -! -! Similar to lw_solver_noscat_GaussQuad. -! It is main solver to use the rescaled-for-scattering approximation for fluxes -! In addition to the no scattering input parameters the user must provide -! scattering related properties (ssa and g) that the solver uses to compute scaling -! -! --------------------------------------------------------------- -subroutine lw_solver_1rescl_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weights, & - tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, & - sfc_emis, sfc_src, & - flux_up, flux_dn, & - sfc_src_Jac, flux_up_Jac, flux_dn_Jac) & - bind(C, name="lw_solver_1rescl_GaussQuad") - integer, intent(in ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points - logical(wl), intent(in ) :: top_at_1 - integer, intent(in ) :: nmus ! number of quadrature angles - real(wp), dimension(nmus), intent(in ) :: Ds, weights ! quadrature secants, weights - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: tau ! Optical thickness, - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: ssa ! single-scattering albedo, - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: g ! asymmetry parameter [] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lay_source ! Planck source at layer average temperature [W/m2] - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lev_source_inc - ! Planck source at layer edge for radiation in increasing ilay direction [W/m2] - ! Includes spectral weighting that accounts for state-dependent frequency to g-space mapping - real(wp), dimension(ncol,nlay, ngpt), intent(in ) :: lev_source_dec - ! Planck source at layer edge for radiation in decreasing ilay direction [W/m2] - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_emis ! Surface emissivity [] - real(wp), dimension(ncol, ngpt), intent(in ) :: sfc_src ! Surface source function [W/m2] - real(wp), dimension(ncol,nlay+1,ngpt), intent( out) :: flux_up ! Radiances [W/m2-str] - real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn ! Top level must contain incident flux boundary condition - - real(wp), dimension(ncol ,ngpt), intent(in ) :: sfc_src_Jac ! surface temperature Jacobian of surface source function [W/m2/K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: flux_up_Jac ! surface temperature Jacobian of Radiances [W/m2-str / K] - real(wp), dimension(ncol,nlay+1,ngpt), intent(out) :: flux_dn_Jac ! surface temperature Jacobian of Radiances [W/m2-str / K] - ! Local variables - real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up ! Fluxes per quad angle - real(wp), dimension(ncol, ngpt) :: Ds_ncol - real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn_Jac, radn_up_Jac ! Fluxes per quad angle - - real(wp), dimension(ncol,nlay, ngpt) :: tauLoc ! rescaled Tau - real(wp), dimension(ncol,nlay, ngpt) :: scaling ! scaling - real(wp), dimension(ncol,ngpt) :: fluxTOA ! downward flux at TOA - - integer :: imu, top_level - real :: weight - real(wp), parameter :: tresh=1.0_wp - 1e-6_wp - - ! Tang rescaling - if (any(ssa*g >= tresh)) then - call scaling_1rescl_safe(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g) - else - call scaling_1rescl(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g) - endif - ! ------------------------------------ - ! - ! For the first angle output arrays store total flux - ! - top_level = MERGE(1, nlay+1, top_at_1) - fluxTOA = flux_dn(1:ncol, top_level, 1:ngpt) - Ds_ncol(:,:) = Ds(1) - weight = 2._wp*pi*weights(1) - ! Transport is for intensity - ! convert flux at top of domain to intensity assuming azimuthal isotropy - ! - radn_dn(1:ncol, top_level, 1:ngpt) = fluxTOA(1:ncol, 1:ngpt) / weight - call lw_solver_1rescl(ncol, nlay, ngpt, & - top_at_1, Ds_ncol, tauLoc, scaling, & - lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & - flux_up, flux_dn, sfc_src_Jac, flux_up_Jac, flux_dn_Jac) - - flux_up = flux_up * weight - flux_dn = flux_dn * weight - flux_up_Jac = flux_up_Jac * weight - flux_dn_Jac = flux_dn_Jac * weight - do imu = 2, nmus - Ds_ncol(:,:) = Ds(imu) - weight = 2._wp*pi*weights(imu) - ! Transport is for intensity - ! convert flux at top of domain to intensity assuming azimuthal isotropy - ! - radn_dn(1:ncol, top_level, 1:ngpt) = fluxTOA(1:ncol, 1:ngpt) / weight - call lw_solver_1rescl(ncol, nlay, ngpt, & - top_at_1, Ds_ncol, tauLoc, scaling, & - lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, & - radn_up, radn_dn,sfc_src_Jac,radn_up_Jac,radn_dn_Jac) - - flux_up (:,:,:) = flux_up (:,:,:) + weight*radn_up (:,:,:) - flux_dn (:,:,:) = flux_dn (:,:,:) + weight*radn_dn (:,:,:) - flux_up_Jac(:,:,:) = flux_up_Jac(:,:,:) + weight*radn_up_Jac(:,:,:) - flux_dn_Jac(:,:,:) = flux_dn_Jac(:,:,:) + weight*radn_dn_Jac(:,:,:) - end do - end subroutine lw_solver_1rescl_GaussQuad -! ------------------------------------------------------------------------------------------------- -! -! Computes re-scaled layer optical thickness and scaling parameter -! unsafe if ssa*g =1. -! -! --------------------------------------------------------------- - pure subroutine scaling_1rescl(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g) - integer , intent(in) :: ncol - integer , intent(in) :: nlay - integer , intent(in) :: ngpt - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: tau - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: ssa - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: g - - real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: tauLoc - real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: scaling - - - integer :: icol, ilay, igpt - real(wp) :: wb, ssal, scaleTau - do igpt=1,ngpt - do ilay=1,nlay - do icol=1,ncol - ssal = ssa(icol, ilay, igpt) - wb = ssal*(1._wp - g(icol, ilay, igpt)) / 2._wp - scaleTau = (1._wp - ssal + wb ) - tauLoc(icol, ilay, igpt) = scaleTau * tau(icol, ilay, igpt) ! Eq.15 of the paper - ! - ! here scaling is used to store parameter wb/(1-w(1-b)) of Eq.21 of the Tang paper - ! actually it is in line of parameter rescaling defined in Eq.7 - ! potentialy if g=ssa=1 then wb/scaleTau = NaN - ! it should not happen - scaling(icol, ilay, igpt) = wb / scaleTau - enddo - enddo - enddo - end subroutine scaling_1rescl -! ------------------------------------------------------------------------------------------------- -! -! Computes re-scaled layer optical thickness and scaling parameter -! safe implementation -! -! --------------------------------------------------------------- - pure subroutine scaling_1rescl_safe(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g) - integer , intent(in) :: ncol - integer , intent(in) :: nlay - integer , intent(in) :: ngpt - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: tau - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: ssa - real(wp), dimension(ncol, nlay, ngpt), intent(in) :: g - - real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: tauLoc - real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: scaling - - integer :: icol, ilay, igpt - real(wp) :: wb, ssal, scaleTau - do igpt=1,ngpt - do ilay=1,nlay - do icol=1,ncol - ssal = ssa(icol, ilay, igpt) - wb = ssal*(1._wp - g(icol, ilay, igpt)) / 2._wp - scaleTau = (1._wp - ssal + wb ) - tauLoc(icol, ilay, igpt) = scaleTau * tau(icol, ilay, igpt) ! Eq.15 of the paper - ! - ! here scaling is used to store parameter wb/(1-w(1-b)) of Eq.21 of the Tang paper - ! actually it is in line of parameter rescaling defined in Eq.7 - if (scaleTau < 1e-6_wp) then - scaling(icol, ilay, igpt) = 1.0_wp - else - scaling(icol, ilay, igpt) = wb / scaleTau - endif - enddo - enddo - enddo - end subroutine scaling_1rescl_safe -! ------------------------------------------------------------------------------------------------- -! ! Similar to Longwave no-scattering tarnsport (lw_transport_noscat) ! a) adds adjustment factor based on cloud properties ! @@ -1274,7 +1072,7 @@ end subroutine scaling_1rescl_safe subroutine lw_transport_1rescl(ncol, nlay, top_at_1, & trans, source_dn, source_up, & radn_up, radn_dn, An, Cn,& - radn_up_Jac, radn_dn_Jac) bind(C, name="lw_transport_1rescl") + do_Jacobians, radn_up_Jac) bind(C, name="lw_transport_1rescl") integer, intent(in ) :: ncol, nlay ! Number of columns, layers, g-points logical(wl), intent(in ) :: top_at_1 ! real(wp), dimension(ncol,nlay ), intent(in ) :: trans ! transmissivity = exp(-tau) @@ -1283,8 +1081,13 @@ subroutine lw_transport_1rescl(ncol, nlay, top_at_1, & real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_up ! Radiances [W/m2-str] real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_dn !Top level must contain incident flux boundary condition real(wp), dimension(ncol,nlay), intent(in ) :: An, Cn + logical(wl), intent(in ) :: do_Jacobians real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_up_Jac ! Surface temperature Jacobians [W/m2-str/K] - real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_dn_Jac !Top level must set to 0 + ! + ! We could in principle compute a downwelling Jacobian too, but it's small + ! (only a small proportion of LW is scattered) and it complicates code and the API, + ! so we will not + ! ! Local variables integer :: ilev, icol @@ -1296,26 +1099,26 @@ subroutine lw_transport_1rescl(ncol, nlay, top_at_1, & ! ! 1st Upward propagation do ilev = nlay, 1, -1 - radn_up (:,ilev) = trans(:,ilev)*radn_up (:,ilev+1) + source_up(:,ilev) - radn_up_Jac(:,ilev) = trans(:,ilev)*radn_up_Jac(:,ilev+1) do icol=1,ncol - adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_dn(icol,ilev) - & - trans(icol,ilev)*source_dn(icol,ilev) - source_up(icol,ilev) ) - radn_up (icol,ilev) = radn_up(icol,ilev) + adjustmentFactor - enddo + adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_dn(icol,ilev) - & + trans(icol,ilev)*source_dn(icol,ilev) - source_up(icol,ilev) ) + radn_up (icol,ilev) = trans(icol,ilev)*radn_up(icol,ilev+1) + source_up(icol,ilev) + & + adjustmentFactor + end do + if(do_Jacobians) & + radn_up_Jac(:,ilev) = trans(:,ilev)*radn_up_Jac(:,ilev+1) end do ! 2nd Downward propagation + ! radn_dn_Jac(:,1) = 0._wp do ilev = 1, nlay - radn_dn (:,ilev+1) = trans(:,ilev)*radn_dn (:,ilev) + source_dn(:,ilev) - radn_dn_Jac(:,ilev+1) = trans(:,ilev)*radn_dn_Jac(:,ilev) + ! radn_dn_Jac(:,ilev+1) = trans(:,ilev)*radn_dn_Jac(:,ilev) do icol=1,ncol adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_up(icol,ilev) - & trans(icol,ilev)*source_up(icol,ilev) - source_dn(icol,ilev) ) - - radn_dn (icol,ilev+1) = radn_dn(icol,ilev+1) + adjustmentFactor - - adjustmentFactor = Cn(icol,ilev)*An(icol,ilev)*radn_up_Jac(icol,ilev) - radn_dn_Jac(icol,ilev+1) = radn_dn_Jac(icol,ilev+1) + adjustmentFactor + radn_dn(icol,ilev+1) = trans(icol,ilev)*radn_dn(icol,ilev) + source_dn(icol,ilev) + & + adjustmentFactor + ! adjustmentFactor = Cn(icol,ilev)*An(icol,ilev)*radn_up_Jac(icol,ilev) + ! radn_dn_Jac(icol,ilev+1) = radn_dn_Jac(icol,ilev+1) + adjustmentFactor enddo end do else @@ -1324,26 +1127,28 @@ subroutine lw_transport_1rescl(ncol, nlay, top_at_1, & ! ! Upward propagation do ilev = 1, nlay - radn_up (:,ilev+1) = trans(:,ilev) * radn_up (:,ilev) + source_up(:,ilev) - radn_up_Jac(:,ilev+1) = trans(:,ilev) * radn_up_Jac(:,ilev) + radn_up (:,ilev+1) = trans(:,ilev) * radn_up (:,ilev) + source_up(:,ilev) do icol=1,ncol adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_dn(icol,ilev+1) - & trans(icol,ilev)*source_dn(icol,ilev) - source_up(icol,ilev) ) - radn_up(icol,ilev+1) = radn_up(icol,ilev+1) + adjustmentFactor + radn_up(icol,ilev+1) = trans(icol,ilev)*radn_up(icol,ilev) + source_up(icol,ilev) + & + adjustmentFactor enddo + if(do_Jacobians) & + radn_up_Jac(:,ilev+1) = trans(:,ilev) * radn_up_Jac(:,ilev) end do ! 2st Downward propagation + ! radn_dn_Jac(:,nlay+1) = 0._wp do ilev = nlay, 1, -1 - radn_dn (:,ilev) = trans(:,ilev)*radn_dn (:,ilev+1) + source_dn(:,ilev) - radn_dn_Jac(:,ilev) = trans(:,ilev)*radn_dn_Jac(:,ilev+1) + ! radn_dn_Jac(:,ilev) = trans(:,ilev)*radn_dn_Jac(:,ilev+1) do icol=1,ncol adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_up(icol,ilev) - & trans(icol,ilev)*source_up(icol,ilev) - source_dn(icol,ilev) ) - radn_dn(icol,ilev) = radn_dn(icol,ilev) + adjustmentFactor - - adjustmentFactor = Cn(icol,ilev)*An(icol,ilev)*radn_up_Jac(icol,ilev) - radn_dn_Jac(icol,ilev) = radn_dn_Jac(icol,ilev) + adjustmentFactor + radn_dn(icol,ilev) = trans(icol,ilev)*radn_dn(icol,ilev+1) + source_dn(icol,ilev) + & + adjustmentFactor + ! adjustmentFactor = Cn(icol,ilev)*An(icol,ilev)*radn_up_Jac(icol,ilev) + ! radn_dn_Jac(icol,ilev) = radn_dn_Jac(icol,ilev) + adjustmentFactor enddo end do end if diff --git a/rte/mo_optical_props.F90 b/rte/mo_optical_props.F90 index 3aa2bf548..d254b2f49 100644 --- a/rte/mo_optical_props.F90 +++ b/rte/mo_optical_props.F90 @@ -163,6 +163,7 @@ end function subset_range_abstract procedure, public :: validate => validate_1scalar procedure, public :: get_subset => subset_1scl_range procedure, public :: delta_scale => delta_scale_1scl + procedure, public :: finalize_1scl procedure, private :: alloc_only_1scl procedure, private :: init_and_alloc_1scl @@ -178,6 +179,7 @@ end function subset_range_abstract procedure, public :: validate => validate_2stream procedure, public :: get_subset => subset_2str_range procedure, public :: delta_scale => delta_scale_2str + procedure, public :: finalize_2str procedure, private :: alloc_only_2str procedure, private :: init_and_alloc_2str @@ -194,6 +196,7 @@ end function subset_range_abstract procedure, public :: get_subset => subset_nstr_range procedure, public :: delta_scale => delta_scale_nstr procedure, public :: get_nmom + procedure, public :: finalize_nstr procedure, private :: alloc_only_nstr procedure, private :: init_and_alloc_nstr @@ -343,16 +346,14 @@ function alloc_only_2str(this, ncol, nlay) result(err_message) character(len=128) :: err_message err_message = "" - if(.not. this%is_initialized()) then + if(.not. this%is_initialized()) & err_message = "optical_props%alloc: spectral discretization hasn't been provided" - return - end if - if(any([ncol, nlay] <= 0)) then + if(any([ncol, nlay] <= 0)) & err_message = "optical_props%alloc: must provide positive extents for ncol, nlay" - else - if(allocated(this%tau)) deallocate(this%tau) - allocate(this%tau(ncol,nlay,this%get_ngpt())) - end if + if(err_message /= "") return + + if(allocated(this%tau)) deallocate(this%tau) + allocate(this%tau(ncol,nlay,this%get_ngpt())) if(allocated(this%ssa)) deallocate(this%ssa) if(allocated(this%g )) deallocate(this%g ) allocate(this%ssa(ncol,nlay,this%get_ngpt()), this%g(ncol,nlay,this%get_ngpt())) @@ -366,16 +367,14 @@ function alloc_only_nstr(this, nmom, ncol, nlay) result(err_message) character(len=128) :: err_message err_message = "" - if(.not. this%is_initialized()) then + if(.not. this%is_initialized()) & err_message = "optical_props%alloc: spectral discretization hasn't been provided" - return - end if - if(any([ncol, nlay] <= 0)) then + if(any([ncol, nlay] <= 0)) & err_message = "optical_props%alloc: must provide positive extents for ncol, nlay" - else - if(allocated(this%tau)) deallocate(this%tau) - allocate(this%tau(ncol,nlay,this%get_ngpt())) - end if + if(err_message /= "") return + + if(allocated(this%tau)) deallocate(this%tau) + allocate(this%tau(ncol,nlay,this%get_ngpt())) if(allocated(this%ssa)) deallocate(this%ssa) if(allocated(this%p )) deallocate(this%p ) allocate(this%ssa(ncol,nlay,this%get_ngpt()), this%p(nmom,ncol,nlay,this%get_ngpt())) @@ -484,6 +483,38 @@ function copy_and_alloc_nstr(this, nmom, ncol, nlay, spectral_desc, name) result end function copy_and_alloc_nstr ! ------------------------------------------------------------------------------------------ ! + ! Finalize routines + ! + ! ------------------------------------------------------------------------------------------ + function finalize_1scl(this) result(err_message) + class(ty_optical_props_1scl) :: this + character(len=128) :: err_message + + if(allocated(this%tau)) deallocate(this%tau) + err_message = "" + end function finalize_1scl + ! --------------------------------------------------------------------------- + function finalize_2str(this) result(err_message) + class(ty_optical_props_2str) :: this + character(len=128) :: err_message + + if(allocated(this%tau)) deallocate(this%tau) + if(allocated(this%ssa)) deallocate(this%ssa) + if(allocated(this%g )) deallocate(this%g ) + err_message = "" + end function finalize_2str + ! --------------------------------------------------------------------------- + function finalize_nstr(this) result(err_message) + class(ty_optical_props_nstr) :: this + character(len=128) :: err_message + + if(allocated(this%tau)) deallocate(this%tau) + if(allocated(this%ssa)) deallocate(this%ssa) + if(allocated(this%p )) deallocate(this%p ) + err_message = "" + end function finalize_nstr + ! ------------------------------------------------------------------------------------------ + ! ! Routines for array classes: delta-scaling, validation (ensuring all values can be used ) ! ! ------------------------------------------------------------------------------------------ @@ -809,6 +840,12 @@ function increment(op_in, op_io) result(err_message) integer :: ncol, nlay, ngpt, nmom ! ----- err_message = "" + if(.not. op_in%is_initialized()) & + err_message = "ty_optical_props%increment: Incrementing optical properties aren't initialized" + if(.not. op_in%is_initialized()) & + err_message = "ty_optical_props%increment: optical properties to be incremented aren't initialized" + if(err_message /= "") return + ncol = op_io%get_ncol() nlay = op_io%get_nlay() ngpt = op_io%get_ngpt() diff --git a/rte/mo_rte_lw.F90 b/rte/mo_rte_lw.F90 index 488f5ec34..940387bdd 100644 --- a/rte/mo_rte_lw.F90 +++ b/rte/mo_rte_lw.F90 @@ -43,8 +43,7 @@ module mo_rte_lw only: ty_source_func_lw use mo_fluxes, only: ty_fluxes, ty_fluxes_broadband use mo_rte_solver_kernels, & - only: apply_BC, lw_solver_noscat, lw_solver_noscat_GaussQuad, lw_solver_2stream, & - lw_solver_1rescl_GaussQuad + only: apply_BC, lw_solver_noscat, lw_solver_noscat_GaussQuad, lw_solver_2stream implicit none private @@ -59,28 +58,25 @@ function rte_lw(optical_props, top_at_1, & sources, sfc_emis, & fluxes, & inc_flux, n_gauss_angles, use_2stream, & - lw_Ds, flux_up_Jac, flux_dn_Jac) result(error_msg) - class(ty_optical_props_arry), intent(in ) :: optical_props ! Array of ty_optical_props. This type is abstract - ! and needs to be made concrete, either as an array - ! (class ty_optical_props_arry) or in some user-defined way + lw_Ds, flux_up_Jac) result(error_msg) + class(ty_optical_props_arry), intent(in ) :: optical_props ! Set of optical properties as one or more arrays logical, intent(in ) :: top_at_1 ! Is the top of the domain at index 1? ! (if not, ordering is bottom-to-top) - type(ty_source_func_lw), intent(in ) :: sources + type(ty_source_func_lw), intent(in ) :: sources ! Derived type with Planck source functions real(wp), dimension(:,:), intent(in ) :: sfc_emis ! emissivity at surface [] (nband, ncol) - class(ty_fluxes), intent(inout) :: fluxes ! Array of ty_fluxes. Default computes broadband fluxes at all levels + class(ty_fluxes), intent(inout) :: fluxes ! Dervied type for computing spectral integrals from g-point fluxes. + ! Default computes broadband fluxes at all levels ! if output arrays are defined. Can be extended per user desires. real(wp), dimension(:,:), & target, optional, intent(in ) :: inc_flux ! incident flux at domain top [W/m2] (ncol, ngpts) - integer, optional, intent(in ) :: n_gauss_angles ! Number of angles used in Gaussian quadrature + integer, optional, intent(in ) :: n_gauss_angles ! Number of angles used in Gaussian quadrature (max 3) ! (no-scattering solution) logical, optional, intent(in ) :: use_2stream ! When 2-stream parameters (tau/ssa/g) are provided, use 2-stream methods ! Default is to use re-scaled longwave transport real(wp), dimension(:,:), & - optional, intent(in ) :: lw_Ds ! linear fit to column transmissivity (ncol,ngpt) - real(wp), dimension(:,:), & - target, optional, intent(inout) :: flux_up_Jac ! surface temperature flux Jacobian [W/m2/K] (ncol, nlay+1) - real(wp), dimension(:,:), & - target, optional, intent(inout) :: flux_dn_Jac ! surface temperature flux Jacobian [W/m2/K] (ncol, nlay+1) + optional, intent(in ) :: lw_Ds ! User-specifed 1/cos of transport angle per col, g-point + real(wp), dimension(:,:), target, & + optional, intent(inout) :: flux_up_Jac ! surface temperature flux Jacobian [W/m2/K] (ncol, nlay+1) character(len=128) :: error_msg ! If empty, calculation was successful ! -------------------------------- ! @@ -90,11 +86,11 @@ function rte_lw(optical_props, top_at_1, & integer :: n_quad_angs integer :: icol, iband, igpt real(wp) :: lw_Ds_wt - logical :: using_2stream + logical :: using_2stream, do_Jacobians real(wp), dimension(:,:,:), allocatable :: gpt_flux_up, gpt_flux_dn real(wp), dimension(:,:), allocatable :: sfc_emis_gpt - real(wp), dimension(:,:,:), allocatable :: gpt_flux_upJac, gpt_flux_dnJac - type(ty_fluxes_broadband) :: Jac_fluxes + real(wp), dimension(1,1), target :: decoy + real(wp), dimension(:,:), pointer :: jacobian ! -------------------------------------------------- ! ! Weights and angle secants for first order (k=1) Gaussian quadrature. @@ -124,8 +120,13 @@ function rte_lw(optical_props, top_at_1, & nlay = optical_props%get_nlay() ngpt = optical_props%get_ngpt() nband = optical_props%get_nband() + do_Jacobians = present(flux_up_Jac) + if(do_Jacobians) then + jacobian => flux_up_Jac + else + jacobian => decoy + end if error_msg = "" - ! ------------------------------------------------------------------------------------ ! ! Error checking -- input consistency of sizes and validity of values @@ -135,7 +136,7 @@ function rte_lw(optical_props, top_at_1, & if(.not. fluxes%are_desired()) & error_msg = "rte_lw: no space allocated for fluxes" - if (present(flux_up_Jac) .and. check_extents) then + if (do_Jacobians .and. check_extents) then if( .not. extents_are(flux_up_Jac, ncol, nlay+1)) & error_msg = "rte_lw: flux Jacobian inconsistently sized" endif @@ -208,10 +209,10 @@ function rte_lw(optical_props, top_at_1, & end if class is (ty_optical_props_2str) if (present(lw_Ds)) & - error_msg = "rte_lw: lw_Ds not valid input for _2str class" + error_msg = "rte_lw: lw_Ds not valid when providing scattering optical properties" if (using_2stream .and. n_quad_angs /= 1) & error_msg = "rte_lw: using_2stream=true incompatible with specifying n_gauss_angles" - if (using_2stream .and. (present(flux_up_Jac) .or. present(flux_up_Jac))) & + if (using_2stream .and. do_Jacobians) & error_msg = "rte_lw: can't provide Jacobian of fluxes w.r.t surface temperature with 2-stream" class default error_msg = "rte_lw: lw_solver(...ty_optical_props_nstr...) not yet implemented" @@ -228,28 +229,31 @@ function rte_lw(optical_props, top_at_1, & error_msg = trim(optical_props%get_name()) // ': ' // trim(error_msg) return end if - ! ------------------------------------------------------------------------------------ ! ! Lower boundary condition -- expand surface emissivity by band to gpoints ! allocate(gpt_flux_up (ncol, nlay+1, ngpt), gpt_flux_dn(ncol, nlay+1, ngpt)) - allocate(gpt_flux_upJac(ncol, nlay+1, ngpt)) allocate(sfc_emis_gpt(ncol, ngpt)) !!$acc enter data copyin(sources, sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source) !$acc enter data copyin(optical_props) - !$acc enter data create(gpt_flux_dn, gpt_flux_up) - !$acc enter data create(gpt_flux_upJac) - !$acc enter data create(sfc_emis_gpt) + !$acc enter data create( gpt_flux_dn, gpt_flux_up) + !$omp target enter data map(alloc:gpt_flux_dn, gpt_flux_up) + !$acc enter data create( sfc_emis_gpt) + !$omp target enter data map(alloc:sfc_emis_gpt) + !$omp enter data create( flux_up_Jac) if(do_Jacobians) + !$omp target enter data map(alloc:flux_up_Jac) if(do_Jacobians) call expand_and_transpose(optical_props, sfc_emis, sfc_emis_gpt) ! ! Upper boundary condition ! if(present(inc_flux)) then - !$acc enter data copyin(inc_flux) + !$acc enter data copyin(inc_flux) + !$omp target enter data map(to:inc_flux) call apply_BC(ncol, nlay, ngpt, logical(top_at_1, wl), inc_flux, gpt_flux_dn) - !$acc exit data delete(inc_flux) + !$acc exit data delete( inc_flux) + !$omp target exit data map(release:inc_flux) else ! ! Default is zero incident diffuse flux @@ -266,7 +270,8 @@ function rte_lw(optical_props, top_at_1, & ! ! No scattering two-stream calculation ! - !$acc enter data copyin(optical_props%tau) + !$acc enter data copyin(optical_props%tau) + !$omp target enter data map(to:optical_props%tau) error_msg = optical_props%validate() if(len_trim(error_msg) > 0) return @@ -277,7 +282,9 @@ function rte_lw(optical_props, top_at_1, & optical_props%tau, & sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, & sfc_emis_gpt, sources%sfc_source, & - gpt_flux_up, gpt_flux_dn, sources%sfc_source_Jac, gpt_flux_upJac) + gpt_flux_up, gpt_flux_dn, & + logical(do_Jacobians, wl), sources%sfc_source_Jac, jacobian, & + logical(.false., wl), optical_props%tau, optical_props%tau) else call lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, & logical(top_at_1, wl), & @@ -288,22 +295,19 @@ function rte_lw(optical_props, top_at_1, & sources%lay_source, sources%lev_source_inc, & sources%lev_source_dec, & sfc_emis_gpt, sources%sfc_source, & - gpt_flux_up, gpt_flux_dn, sources%sfc_source_Jac, gpt_flux_upJac) + gpt_flux_up, gpt_flux_dn, & + logical(do_Jacobians, wl), sources%sfc_source_Jac, jacobian, & + logical(.false., wl), optical_props%tau, optical_props%tau) end if - !$acc exit data delete(optical_props%tau) + !$acc exit data delete( optical_props%tau) + !$omp target exit data map(release:optical_props%tau) class is (ty_optical_props_2str) - - if (present(flux_dn_Jac) .and. check_extents) then - if( .not. extents_are(flux_dn_Jac, ncol, nlay+1)) then - error_msg = "rte_lw: flux_dn_Jac inconsistently sized" - return - end if - endif if (using_2stream) then ! ! two-stream calculation with scattering ! - !$acc enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g) + !$acc enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g) + !$omp target enter data map(to:optical_props%tau, optical_props%ssa, optical_props%g) error_msg = optical_props%validate() if(len_trim(error_msg) > 0) return call lw_solver_2stream(ncol, nlay, ngpt, logical(top_at_1, wl), & @@ -311,24 +315,28 @@ function rte_lw(optical_props, top_at_1, & sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, & sfc_emis_gpt, sources%sfc_source, & gpt_flux_up, gpt_flux_dn) - !$acc exit data delete(optical_props%tau, optical_props%ssa, optical_props%g) + !$acc exit data delete( optical_props%tau, optical_props%ssa, optical_props%g) + !$omp target exit data map(release:optical_props%tau, optical_props%ssa, optical_props%g) else ! ! Re-scaled solution to account for scattering ! - allocate(gpt_flux_dnJac (ncol, nlay+1, ngpt)) - !$acc enter data create(gpt_flux_dnJac) - !$acc enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g) - call lw_solver_1rescl_GaussQuad(ncol, nlay, ngpt, logical(top_at_1, wl), & - n_quad_angs, gauss_Ds(1:n_quad_angs,n_quad_angs), & - gauss_wts(1:n_quad_angs,n_quad_angs), & - optical_props%tau, optical_props%ssa, optical_props%g, & - sources%lay_source, sources%lev_source_inc, & - sources%lev_source_dec, & - sfc_emis_gpt, sources%sfc_source,& - gpt_flux_up, gpt_flux_dn, & - sources%sfc_source_Jac, gpt_flux_upJac, gpt_flux_dnJac) - !$acc exit data delete(optical_props%tau, optical_props%ssa, optical_props%g) + !$acc enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g) + !$omp target enter data map(to:optical_props%tau, optical_props%ssa, optical_props%g) + call lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, & + logical(top_at_1, wl), & + n_quad_angs, & + gauss_Ds(1:n_quad_angs,n_quad_angs), & + gauss_wts(1:n_quad_angs,n_quad_angs), & + optical_props%tau, & + sources%lay_source, sources%lev_source_inc, & + sources%lev_source_dec, & + sfc_emis_gpt, sources%sfc_source, & + gpt_flux_up, gpt_flux_dn, & + logical(do_Jacobians, wl), sources%sfc_source_Jac, jacobian, & + logical(.true., wl), optical_props%ssa, optical_props%g) + !$acc exit data delete( optical_props%tau, optical_props%ssa, optical_props%g) + !$omp target exit data map(release:optical_props%tau, optical_props%ssa, optical_props%g) endif class is (ty_optical_props_nstr) ! @@ -342,34 +350,14 @@ function rte_lw(optical_props, top_at_1, & ! ...and reduce spectral fluxes to desired output quantities ! error_msg = fluxes%reduce(gpt_flux_up, gpt_flux_dn, optical_props, top_at_1) - if (error_msg /= '') return - - if (present(flux_up_Jac)) Jac_fluxes%flux_up => flux_up_Jac - select type (optical_props) - class is (ty_optical_props_1scl) - ! - ! gpoint Jacobian fluxes aren't defined for _1scl - ! - error_msg = Jac_fluxes%reduce(gpt_flux_upJac, gpt_flux_upJac, optical_props, top_at_1) - class is (ty_optical_props_2str) - ! - ! Compute Jacobians when using rescaling approach for scattering - ! - if(.not. using_2stream) then - if (present(flux_dn_Jac)) Jac_fluxes%flux_dn => flux_dn_Jac - error_msg = Jac_fluxes%reduce(gpt_flux_upJac, gpt_flux_dnJac, optical_props, top_at_1) - !$acc exit data delete(gpt_flux_dnJac) - deallocate(gpt_flux_dnJac) - end if - end select - - !$acc exit data delete(gpt_flux_upJac) - deallocate(gpt_flux_upJac) - !$acc exit data delete(sfc_emis_gpt) - !$acc exit data delete(gpt_flux_up,gpt_flux_dn) - !$acc exit data delete(optical_props) + !$acc exit data delete( gpt_flux_up, gpt_flux_dn, sfc_emis_gpt) + !$omp target exit data map(release:gpt_flux_up, gpt_flux_dn, sfc_emis_gpt) + !$acc exit data delete(optical_props) !!$acc exit data delete(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source,sources) + !$omp exit data copyout( flux_up_Jac) if(do_Jacobians) + !$omp target exit data map(from:flux_up_Jac) if(do_Jacobians) + end function rte_lw !-------------------------------------------------------------------------------------------------------------------- ! @@ -389,6 +377,7 @@ subroutine expand_and_transpose(ops,arr_in,arr_out) ngpt = ops%get_ngpt() limits = ops%get_band_lims_gpoint() !$acc parallel loop collapse(2) copyin(arr_in, limits) + !$omp target teams distribute parallel do simd collapse(2) map(to:arr_in, limits) do iband = 1, nband do icol = 1, ncol do igpt = limits(1, iband), limits(2, iband) diff --git a/rte/mo_rte_sw.F90 b/rte/mo_rte_sw.F90 index 18169b3e3..c698fd538 100644 --- a/rte/mo_rte_sw.F90 +++ b/rte/mo_rte_sw.F90 @@ -135,6 +135,7 @@ function rte_sw(atmos, top_at_1, & ! and switch dimension ordering !$acc enter data create(sfc_alb_dir_gpt, sfc_alb_dif_gpt) + !$omp target enter data map(alloc:sfc_alb_dir_gpt, sfc_alb_dif_gpt) call expand_and_transpose(atmos, sfc_alb_dir, sfc_alb_dir_gpt) call expand_and_transpose(atmos, sfc_alb_dif, sfc_alb_dif_gpt) ! ------------------------------------------------------------------------------------ @@ -147,15 +148,21 @@ function rte_sw(atmos, top_at_1, & ! direct and diffuse to represent the total, consistent with the LW ! !$acc enter data copyin(mu0) + !$omp target enter data map(to:mu0) !$acc enter data create(gpt_flux_up, gpt_flux_dn, gpt_flux_dir) + !$omp target enter data map(alloc:gpt_flux_up, gpt_flux_dn, gpt_flux_dir) !$acc enter data copyin(inc_flux) + !$omp target enter data map(to:inc_flux) call apply_BC(ncol, nlay, ngpt, logical(top_at_1, wl), inc_flux, mu0, gpt_flux_dir) !$acc exit data delete(inc_flux) + !$omp target exit data map(release:inc_flux) if(present(inc_flux_dif)) then !$acc enter data copyin(inc_flux_dif) + !$omp target enter data map(to:inc_flux_dif) call apply_BC(ncol, nlay, ngpt, logical(top_at_1, wl), inc_flux_dif, gpt_flux_dn ) !$acc exit data delete(inc_flux_dif) + !$omp target exit data map(release:inc_flux_dif) else call apply_BC(ncol, nlay, ngpt, logical(top_at_1, wl), gpt_flux_dn ) end if @@ -166,6 +173,7 @@ function rte_sw(atmos, top_at_1, & ! Direct beam only ! !$acc enter data copyin(atmos, atmos%tau) + !$omp target enter data map(to:atmos%tau) error_msg = atmos%validate() if(len_trim(error_msg) > 0) return call sw_solver_noscat(ncol, nlay, ngpt, logical(top_at_1, wl), & @@ -177,11 +185,13 @@ function rte_sw(atmos, top_at_1, & !gpt_flux_up = 0._wp !gpt_flux_dn = 0._wp !$acc exit data delete(atmos%tau, atmos) + !$omp target exit data map(release:atmos%tau) class is (ty_optical_props_2str) ! ! two-stream calculation with scattering ! !$acc enter data copyin(atmos, atmos%tau, atmos%ssa, atmos%g) + !$omp target enter data map(to:atmos%tau, atmos%ssa, atmos%g) error_msg = atmos%validate() if(len_trim(error_msg) > 0) return call sw_solver_2stream(ncol, nlay, ngpt, logical(top_at_1, wl), & @@ -189,7 +199,9 @@ function rte_sw(atmos, top_at_1, & sfc_alb_dir_gpt, sfc_alb_dif_gpt, & gpt_flux_up, gpt_flux_dn, gpt_flux_dir) !$acc exit data delete(atmos%tau, atmos%ssa, atmos%g, atmos) + !$omp target exit data map(release:atmos%tau, atmos%ssa, atmos%g) !$acc exit data delete(sfc_alb_dir_gpt, sfc_alb_dif_gpt) + !$omp target exit data map(release:sfc_alb_dir_gpt, sfc_alb_dif_gpt) class is (ty_optical_props_nstr) ! ! n-stream calculation @@ -208,7 +220,9 @@ function rte_sw(atmos, top_at_1, & ! error_msg = fluxes%reduce(gpt_flux_up, gpt_flux_dn, atmos, top_at_1, gpt_flux_dir) !$acc exit data delete(mu0) + !$omp target exit data map(release:mu0) !$acc exit data delete(gpt_flux_up, gpt_flux_dn, gpt_flux_dir) + !$omp target exit data map(release:gpt_flux_up, gpt_flux_dn, gpt_flux_dir) end function rte_sw !-------------------------------------------------------------------------------------------------------------------- ! @@ -228,6 +242,7 @@ subroutine expand_and_transpose(ops,arr_in,arr_out) ngpt = ops%get_ngpt() limits = ops%get_band_lims_gpoint() !$acc parallel loop collapse(2) copyin(arr_in, limits) + !$omp target teams distribute parallel do simd collapse(2) map(to:arr_in, limits) do iband = 1, nband do icol = 1, ncol do igpt = limits(1, iband), limits(2, iband) diff --git a/rte/mo_rte_util_array.F90 b/rte/mo_rte_util_array.F90 index c946cdb78..9ef28380e 100644 --- a/rte/mo_rte_util_array.F90 +++ b/rte/mo_rte_util_array.F90 @@ -48,8 +48,10 @@ logical function any_vals_less_than_1D(array, check_value) real(wp) :: minValue !$acc kernels copyin(array) + !$omp target map(to:array) map(from:minValue) minValue = minval(array) !$acc end kernels + !$omp end target any_vals_less_than_1D = (minValue < check_value) @@ -62,8 +64,10 @@ logical function any_vals_less_than_2D(array, check_value) real(wp) :: minValue !$acc kernels copyin(array) + !$omp target map(to:array) map(from:minValue) minValue = minval(array) !$acc end kernels + !$omp end target any_vals_less_than_2D = (minValue < check_value) @@ -75,9 +79,28 @@ logical function any_vals_less_than_3D(array, check_value) real(wp) :: minValue +#ifdef _OPENMP + integer :: dim1, dim2, dim3, i, j, k + dim1 = size(array,1) + dim2 = size(array,2) + dim3 = size(array,3) + minValue = array(1,1,1) ! initialize to some value + !$omp target teams map(to:array) & + !$omp defaultmap(tofrom:scalar) reduction(min:minValue) + !$omp distribute parallel do simd reduction(min:minValue) + do i = 1, dim1 + do j = 1, dim2 + do k = 1, dim3 + minValue = min(minValue,array(i,j,k)) + enddo + enddo + enddo + !$omp end target teams +#else !$acc kernels copyin(array) minValue = minval(array) !$acc end kernels +#endif any_vals_less_than_3D = (minValue < check_value) @@ -93,8 +116,10 @@ logical function any_vals_less_than_1D_masked(array, mask, check_value) real(wp) :: minValue !$acc kernels copyin(array) + !$omp target map(to:array, mask) map(from:minValue) minValue = minval(array, mask=mask) !$acc end kernels + !$omp end target any_vals_less_than_1D_masked = (minValue < check_value) @@ -108,8 +133,10 @@ logical function any_vals_less_than_2D_masked(array, mask, check_value) real(wp) :: minValue !$acc kernels copyin(array) + !$omp target map(to:array, mask) map(from:minValue) minValue = minval(array, mask=mask) !$acc end kernels + !$omp end target any_vals_less_than_2D_masked = (minValue < check_value) @@ -123,8 +150,10 @@ logical function any_vals_less_than_3D_masked(array, mask, check_value) real(wp) :: minValue !$acc kernels copyin(array) + !$omp target map(to:array, mask) map(from:minValue) minValue = minval(array, mask=mask) !$acc end kernels + !$omp end target any_vals_less_than_3D_masked = (minValue < check_value) @@ -139,9 +168,11 @@ logical function any_vals_outside_1D(array, checkMin, checkMax) real(wp) :: minValue, maxValue !$acc kernels copyin(array) + !$omp target map(to:array) map(from:minValue, maxValue) minValue = minval(array) maxValue = maxval(array) !$acc end kernels + !$omp end target any_vals_outside_1D = minValue < checkMin .or. maxValue > checkMax end function any_vals_outside_1D @@ -153,9 +184,11 @@ logical function any_vals_outside_2D(array, checkMin, checkMax) real(wp) :: minValue, maxValue !$acc kernels copyin(array) + !$omp target map(to:array) map(from:minValue, maxValue) minValue = minval(array) maxValue = maxval(array) !$acc end kernels + !$omp end target any_vals_outside_2D = minValue < checkMin .or. maxValue > checkMax end function any_vals_outside_2D @@ -168,10 +201,33 @@ logical function any_vals_outside_3D(array, checkMin, checkMax) ! but an explicit loop is the only current solution on GPUs real(wp) :: minValue, maxValue + +#ifdef _OPENMP + integer :: dim1, dim2, dim3, i, j, k + dim1 = size(array,1) + dim2 = size(array,2) + dim3 = size(array,3) + minValue = array(1,1,1) ! initialize to some value + maxValue = array(1,1,1) ! initialize to some value + !$omp target teams map(to:array) & + !$omp defaultmap(tofrom:scalar) reduction(min:minValue) reduction(max:maxValue) + !$omp distribute parallel do simd reduction(min:minValue) reduction(max:maxValue) + do i= 1, dim1 + do j = 1, dim2 + do k = 1, dim3 + minValue = min(minValue,array(i,j,k)) + maxValue = max(maxValue,array(i,j,k)) + enddo + enddo + enddo + !$omp end target teams +#else !$acc kernels copyin(array) minValue = minval(array) maxValue = maxval(array) !$acc end kernels +#endif + any_vals_outside_3D = minValue < checkMin .or. maxValue > checkMax end function any_vals_outside_3D @@ -186,9 +242,11 @@ logical function any_vals_outside_1D_masked(array, mask, checkMin, checkMax) real(wp) :: minValue, maxValue !$acc kernels copyin(array) + !$omp target map(to:array, mask) map(from:minValue, maxValue) minValue = minval(array, mask=mask) maxValue = maxval(array, mask=mask) !$acc end kernels + !$omp end target any_vals_outside_1D_masked = minValue < checkMin .or. maxValue > checkMax end function any_vals_outside_1D_masked @@ -201,9 +259,11 @@ logical function any_vals_outside_2D_masked(array, mask, checkMin, checkMax) real(wp) :: minValue, maxValue !$acc kernels copyin(array) + !$omp target map(to:array, mask) map(from:minValue, maxValue) minValue = minval(array, mask=mask) maxValue = maxval(array, mask=mask) !$acc end kernels + !$omp end target any_vals_outside_2D_masked = minValue < checkMin .or. maxValue > checkMax end function any_vals_outside_2D_masked @@ -216,9 +276,11 @@ logical function any_vals_outside_3D_masked(array, mask, checkMin, checkMax) real(wp) :: minValue, maxValue !$acc kernels copyin(array) + !$omp target map(to:array, mask) map(from:minValue, maxValue) minValue = minval(array, mask=mask) maxValue = maxval(array, mask=mask) !$acc end kernels + !$omp end target any_vals_outside_3D_masked = minValue < checkMin .or. maxValue > checkMax end function any_vals_outside_3D_masked @@ -308,6 +370,7 @@ subroutine zero_array_1D(ni, array) bind(C, name="zero_array_1D") integer :: i ! ----------------------- !$acc parallel loop copyout(array) + !$omp target teams distribute parallel do simd map(from:array) do i = 1, ni array(i) = 0.0_wp end do @@ -320,6 +383,7 @@ subroutine zero_array_3D(ni, nj, nk, array) bind(C, name="zero_array_3D") integer :: i,j,k ! ----------------------- !$acc parallel loop collapse(3) copyout(array) + !$omp target teams distribute parallel do simd collapse(3) map(from:array) do k = 1, nk do j = 1, nj do i = 1, ni @@ -337,6 +401,7 @@ subroutine zero_array_4D(ni, nj, nk, nl, array) bind(C, name="zero_array_4D") integer :: i,j,k,l ! ----------------------- !$acc parallel loop collapse(4) copyout(array) + !$omp target teams distribute parallel do simd collapse(4) map(from:array) do l = 1, nl do k = 1, nk do j = 1, nj diff --git a/tests/Makefile b/tests/Makefile index dbc0906df..ed64c65b0 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,26 +1,22 @@ +# # Location of RTE+RRTMGP libraries, module files. +# RRTMGP_BUILD = $(RRTMGP_ROOT)/build -# Sets macros FC, FCFLAGS consistent with RTE+RRTMGP --include $(RRTMGP_BUILD)/Makefile.conf - -# Location of netcdf C and Fortran libraries. Could specify with environment variables if file doesn't exist --include $(RRTMGP_ROOT)/examples/rfmip-clear-sky/Makefile.libs # # RRTMGP library, module files # -LDFLAGS += -L$(RRTMGP_BUILD) -LIBS += -lrrtmgp -lrte +# LDFLAGS += -L$(RRTMGP_BUILD) +# LIBS += -lrrtmgp -lrte FCINCLUDE += -I$(RRTMGP_BUILD) - # # netcdf library, module files -# C and Fortran interfaces respectively +# Environment variables NCHOME and NFHOME point to root of C and Fortran interfaces respectively - # FCINCLUDE += -I$(NFHOME)/include LDFLAGS += -L$(NFHOME)/lib -L$(NCHOME)/lib LIBS += -lnetcdff -lnetcdf -VPATH = .:$(RRTMGP_ROOT)/examples:$(RRTMGP_ROOT)/examples/rfmip-clear-sky +VPATH = .:$(RRTMGP_ROOT)/examples:$(RRTMGP_ROOT)/examples/rfmip-clear-sky:$(RRTMGP_ROOT)/examples/all-sky VPATH += $(RRTMGP_ROOT)/extensions:$(RRTMGP_ROOT)/extensions/cloud_optics:$(RRTMGP_ROOT)/extensions/solar_variability # Compilation rules @@ -38,7 +34,7 @@ ADDITIONS = mo_heating_rates.o mo_compute_bc.o mo_rrtmgp_clr_all_sky.o ADDITIONS += mo_load_coefficients.o mo_simple_netcdf.o mo_rfmip_io.o ADDITIONS += mo_testing_io.o # Cloud optics -CLOUDS += mo_cloud_sampling.o mo_cloud_optics.o mo_load_cloud_coefficients.o +CLOUDS += mo_cloud_sampling.o mo_cloud_optics.o mo_load_cloud_coefficients.o mo_garand_atmos_io.o # Solar variability ADDITIONS += mo_solar_variability.o @@ -62,5 +58,13 @@ mo_load_coefficients.o: $(LIB_DEPS) mo_simple_netcdf.o mo_load_coefficients.F90 mo_rfmip_io.o.o: $(LIB_DEPS) mo_simple_netcdf.o mo_rfmip_io.F90 mo_simple_netcdf.o: $(LIB_DEPS) mo_simple_netcdf.F90 +tests: + cp ${RRTMGP_ROOT}/examples/rfmip-clear-sky/multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc ./test_atmospheres.nc + $(RUN_CMD) ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc + $(RUN_CMD) ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc + +check: + python verification.py + clean: -rm clear_sky_regression *.o *.optrpt diff --git a/tests/validation-plots.py b/tests/validation-plots.py index 0b223cb0b..9f77dcd63 100644 --- a/tests/validation-plots.py +++ b/tests/validation-plots.py @@ -36,11 +36,12 @@ def make_comparison_plot(variants, labels, reference, vscale, col_dim="site", la # Reverse vertical ordering plt.ylim(vscale.max(), vscale.min()) -def construct_lbl_esgf_name(var): +def construct_lbl_esgf_name(var, esgf_node="llnl"): # # For a given variable name, provide the OpenDAP URL for the LBLRM line-by-line results # prefix = "http://esgf3.dkrz.de/thredds/dodsC/cmip6/RFMIP/AER/LBLRTM-12-8/rad-irf/r1i1p1f1/Efx/" + if(esgf_node == "llnl"): prefix = "http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/RFMIP/AER/LBLRTM-12-8/rad-irf/r1i1p1f1/Efx/" return(prefix + var + "/gn/v20190514/" + var + "_Efx_LBLRTM-12-8_rad-irf_r1i1p1f1_gn.nc") ######################################################################## @@ -68,6 +69,9 @@ def construct_lbl_esgf_name(var): reset_coords().swap_dims({"level":"plev"}).interp(plev=plev) for i in np.arange(0, gp.site.size)], dim = 'site') cols = cc.glasbey_dark + plev.load() + gpi.load() + lbli.load() with PdfPages('validation-figures.pdf') as pdf: ######################################################################## # Longwave