From aca56b723e8c35d0fc0166c25ead3c906530cf72 Mon Sep 17 00:00:00 2001
From: Robert Pincus <Robert.Pincus@colorado.edu>
Date: Wed, 14 Apr 2021 23:22:50 -0400
Subject: [PATCH] Streamlined LW calculations, OpenMP GPU support, small
 efficiency changes, updates to continuous integration (#110)

OpenMP GPU offload, tested with  CCE 11.0.0. Longwave Jacobians aren't computed if the optional arguments aren't provided. Finalization methods for optical properties arrays are introduced. Make is streamlined to use environment variables and omit config files. Small changes for efficiency and robustness. Continuous integration is evolved; includes gfortran 9 and 10 on Github actions; current Nvidia and Intel HPC compilers in containers; Cray CPU and OpenMP GPU and PGI 20 OpenACC.

Co-authored-by: Nichols Romero <naromero@anl.gov>
Co-authored-by: Dustin Swales <Dustin.Swales@noaa.gov>
Co-authored-by: Peter Ukkonen <peterukk@gmail.com>
Co-authored-by: Chiel van Heerwaarden <chiel.vanheerwaarden@wur.nl>
---
 .github/workflows/containerized-ci.yml        |  77 ++
 .github/workflows/continuous-integration.yml  | 142 +++
 .github/workflows/main.yml                    | 110 ---
 Compiler-flags.md                             |  30 +
 Makefile                                      |  27 +
 README.md                                     |  14 +-
 azure-pipelines.yml                           |  72 +-
 build/Makefile                                |   4 +-
 build/Makefile.conf.gfortran                  |  17 -
 build/Makefile.conf.ifort                     |  18 -
 build/Makefile.conf.nagfor                    |  12 -
 build/Makefile.conf.olcf_pgi                  |  19 -
 build/Makefile.conf.pgfortran                 |  12 -
 build/Makefile.conf.pgfortran-cscs            |  36 -
 build/Makefile.conf.pgfortran-cscs-gpu        |  36 -
 examples/all-sky/Makefile                     |  18 +-
 examples/all-sky/README.md                    |   5 +-
 examples/all-sky/mo_garand_atmos_io.F90       |   0
 examples/all-sky/rrtmgp_allsky.F90            | 812 ++++++++--------
 examples/rfmip-clear-sky/Makefile             |  44 +-
 examples/rfmip-clear-sky/Makefile.libs.macos  |  10 -
 examples/rfmip-clear-sky/Makefile.libs.olcf   |  10 -
 .../Makefile.libs.pgfortran-cscs              |  20 -
 examples/rfmip-clear-sky/Readme.md            |   6 +-
 .../rfmip-clear-sky/compare-to-reference.py   |  12 +
 examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90  |  11 +-
 examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90  |  17 +-
 examples/rfmip-clear-sky/stage_files.sh       |   5 +
 extensions/cloud_optics/mo_cloud_optics.F90   |  35 +-
 extensions/mo_fluxes_byband.F90               |   2 +
 extensions/mo_fluxes_byband_kernels.F90       |   8 +-
 rrtmgp/Make.depends                           |   2 +-
 .../kernels-openacc/mo_gas_optics_kernels.F90 | 110 ++-
 rrtmgp/kernels/mo_gas_optics_kernels.F90      |  17 +-
 .../mo_rrtmgp_util_reorder_kernels.F90        |   2 +
 rrtmgp/mo_gas_concentrations.F90              |  86 ++
 rrtmgp/mo_gas_optics_rrtmgp.F90               |  72 +-
 .../mo_optical_props_kernels.F90              | 111 ++-
 rte/kernels-openacc/mo_rte_solver_kernels.F90 | 917 +++++++-----------
 rte/kernels/mo_fluxes_broadband_kernels.F90   |  17 +-
 rte/kernels/mo_rte_solver_kernels.F90         | 589 ++++-------
 rte/mo_optical_props.F90                      |  69 +-
 rte/mo_rte_lw.F90                             | 149 ++-
 rte/mo_rte_sw.F90                             |  15 +
 rte/mo_rte_util_array.F90                     |  65 ++
 tests/Makefile                                |  26 +-
 tests/validation-plots.py                     |   6 +-
 47 files changed, 2021 insertions(+), 1873 deletions(-)
 create mode 100644 .github/workflows/containerized-ci.yml
 create mode 100644 .github/workflows/continuous-integration.yml
 delete mode 100644 .github/workflows/main.yml
 create mode 100644 Compiler-flags.md
 create mode 100644 Makefile
 delete mode 100644 build/Makefile.conf.gfortran
 delete mode 100644 build/Makefile.conf.ifort
 delete mode 100644 build/Makefile.conf.nagfor
 delete mode 100755 build/Makefile.conf.olcf_pgi
 delete mode 100644 build/Makefile.conf.pgfortran
 delete mode 100644 build/Makefile.conf.pgfortran-cscs
 delete mode 100644 build/Makefile.conf.pgfortran-cscs-gpu
 mode change 100755 => 100644 examples/all-sky/Makefile
 mode change 100755 => 100644 examples/all-sky/mo_garand_atmos_io.F90
 mode change 100755 => 100644 examples/all-sky/rrtmgp_allsky.F90
 delete mode 100644 examples/rfmip-clear-sky/Makefile.libs.macos
 delete mode 100644 examples/rfmip-clear-sky/Makefile.libs.olcf
 delete mode 100644 examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs
 create mode 100644 examples/rfmip-clear-sky/stage_files.sh
 mode change 100755 => 100644 rrtmgp/mo_gas_concentrations.F90

diff --git a/.github/workflows/containerized-ci.yml b/.github/workflows/containerized-ci.yml
new file mode 100644
index 000000000..ec473d15d
--- /dev/null
+++ b/.github/workflows/containerized-ci.yml
@@ -0,0 +1,77 @@
+name: Continuous integration in a box
+on: [push, pull_request]
+
+jobs:
+  Containerized-CI:
+    runs-on: ubuntu-20.04
+    strategy:
+      matrix:
+        rte-kernels: [default, openacc]
+        container: ["earthsystemradiation/rte-rrtmgp-ci:ifort","earthsystemradiation/rte-rrtmgp-ci:nvfortran"]
+    container:
+      image: ${{ matrix.container }}
+    env:
+      NCHOME: /home/runner/netcdf-c
+      NFHOME: /home/runner/netcdf-fortran
+      RFMIP_DIR: /home/runner/rfmip-files
+    steps:
+    ############################################################################
+    # Checks out repository under $GITHUB_WORKSPACE
+    - name: Check out code
+      uses: actions/checkout@v2
+    - name: Environmental variables
+      # This might be able to be set in the ENV section above
+      run: echo "RRTMGP_ROOT=${GITHUB_WORKSPACE}"        >> $GITHUB_ENV
+    - name: Environmental variables - ifort
+      if: contains(matrix.container, 'ifort')
+      run: echo "FCFLAGS=-m64 -g  -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -check bounds,uninit,pointers,stack -stand f08" >> $GITHUB_ENV
+    - name: Environmental variables - nvfortran
+      if: contains(matrix.container, 'nvfortran')
+      run: echo "FCFLAGS=-Mallocatable=03 -Mstandard -Mbounds -Mchkptr -Kieee -Mchkstk" >> $GITHUB_ENV
+
+    - name: Make library, examples, tests
+      shell: bash
+      env:
+        RTE_KERNELS: ${{ matrix.rte-kernels }}
+      run: |
+        source /opt/intel/oneapi/setvars.sh || true
+        cd ${RRTMGP_ROOT}
+        ${FC} --version
+        make libs
+    ############################################################################
+    - name: Cache RFMIP files
+      id: cache-rfmip-files
+      uses: actions/cache@v2
+      with:
+        path: /home/runner/rfmip-files # Same as #{RFMIP_DIR}
+        key: rfmip-files
+
+    - name: Stage RFMIP files
+      if: steps.cache-rfmip-files.outputs.cache-hit != 'true'
+      run: |
+        mkdir -p ${RFMIP_DIR}
+        cd ${RFMIP_DIR}
+        python ${RRTMGP_ROOT}/examples/rfmip-clear-sky/stage_files.py
+    ############################################################################
+    - name: Run examples, tests
+      shell: bash
+      env:
+        LD_LIBRARY_PATH: /home/runner/netcdf-c/lib
+      run: |
+        source /opt/intel/oneapi/setvars.sh || true
+        export LD_LIBRARY_PATH=${NFHOME}/lib:${LD_LIBRARY_PATH}
+        make tests
+    - name: Comparison
+      run: make check
+    ############################################################################
+    - name: Validation plots
+      if: contains(matrix.container, 'ifort') && contains(matrix.rte-kernels, 'default')
+      run: |
+        cd ${RRTMGP_ROOT}/tests
+        python validation-plots.py
+    - name: Upload plots
+      if: contains(matrix.container, 'ifort') && contains(matrix.rte-kernels, 'default')
+      uses: actions/upload-artifact@v2
+      with:
+        name: valdiation-plot
+        path: tests/validation-figures.pdf
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
new file mode 100644
index 000000000..674e217ff
--- /dev/null
+++ b/.github/workflows/continuous-integration.yml
@@ -0,0 +1,142 @@
+name: Continuous Integration
+on: [push, pull_request]
+
+jobs:
+  CI:
+    runs-on: ubuntu-20.04
+    strategy:
+      matrix:
+        fortran-compiler:  [gfortran-9, gfortran-10]
+        rte-kernels: [default, openacc]
+    env:
+      FC: ${{ matrix.fortran-compiler }}
+      FCFLAGS: "-ffree-line-length-none -m64 -std=f2008 -march=native -fbounds-check -finit-real=nan -g -DUSE_CBOOL"
+      CC: gcc
+      NCHOME: /home/runner/netcdf-c
+      NFHOME: /home/runner/netcdf-fortran
+      RFMIP_DIR: /home/runner/rfmip-files
+    steps:
+    - name: Update system packages
+      run: sudo apt-get update
+    ############################################################################
+    #
+    # Compilers....
+    #
+    # Gfortran 10 not available in Github CI stack, so install
+    #
+    - name: gfortran-10 setup compiler
+      if: contains(matrix.fortran-compiler, 'gfortran-10')
+      run: |
+        sudo apt-get install gfortran-10 gcc-10
+        echo "CC=gcc-10" >> $GITHUB_ENV
+
+    ############################################################################
+    #
+    # Netcdf C and Fortran
+    #
+    - name: Install HDF5 library
+      run: |
+        sudo apt-get install libhdf5-dev libcurl4-gnutls-dev hdf5-helpers
+        dpkg -L libhdf5-dev
+
+    # Skipping this for now - netCDF configure doesn't see the HDF libararies
+    - name: cache-netcdf-c
+      id: cache-netcdf-c
+      uses: actions/cache@v2
+      with:
+        path: /home/runner/netcdf-c
+        key: netcdf-c-4.7.4a-${{ runner.os }}-${{ matrix.fortran-compiler }}
+
+    - name: Install netcdf C library from source
+      if: steps.cache-netcdf-c.outputs.cache-hit != 'true'
+      env:
+        CPPFLAGS: -I/usr/include/hdf5/serial
+        LDFLAGS: -L/usr/lib/x86_64-linux-gnu/hdf5/serial/
+      run: |
+        ${CC} --version
+        git clone https://github.com/Unidata/netcdf-c.git --branch v4.7.4
+        cd netcdf-c
+        ls /usr/include
+        ./configure --prefix=${NCHOME}
+        make -j
+        sudo make install
+
+    # Would be great to encode version info
+    - name: cache-netcdf-fortran
+      id: cache-netcdf-fortran
+      uses: actions/cache@v2
+      with:
+        path: /home/runner/netcdf-fortran
+        key: netcdf-fortran-4.5.3-${{ runner.os }}-${{ matrix.fortran-compiler }}
+
+    - name: Build NetCDF Fortran library
+      # Here too it would be nice to use the environment to specify netcdf-c location
+      env:
+        CPPFLAGS: -I/home/runner/netcdf-c/include
+        LDFLAGS: -L/home/runner/netcdf-c/lib
+        LD_LIBRARY_PATH: /home/runner/netcdf-c/lib
+        FCFLAGS: -fPIC
+      if: steps.cache-netcdf-fortran.outputs.cache-hit != 'true'
+      run: |
+        echo ${TEST}
+        ${FC} --version
+        git clone https://github.com/Unidata/netcdf-fortran.git --branch v4.5.3
+        cd netcdf-fortran
+        echo ${CPPFLAGS}
+        ./configure --prefix=${NFHOME}
+        make -j
+        sudo make install
+    ############################################################################
+    # Checks out repository under $GITHUB_WORKSPACE
+    - name: Check out code
+      uses: actions/checkout@v2
+
+    - name: Environmental variables
+      run: echo "RRTMGP_ROOT=${GITHUB_WORKSPACE}"        >> $GITHUB_ENV
+
+    - name: Make library, examples, tests
+      env:
+        RTE_KERNELS: ${{ matrix.rte-kernels }}
+      run: |
+        cd ${RRTMGP_ROOT}
+        ${FC} --version
+        make libs
+
+    ############################################################################
+    # Set up Python and packages
+    #
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.7
+    - name: Setup conda
+      uses: s-weigand/setup-conda@v1
+      with:
+        python-version: 3.7
+    - name: Install python packages
+      run: conda install --yes urllib3 netcdf4 xarray dask scipy matplotlib seaborn colorcet
+    ############################################################################
+    - name: Cache RFMIP files
+      id: cache-rfmip-files
+      uses: actions/cache@v2
+      with:
+        path: /home/runner/rfmip-files # Same as #{RFMIP_DIR}
+        key: rfmip-files
+
+    - name: Stage RFMIP files
+      if: steps.cache-rfmip-files.outputs.cache-hit != 'true'
+      run: |
+        mkdir -p ${RFMIP_DIR}
+        cd ${RFMIP_DIR}
+        python ${RRTMGP_ROOT}/examples/rfmip-clear-sky/stage_files.py
+    ############################################################################
+    # Would be great to encode version info
+    - name: Run examples, tests
+      env:
+        LD_LIBRARY_PATH: /home/runner/netcdf-c/lib
+      run: |
+        export LD_LIBRARY_PATH=${NFHOME}/lib:${LD_LIBRARY_PATH}
+        make tests
+    - name: Comparison
+      run: |
+        make check
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
deleted file mode 100644
index 7dcdfce66..000000000
--- a/.github/workflows/main.yml
+++ /dev/null
@@ -1,110 +0,0 @@
-name: Continuous Integration
-on: [push, pull_request]
-
-jobs:
-  CI:
-    runs-on: ubuntu-18.04
-    strategy:
-      matrix:
-        fortran-compiler: [gfortran-8, gfortran-9, ifort]
-        rte-kernels: [default, openacc]
-    env:
-      FC: ${{ matrix.fortran-compiler }}
-      NCHOME: /usr
-      NFHOME: /usr
-      # I would like to use this variable within the run scripts
-      #   i.e. to source files and change FCFLAGS, but can't figure out how
-      USING_IFORT: ${{ contains(matrix.fortran-compiler, 'ifort') }}
-    steps:
-    #
-    # Checks-out repository under $GITHUB_WORKSPACE
-    #
-    - uses: actions/checkout@v2
-    #
-    # Set up Python and dependencies
-    #
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.7
-    - name: Setup conda
-      uses: s-weigand/setup-conda@v1
-      with:
-        python-version: 3.7
-    - name: Install python dependencies
-      run: conda install --yes urllib3 netcdf4 xarray dask scipy
-    #
-    # Install  NetCDF library
-    #
-    - name: Install netcdf C library
-      run: sudo apt-get install libnetcdf-dev
-    #
-    # Intel compilers and libraries if needed
-    #   https://software.intel.com/content/www/us/en/develop/articles/oneapi-repo-instructions.html#aptpkg
-    #
-    - name: Install Intel compilers libraries
-      if: contains(matrix.fortran-compiler, 'ifort')
-      run: |
-        wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
-        sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
-        sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
-        sudo apt-get update
-        sudo apt-get install intel-oneapi-common-licensing
-        sudo apt-get install intel-oneapi-common-vars
-        sudo apt-get install intel-oneapi-dev-utilities
-        sudo apt-get install intel-oneapi-ifort
-    #
-    # NetCDF FORTRAN library
-    #
-    - name: Build NetCDF Fortran library
-      run: |
-        if [[ -e /opt/intel/inteloneapi/setvars.sh ]]; then source /opt/intel/inteloneapi/setvars.sh; fi
-        git clone https://github.com/Unidata/netcdf-fortran.git --branch v4.4.4
-        cd netcdf-fortran
-        ./configure --prefix=${NFHOME} F77=${FC}
-        make
-        sudo make install
-    - name: Stage RFMIP files
-      run: |
-        export RRTMGP_ROOT=${GITHUB_WORKSPACE}
-        cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky
-        python ./stage_files.py
-    - name: Make library, examples, tests
-      # Compiler flags for gfortran 8 and 9. Over-ridden in run script if using ifort
-      env:
-        FCFLAGS: "-ffree-line-length-none -m64 -std=f2008 -march=native -fbounds-check -finit-real=nan -DUSE_CBOOL"
-      run: |
-        export RRTMGP_ROOT=${GITHUB_WORKSPACE}
-        export RTE_KERNELS=${{ matrix.rte-kernels }}
-        if [[ -e /opt/intel/inteloneapi/setvars.sh ]]; then
-          source /opt/intel/inteloneapi/setvars.sh;
-          export FCFLAGS="-m64 -g  -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -check bounds,uninit,pointers,stack -stand f08";
-        fi
-        cd ${RRTMGP_ROOT}
-        ${FC} --version
-        make -C build -j 2
-        make -C tests -j 1
-        make -C examples/all-sky -j 2
-        export RRTMGP_BUILD=${RRTMGP_ROOT}/build
-        make -C examples/rfmip-clear-sky -j 2
-    - name: Run examples, tests
-      run: |
-        export RRTMGP_ROOT=${GITHUB_WORKSPACE}
-        if [[ -e /opt/intel/inteloneapi/setvars.sh ]]; then source /opt/intel/inteloneapi/setvars.sh; fi
-        cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky
-        python ./run-rfmip-examples.py --block_size 8
-        cd  ${RRTMGP_ROOT}/examples/all-sky
-        python ./run-allsky-example.py
-        cd  ${RRTMGP_ROOT}/tests
-        cp ${RRTMGP_ROOT}/examples/rfmip-clear-sky/multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc test_atmospheres.nc
-        ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc
-        ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc
-    - name: Comparison
-      run: |
-        export RRTMGP_ROOT=${GITHUB_WORKSPACE}
-        cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky
-        python ./compare-to-reference.py --fail=7.e-4
-        cd ${RRTMGP_ROOT}/examples/all-sky
-        python ./compare-to-reference.py
-        cd ${RRTMGP_ROOT}/tests
-        python verification.py
diff --git a/Compiler-flags.md b/Compiler-flags.md
new file mode 100644
index 000000000..cef9c3f42
--- /dev/null
+++ b/Compiler-flags.md
@@ -0,0 +1,30 @@
+# Compiler flag Examples
+
+Before using the Makefiles supplied with the `RTE+RRTMGP` repository, the environment variables `FC` and
+`FCFLAGS`, identifying the Fortran compiler and flags passed to it, need to be set. Here are some examples
+used during development and testing.
+
+To build any of the executables in `examples/` or `tests` the locations of the C and Fortran netCDF libraries
+need to be set via environment variables `NCHOME` and `NFHOME`, and the variable `RRTMGP_ROOT` must be set to the
+root of the RTE+RRTMGP installation.
+
+## Gnu Fortran
+`FC: gfortran-8` or `gfortran-9` or `gfortran-10`
+### Debugging flags
+`FCFLAGS: "-ffree-line-length-none -m64 -std=f2008 -march=native -fbounds-check -finit-real=nan -DUSE_CBOOL"`  
+### Even stricter debugging flags
+`FCFLAGS: "-ffree-line-length-none -m64 -std=f2008 -march=native -fbounds-check -fbacktrace -finit-real=nan -DUSE_CBOOL -pedantic -g -Wall"`  
+
+## Intel Fortran
+`FC: ifort`  
+### Debugging flags
+`FCFLAGS: "-m64 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -check bounds,uninit,pointers,stack -stand f08"`  
+### Optimization flags:  
+`FCFLAGS:"-m64 -O3 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132"`
+
+## PGI Fortran
+`FC: pgfortran` or `FC: nvfortran` (if using the Nvidia HPC SDK)
+### Debugging flags
+`FCFLAGS: "-g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mallocatable=03  -Mpreprocess"`
+### Optimization flags:  
+`FCFLAGS: "-m64 -O3 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132"`
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..9f57b43d8
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,27 @@
+#
+# Top-level Makefile
+#
+.PHONY: libs tests check
+all: libs tests check
+
+libs:
+	make -C build -j
+	make -C tests -j 1
+	make -C examples/all-sky -j
+	make -C examples/rfmip-clear-sky -j
+
+tests:
+	make -C examples/rfmip-clear-sky tests
+	make -C examples/all-sky         tests
+	make -C tests                    tests
+
+check:
+	make -C examples/rfmip-clear-sky check
+	make -C examples/all-sky         check
+	make -C tests                    check
+
+clean:
+	make -C build clean
+	make -C examples/rfmip-clear-sky clean
+	make -C examples/all-sky         clean
+	make -C tests                    clean
diff --git a/README.md b/README.md
index 13c42f8ea..9b618f210 100644
--- a/README.md
+++ b/README.md
@@ -18,13 +18,15 @@ Example programs and documentation are evolving - please see examples/ in the re
 Relative to commit `69d36c9` to `master` on Apr 20, 2020, the required arguments to both the longwave and shortwave versions of `ty_gas_optics_rrtmgp%load()`have changed.
 
 
-## Building the libraries.
+## Building the libraries, examples, and unit-testing codes.
 
-1. `cd build`
-2. Set environment variables `FC` (the Fortran 2003 compiler) and `FCFLAGS` (compiler flags). Alternately create a Makefile.conf that sets these variables. You could also link to an existing file.
-3. Set environment variable `RTE_KERNELS` to `openacc` if you want the OpenACC kernels rather than the default.
-4. `make`
+1. Set environment variables `FC` (the Fortran 2003 compiler) and `FCFLAGS` (compiler flags). Examples are provided in the `Compiler-flags.md` file.
+2. Set environment variables `RRTMGP_ROOT` to the top-level RTE+RRTMGP directory and `RTE_KERNELS` to `openacc` if you want the OpenACC/OpenMP kernels rather than the default.
+3. `make libs` in the top-level directory will make the RTE and RRTMGP libraries.
+4. The examples and testing codes use netCDF. Set the variables `NCHOME` and `NFHOME` to the roots of the C and Fortran netCDF installations, then `make tests` to build and run these. (A few files need to be downloaded for `examples/rfmaip-clear-sky`. The default is to download these with `wget` but a Python script is also available.)
+5. Evaluating the results of the tests requires `Python` with the `xarray` package and its depdencies installed. Comparisons can be made with `make check` in the top level directory.
+6. `make` invoked without a target in the top level attempts all three steps.
 
 ## Examples
 
-Two examples are provided, one for clear skies and one including clouds. See the README file and codes in each directory for further information.
+Two examples are provided in `examples/`, one for clear skies and one including clouds. Directory `tests/` contains regression testing (e.g. to ensure that answers are independent of orientation) and unit testing (to be sure all the code paths are tested). See the README file and codes in each directory for further information.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 166ff69e8..624c3af20 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -3,13 +3,6 @@ jobs:
   pool: CSCS
   strategy:
     matrix:
-      pgi_19_9_gpu:
-        compiler_base: pgi
-        compiler_module: PGI/19.9
-        accel_module: cudatoolkit
-        FCFLAGS: "-O3 -ta=tesla:cc60,cuda10.1 -Mallocatable=03 -gopt -Minline,reshape,maxsize:40"
-        RTE_KERNELS: openacc
-        RUN_CMD: "srun -C gpu -A c15 -p cscsci"
       pgi_default_gpu:
         compiler_base: pgi
         compiler_module: pgi
@@ -17,27 +10,21 @@ jobs:
         # Generic accelerator flag
         FCFLAGS: "-O3 -acc                   -Mallocatable=03 -gopt"
         RTE_KERNELS: openacc
-        RUN_CMD: "srun -C gpu -A c15 -p cscsci"
-      pgi_19_10_cpu:
-        compiler_base: pgi
-        compiler_module: PGI/19.10
-        accel_module:
-        # Error checking flags
-        FCFLAGS: "-Mallocatable=03 -Mstandard -Mbounds -Mchkptr -Kieee -Mchkstk"
-        RUN_CMD:
-      pgi_19_9_cpu:
-        compiler_base: pgi
-        compiler_module: PGI/19.9
-        accel_module:
-        # Error checking flags
-        FCFLAGS: "-Mallocatable=03 -Mstandard -Mbounds -Mchkptr -Kieee -Mchkstk"
-        RUN_CMD:
+        RUN_CMD: "srun -C gpu -A pr55 -p cscsci"
       cce-cpu-icon-production:
         compiler_base: cray
-        compiler_module: ccce-icon/9.0.2-classic
+        compiler_module: cce-icon/11.0.0
         accel_module:
         # Production flags for Icon model
         FCFLAGS: "-hadd_paren -r am -Ktrap=divz,ovf,inv -hflex_mp=intolerant -hfp1 -hnoacc -O1,cache0"
+      cce-openmp:
+        compiler_base: cray
+        compiler_module: cce/11.0.0
+        accel_module: craype-accel-nvidia60
+        # OpenMP flags from Nichols Romero (Argonne)
+        FCFLAGS: "-hnoacc -homp -O0"
+        RTE_KERNELS: openacc
+        RUN_CMD: "srun -C gpu -A pr55 -p cscsci"
     maxParallel: 2
 
   workspace:
@@ -47,7 +34,7 @@ jobs:
   - script: |
       set -e
 
-      echo "
+      echo '
       module load daint-gpu
       export PATH=$CRAY_BINUTILS_BIN:$PATH
       module swap PrgEnv-cray PrgEnv-$(compiler_base)
@@ -55,49 +42,33 @@ jobs:
       module load $(accel_module)
       module load cray-netcdf cray-hdf5
       export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH
+      export CUDA_HOME=$CUDATOOLKIT_HOME
       echo Compiler Environment:
       module list
       echo LD_LIBRARY_PATH is:
       echo $LD_LIBRARY_PATH
-      " > compiler_modules
+      ' > compiler_modules
 
     displayName: 'Create module environment'
   - script: |
       set -e
-      module load daint-gpu
-      module load netcdf-python
       cd examples/rfmip-clear-sky
-      python ./stage_files.py
+      source ./stage_files.sh
     displayName: 'Stage files'
   - script: |
       set -e
       source compiler_modules
       export RRTMGP_ROOT=$PWD
-      export RRTMGP_BUILD=$PWD/build
       export FC=ftn
-      make -C build/ clean
-
-      make -C build/ -j 8
-      make -C tests clean
-      make -C tests -j 1
-      make -C examples/all-sky clean
-      make -C examples/all-sky -j 8
-      make -C examples/rfmip-clear-sky clean
-      make -C examples/rfmip-clear-sky -j 8
+      make clean
+      make libs
     displayName: 'Make'
   - script: |
       set -e
       source compiler_modules
-      module load cray-python/3.6.5.7
+      module load cray-python
       export RRTMGP_ROOT=$PWD
-      cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky
-      ${RUN_CMD} python ./run-rfmip-examples.py --block_size 1800
-      cd  ${RRTMGP_ROOT}/examples/all-sky
-      ${RUN_CMD} python ./run-allsky-example.py
-      cd  ${RRTMGP_ROOT}/tests
-      cp ${RRTMGP_ROOT}/examples/rfmip-clear-sky/multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc test_atmospheres.nc
-      ${RUN_CMD} ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc
-      ${RUN_CMD} ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc
+      make tests
     displayName: 'Run'
   - script: |
       set -e
@@ -105,10 +76,5 @@ jobs:
       export RRTMGP_ROOT=$PWD
       # This module will unload some of the build modules, so do the checks separately
       module load netcdf-python
-      cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky
-      python ./compare-to-reference.py --fail=7.e-4
-      cd ${RRTMGP_ROOT}/examples/all-sky
-      python ./compare-to-reference.py
-      cd ${RRTMGP_ROOT}/tests
-      python verification.py
+      make check
     displayName: 'Check results'
diff --git a/build/Makefile b/build/Makefile
index d18d52159..ec79c985f 100644
--- a/build/Makefile
+++ b/build/Makefile
@@ -13,9 +13,7 @@ RTE_KERNEL_DIR += ../rte/kernels
 RRTMGP_KERNEL_DIR += ../rrtmgp/kernels
 VPATH = $(RTE_DIR):$(RTE_KERNEL_DIR):$(RRTMGP_DIR):$(RRTMGP_KERNEL_DIR)
 #
-# Compiler variables FC, FCFLAGS can be set in the environment or in Makefile.conf
-#
--include Makefile.conf
+# Compiler variables FC, FCFLAGS must be set in the environment
 
 all: librte.a librrtmgp.a
 
diff --git a/build/Makefile.conf.gfortran b/build/Makefile.conf.gfortran
deleted file mode 100644
index ff8deddf2..000000000
--- a/build/Makefile.conf.gfortran
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Gfortran 7
-#
-
-# Fortran compiler command
-export FC = gfortran
-
-# Fortran compiler flags
-#
-# Optimized
-#
-export FCFLAGS = -ffree-line-length-none -m64 -std=f2003 -march=native -O3 -DUSE_CBOOL
-#
-# Debugging - -mno-avx is particular to (broken) gcc setups using MacPorts
-#
-#export FCFLAGS = -ffree-line-length-none -m64 -std=f2003 -march=native -mno-avx -pedantic -g -fbounds-check -Wall -fbacktrace -finit-real=nan -DUSE_CBOOL
-#export FCFLAGS = -ffree-line-length-none -m64 -std=f2003 -march=native          -pedantic -g -fbounds-check -Wall -fbacktrace -finit-real=nan -DUSE_CBOOL
diff --git a/build/Makefile.conf.ifort b/build/Makefile.conf.ifort
deleted file mode 100644
index 325892b63..000000000
--- a/build/Makefile.conf.ifort
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Intel Fortran 18
-#
-# Fortran compiler command
-export FC = ifort
-#
-# Fortran compiler flags
-#
-# Optimized
-#
-export FCFLAGS  += -m64 -O3 -g -traceback -heap-arrays -assume realloc_lhs -extend-source 132
-export F77FLAGS += -m64 -O3 -g -traceback
-# can add  -qopt-report-phase=vec
-#
-# Debugging
-#
-# export FCFLAGS  = -m64 -g  -traceback -heap-arrays -assume realloc_lhs -extend-source 132 -check bounds,uninit,pointers,stack -stand f08
-# export F77FLAGS = -m64 -g  -traceback                                        -check bounds,uninit,pointers,stack
diff --git a/build/Makefile.conf.nagfor b/build/Makefile.conf.nagfor
deleted file mode 100644
index 9e130c569..000000000
--- a/build/Makefile.conf.nagfor
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# NAG Fortran
-#
-# Fortran compiler command
-export FC = nagfor
-
-# Fortran compiler flags
-#
-# Debugging
-#
-export FCFLAGS  = -g -f2003 -ieee=nonstd -gline -colour -C -C=dangling
-export F77FLAGS = -g        -ieee=nonstd -gline -colour -C -C=dangling -dusty
diff --git a/build/Makefile.conf.olcf_pgi b/build/Makefile.conf.olcf_pgi
deleted file mode 100755
index dbb69b694..000000000
--- a/build/Makefile.conf.olcf_pgi
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-# PGI Fortran on Ascent/Summit
-#
-# Fortran compiler command
-export FC = mpif90
-
-# Fortran compiler flags
-#
-# NOTE: There is a bug in the PGI OpenACC implementation related to optimization
-# and C-compatible logical types (c_bool). The temporary workaround (until the
-# compiler bug is fixed) is to compile without optimization. Alternatively, we
-# can change all variables of type logical(wl) or logical(kind=c_bool) with
-# intent(in) to intent(inout), which access memory differently. Requiring
-# building without optimization seemed like the less intrusive approach for the
-# time being though. This allows the tests to build and run, and gives the right
-# answers against baselines, but will need to be fixed before running big
-# expensive jobs probably.
-#
-export FCFLAGS = -O0 -g -ta=nvidia,cc70,ptxinfo -Minfo=accel
diff --git a/build/Makefile.conf.pgfortran b/build/Makefile.conf.pgfortran
deleted file mode 100644
index 33b3c065b..000000000
--- a/build/Makefile.conf.pgfortran
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# PG Fortran
-#
-# Fortran compiler command
-export FC = ftn
-
-# Fortran compiler flags
-#
-# Debugging
-#
-export FCFLAGS  = -g -Mallocatable=03 -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mpreprocess
-export F77FLAGS = $(FCFLAGS)
diff --git a/build/Makefile.conf.pgfortran-cscs b/build/Makefile.conf.pgfortran-cscs
deleted file mode 100644
index 1e3f28ef1..000000000
--- a/build/Makefile.conf.pgfortran-cscs
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/make
-
-# Load the following modules to compile with PGI for CPU
-#
-# module load cdt/19.06
-# module swap PrgEnv-cray PrgEnv-pgi
-# module load cray-netcdf cray-hdf5
-# module load craype-accel-nvidia60
-# module unload cray-libsci_acc
-#
-#
-# Fortran compiler command
-FC = ftn
-NCHOME =
-
-# Fortran compiler flags
-#FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mipa=fast,inline -Mallocatable=03
-FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk                   -Mallocatable=03  -Mpreprocess
-
-# Fortran .mod files, e.g. -I<include dir> if you have headers in a nonstandard directory <include dir>
-FCINCLUDE =
-
-# linker flags, e.g. -L<lib dir> if you have libraries in a nonstandard directory <lib dir>
-#LDFLAGS = -L/opt/nvidia/cudatoolkit6.5/6.5.14-1.0502.9836.8.1/lib64 -ta=tesla6.5 -lcudart_static -Bdynamic
-#LDFLAGS = -ta=tesla
-LDFLAGS =
-
-# -L/sw/squeeze-x64/netcdf_fortran-4.2.0-static-pgi15/lib -lnetcdff -L/sw/squeeze-x64/szip-2.1-static/lib -L/sw/squeeze-x64/hdf5-1.8.12-static/lib -L/sw/squeeze-x64/netcdf-4.3.1.1-static/lib -lnetcdf -lnetcdf -lhdf5_hl -lhdf5 -lsz -lz -lm -ldl
-# libraries to pass to the linker, e.g. -l<library>
-LIBS =
-
-# build directory
-BUILDDIR = build
-
-# these files will not be compiled
-#BLACKLIST = mo_gas_desc.F90
diff --git a/build/Makefile.conf.pgfortran-cscs-gpu b/build/Makefile.conf.pgfortran-cscs-gpu
deleted file mode 100644
index 8042ad70a..000000000
--- a/build/Makefile.conf.pgfortran-cscs-gpu
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/make
-
-# Load the following modules
-#
-# module load cdt/19.06
-# module swap PrgEnv-cray PrgEnv-pgi
-# module load cray-netcdf cray-hdf5
-# module load craype-accel-nvidia60
-# module unload cray-libsci_acc
-#
-#
-# Fortran compiler command
-FC = ftn
-NCHOME =
-
-# Fortran compiler flags
-#FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mipa=fast,inline -acc -ta=tesla:6.5
-FCFLAGS = -g -ta=tesla:cc60,cuda9.2 -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mallocatable=03 -Mpreprocess
-
-# Fortran .mod files, e.g. -I<include dir> if you have headers in a nonstandard directory <include dir>
-FCINCLUDE =
-
-# linker flags, e.g. -L<lib dir> if you have libraries in a nonstandard directory <lib dir>
-#LDFLAGS = -L/opt/nvidia/cudatoolkit6.5/6.5.14-1.0502.9836.8.1/lib64 -ta=tesla6.5 -lcudart_static -Bdynamic
-#LDFLAGS = -ta=tesla
-LDFLAGS =
-
-# -L/sw/squeeze-x64/netcdf_fortran-4.2.0-static-pgi15/lib -lnetcdff -L/sw/squeeze-x64/szip-2.1-static/lib -L/sw/squeeze-x64/hdf5-1.8.12-static/lib -L/sw/squeeze-x64/netcdf-4.3.1.1-static/lib -lnetcdf -lnetcdf -lhdf5_hl -lhdf5 -lsz -lz -lm -ldl
-# libraries to pass to the linker, e.g. -l<library>
-LIBS =
-
-# build directory
-BUILDDIR = build
-
-# these files will not be compiled
-#BLACKLIST = mo_gas_desc.F90
diff --git a/examples/all-sky/Makefile b/examples/all-sky/Makefile
old mode 100755
new mode 100644
index 919d6b5b2..1d58ddc76
--- a/examples/all-sky/Makefile
+++ b/examples/all-sky/Makefile
@@ -1,10 +1,7 @@
+#
 # Location of RTE+RRTMGP libraries, module files.
+#
 RRTMGP_BUILD = $(RRTMGP_ROOT)/build
-# Sets macros FC, FCFLAGS consistent with RTE+RRTMGP
--include $(RRTMGP_BUILD)/Makefile.conf
-
-# Location of netcdf C and Fortran libraries. Could specify with environment variables if file doesn't exist
--include $(RRTMGP_ROOT)/examples/rfmip-clear-sky/Makefile.libs
 #
 # RRTMGP library, module files
 #
@@ -14,7 +11,7 @@ FCINCLUDE += -I$(RRTMGP_BUILD)
 
 #
 # netcdf library, module files
-# C and Fortran interfaces respectively
+# Environment variables NCHOME and NFHOME point to root of C and Fortran interfaces respectively -
 #
 FCINCLUDE += -I$(NFHOME)/include
 LDFLAGS   += -L$(NFHOME)/lib -L$(NCHOME)/lib
@@ -28,7 +25,6 @@ VPATH = ../:$(RRTMGP_ROOT)/extensions/cloud_optics
 %: %.o
 	$(FC) $(FCFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS)
 
-
 #
 # Extra sources -- extensions to RRTMGP classes, shared infrastructure, local sources
 #
@@ -49,5 +45,13 @@ mo_load_coefficients.o: mo_simple_netcdf.o mo_load_coefficients.F90
 mo_garand_atmos_io.o:   mo_simple_netcdf.o mo_garand_atmos_io.F90
 mo_load_cloud_coefficients.o: mo_simple_netcdf.o mo_cloud_optics.o mo_load_cloud_coefficients.F90
 
+tests:
+	cp garand-atmos-1.nc rrtmgp-allsky.nc
+	$(RUN_CMD) ./rrtmgp_allsky rrtmgp-allsky.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc ${RRTMGP_ROOT}/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-lw.nc 128
+	$(RUN_CMD) ./rrtmgp_allsky rrtmgp-allsky.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc ${RRTMGP_ROOT}/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-sw.nc 128
+
+check:
+	python ./compare-to-reference.py
+
 clean:
 	-rm rrtmgp_allsky *.o *.optrpt ../*.optrpt *.mod
diff --git a/examples/all-sky/README.md b/examples/all-sky/README.md
index 62916f9e7..ff894bbd4 100644
--- a/examples/all-sky/README.md
+++ b/examples/all-sky/README.md
@@ -4,7 +4,8 @@ This example provides a modestly more realistic setting the clear-sky problem do
 
 The example uses the first of the Garand atmosphere used for developing RRTMGP, as described in the [paper](https://doi.org/10.1029/2019MS001621) documenting the code, repeats the column a user-specified number of times, computes the optical properties of an arbitrary cloud in each column, and computes the broadband fluxes. Fractional cloudiness is not considered, and the clouds are extensive but quite boring, with uniform condensate amount and particle size everywhere (though with different values for liquid and ice).
 
-1. Build the RTE+RRTMGP libraries in `../../build/`. This will require setting environmental variables `FC` for the Fortran compiler and `FCFLAGS`, or creating `../../build/Makefile.conf` with that information.
-2. Build the executables in this directory, which will require providing the locations of the netCDF C and Fortran libraries and module files as environmental variables (NCHOME and NFHOME) or via file `Makefile.libs`
+1. Build the RTE+RRTMGP libraries in `../../build/`. This will require setting environmental variables `FC` for the Fortran compiler and `FCFLAGS`.
+2. Build the executables in this directory, which will require providing the locations of the netCDF C and Fortran libraries and module files as environmental
+variables `NCHOME` and `NFHOME`, as well a variable `RRTMGP_ROOT` pointing to the root of the installation (the absolute path to `../../`).
 4. Use Python script `run-rfmip-examples.py` to run the examples. The script takes some optional arguments, see `run-rfmip-examples.py -h`
 5. Python script `compare-to-reference.py` will compare the results to reference answers for 128 columns, produced on a Mac with Intel 19 Fortran compiler.
diff --git a/examples/all-sky/mo_garand_atmos_io.F90 b/examples/all-sky/mo_garand_atmos_io.F90
old mode 100755
new mode 100644
diff --git a/examples/all-sky/rrtmgp_allsky.F90 b/examples/all-sky/rrtmgp_allsky.F90
old mode 100755
new mode 100644
index a1c3ea666..3279a191c
--- a/examples/all-sky/rrtmgp_allsky.F90
+++ b/examples/all-sky/rrtmgp_allsky.F90
@@ -1,390 +1,422 @@
-subroutine stop_on_err(error_msg)
-  use iso_fortran_env, only : error_unit
-  character(len=*), intent(in) :: error_msg
-
-  if(error_msg /= "") then
-    write (error_unit,*) trim(error_msg)
-    write (error_unit,*) "rte_rrtmgp_clouds stopping"
-    stop
-  end if
-end subroutine stop_on_err
-
-subroutine vmr_2d_to_1d(gas_concs, gas_concs_garand, name, sz1, sz2)
-  use mo_gas_concentrations, only: ty_gas_concs
-  use mo_rte_kind,           only: wp
-
-  type(ty_gas_concs), intent(in)    :: gas_concs_garand
-  type(ty_gas_concs), intent(inout) :: gas_concs
-  character(len=*),   intent(in)    :: name
-  integer,            intent(in)    :: sz1, sz2
-
-  real(wp) :: tmp(sz1, sz2), tmp_col(sz2)
-
-  !$acc data create(tmp, tmp_col)
-  call stop_on_err(gas_concs_garand%get_vmr(name, tmp))
-  !$acc kernels
-  tmp_col(:) = tmp(1, :)
-  !$acc end kernels
-
-  call stop_on_err(gas_concs%set_vmr       (name, tmp_col))
-  !$acc end data
-end subroutine vmr_2d_to_1d
-! ----------------------------------------------------------------------------------
-program rte_rrtmgp_clouds
-  use mo_rte_kind,           only: wp, i8
-  use mo_optical_props,      only: ty_optical_props, &
-                                   ty_optical_props_arry, ty_optical_props_1scl, ty_optical_props_2str
-  use mo_gas_optics_rrtmgp,  only: ty_gas_optics_rrtmgp
-  use mo_cloud_optics,       only: ty_cloud_optics
-  use mo_gas_concentrations, only: ty_gas_concs
-  use mo_source_functions,   only: ty_source_func_lw
-  use mo_fluxes,             only: ty_fluxes_broadband
-  use mo_rte_lw,             only: rte_lw
-  use mo_rte_sw,             only: rte_sw
-  use mo_load_coefficients,  only: load_and_init
-  use mo_load_cloud_coefficients, &
-                             only: load_cld_lutcoeff, load_cld_padecoeff
-  use mo_garand_atmos_io,    only: read_atmos, write_lw_fluxes, write_sw_fluxes
-  implicit none
-  ! ----------------------------------------------------------------------------------
-  ! Variables
-  ! ----------------------------------------------------------------------------------
-  ! Arrays: dimensions (col, lay)
-  real(wp), dimension(:,:),   allocatable :: p_lay, t_lay, p_lev
-  real(wp), dimension(:,:),   allocatable :: col_dry
-  real(wp), dimension(:,:),   allocatable :: temp_array
-
-  !
-  ! Longwave only
-  !
-  real(wp), dimension(:,:),   allocatable :: t_lev
-  real(wp), dimension(:),     allocatable :: t_sfc
-  real(wp), dimension(:,:),   allocatable :: emis_sfc ! First dimension is band
-  !
-  ! Shortwave only
-  !
-  real(wp), dimension(:),     allocatable :: mu0
-  real(wp), dimension(:,:),   allocatable :: sfc_alb_dir, sfc_alb_dif ! First dimension is band
-  !
-  ! Source functions
-  !
-  !   Longwave
-  type(ty_source_func_lw), save               :: lw_sources
-  !   Shortwave
-  real(wp), dimension(:,:), allocatable, save :: toa_flux
-  !
-  ! Clouds
-  !
-  real(wp), allocatable, dimension(:,:) :: lwp, iwp, rel, rei
-  logical,  allocatable, dimension(:,:) :: cloud_mask
-  !
-  ! Output variables
-  !
-  real(wp), dimension(:,:), target, &
-                            allocatable :: flux_up, flux_dn, flux_dir
-  !
-  ! Derived types from the RTE and RRTMGP libraries
-  !
-  type(ty_gas_optics_rrtmgp) :: k_dist
-  type(ty_cloud_optics)      :: cloud_optics
-  type(ty_gas_concs)         :: gas_concs, gas_concs_garand, gas_concs_1col
-  class(ty_optical_props_arry), &
-                 allocatable :: atmos, clouds
-  type(ty_fluxes_broadband)  :: fluxes
-
-  !
-  ! Inputs to RRTMGP
-  !
-  logical :: top_at_1, is_sw, is_lw
-
-  integer  :: ncol, nlay, nbnd, ngpt
-  integer  :: icol, ilay, ibnd, iloop, igas
-  real(wp) :: rel_val, rei_val
-
-  character(len=8) :: char_input
-  integer  :: nUserArgs=0, nloops
-  logical :: use_luts = .true., write_fluxes = .true.
-  integer, parameter :: ngas = 8
-  character(len=3), dimension(ngas) &
-                     :: gas_names = ['h2o', 'co2', 'o3 ', 'n2o', 'co ', 'ch4', 'o2 ', 'n2 ']
-
-  character(len=256) :: input_file, k_dist_file, cloud_optics_file
-  !
-  ! Timing variables
-  !
-  integer(kind=i8)              :: start, finish, start_all, finish_all, clock_rate
-  real(wp)                      :: avg
-  integer(kind=i8), allocatable :: elapsed(:)
-  !$omp threadprivate( lw_sources, toa_flux, flux_up, flux_dn, flux_dir )
-  ! ----------------------------------------------------------------------------------
-  ! Code
-  ! ----------------------------------------------------------------------------------
-  !
-  ! Parse command line for any file names, block size
-  !
-  ! rrtmgp_clouds rrtmgp-clouds.nc $RRTMGP_ROOT/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc $RRTMGP_ROOT/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-lw.nc  128 1
-  ! rrtmgp_clouds rrtmgp-clouds.nc $RRTMGP_ROOT/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc $RRTMGP_ROOT/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-sw.nc  128 1
-  nUserArgs = command_argument_count()
-  nloops = 1
-  if (nUserArgs <  4) call stop_on_err("Need to supply input_file k_distribution_file ncol.")
-  if (nUserArgs >= 1) call get_command_argument(1,input_file)
-  if (nUserArgs >= 2) call get_command_argument(2,k_dist_file)
-  if (nUserArgs >= 3) call get_command_argument(3,cloud_optics_file)
-  if (nUserArgs >= 4) then
-    call get_command_argument(4, char_input)
-    read(char_input, '(i8)') ncol
-    if(ncol <= 0) call stop_on_err("Specify positive ncol.")
-  end if
-  if (nUserArgs >= 5) then
-    call get_command_argument(5, char_input)
-    read(char_input, '(i8)') nloops
-    if(nloops <= 0) call stop_on_err("Specify positive nloops.")
-  end if
-  if (nUserArgs >  6) print *, "Ignoring command line arguments beyond the first five..."
-  if(trim(input_file) == '-h' .or. trim(input_file) == "--help") then
-    call stop_on_err("rrtmgp_clouds input_file absorption_coefficients_file cloud_optics_file ncol")
-  end if
-  !
-  ! Read temperature, pressure, gas concentrations.
-  !   Arrays are allocated as they are read
-  !
-  call read_atmos(input_file,                 &
-                  p_lay, t_lay, p_lev, t_lev, &
-                  gas_concs_garand, col_dry)
-  deallocate(col_dry)
-  nlay = size(p_lay, 2)
-  ! For clouds we'll use the first column, repeated over and over
-  call stop_on_err(gas_concs%init(gas_names))
-  do igas = 1, ngas
-    call vmr_2d_to_1d(gas_concs, gas_concs_garand, gas_names(igas), size(p_lay, 1), nlay)
-  end do
-  !  If we trusted in Fortran allocate-on-assign we could skip the temp_array here
-  allocate(temp_array(ncol, nlay))
-  temp_array = spread(p_lay(1,:), dim = 1, ncopies=ncol)
-  call move_alloc(temp_array, p_lay)
-  allocate(temp_array(ncol, nlay))
-  temp_array = spread(t_lay(1,:), dim = 1, ncopies=ncol)
-  call move_alloc(temp_array, t_lay)
-  allocate(temp_array(ncol, nlay+1))
-  temp_array = spread(p_lev(1,:), dim = 1, ncopies=ncol)
-  call move_alloc(temp_array, p_lev)
-  allocate(temp_array(ncol, nlay+1))
-  temp_array = spread(t_lev(1,:), dim = 1, ncopies=ncol)
-  call move_alloc(temp_array, t_lev)
-  ! This puts pressure and temperature arrays on the GPU
-  !$acc enter data copyin(p_lay, p_lev, t_lay, t_lev)
-  ! ----------------------------------------------------------------------------
-  ! load data into classes
-  call load_and_init(k_dist, k_dist_file, gas_concs)
-  is_sw = k_dist%source_is_external()
-  is_lw = .not. is_sw
-  !
-  ! Should also try with Pade calculations
-  !  call load_cld_padecoeff(cloud_optics, cloud_optics_file)
-  !
-  if(use_luts) then
-    call load_cld_lutcoeff (cloud_optics, cloud_optics_file)
-  else
-    call load_cld_padecoeff(cloud_optics, cloud_optics_file)
-  end if
-  call stop_on_err(cloud_optics%set_ice_roughness(2))
-  ! ----------------------------------------------------------------------------
-  !
-  ! Problem sizes
-  !
-  nbnd = k_dist%get_nband()
-  ngpt = k_dist%get_ngpt()
-  top_at_1 = p_lay(1, 1) < p_lay(1, nlay)
-
-  ! ----------------------------------------------------------------------------
-  ! LW calculations neglect scattering; SW calculations use the 2-stream approximation
-  !   Here we choose the right variant of optical_props.
-  !
-  if(is_sw) then
-    allocate(ty_optical_props_2str::atmos)
-    allocate(ty_optical_props_2str::clouds)
-  else
-    allocate(ty_optical_props_1scl::atmos)
-    allocate(ty_optical_props_1scl::clouds)
-  end if
-  ! Clouds optical props are defined by band
-  call stop_on_err(clouds%init(k_dist%get_band_lims_wavenumber()))
-  !
-  ! Allocate arrays for the optical properties themselves.
-  !
-  select type(atmos)
-    class is (ty_optical_props_1scl)
-      !$acc enter data copyin(atmos)
-      call stop_on_err(atmos%alloc_1scl(ncol, nlay, k_dist))
-      !$acc enter data copyin(atmos) create(atmos%tau)
-    class is (ty_optical_props_2str)
-      call stop_on_err(atmos%alloc_2str( ncol, nlay, k_dist))
-      !$acc enter data copyin(atmos) create(atmos%tau, atmos%ssa, atmos%g)
-    class default
-      call stop_on_err("rte_rrtmgp_clouds: Don't recognize the kind of optical properties ")
-  end select
-  select type(clouds)
-    class is (ty_optical_props_1scl)
-      call stop_on_err(clouds%alloc_1scl(ncol, nlay))
-      !$acc enter data copyin(clouds) create(clouds%tau)
-    class is (ty_optical_props_2str)
-      call stop_on_err(clouds%alloc_2str(ncol, nlay))
-      !$acc enter data copyin(clouds) create(clouds%tau, clouds%ssa, clouds%g)
-    class default
-      call stop_on_err("rte_rrtmgp_clouds: Don't recognize the kind of optical properties ")
-  end select
-  ! ----------------------------------------------------------------------------
-  !  Boundary conditions depending on whether the k-distribution being supplied
-  !   is LW or SW
-  if(is_sw) then
-    ! toa_flux is threadprivate
-    !$omp parallel
-    allocate(toa_flux(ncol, ngpt))
-    !$omp end parallel
-    !
-    allocate(sfc_alb_dir(nbnd, ncol), sfc_alb_dif(nbnd, ncol), mu0(ncol))
-    !$acc enter data create(sfc_alb_dir, sfc_alb_dif, mu0)
-    ! Ocean-ish values for no particular reason
-    !$acc kernels
-    sfc_alb_dir = 0.06_wp
-    sfc_alb_dif = 0.06_wp
-    mu0 = .86_wp
-    !$acc end kernels
-  else
-    ! lw_sorces is threadprivate
-    !$omp parallel
-    call stop_on_err(lw_sources%alloc(ncol, nlay, k_dist))
-    !$omp end parallel
-
-    allocate(t_sfc(ncol), emis_sfc(nbnd, ncol))
-    !$acc enter data create(t_sfc, emis_sfc)
-    ! Surface temperature
-    !$acc kernels
-    t_sfc = t_lev(1, merge(nlay+1, 1, top_at_1))
-    emis_sfc = 0.98_wp
-    !$acc end kernels
-  end if
-  ! ----------------------------------------------------------------------------
-  !
-  ! Fluxes
-  !
-  !$omp parallel
-  allocate(flux_up(ncol,nlay+1), flux_dn(ncol,nlay+1))
-  !$omp end parallel
-
-  !$acc enter data create(flux_up, flux_dn)
-  if(is_sw) then
-    allocate(flux_dir(ncol,nlay+1))
-    !$acc enter data create(flux_dir)
-  end if
-  !
-  ! Clouds
-  !
-  allocate(lwp(ncol,nlay), iwp(ncol,nlay), &
-           rel(ncol,nlay), rei(ncol,nlay), cloud_mask(ncol,nlay))
-  !$acc enter data create(cloud_mask, lwp, iwp, rel, rei)
-
-  ! Restrict clouds to troposphere (> 100 hPa = 100*100 Pa)
-  !   and not very close to the ground (< 900 hPa), and
-  !   put them in 2/3 of the columns since that's roughly the
-  !   total cloudiness of earth
-  rel_val = 0.5 * (cloud_optics%get_min_radius_liq() + cloud_optics%get_max_radius_liq())
-  rei_val = 0.5 * (cloud_optics%get_min_radius_ice() + cloud_optics%get_max_radius_ice())
-  !$acc parallel loop collapse(2) copyin(t_lay) copyout(lwp, iwp, rel, rei)
-  do ilay=1,nlay
-    do icol=1,ncol
-      cloud_mask(icol,ilay) = p_lay(icol,ilay) > 100._wp * 100._wp .and. &
-                              p_lay(icol,ilay) < 900._wp * 100._wp .and. &
-                              mod(icol, 3) /= 0
-      !
-      ! Ice and liquid will overlap in a few layers
-      !
-      lwp(icol,ilay) = merge(10._wp,  0._wp, cloud_mask(icol,ilay) .and. t_lay(icol,ilay) > 263._wp)
-      iwp(icol,ilay) = merge(10._wp,  0._wp, cloud_mask(icol,ilay) .and. t_lay(icol,ilay) < 273._wp)
-      rel(icol,ilay) = merge(rel_val, 0._wp, lwp(icol,ilay) > 0._wp)
-      rei(icol,ilay) = merge(rei_val, 0._wp, iwp(icol,ilay) > 0._wp)
-    end do
-  end do
-  !$acc exit data delete(cloud_mask)
-  ! ----------------------------------------------------------------------------
-  !
-  ! Multiple iterations for big problem sizes, and to help identify data movement
-  !   For CPUs we can introduce OpenMP threading over loop iterations
-  !
-  allocate(elapsed(nloops))
-  !
-  call system_clock(start_all)
-  !
-  !$omp parallel do firstprivate(fluxes)
-  do iloop = 1, nloops
-    call system_clock(start)
-    call stop_on_err(                                      &
-      cloud_optics%cloud_optics(lwp, iwp, rel, rei, clouds))
-    !
-    ! Solvers
-    !
-    fluxes%flux_up => flux_up(:,:)
-    fluxes%flux_dn => flux_dn(:,:)
-    if(is_lw) then
-      !$acc enter data create(lw_sources, lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source)
-      call stop_on_err(k_dist%gas_optics(p_lay, p_lev, &
-                                         t_lay, t_sfc, &
-                                         gas_concs,    &
-                                         atmos,        &
-                                         lw_sources,   &
-                                         tlev = t_lev))
-      call stop_on_err(clouds%increment(atmos))
-      call stop_on_err(rte_lw(atmos, top_at_1, &
-                              lw_sources,      &
-                              emis_sfc,        &
-                              fluxes))
-      !$acc exit data delete(lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source, lw_sources)
-    else
-      !$acc enter data create(toa_flux)
-      fluxes%flux_dn_dir => flux_dir(:,:)
-
-      call stop_on_err(k_dist%gas_optics(p_lay, p_lev, &
-                                         t_lay,        &
-                                         gas_concs,    &
-                                         atmos,        &
-                                         toa_flux))
-      call stop_on_err(clouds%delta_scale())
-      call stop_on_err(clouds%increment(atmos))
-      call stop_on_err(rte_sw(atmos, top_at_1, &
-                              mu0,   toa_flux, &
-                              sfc_alb_dir, sfc_alb_dif, &
-                              fluxes))
-      !$acc exit data delete(toa_flux)
-    end if
-    !print *, "******************************************************************"
-    call system_clock(finish, clock_rate)
-    elapsed(iloop) = finish - start
-  end do
-  !
-  call system_clock(finish_all, clock_rate)
-  !
-  !$acc exit data delete(lwp, iwp, rel, rei)
-  !$acc exit data delete(p_lay, p_lev, t_lay, t_lev)
-
-#ifdef _OPENACC
-  avg = sum( elapsed(merge(2,1,nloops>1):) ) / real(merge(nloops-1,nloops,nloops>1))
-
-  print *, "Execution times - min(s)        :", minval(elapsed) / real(clock_rate)
-  print *, "                - avg(s)        :", avg / real(clock_rate)
-  print *, "                - per column(ms):", avg / real(ncol) / (1.0e-3*clock_rate)
-#else
-  print *, "Execution times - total(s)      :", (finish_all-start_all) / real(clock_rate)
-  print *, "                - per column(ms):", (finish_all-start_all) / real(ncol*nloops) / (1.0e-3*clock_rate)
-#endif
-
-  if(is_lw) then
-    !$acc exit data copyout(flux_up, flux_dn)
-    if(write_fluxes) call write_lw_fluxes(input_file, flux_up, flux_dn)
-    !$acc exit data delete(t_sfc, emis_sfc)
-  else
-    !$acc exit data copyout(flux_up, flux_dn, flux_dir)
-    if(write_fluxes) call write_sw_fluxes(input_file, flux_up, flux_dn, flux_dir)
-    !$acc exit data delete(sfc_alb_dir, sfc_alb_dif, mu0)
-  end if
-  !$acc enter data create(lwp, iwp, rel, rei)
-end program rte_rrtmgp_clouds
+subroutine stop_on_err(error_msg)
+  use iso_fortran_env, only : error_unit
+  character(len=*), intent(in) :: error_msg
+
+  if(error_msg /= "") then
+    write (error_unit,*) trim(error_msg)
+    write (error_unit,*) "rte_rrtmgp_clouds stopping"
+    error stop 1
+  end if
+end subroutine stop_on_err
+
+subroutine vmr_2d_to_1d(gas_concs, gas_concs_garand, name, sz1, sz2)
+  use mo_gas_concentrations, only: ty_gas_concs
+  use mo_rte_kind,           only: wp
+
+  type(ty_gas_concs), intent(in)    :: gas_concs_garand
+  type(ty_gas_concs), intent(inout) :: gas_concs
+  character(len=*),   intent(in)    :: name
+  integer,            intent(in)    :: sz1, sz2
+
+  real(wp) :: tmp(sz1, sz2), tmp_col(sz2)
+
+  !$acc data create(tmp, tmp_col)
+  !$omp target data map(alloc:tmp, tmp_col)
+  call stop_on_err(gas_concs_garand%get_vmr(name, tmp))
+  !$acc kernels
+  !$omp target
+  tmp_col(:) = tmp(1, :)
+  !$acc end kernels
+  !$omp end target
+
+  call stop_on_err(gas_concs%set_vmr       (name, tmp_col))
+  !$acc end data
+  !$omp end target data
+end subroutine vmr_2d_to_1d
+! ----------------------------------------------------------------------------------
+program rte_rrtmgp_clouds
+  use mo_rte_kind,           only: wp, i8
+  use mo_optical_props,      only: ty_optical_props, &
+                                   ty_optical_props_arry, ty_optical_props_1scl, ty_optical_props_2str
+  use mo_gas_optics_rrtmgp,  only: ty_gas_optics_rrtmgp
+  use mo_cloud_optics,       only: ty_cloud_optics
+  use mo_gas_concentrations, only: ty_gas_concs
+  use mo_source_functions,   only: ty_source_func_lw
+  use mo_fluxes,             only: ty_fluxes_broadband
+  use mo_rte_lw,             only: rte_lw
+  use mo_rte_sw,             only: rte_sw
+  use mo_load_coefficients,  only: load_and_init
+  use mo_load_cloud_coefficients, &
+                             only: load_cld_lutcoeff, load_cld_padecoeff
+  use mo_garand_atmos_io,    only: read_atmos, write_lw_fluxes, write_sw_fluxes
+  implicit none
+  ! ----------------------------------------------------------------------------------
+  ! Variables
+  ! ----------------------------------------------------------------------------------
+  ! Arrays: dimensions (col, lay)
+  real(wp), dimension(:,:),   allocatable :: p_lay, t_lay, p_lev
+  real(wp), dimension(:,:),   allocatable :: col_dry
+  real(wp), dimension(:,:),   allocatable :: temp_array
+
+  !
+  ! Longwave only
+  !
+  real(wp), dimension(:,:),   allocatable :: t_lev
+  real(wp), dimension(:),     allocatable :: t_sfc
+  real(wp), dimension(:,:),   allocatable :: emis_sfc ! First dimension is band
+  !
+  ! Shortwave only
+  !
+  real(wp), dimension(:),     allocatable :: mu0
+  real(wp), dimension(:,:),   allocatable :: sfc_alb_dir, sfc_alb_dif ! First dimension is band
+  !
+  ! Source functions
+  !
+  !   Longwave
+  type(ty_source_func_lw), save               :: lw_sources
+  !   Shortwave
+  real(wp), dimension(:,:), allocatable, save :: toa_flux
+  !
+  ! Clouds
+  !
+  real(wp), allocatable, dimension(:,:) :: lwp, iwp, rel, rei
+  logical,  allocatable, dimension(:,:) :: cloud_mask
+  !
+  ! Output variables
+  !
+  real(wp), dimension(:,:), target, &
+                            allocatable :: flux_up, flux_dn, flux_dir
+  !
+  ! Derived types from the RTE and RRTMGP libraries
+  !
+  type(ty_gas_optics_rrtmgp) :: k_dist
+  type(ty_cloud_optics)      :: cloud_optics
+  type(ty_gas_concs)         :: gas_concs, gas_concs_garand, gas_concs_1col
+  class(ty_optical_props_arry), &
+                 allocatable :: atmos, clouds
+  type(ty_fluxes_broadband)  :: fluxes
+
+  !
+  ! Inputs to RRTMGP
+  !
+  logical :: top_at_1, is_sw, is_lw
+
+  integer  :: ncol, nlay, nbnd, ngpt
+  integer  :: icol, ilay, ibnd, iloop, igas
+  real(wp) :: rel_val, rei_val
+
+  character(len=8) :: char_input
+  integer  :: nUserArgs=0, nloops
+  logical :: use_luts = .true., write_fluxes = .true.
+  integer, parameter :: ngas = 8
+  character(len=3), dimension(ngas) &
+                     :: gas_names = ['h2o', 'co2', 'o3 ', 'n2o', 'co ', 'ch4', 'o2 ', 'n2 ']
+
+  character(len=256) :: input_file, k_dist_file, cloud_optics_file
+  !
+  ! Timing variables
+  !
+  integer(kind=i8)              :: start, finish, start_all, finish_all, clock_rate
+  real(wp)                      :: avg
+  integer(kind=i8), allocatable :: elapsed(:)
+  ! NAR OpenMP CPU directives in compatible with OpenMP GPU directives
+  !!$omp threadprivate( lw_sources, toa_flux, flux_up, flux_dn, flux_dir )
+  ! ----------------------------------------------------------------------------------
+  ! Code
+  ! ----------------------------------------------------------------------------------
+  !
+  ! Parse command line for any file names, block size
+  !
+  ! rrtmgp_clouds rrtmgp-clouds.nc $RRTMGP_ROOT/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc $RRTMGP_ROOT/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-lw.nc  128 1
+  ! rrtmgp_clouds rrtmgp-clouds.nc $RRTMGP_ROOT/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc $RRTMGP_ROOT/extensions/cloud_optics/rrtmgp-cloud-optics-coeffs-sw.nc  128 1
+  nUserArgs = command_argument_count()
+  nloops = 1
+  if (nUserArgs <  4) call stop_on_err("Need to supply input_file k_distribution_file ncol.")
+  if (nUserArgs >= 1) call get_command_argument(1,input_file)
+  if (nUserArgs >= 2) call get_command_argument(2,k_dist_file)
+  if (nUserArgs >= 3) call get_command_argument(3,cloud_optics_file)
+  if (nUserArgs >= 4) then
+    call get_command_argument(4, char_input)
+    read(char_input, '(i8)') ncol
+    if(ncol <= 0) call stop_on_err("Specify positive ncol.")
+  end if
+  if (nUserArgs >= 5) then
+    call get_command_argument(5, char_input)
+    read(char_input, '(i8)') nloops
+    if(nloops <= 0) call stop_on_err("Specify positive nloops.")
+  end if
+  if (nUserArgs >  6) print *, "Ignoring command line arguments beyond the first five..."
+  if(trim(input_file) == '-h' .or. trim(input_file) == "--help") then
+    call stop_on_err("rrtmgp_clouds input_file absorption_coefficients_file cloud_optics_file ncol")
+  end if
+  !
+  ! Read temperature, pressure, gas concentrations.
+  !   Arrays are allocated as they are read
+  !
+  call read_atmos(input_file,                 &
+                  p_lay, t_lay, p_lev, t_lev, &
+                  gas_concs_garand, col_dry)
+  deallocate(col_dry)
+  nlay = size(p_lay, 2)
+  ! For clouds we'll use the first column, repeated over and over
+  call stop_on_err(gas_concs%init(gas_names))
+  do igas = 1, ngas
+    call vmr_2d_to_1d(gas_concs, gas_concs_garand, gas_names(igas), size(p_lay, 1), nlay)
+  end do
+  !  If we trusted in Fortran allocate-on-assign we could skip the temp_array here
+  allocate(temp_array(ncol, nlay))
+  temp_array = spread(p_lay(1,:), dim = 1, ncopies=ncol)
+  call move_alloc(temp_array, p_lay)
+  allocate(temp_array(ncol, nlay))
+  temp_array = spread(t_lay(1,:), dim = 1, ncopies=ncol)
+  call move_alloc(temp_array, t_lay)
+  allocate(temp_array(ncol, nlay+1))
+  temp_array = spread(p_lev(1,:), dim = 1, ncopies=ncol)
+  call move_alloc(temp_array, p_lev)
+  allocate(temp_array(ncol, nlay+1))
+  temp_array = spread(t_lev(1,:), dim = 1, ncopies=ncol)
+  call move_alloc(temp_array, t_lev)
+  ! This puts pressure and temperature arrays on the GPU
+  !$acc enter data copyin(p_lay, p_lev, t_lay, t_lev)
+  !$omp target enter data map(to:p_lay, p_lev, t_lay, t_lev)
+  ! ----------------------------------------------------------------------------
+  ! load data into classes
+  call load_and_init(k_dist, k_dist_file, gas_concs)
+  is_sw = k_dist%source_is_external()
+  is_lw = .not. is_sw
+  !
+  ! Should also try with Pade calculations
+  !  call load_cld_padecoeff(cloud_optics, cloud_optics_file)
+  !
+  if(use_luts) then
+    call load_cld_lutcoeff (cloud_optics, cloud_optics_file)
+  else
+    call load_cld_padecoeff(cloud_optics, cloud_optics_file)
+  end if
+  call stop_on_err(cloud_optics%set_ice_roughness(2))
+  ! ----------------------------------------------------------------------------
+  !
+  ! Problem sizes
+  !
+  nbnd = k_dist%get_nband()
+  ngpt = k_dist%get_ngpt()
+  top_at_1 = p_lay(1, 1) < p_lay(1, nlay)
+
+  ! ----------------------------------------------------------------------------
+  ! LW calculations neglect scattering; SW calculations use the 2-stream approximation
+  !   Here we choose the right variant of optical_props.
+  !
+  if(is_sw) then
+    allocate(ty_optical_props_2str::atmos)
+    allocate(ty_optical_props_2str::clouds)
+  else
+    allocate(ty_optical_props_1scl::atmos)
+    allocate(ty_optical_props_1scl::clouds)
+  end if
+  ! Clouds optical props are defined by band
+  call stop_on_err(clouds%init(k_dist%get_band_lims_wavenumber()))
+  !
+  ! Allocate arrays for the optical properties themselves.
+  !
+  select type(atmos)
+    class is (ty_optical_props_1scl)
+      !$acc enter data copyin(atmos)
+      call stop_on_err(atmos%alloc_1scl(ncol, nlay, k_dist))
+      !$acc enter data copyin(atmos) create(atmos%tau)
+      !$omp target enter data map(alloc:atmos%tau)
+    class is (ty_optical_props_2str)
+      call stop_on_err(atmos%alloc_2str( ncol, nlay, k_dist))
+      !$acc enter data copyin(atmos) create(atmos%tau, atmos%ssa, atmos%g)
+      !$omp target enter data map(alloc:atmos%tau, atmos%ssa, atmos%g)
+    class default
+      call stop_on_err("rte_rrtmgp_clouds: Don't recognize the kind of optical properties ")
+  end select
+  select type(clouds)
+    class is (ty_optical_props_1scl)
+      call stop_on_err(clouds%alloc_1scl(ncol, nlay))
+      !$acc enter data copyin(clouds) create(clouds%tau)
+      !$omp target enter data map(alloc:clouds%tau)
+    class is (ty_optical_props_2str)
+      call stop_on_err(clouds%alloc_2str(ncol, nlay))
+      !$acc enter data copyin(clouds) create(clouds%tau, clouds%ssa, clouds%g)
+      !$omp target enter data map(alloc:clouds%tau, clouds%ssa, clouds%g)
+    class default
+      call stop_on_err("rte_rrtmgp_clouds: Don't recognize the kind of optical properties ")
+  end select
+  ! ----------------------------------------------------------------------------
+  !  Boundary conditions depending on whether the k-distribution being supplied
+  !   is LW or SW
+  if(is_sw) then
+    ! toa_flux is threadprivate
+    !!$omp parallel
+    allocate(toa_flux(ncol, ngpt))
+    !!$omp end parallel
+    !
+    allocate(sfc_alb_dir(nbnd, ncol), sfc_alb_dif(nbnd, ncol), mu0(ncol))
+    !$acc enter data create(sfc_alb_dir, sfc_alb_dif, mu0)
+    !$omp target enter data map(alloc:sfc_alb_dir, sfc_alb_dif, mu0)
+    ! Ocean-ish values for no particular reason
+    !$acc kernels
+    !$omp target
+    sfc_alb_dir = 0.06_wp
+    sfc_alb_dif = 0.06_wp
+    mu0 = .86_wp
+    !$acc end kernels
+    !$omp end target
+  else
+    ! lw_sorces is threadprivate
+    !!$omp parallel
+    call stop_on_err(lw_sources%alloc(ncol, nlay, k_dist))
+    !!$omp end parallel
+
+    allocate(t_sfc(ncol), emis_sfc(nbnd, ncol))
+    !$acc enter data create(t_sfc, emis_sfc)
+    !$omp target enter data map(alloc:t_sfc, emis_sfc)
+    ! Surface temperature
+    !$acc kernels
+    !$omp target
+    t_sfc = t_lev(1, merge(nlay+1, 1, top_at_1))
+    emis_sfc = 0.98_wp
+    !$acc end kernels
+    !$omp end target
+  end if
+  ! ----------------------------------------------------------------------------
+  !
+  ! Fluxes
+  !
+  !!$omp parallel
+  allocate(flux_up(ncol,nlay+1), flux_dn(ncol,nlay+1))
+  !!$omp end parallel
+
+  !$acc enter data create(flux_up, flux_dn)
+  !$omp target enter data map(alloc:flux_up, flux_dn)
+  if(is_sw) then
+    allocate(flux_dir(ncol,nlay+1))
+    !$acc enter data create(flux_dir)
+    !$omp target enter data map(alloc:flux_dir)
+  end if
+  !
+  ! Clouds
+  !
+  allocate(lwp(ncol,nlay), iwp(ncol,nlay), &
+           rel(ncol,nlay), rei(ncol,nlay), cloud_mask(ncol,nlay))
+  !$acc enter data create(cloud_mask, lwp, iwp, rel, rei)
+  !$omp target enter data map(alloc:cloud_mask, lwp, iwp, rel, rei)
+
+  ! Restrict clouds to troposphere (> 100 hPa = 100*100 Pa)
+  !   and not very close to the ground (< 900 hPa), and
+  !   put them in 2/3 of the columns since that's roughly the
+  !   total cloudiness of earth
+  rel_val = 0.5 * (cloud_optics%get_min_radius_liq() + cloud_optics%get_max_radius_liq())
+  rei_val = 0.5 * (cloud_optics%get_min_radius_ice() + cloud_optics%get_max_radius_ice())
+  !$acc parallel loop collapse(2) copyin(t_lay) copyout(lwp, iwp, rel, rei)
+  !$omp target teams distribute parallel do simd collapse(2) map(to:t_lay) map(from:lwp, iwp, rel, rei)
+  do ilay=1,nlay
+    do icol=1,ncol
+      cloud_mask(icol,ilay) = p_lay(icol,ilay) > 100._wp * 100._wp .and. &
+                              p_lay(icol,ilay) < 900._wp * 100._wp .and. &
+                              mod(icol, 3) /= 0
+      !
+      ! Ice and liquid will overlap in a few layers
+      !
+      lwp(icol,ilay) = merge(10._wp,  0._wp, cloud_mask(icol,ilay) .and. t_lay(icol,ilay) > 263._wp)
+      iwp(icol,ilay) = merge(10._wp,  0._wp, cloud_mask(icol,ilay) .and. t_lay(icol,ilay) < 273._wp)
+      rel(icol,ilay) = merge(rel_val, 0._wp, lwp(icol,ilay) > 0._wp)
+      rei(icol,ilay) = merge(rei_val, 0._wp, iwp(icol,ilay) > 0._wp)
+    end do
+  end do
+  !$acc exit data delete(cloud_mask)
+  !$omp target exit data map(release:cloud_mask)
+  ! ----------------------------------------------------------------------------
+  !
+  ! Multiple iterations for big problem sizes, and to help identify data movement
+  !   For CPUs we can introduce OpenMP threading over loop iterations
+  !
+  allocate(elapsed(nloops))
+  !
+  call system_clock(start_all)
+  !
+  !!$omp parallel do firstprivate(fluxes)
+  do iloop = 1, nloops
+    call system_clock(start)
+    call stop_on_err(                                      &
+      cloud_optics%cloud_optics(lwp, iwp, rel, rei, clouds))
+    !
+    ! Solvers
+    !
+    fluxes%flux_up => flux_up(:,:)
+    fluxes%flux_dn => flux_dn(:,:)
+    if(is_lw) then
+      !$acc enter data create(lw_sources, lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source)
+      !$omp target enter data map(alloc:lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source)
+      call stop_on_err(k_dist%gas_optics(p_lay, p_lev, &
+                                         t_lay, t_sfc, &
+                                         gas_concs,    &
+                                         atmos,        &
+                                         lw_sources,   &
+                                         tlev = t_lev))
+      call stop_on_err(clouds%increment(atmos))
+      call stop_on_err(rte_lw(atmos, top_at_1, &
+                              lw_sources,      &
+                              emis_sfc,        &
+                              fluxes))
+      !$acc exit data delete(lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source, lw_sources)
+      !$omp target exit data map(release:lw_sources%lay_source, lw_sources%lev_source_inc, lw_sources%lev_source_dec, lw_sources%sfc_source)
+    else
+      !$acc enter data create(toa_flux)
+      !$omp target enter data map(alloc:toa_flux)
+      fluxes%flux_dn_dir => flux_dir(:,:)
+
+      call stop_on_err(k_dist%gas_optics(p_lay, p_lev, &
+                                         t_lay,        &
+                                         gas_concs,    &
+                                         atmos,        &
+                                         toa_flux))
+      call stop_on_err(clouds%delta_scale())
+      call stop_on_err(clouds%increment(atmos))
+      call stop_on_err(rte_sw(atmos, top_at_1, &
+                              mu0,   toa_flux, &
+                              sfc_alb_dir, sfc_alb_dif, &
+                              fluxes))
+      !$acc exit data delete(toa_flux)
+      !$omp target exit data map(release:toa_flux)
+    end if
+    !print *, "******************************************************************"
+    call system_clock(finish, clock_rate)
+    elapsed(iloop) = finish - start
+  end do
+  !
+  call system_clock(finish_all, clock_rate)
+  !
+  !$acc exit data delete(lwp, iwp, rel, rei)
+  !$omp target exit data map(release:lwp, iwp, rel, rei)
+  !$acc exit data delete(p_lay, p_lev, t_lay, t_lev)
+  !$omp target exit data map(release:p_lay, p_lev, t_lay, t_lev)
+
+#if defined(_OPENACC) || defined(_OPENMP)
+  avg = sum( elapsed(merge(2,1,nloops>1):) ) / real(merge(nloops-1,nloops,nloops>1))
+
+  print *, "Execution times - min(s)        :", minval(elapsed) / real(clock_rate)
+  print *, "                - avg(s)        :", avg / real(clock_rate)
+  print *, "                - per column(ms):", avg / real(ncol) / (1.0e-3*clock_rate)
+#else
+  print *, "Execution times - total(s)      :", (finish_all-start_all) / real(clock_rate)
+  print *, "                - per column(ms):", (finish_all-start_all) / real(ncol*nloops) / (1.0e-3*clock_rate)
+#endif
+
+  if(is_lw) then
+    !$acc exit data copyout(flux_up, flux_dn)
+    !$omp target exit data map(from:flux_up, flux_dn)
+    if(write_fluxes) call write_lw_fluxes(input_file, flux_up, flux_dn)
+    !$acc exit data delete(t_sfc, emis_sfc)
+    !$omp target exit data map(release:t_sfc, emis_sfc)
+  else
+    !$acc exit data copyout(flux_up, flux_dn, flux_dir)
+    !$omp target exit data map(from:flux_up, flux_dn, flux_dir)
+    if(write_fluxes) call write_sw_fluxes(input_file, flux_up, flux_dn, flux_dir)
+    !$acc exit data delete(sfc_alb_dir, sfc_alb_dif, mu0)
+    !$omp target exit data map(release:sfc_alb_dir, sfc_alb_dif, mu0)
+  end if
+  !$acc enter data create(lwp, iwp, rel, rei)
+  !$omp target enter data map(alloc:lwp, iwp, rel, rei)
+end program rte_rrtmgp_clouds
diff --git a/examples/rfmip-clear-sky/Makefile b/examples/rfmip-clear-sky/Makefile
index 74d59c44c..500f29966 100644
--- a/examples/rfmip-clear-sky/Makefile
+++ b/examples/rfmip-clear-sky/Makefile
@@ -1,40 +1,35 @@
 #
-# Here set variables RRTMGP_BUILD, NCHOME, NFHOME, TIME_DIR (for GPTL)
-#   or have those variables set in the environment
+# Location of RTE+RRTMGP libraries, module files.
 #
-RRTMGP_BUILD=$(RRTMGP_ROOT)/build
--include Makefile.libs
--include $(RRTMGP_BUILD)/Makefile.conf
+RRTMGP_BUILD = $(RRTMGP_ROOT)/build
 #
 # RRTMGP library, module files
 #
-LDFLAGS   += -L$(RRTMGP_BUILD)
-LIBS      += -lrrtmgp -lrte
+# LDFLAGS   += -L$(RRTMGP_BUILD)
+# LIBS      += -lrrtmgp -lrte
 FCINCLUDE += -I$(RRTMGP_BUILD)
 
 #
 # netcdf library, module files
-# C and Fortran interfaces respectively
+# Environment variables NCHOME and NFHOME point to root of C and Fortran interfaces respectively -
 #
 FCINCLUDE += -I$(NFHOME)/include
 LDFLAGS   += -L$(NFHOME)/lib -L$(NCHOME)/lib
 LIBS      += -lnetcdff -lnetcdf
 
 #
-# Setting macro TIMING=yes uses routines from the General Purpose Timing Library
-#  https://jmrosinski.github.io/GPTL/
+# General Purpose Timing Library https://jmrosinski.github.io/GPTL/
+# Set environment variable GPTL_DIR to the root of a GPTL installation to build
+#   the RFMIP example with timers
 #
-TIMING=no
-# Compiler specific - e.g. turn off for pgfortran, set to -cpp for gfortran
-#FCFLAGS += -fpp
-ifeq ($(TIMING),yes)
+ifneq ($(origin GPTL_DIR),undefined)
 	#
 	# Timing library
 	#
-	FCINCLUDE += -I$(TIME_DIR)/include
+	FCINCLUDE += -I$(GPTL_DIR)/include
 	# Compiler specific
-	FCFLAGS += -DUSE_TIMING
-	LDFLAGS   += -L$(TIME_DIR)/lib
+	FCFLAGS   += -DUSE_TIMING
+	LDFLAGS   += -L$(GPTL_DIR)/lib
 	LIBS      += -lgptl
 endif
 
@@ -58,7 +53,7 @@ rrtmgp_rfmip_lw:   rrtmgp_rfmip_lw.o   $(ADDITIONS) $(RRTMGP_BUILD)/librte.a $(R
 
 rrtmgp_rfmip_lw.o: rrtmgp_rfmip_lw.F90 $(ADDITIONS)
 
-rrtmgp_rfmip_sw:   rrtmgp_rfmip_sw.o   $(ADDITIONS) $(RRTMGP_BUILD)/librte.a $(RRTMGP_BUILD)/librrtmgp.a
+rrtmgp_rfmip_sw:   rrtmgp_rfmip_sw.o   $(ADDITIONS) $(RRTMGP_BUILD)/librrtmgp.a $(RRTMGP_BUILD)/librte.a
 
 rrtmgp_rfmip_sw.o: rrtmgp_rfmip_sw.F90 $(ADDITIONS)
 
@@ -66,5 +61,16 @@ mo_rfmip_io.o:          mo_rfmip_io.F90          mo_simple_netcdf.o
 
 mo_load_coefficients.o: mo_load_coefficients.F90 mo_simple_netcdf.o
 
+tests: multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc # Also the template files
+	# Files need to have been generated/downloaded before
+	$(RUN_CMD) ./rrtmgp_rfmip_lw 8 multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc
+	$(RUN_CMD) ./rrtmgp_rfmip_sw 8 multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc
+
+check:
+	cd ${RRTMGP_ROOT}/examples/rfmip-clear-sky && python ./compare-to-reference.py --fail=7.e-4
+
+multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc:
+	bash ./stage_files.sh
+
 clean:
-	-rm rrtmgp_rfmip_sw rrtmgp_rfmip_lw *.o *.mod *.optrpt
+	-rm rrtmgp_rfmip_sw rrtmgp_rfmip_lw *.o *.mod *.optrpt *.nc
diff --git a/examples/rfmip-clear-sky/Makefile.libs.macos b/examples/rfmip-clear-sky/Makefile.libs.macos
deleted file mode 100644
index 465cdd3aa..000000000
--- a/examples/rfmip-clear-sky/Makefile.libs.macos
+++ /dev/null
@@ -1,10 +0,0 @@
-# Location of RTE+RRTMGP libraries, module files.
-export RRTMGP_BUILD = $(RRTMGP_ROOT)/build
-# Sets macros FC, FCFLAGS consistent with RTE+RRTMGP
-
-# NetCDF C and Fortran libraries, module files
-export NCHOME = /opt/local
-export NFHOME = $(HOME)/Applications/$(FC)
-
-# GPTL libraries and module files if desired (https://jmrosinski.github.io/GPTL/)
-export TIME_DIR = $(HOME)/Codes/GPTL/gptl-v5.5.3/macos
diff --git a/examples/rfmip-clear-sky/Makefile.libs.olcf b/examples/rfmip-clear-sky/Makefile.libs.olcf
deleted file mode 100644
index 38ebdc75c..000000000
--- a/examples/rfmip-clear-sky/Makefile.libs.olcf
+++ /dev/null
@@ -1,10 +0,0 @@
-# Location of RTE+RRTMGP libraries, module files.
-export RRTMGP_BUILD = ../../build
-# Sets macros FC, FCFLAGS consistent with RTE+RRTMGP
-
-# NetCDF C and Fortran libraries, module files
-NCHOME = $(OLCF_NETCDF_ROOT)
-NFHOME = $(OLCF_NETCDF_FORTRAN_ROOT)
-
-# GPTL libraries and module files if desired (https://jmrosinski.github.io/GPTL/)
-# export TIME_DIR
diff --git a/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs b/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs
deleted file mode 100644
index 3bd5a213b..000000000
--- a/examples/rfmip-clear-sky/Makefile.libs.pgfortran-cscs
+++ /dev/null
@@ -1,20 +0,0 @@
-# Load the following modules and set the library path
-#
-# module load cdt/19.06
-# module swap PrgEnv-cray PrgEnv-pgi
-# module load cray-netcdf cray-hdf5
-# module load craype-accel-nvidia60
-# module unload cray-libsci_acc
-# export LD_LIBRARY_PATH=$CRAY_LD_LIBRARY_PATH:$LD_LIBRARY_PATH
-
-export FC = ftn
-export FCFLAGS = -g -Minfo -Mbounds -Mchkptr -Mstandard -Kieee -Mchkstk -Mipa=fast,inline -Mallocatable=03
-
-# Where to find Fortran .mod files
-export FCINCLUDE =
-
-# Libraries including netCDF and Fortran interface to netCDF
-export LDFLAGS =
-
-#export LIBS = -lnetcdff -lnetcdf   # Not necessary if ftn wrapper is used
-export LIBS =
diff --git a/examples/rfmip-clear-sky/Readme.md b/examples/rfmip-clear-sky/Readme.md
index a2d2323e6..511bb315c 100644
--- a/examples/rfmip-clear-sky/Readme.md
+++ b/examples/rfmip-clear-sky/Readme.md
@@ -4,11 +4,11 @@ the [RTE+RRTMGP](https://github.com/RobertPincus/rte-rrtmgp) radiation parameter
 [RFMIP](https://www.earthsystemcog.org/projects/rfmip/) cases.
 
 1. Build the RTE+RRTMGP libraries in `../../build/`. This will require setting
-environmental variables `FC` for the Fortran compiler and `FCFLAGS`, or creating
-`../../build/Makefile.conf` with that information.
+environmental variables `FC` for the Fortran compiler and `FCFLAGS`.
 2. Build the executables in this directory, which will require providing the
 locations of the netCDF C and Fortran libraries and module files as environmental
-variables (NCHOME and NFHOME) or via file `Makefile.libs`
+variables `NCHOME` and `NFHOME`, as well a variable `RRTMGP_ROOT` pointing to the root of the installation
+(the absolute path to `../../`).
 3. Use Python script `stage_files.py` to download relevant files from the
 [RFMIP web site](https://www.earthsystemcog.org/projects/rfmip/resources/).This script invokes another Python script to create empty output files.
 4. Use Python script `run-rfmip-examples.py` to run the examples. The script takes
diff --git a/examples/rfmip-clear-sky/compare-to-reference.py b/examples/rfmip-clear-sky/compare-to-reference.py
index f1b87e66c..a8f104db0 100755
--- a/examples/rfmip-clear-sky/compare-to-reference.py
+++ b/examples/rfmip-clear-sky/compare-to-reference.py
@@ -12,11 +12,23 @@
 tst_dir = "."
 rrtmgp_suffix = "_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc"
 
+def construct_esgf_remote_name(var):
+    #
+    # For a given variable name, provide the OpenDAP URL for the RTE+RRTMGP RFMIP results
+    #   This doesn't seem to work on CSCS Piz Daint within the netcdf-python module 
+    #
+    esgf_url_base = "http://esgf3.dkrz.de/thredds/dodsC/cmip6/RFMIP/RTE-RRTMGP-Consortium/RTE-RRTMGP-181204/rad-irf/r1i1p1f1/Efx/"
+    # DKRZ server has been unstable - better to try the other if one fails
+    esgf_url_base = "http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/RFMIP/RTE-RRTMGP-Consortium/RTE-RRTMGP-181204/rad-irf/r1i1p1f1/Efx/"
+    esgf_url_ver  = "gn/v20191007/"
+    return(os.path.join(esgf_url_base, var, esgf_url_ver, var+rrtmgp_suffix))
 #
 # Construct URL for RTE+RRTMGP results for RFMIP from ESGF
 #
 def construct_esgf_file(var):
     esgf_url_base = "http://esgf3.dkrz.de/thredds/fileServer/cmip6/RFMIP/RTE-RRTMGP-Consortium/RTE-RRTMGP-181204/rad-irf/r1i1p1f1/Efx/"
+    # DKRZ node goes down frequently
+    esgf_url_base = "http://esgf-data1.llnl.gov/thredds/fileServer/css03_data/CMIP6/RFMIP/RTE-RRTMGP-Consortium/RTE-RRTMGP-181204/rad-irf/r1i1p1f1/Efx/"
     esgf_url_ver  = "gn/v20191007/"
     return (os.path.join(esgf_url_base, var, esgf_url_ver, var+rrtmgp_suffix))
 #
diff --git a/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90 b/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90
index 5c43e5d1d..b41fd82fc 100644
--- a/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90
+++ b/examples/rfmip-clear-sky/rrtmgp_rfmip_lw.F90
@@ -29,7 +29,7 @@ subroutine stop_on_err(error_msg)
   if(error_msg /= "") then
     write (error_unit,*) trim(error_msg)
     write (error_unit,*) "rrtmgp_rfmip_lw stopping"
-    stop
+    error stop 1
   end if
 end subroutine stop_on_err
 ! -------------------------------------------------------------------------------------------------
@@ -226,8 +226,11 @@ program rrtmgp_rfmip_lw
   ! bug related to the use of Fortran classes on the GPU.
   !
   !$acc enter data create(sfc_emis_spec)
+  !$omp target enter data map(alloc:sfc_emis_spec)
   !$acc enter data create(optical_props, optical_props%tau)
+  !$omp target enter data map(alloc:optical_props%tau)
   !$acc enter data create(source, source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source)
+  !$omp target enter data map(alloc:source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source)
   ! --------------------------------------------------
 #ifdef USE_TIMING
   !
@@ -241,7 +244,7 @@ program rrtmgp_rfmip_lw
   ! Loop over blocks
   !
 #ifdef USE_TIMING
-  do i = 1, 32
+  do i = 1, 4
 #endif
   do b = 1, nblocks
     fluxes%flux_up => flux_up(:,:,b)
@@ -251,6 +254,7 @@ program rrtmgp_rfmip_lw
     !   (This is partly to show how to keep work on GPUs using OpenACC)
     !
     !$acc parallel loop collapse(2) copyin(sfc_emis)
+    !$omp target teams distribute parallel do simd collapse(2) map(to:sfc_emis)
     do icol = 1, block_size
       do ibnd = 1, nbnd
         sfc_emis_spec(ibnd,icol) = sfc_emis(icol,b)
@@ -299,8 +303,11 @@ program rrtmgp_rfmip_lw
   ret = gptlfinalize()
 #endif
   !$acc exit data delete(sfc_emis_spec)
+  !$omp target exit data map(release:sfc_emis_spec)
   !$acc exit data delete(optical_props%tau, optical_props)
+  !$omp target exit data map(release:optical_props%tau)
   !$acc exit data delete(source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source)
+  !$omp target exit data map(release:source%lay_source, source%lev_source_inc, source%lev_source_dec, source%sfc_source)
   !$acc exit data delete(source)
   ! --------------------------------------------------m
   call unblock_and_write(trim(flxup_file), 'rlu', flux_up)
diff --git a/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90 b/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90
index 3c01f8dda..52215ade3 100644
--- a/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90
+++ b/examples/rfmip-clear-sky/rrtmgp_rfmip_sw.F90
@@ -29,7 +29,7 @@ subroutine stop_on_err(error_msg)
   if(error_msg /= "") then
     write (error_unit,*) trim(error_msg)
     write (error_unit,*) "rrtmgp_rfmip_sw stopping"
-    stop
+    error stop 1
   end if
 end subroutine stop_on_err
 ! -------------------------------------------------------------------------------------------------
@@ -229,8 +229,11 @@ program rrtmgp_rfmip_sw
   allocate(mu0(block_size), sfc_alb_spec(nbnd,block_size))
   call stop_on_err(optical_props%alloc_2str(block_size, nlay, k_dist))
   !$acc enter data create(optical_props, optical_props%tau, optical_props%ssa, optical_props%g)
+  !$omp target enter data map(alloc:optical_props%tau, optical_props%ssa, optical_props%g)
   !$acc enter data create (toa_flux, def_tsi)
+  !$omp target enter data map(alloc:toa_flux, def_tsi)
   !$acc enter data create (sfc_alb_spec, mu0)
+  !$omp target enter data map(alloc:sfc_alb_spec, mu0)
   ! --------------------------------------------------
 #ifdef USE_TIMING
   !
@@ -244,7 +247,7 @@ program rrtmgp_rfmip_sw
   ! Loop over blocks
   !
 #ifdef USE_TIMING
-  do i = 1, 32
+  do i = 1, 4
 #endif
   do b = 1, nblocks
     fluxes%flux_up => flux_up(:,:,b)
@@ -269,12 +272,14 @@ program rrtmgp_rfmip_sw
     !   (This is partly to show how to keep work on GPUs using OpenACC in a host application)
     ! What's the total solar irradiance assumed by RRTMGP?
     !
-#ifdef _OPENACC
+#if defined(_OPENACC) || defined(_OPENMP)
     call zero_array(block_size, def_tsi)
     !$acc parallel loop collapse(2) copy(def_tsi) copyin(toa_flux)
+    !$omp target teams distribute parallel do simd collapse(2) map(tofrom:def_tsi) map(to:toa_flux)
     do igpt = 1, ngpt
       do icol = 1, block_size
         !$acc atomic update
+        !$omp atomic update
         def_tsi(icol) = def_tsi(icol) + toa_flux(icol, igpt)
       end do
     end do
@@ -288,6 +293,7 @@ program rrtmgp_rfmip_sw
     ! Normalize incoming solar flux to match RFMIP specification
     !
     !$acc parallel loop collapse(2) copyin(total_solar_irradiance, def_tsi) copy(toa_flux)
+    !$omp target teams distribute parallel do simd collapse(2) map(to:total_solar_irradiance, def_tsi) map(tofrom:toa_flux)
     do igpt = 1, ngpt
       do icol = 1, block_size
         toa_flux(icol,igpt) = toa_flux(icol,igpt) * total_solar_irradiance(icol,b)/def_tsi(icol)
@@ -297,6 +303,7 @@ program rrtmgp_rfmip_sw
     ! Expand the spectrally-constant surface albedo to a per-band albedo for each column
     !
     !$acc parallel loop collapse(2) copyin(surface_albedo)
+    !$omp target teams distribute parallel do simd collapse(2) map(to:surface_albedo)
     do icol = 1, block_size
       do ibnd = 1, nbnd
         sfc_alb_spec(ibnd,icol) = surface_albedo(icol,b)
@@ -306,6 +313,7 @@ program rrtmgp_rfmip_sw
     ! Cosine of the solar zenith angle
     !
     !$acc parallel loop copyin(solar_zenith_angle, usecol)
+    !$omp target teams distribute parallel do simd map(to:solar_zenith_angle, usecol)
     do icol = 1, block_size
       mu0(icol) = merge(cos(solar_zenith_angle(icol,b)*deg_to_rad), 1._wp, usecol(icol,b))
     end do
@@ -346,8 +354,11 @@ program rrtmgp_rfmip_sw
   ret = gptlfinalize()
 #endif
   !$acc exit data delete(optical_props%tau, optical_props%ssa, optical_props%g, optical_props)
+  !$omp target exit data map(release:optical_props%tau, optical_props%ssa, optical_props%g)
   !$acc exit data delete(sfc_alb_spec, mu0)
+  !$omp target exit data map(release:sfc_alb_spec, mu0)
   !$acc exit data delete(toa_flux, def_tsi)
+  !$omp target exit data map(release:toa_flux, def_tsi)
   ! --------------------------------------------------
   call unblock_and_write(trim(flxup_file), 'rsu', flux_up)
   call unblock_and_write(trim(flxdn_file), 'rsd', flux_dn)
diff --git a/examples/rfmip-clear-sky/stage_files.sh b/examples/rfmip-clear-sky/stage_files.sh
new file mode 100644
index 000000000..53ed9318e
--- /dev/null
+++ b/examples/rfmip-clear-sky/stage_files.sh
@@ -0,0 +1,5 @@
+wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc
+wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/rld_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc
+wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/rlu_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc
+wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/rsd_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc
+wget https://psl.noaa.gov/thredds/fileServer/Datasets/rte-rrtmgp/continuous-integration/rsu_Efx_RTE-RRTMGP-181204_rad-irf_r1i1p1f1_gn.nc
diff --git a/extensions/cloud_optics/mo_cloud_optics.F90 b/extensions/cloud_optics/mo_cloud_optics.F90
index bd812ed88..3e4309eb7 100644
--- a/extensions/cloud_optics/mo_cloud_optics.F90
+++ b/extensions/cloud_optics/mo_cloud_optics.F90
@@ -151,6 +151,9 @@ function load_lut(this, band_lims_wvn, &
     !$acc enter data create(this)                                               &
     !$acc            create(this%lut_extliq, this%lut_ssaliq, this%lut_asyliq)  &
     !$acc            create(this%lut_extice, this%lut_ssaice, this%lut_asyice)
+    !$omp target enter data &
+    !$omp map(alloc:this%lut_extliq, this%lut_ssaliq, this%lut_asyliq) &
+    !$omp map(alloc:this%lut_extice, this%lut_ssaice, this%lut_asyice)
     ! Load LUT constants
     this%radliq_lwr = radliq_lwr
     this%radliq_upr = radliq_upr
@@ -159,6 +162,7 @@ function load_lut(this, band_lims_wvn, &
 
     ! Load LUT coefficients
     !$acc kernels
+    !$omp target
     this%lut_extliq = lut_extliq
     this%lut_ssaliq = lut_ssaliq
     this%lut_asyliq = lut_asyliq
@@ -166,6 +170,7 @@ function load_lut(this, band_lims_wvn, &
     this%lut_ssaice = lut_ssaice
     this%lut_asyice = lut_asyice
     !$acc end kernels
+    !$omp end target
     !
     ! Set default ice roughness - min values
     !
@@ -277,10 +282,16 @@ function load_pade(this, band_lims_wvn, &
     !$acc            create(this%pade_extice, this%pade_ssaice, this%pade_asyice)                       &
     !$acc            create(this%pade_sizreg_extliq, this%pade_sizreg_ssaliq, this%pade_sizreg_asyliq)  &
     !$acc            create(this%pade_sizreg_extice, this%pade_sizreg_ssaice, this%pade_sizreg_asyice)
+    !$omp target enter data &
+    !$omp map(alloc:this%pade_extliq, this%pade_ssaliq, this%pade_asyliq) &
+    !$omp map(alloc:this%pade_extice, this%pade_ssaice, this%pade_asyice) &
+    !$omp map(alloc:this%pade_sizreg_extliq, this%pade_sizreg_ssaliq, this%pade_sizreg_asyliq) &
+    !$omp map(alloc:this%pade_sizreg_extice, this%pade_sizreg_ssaice, this%pade_sizreg_asyice)
     !
     ! Load data
     !
     !$acc kernels
+    !$omp target
     this%pade_extliq = pade_extliq
     this%pade_ssaliq = pade_ssaliq
     this%pade_asyliq = pade_asyliq
@@ -294,6 +305,7 @@ function load_pade(this, band_lims_wvn, &
     this%pade_sizreg_ssaice = pade_sizreg_ssaice
     this%pade_sizreg_asyice = pade_sizreg_asyice
     !$acc end kernels
+    !$omp end target
     !
     ! Set default ice roughness - min values
     !
@@ -318,6 +330,8 @@ subroutine finalize(this)
       !$acc exit data delete(this%lut_extliq, this%lut_ssaliq, this%lut_asyliq)  &
       !$acc           delete(this%lut_extice, this%lut_ssaice, this%lut_asyice)  &
       !$acc           delete(this)
+      !$omp target exit data map(release:this%lut_extliq, this%lut_ssaliq, this%lut_asyliq) &
+      !$omp map(release:this%lut_extice, this%lut_ssaice, this%lut_asyice)
 
 
       deallocate(this%lut_extliq, this%lut_ssaliq, this%lut_asyliq, &
@@ -336,6 +350,10 @@ subroutine finalize(this)
       !$acc           delete(this%pade_sizreg_extliq, this%pade_sizreg_ssaliq, this%pade_sizreg_asyliq)  &
       !$acc           delete(this%pade_sizreg_extice, this%pade_sizreg_ssaice, this%pade_sizreg_asyice)  &
       !$acc           delete(this)
+      !$omp target exit data map(release:this%pade_extliq, this%pade_ssaliq, this%pade_asyliq) &
+      !$omp map(release:this%pade_extice, this%pade_ssaice, this%pade_asyice) &
+      !$omp map(release:this%pade_sizreg_extliq, this%pade_sizreg_ssaliq, this%pade_sizreg_asyliq) &
+      !$omp map(release:this%pade_sizreg_extice, this%pade_sizreg_ssaice, this%pade_sizreg_asyice)
 
       deallocate(this%pade_extliq, this%pade_ssaliq, this%pade_asyliq, &
                  this%pade_extice, this%pade_ssaice, this%pade_asyice, &
@@ -420,10 +438,14 @@ function cloud_optics(this, &
     !$acc data copyin(clwp, ciwp, reliq, reice)                         &
     !$acc      create(ltau, ltaussa, ltaussag, itau, itaussa, itaussag) &
     !$acc      create(liqmsk,icemsk)
+    !$omp target data map(to:clwp, ciwp, reliq, reice) &
+    !$omp map(alloc:ltau, ltaussa, ltaussag, itau, itaussa, itaussag) &
+    !$omp map(alloc:liqmsk, icemsk)
     !
     ! Cloud masks; don't need value re values if there's no cloud
     !
     !$acc parallel loop gang vector default(none) collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilay = 1, nlay
       do icol = 1, ncol
         liqmsk(icol,ilay) = clwp(icol,ilay) > 0._wp
@@ -441,8 +463,8 @@ function cloud_optics(this, &
         error_msg = 'cloud optics: ice effective radius is out of bounds'
       if(any_vals_less_than(clwp, liqmsk, 0._wp) .or. any_vals_less_than(ciwp, icemsk, 0._wp)) &
         error_msg = 'cloud optics: negative clwp or ciwp where clouds are supposed to be'
-      if(error_msg == "") then
     end if
+    if(error_msg == "") then
       !
       !
       ! ----------------------------------------
@@ -500,6 +522,8 @@ function cloud_optics(this, &
       type is (ty_optical_props_1scl)
         !$acc parallel loop gang vector default(none) collapse(3) &
         !$acc               copyin(optical_props) copyout(optical_props%tau)
+        !$omp target teams distribute parallel do simd collapse(3) &
+        !$omp map(from:optical_props%tau)
 
         do ibnd = 1, nbnd
           do ilay = 1, nlay
@@ -513,6 +537,8 @@ function cloud_optics(this, &
       type is (ty_optical_props_2str)
         !$acc parallel loop gang vector default(none) collapse(3) &
         !$acc               copyin(optical_props) copyout(optical_props%tau, optical_props%ssa, optical_props%g)
+        !$omp target teams distribute parallel do simd collapse(3) &
+        !$omp map(from:optical_props%tau, optical_props%ssa, optical_props%g)
         do ibnd = 1, nbnd
           do ilay = 1, nlay
             do icol = 1,ncol
@@ -528,9 +554,9 @@ function cloud_optics(this, &
       type is (ty_optical_props_nstr)
         error_msg = "cloud optics: n-stream calculations not yet supported"
       end select
-
-    end if ! error_msg == ""
+    end if 
     !$acc end data
+    !$omp end target data
   end function cloud_optics
   !--------------------------------------------------------------------------------------------------------------------
   !
@@ -616,6 +642,7 @@ subroutine compute_all_from_table(ncol, nlay, nbnd, mask, lwp, re, &
     real(wp) :: t, ts, tsg  ! tau, tau*ssa, tau*ssa*g
     ! ---------------------------
     !$acc parallel loop gang vector default(present) collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do ibnd = 1, nbnd
       do ilay = 1,nlay
         do icol = 1, ncol
@@ -670,6 +697,7 @@ subroutine compute_all_from_pade(ncol, nlay, nbnd, nsizes, &
     real(wp) :: t, ts
 
     !$acc parallel loop gang vector default(present) collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do ibnd = 1, nbnd
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -742,6 +770,7 @@ end function pade_eval_nbnd
   !
   function pade_eval_1(iband, nbnd, nrads, m, n, irad, re, pade_coeffs)
     !$acc routine seq
+    !$omp declare target
     !
     integer,                intent(in) :: iband, nbnd, nrads, m, n, irad
     real(wp), dimension(nbnd, nrads, 0:m+n), &
diff --git a/extensions/mo_fluxes_byband.F90 b/extensions/mo_fluxes_byband.F90
index c330f63a6..a83261e0e 100644
--- a/extensions/mo_fluxes_byband.F90
+++ b/extensions/mo_fluxes_byband.F90
@@ -98,6 +98,7 @@ function reduce_byband(this, gpt_flux_up, gpt_flux_dn, spectral_disc, top_at_1,
 
     ! -------
     !$acc enter data copyin(band_lims)
+    !$omp target enter data map(to:band_lims)
     ! Band-by-band fluxes
     ! Up flux
     if(associated(this%bnd_flux_up)) then
@@ -128,6 +129,7 @@ function reduce_byband(this, gpt_flux_up, gpt_flux_dn, spectral_disc, top_at_1,
       end if
     end if
     !$acc exit data delete(band_lims)
+    !$omp target exit data map(release:band_lims)
   end function reduce_byband
   ! --------------------------------------------------------------------------------------
   ! Are any fluxes desired from this set of g-point fluxes? We can tell because memory will
diff --git a/extensions/mo_fluxes_byband_kernels.F90 b/extensions/mo_fluxes_byband_kernels.F90
index 0e69e46e5..708853aa4 100644
--- a/extensions/mo_fluxes_byband_kernels.F90
+++ b/extensions/mo_fluxes_byband_kernels.F90
@@ -28,7 +28,7 @@ module mo_fluxes_byband_kernels
   !
   ! Spectral reduction over all points
   !
-  pure subroutine sum_byband(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux, byband_flux) bind (C)
+  subroutine sum_byband(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux, byband_flux) bind (C)
     integer,                               intent(in ) :: ncol, nlev, ngpt, nbnd
     integer,  dimension(2,          nbnd), intent(in ) :: band_lims
     real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux
@@ -36,6 +36,7 @@ pure subroutine sum_byband(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux, byb
 
     integer :: icol, ilev, igpt, ibnd
     !$acc parallel loop collapse(3) copyin(spectral_flux, band_lims) copyout(byband_flux)
+    !$omp target teams distribute parallel do collapse(3) map(to:spectral_flux, band_lims) map(from:byband_flux)
     do ibnd = 1, nbnd
       do ilev = 1, nlev
         do icol = 1, ncol
@@ -52,7 +53,7 @@ end subroutine sum_byband
   !
   ! Net flux: Spectral reduction over all points
   !
-  pure subroutine net_byband_full(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux_dn, spectral_flux_up, byband_flux_net) bind (C)
+  subroutine net_byband_full(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux_dn, spectral_flux_up, byband_flux_net) bind (C)
     integer,                               intent(in ) :: ncol, nlev, ngpt, nbnd
     integer,  dimension(2,          nbnd), intent(in ) :: band_lims
     real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux_dn, spectral_flux_up
@@ -61,6 +62,7 @@ pure subroutine net_byband_full(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux
     integer :: icol, ilev, igpt, ibnd
 
     !$acc parallel loop collapse(3) copyin(spectral_flux_dn, spectral_flux_up, band_lims) copyout(byband_flux_net)
+    !$omp target teams distribute parallel do collapse(3) map(to:spectral_flux_dn, spectral_flux_up, band_lims) map(from:byband_flux_net)
     do ibnd = 1, nbnd
       do ilev = 1, nlev
         do icol = 1, ncol
@@ -77,7 +79,7 @@ pure subroutine net_byband_full(ncol, nlev, ngpt, nbnd, band_lims, spectral_flux
     end do
   end subroutine net_byband_full
   ! ----------------------------------------------------------------------------
-  pure subroutine net_byband_precalc(ncol, nlev, nbnd, byband_flux_dn, byband_flux_up, byband_flux_net) bind (C)
+  subroutine net_byband_precalc(ncol, nlev, nbnd, byband_flux_dn, byband_flux_up, byband_flux_net) bind (C)
     integer,                               intent(in ) :: ncol, nlev, nbnd
     real(wp), dimension(ncol, nlev, nbnd), intent(in ) :: byband_flux_dn, byband_flux_up
     real(wp), dimension(ncol, nlev, nbnd), intent(out) :: byband_flux_net
diff --git a/rrtmgp/Make.depends b/rrtmgp/Make.depends
index 1395b71c5..26733df03 100644
--- a/rrtmgp/Make.depends
+++ b/rrtmgp/Make.depends
@@ -28,7 +28,7 @@ mo_gas_concentrations.o: mo_rte_kind.o mo_rte_config.o mo_rte_util_array.o mo_rr
 #
 # Gas optics
 #
-mo_gas_optics_kernels.o: mo_rte_kind.o mo_gas_optics_kernels.F90
+mo_gas_optics_kernels.o: mo_rte_kind.o mo_rte_util_array.o mo_gas_optics_kernels.F90
 
 mo_gas_optics.o: mo_rte_kind.o mo_rte_config.o mo_gas_concentrations.o \
   mo_optical_props.o mo_source_functions.o \
diff --git a/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90 b/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90
index bbfc2ba36..75cc0a5dd 100644
--- a/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90
+++ b/rrtmgp/kernels-openacc/mo_gas_optics_kernels.F90
@@ -16,6 +16,7 @@
 
 module mo_gas_optics_kernels
   use mo_rte_kind,      only: wp, wl
+  use mo_rte_util_array,only: zero_array
   implicit none
   public
 contains
@@ -67,10 +68,14 @@ subroutine interpolation( &
     integer :: icol, ilay, iflav, igases(2), itropo, itemp
 
     !$acc enter data copyin(flavor,press_ref_log,temp_ref,vmr_ref,play,tlay,col_gas)
+    !$omp target enter data map(to:flavor, press_ref_log, temp_ref, vmr_ref, play, tlay, col_gas)
     !$acc enter data create(jtemp,jpress,tropo,jeta,col_mix,fmajor,fminor)
+    !$omp target enter data map(alloc:jtemp, jpress, tropo, jeta, col_mix, fmajor, fminor)
     !$acc enter data create(ftemp,fpress)
+    !$omp target enter data map(alloc:ftemp, fpress)
 
     !$acc parallel loop gang vector collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilay = 1, nlay
       do icol = 1, ncol
         ! index and factor for temperature interpolation
@@ -92,6 +97,7 @@ subroutine interpolation( &
     ! PGI BUG WORKAROUND: if present(vmr_ref) isn't there, OpenACC runtime
     ! thinks it isn't present.
     !$acc parallel loop gang vector collapse(4) private(igases) present(vmr_ref)
+    !$omp target teams distribute parallel do simd collapse(4) private(igases)
     do ilay = 1, nlay
       do icol = 1, ncol
         ! loop over implemented combinations of major species
@@ -127,8 +133,11 @@ subroutine interpolation( &
     end do
 
     !$acc exit data delete(flavor,press_ref_log,temp_ref,vmr_ref,play,tlay,col_gas)
+    !$omp target exit data map(release:flavor, press_ref_log, temp_ref, vmr_ref, play, tlay, col_gas)
     !$acc exit data copyout(jtemp,jpress,tropo,jeta,col_mix,fmajor,fminor)
+    !$omp target exit data map(from:jtemp, jpress, tropo, jeta, col_mix, fmajor, fminor)
     !$acc exit data delete(ftemp,fpress)
+    !$omp target exit data map(release:ftemp, fpress)
 
   end subroutine interpolation
   ! --------------------------------------------------------------------------------------
@@ -213,7 +222,9 @@ subroutine compute_tau_absorption(                &
     ! ----------------------------------------------------------------
 
     !$acc enter data create(itropo_lower, itropo_upper)
+    !$omp target enter data map(alloc:itropo_lower, itropo_upper)
     !$acc enter data copyin(play, tlay, tropo, gpoint_flavor, jeta, jtemp, col_gas, fminor, tau)
+    !$omp target enter data map(to:play, tlay, tropo, gpoint_flavor, jeta, jtemp, col_gas, fminor, tau)
 
     ! ---------------------
     ! Layer limits of upper, lower atmospheres
@@ -221,19 +232,31 @@ subroutine compute_tau_absorption(                &
     top_at_1 = play(1,1) < play(1, nlay)
     if(top_at_1) then
       !$acc parallel loop
+      !$omp target teams distribute parallel do simd
       do icol = 1,ncol
         itropo_lower(icol,2) = nlay
+#ifdef _CRAYFTN
+        itropo_upper(icol,1) = 1
+        call minmaxloc(icol, tropo, play, itropo_lower(icol,1), itropo_upper(icol,2))
+#else
         itropo_lower(icol,1) = minloc(play(icol,:), dim=1, mask=tropo(icol,:))
         itropo_upper(icol,1) = 1
         itropo_upper(icol,2) = maxloc(play(icol,:), dim=1, mask=(.not. tropo(icol,:)))
+#endif
       end do
     else
       !$acc parallel loop
+      !$omp target teams distribute parallel do simd
       do icol = 1,ncol
         itropo_lower(icol,1) = 1
+#ifdef _CRAYFTN
+        itropo_upper(icol,2) = nlay
+        call minmaxloc(icol, tropo, play, itropo_lower(icol,2), itropo_upper(icol,1))
+#else
         itropo_lower(icol,2) = minloc(play(icol,:), dim=1, mask=tropo(icol,:))
         itropo_upper(icol,2) = nlay
         itropo_upper(icol,1) = maxloc(play(icol,:), dim=1, mask=(.not.tropo(icol,:)))
+#endif
       end do
     end if
     ! ---------------------
@@ -292,8 +315,11 @@ subroutine compute_tau_absorption(                &
            tau)
 
     !$acc exit data delete(itropo_lower,itropo_upper)
+    !$omp target exit data map(release:itropo_lower, itropo_upper)
     !$acc exit data delete(play, tlay, tropo, gpoint_flavor, jeta, jtemp, col_gas, fminor)
+    !$omp target exit data map(release:play, tlay, tropo, gpoint_flavor, jeta, jtemp, col_gas, fminor)
     !$acc exit data copyout(tau)
+    !$omp target exit data map(from:tau)
 
   end subroutine compute_tau_absorption
   ! --------------------------------------------------------------------------------------
@@ -338,6 +364,7 @@ subroutine gas_optical_depths_major(ncol,nlay,nbnd,ngpt,&
 
     ! optical depth calculation for major species
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do ilay = 1, nlay
       do icol = 1, ncol
         ! optical depth calculation for major species
@@ -415,6 +442,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
     max_gpt_diff = maxval( minor_limits_gpt(2,:) - minor_limits_gpt(1,:) )
 
     !$acc parallel loop gang vector collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do ilay = 1 , nlay
       do icol = 1, ncol
         do igpt0 = 0, max_gpt_diff
@@ -446,7 +474,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
                 if (scale_by_complement(imnr)) then ! scale by densities of all gases but the special one
                   scaling = scaling * (1._wp - mycol_gas_imnr * vmr_fact * dry_fact)
                 else
-                  scaling = scaling *          mycol_gas_imnr * vmr_fact * dry_fact
+                  scaling = scaling *         (mycol_gas_imnr * vmr_fact * dry_fact)
                 endif
               endif
             endif
@@ -474,6 +502,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
               tau_minor = kminor_loc * scaling
 
               !$acc atomic update
+              !$omp atomic update
               tau(igpt,ilay,icol) = tau(igpt,ilay,icol) + tau_minor
             endif
 
@@ -517,6 +546,7 @@ subroutine compute_tau_rayleigh(ncol,nlay,nbnd,ngpt,         &
     ! -----------------
 
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do ilay = 1, nlay
       do icol = 1, ncol
         do igpt = 1, ngpt
@@ -576,12 +606,17 @@ subroutine compute_Planck_source(                        &
     ! -----------------
 
     !$acc enter data copyin(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor)
+    !$omp target enter data map(to:tlay, tlev, tsfc, fmajor, jeta, tropo, jtemp, jpress, gpoint_bands, pfracin, totplnk, gpoint_flavor)
     !$acc enter data create(sfc_src,lay_src,lev_src_inc,lev_src_dec)
+    !$omp target enter data map(alloc:sfc_src, lay_src, lev_src_inc, lev_src_dec)
     !$acc enter data create(pfrac,planck_function)
+    !$omp target enter data map(alloc:pfrac, planck_function)
     !$acc enter data create(sfc_source_Jac)
+    !$omp target enter data map(alloc:sfc_source_Jac)
 
     ! Calculation of fraction of band's Planck irradiance associated with each g-point
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do icol = 1, ncol
       do ilay = 1, nlay
         do igpt = 1, ngpt
@@ -601,6 +636,7 @@ subroutine compute_Planck_source(                        &
     ! Compute surface source irradiance for g-point, equals band irradiance x fraction for g-point
     !
     !$acc parallel loop
+    !$omp target teams distribute parallel do simd
     do icol = 1, ncol
       call interpolate1D(tsfc(icol)              , temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,1,icol))
       call interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,2,icol))
@@ -609,6 +645,7 @@ subroutine compute_Planck_source(                        &
     ! Map to g-points
     !
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do igpt = 1, ngpt
       do icol = 1, ncol
         sfc_src       (igpt,icol) = pfrac(igpt,sfc_lay,icol) * planck_function(gpoint_bands(igpt),1,icol)
@@ -618,6 +655,7 @@ subroutine compute_Planck_source(                        &
     end do ! icol
 
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do icol = 1, ncol
       do ilay = 1, nlay
         ! Compute layer source irradiance for g-point, equals band irradiance x fraction for g-point
@@ -631,6 +669,7 @@ subroutine compute_Planck_source(                        &
     ! Helps to achieve higher bandwidth
     !
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do icol = 1, ncol, 2
       do ilay = 1, nlay
         do igpt = 1, ngpt
@@ -643,11 +682,13 @@ subroutine compute_Planck_source(                        &
 
     ! compute level source irradiances for each g-point, one each for upward and downward paths
     !$acc parallel loop
+    !$omp target teams distribute parallel do simd
     do icol = 1, ncol
       call interpolate1D(tlev(icol,     1), temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,       1,icol))
     end do
 
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do icol = 1, ncol
       do ilay = 2, nlay+1
         call interpolate1D(tlev(icol,ilay), temp_ref_min, totplnk_delta, totplnk, planck_function(1:nbnd,ilay,icol))
@@ -660,6 +701,7 @@ subroutine compute_Planck_source(                        &
     ! Same unrolling as mentioned before
     !
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do icol = 1, ncol, 2
       do ilay = 1, nlay
         do igpt = 1, ngpt
@@ -674,9 +716,13 @@ subroutine compute_Planck_source(                        &
     end do ! icol
 
     !$acc exit data delete(tlay,tlev,tsfc,fmajor,jeta,tropo,jtemp,jpress,gpoint_bands,pfracin,totplnk,gpoint_flavor)
+    !$omp target exit data map(release:tlay, tlev, tsfc, fmajor, jeta, tropo, jtemp, jpress, gpoint_bands, pfracin, totplnk, gpoint_flavor)
     !$acc exit data delete(pfrac,planck_function)
+    !$omp target exit data map(release:pfrac, planck_function)
     !$acc exit data copyout(sfc_src,lay_src,lev_src_inc,lev_src_dec)
+    !$omp target exit data map(from:sfc_src, lay_src, lev_src_inc, lev_src_dec)
     !$acc exit data copyout(sfc_source_Jac)
+    !$omp target exit data map(from:sfc_source_Jac)
 
   end subroutine compute_Planck_source
   ! ----------------------------------------------------------
@@ -685,6 +731,7 @@ end subroutine compute_Planck_source
   !
   subroutine interpolate1D(val, offset, delta, table, res)
   !$acc routine seq
+  !$omp declare target
     ! input
     real(wp), intent(in) :: val,    & ! axis value at which to evaluate table
                             offset, & ! minimum of table axis
@@ -709,6 +756,7 @@ end subroutine interpolate1D
   !
   function interpolate2D(fminor, k, igpt, jeta, jtemp) result(res)
   !$acc routine seq
+  !$omp declare target
     real(wp), dimension(2,2), intent(in) :: fminor ! interpolation fractions for minor species
                                        ! index(1) : reference eta level (temperature dependent)
                                        ! index(2) : reference temperature level
@@ -728,6 +776,7 @@ end function interpolate2D
   ! interpolation in temperature, pressure, and eta
   function interpolate3D(scaling, fmajor, k, igpt, jeta, jtemp, jpress) result(res)
   !$acc routine seq
+  !$omp declare target
     real(wp), dimension(2),     intent(in) :: scaling
     real(wp), dimension(2,2,2), intent(in) :: fmajor ! interpolation fractions for major species
                                                      ! index(1) : reference eta level (temperature dependent)
@@ -770,12 +819,15 @@ subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau
     !$acc data copy(tau, ssa, g)                 &
     !$acc      copyin(tau_rayleigh, tau_abs)
 
+    call zero_array(ncol, nlay, ngpt, g)
     ! We are using blocking memory accesses here to improve performance
     !  of the transpositions. See also comments in mo_rrtmgp_util_reorder_kernels.F90
     !
     !$acc parallel default(none) vector_length(tile*tile)
     !$acc loop gang collapse(3)
-      do ilay = 1, nlay
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:tau, ssa, g) map(to:tau_rayleigh, tau_abs)
+    do ilay = 1, nlay
       do icol0 = 1, ncol, tile
         do igpt0 = 1, ngpt, tile
 
@@ -785,16 +837,13 @@ subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau
               icol = icol0 + icdiff
               igpt = igpt0 + igdiff
               if (icol > ncol .or. igpt > ngpt) cycle
-
-           t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol)
-           tau(icol,ilay,igpt) = t
-           g  (icol,ilay,igpt) = 0._wp
-           if(t > 2._wp * tiny(t)) then
-             ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t
-           else
-             ssa(icol,ilay,igpt) = 0._wp
-           end if
-
+               t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol)
+               tau(icol,ilay,igpt) = t
+               if(t > 2._wp * tiny(t)) then
+                 ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t
+               else
+                 ssa(icol,ilay,igpt) = 0._wp
+               end if
             end do
           end do
 
@@ -823,6 +872,9 @@ subroutine combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau_abs, tau_rayleig
     !$acc parallel loop collapse(3) &
     !$acc&     copy(tau, ssa, p) &
     !$acc&     copyin(tau_rayleigh(:ngpt,:nlay,:ncol),tau_abs(:ngpt,:nlay,:ncol))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:tau, ssa, p) &
+    !$omp& map(to:tau_rayleigh(:ngpt, :nlay, :ncol), tau_abs(:ngpt, :nlay, :ncol))
     do icol = 1, ncol
       do ilay = 1, nlay
         do igpt = 1, ngpt
@@ -841,5 +893,39 @@ subroutine combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau_abs, tau_rayleig
       end do
     end do
   end subroutine combine_and_reorder_nstr
+
+  ! ----------------------------------------------------------
+  !
+  ! In-house subroutine for handling minloc and maxloc for
+  ! compilers which do not support GPU versions
+  !
+  subroutine minmaxloc(i, mask, a, minl, maxl)
+    !$acc routine seq
+    !$omp declare target
+    implicit none
+    integer :: i, minl, maxl
+    logical(wl) :: mask(:,:)
+    real(wp) :: a(:,:)
+    integer :: j, n
+    real(wp) :: aij, amax, amin
+    n = size(a,2)
+    amax = -huge(amax)
+    amin = huge(amin)
+    do j = 1, n
+      aij = a(i,j)
+      if (mask(i,j)) then
+        if (aij.lt.amin) then
+          amin = aij
+          minl = j
+        end if
+      else
+        if (aij.gt.amax) then
+          amax = aij
+          maxl = j
+        end if
+      end if
+    end do
+  end subroutine
   ! ----------------------------------------------------------
+
 end module mo_gas_optics_kernels
diff --git a/rrtmgp/kernels/mo_gas_optics_kernels.F90 b/rrtmgp/kernels/mo_gas_optics_kernels.F90
index 4f064aa6c..857377a42 100644
--- a/rrtmgp/kernels/mo_gas_optics_kernels.F90
+++ b/rrtmgp/kernels/mo_gas_optics_kernels.F90
@@ -16,6 +16,7 @@
 
 module mo_gas_optics_kernels
   use mo_rte_kind,      only : wp, wl
+  use mo_rte_util_array,only : zero_array
   implicit none
   public
 contains
@@ -364,7 +365,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
     real(wp),    dimension(ngpt,nlay,ncol),      intent(inout) :: tau
     ! -----------------
     ! local variables
-    real(wp), parameter :: PaTohPa = 0.01
+    real(wp), parameter :: PaTohPa = 0.01_wp
     real(wp) :: vmr_fact, dry_fact             ! conversion from column abundance to dry vol. mixing ratio;
     real(wp) :: scaling, kminor_loc            ! minor species absorption coefficient, optical depth
     integer  :: icol, ilay, iflav, igpt, imnr
@@ -403,7 +404,7 @@ subroutine gas_optical_depths_minor(ncol,nlay,ngpt,        &
                   if (scale_by_complement(imnr)) then ! scale by densities of all gases but the special one
                     scaling = scaling * (1._wp - col_gas(icol,ilay,idx_minor_scaling(imnr)) * vmr_fact * dry_fact)
                   else
-                    scaling = scaling *          col_gas(icol,ilay,idx_minor_scaling(imnr)) * vmr_fact * dry_fact
+                    scaling = scaling *         (col_gas(icol,ilay,idx_minor_scaling(imnr)) * vmr_fact * dry_fact)
                   endif
                 endif
               endif
@@ -542,7 +543,7 @@ subroutine compute_Planck_source(                        &
     !
     do icol = 1, ncol
       planck_function(1:nbnd,1,icol) = interpolate1D(tsfc(icol)              , temp_ref_min, totplnk_delta, totplnk)
-      planck_function(1:nbnd,2,icol) = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk) 
+      planck_function(1:nbnd,2,icol) = interpolate1D(tsfc(icol) + delta_Tsurf, temp_ref_min, totplnk_delta, totplnk)
       !
       ! Map to g-points
       !
@@ -720,7 +721,7 @@ end function interpolate3D_byflav
   !
   ! Combine absoprtion and Rayleigh optical depths for total tau, ssa, g
   !
-  pure subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau, ssa, g) &
+  subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh, tau, ssa, g) &
       bind(C, name="combine_and_reorder_2str")
     integer,                             intent(in) :: ncol, nlay, ngpt
     real(wp), dimension(ngpt,nlay,ncol), intent(in   ) :: tau_abs, tau_rayleigh
@@ -729,12 +730,12 @@ pure subroutine combine_and_reorder_2str(ncol, nlay, ngpt, tau_abs, tau_rayleigh
     integer  :: icol, ilay, igpt
     real(wp) :: t
     ! -----------------------
-    do icol = 1, ncol
-      do ilay = 1, nlay
-        do igpt = 1, ngpt
+    call zero_array(ncol, nlay, ngpt, g)
+    do ilay = 1, nlay
+      do igpt = 1, ngpt
+        do icol = 1, ncol
            t = tau_abs(igpt,ilay,icol) + tau_rayleigh(igpt,ilay,icol)
            tau(icol,ilay,igpt) = t
-           g  (icol,ilay,igpt) = 0._wp
            if(t > 2._wp * tiny(t)) then
              ssa(icol,ilay,igpt) = tau_rayleigh(igpt,ilay,icol) / t
            else
diff --git a/rrtmgp/kernels/mo_rrtmgp_util_reorder_kernels.F90 b/rrtmgp/kernels/mo_rrtmgp_util_reorder_kernels.F90
index 9d5d850c2..8d876a907 100644
--- a/rrtmgp/kernels/mo_rrtmgp_util_reorder_kernels.F90
+++ b/rrtmgp/kernels/mo_rrtmgp_util_reorder_kernels.F90
@@ -41,6 +41,7 @@ subroutine reorder_123x312_kernel(d1, d2, d3, array_in, array_out) &
     !$acc&     copyout(array_out) &
     !$acc&     copyin(array_in)
     !$acc loop gang collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3) map(to:array_in) map(from:array_out)
     do i2 = 1, d2
       do i10 = 1, d1, tile
         do i30 = 1, d3, tile
@@ -79,6 +80,7 @@ subroutine reorder_123x321_kernel(d1, d2, d3, array_in, array_out) &
     !$acc&     copyin(array_in)
     !$acc loop gang collapse(3)
     ! private(cache(:,:))
+    !$omp target teams distribute parallel do simd collapse(3) map(to:array_in) map(from:array_out)
     do i2 = 1, d2
       do i10 = 1, d1, tile
         do i30 = 1, d3, tile
diff --git a/rrtmgp/mo_gas_concentrations.F90 b/rrtmgp/mo_gas_concentrations.F90
old mode 100755
new mode 100644
index 2c6e14879..46de19fd4
--- a/rrtmgp/mo_gas_concentrations.F90
+++ b/rrtmgp/mo_gas_concentrations.F90
@@ -107,6 +107,7 @@ function init(this, gas_names) result(error_msg)
     allocate(this%gas_name(ngas), this%concs(ngas))
     !$acc enter data copyin(this)
     !$acc enter data copyin(this%concs)
+    !$omp target enter data map(to:this%concs)
 
     this%gas_name(:) = gas_names(:)
   end function
@@ -122,6 +123,7 @@ function set_vmr_scalar(this, gas, w) result(error_msg)
     real(wp),            intent(in   ) :: w
     character(len=128)                 :: error_msg
     ! ---------
+    real(wp), dimension(:,:), pointer :: p
     integer :: igas
     ! ---------
     error_msg = ''
@@ -142,6 +144,7 @@ function set_vmr_scalar(this, gas, w) result(error_msg)
     if (associated(this%concs(igas)%conc)) then
       if ( any(shape(this%concs(igas)%conc) /= [1, 1]) ) then
         !$acc exit data delete(this%concs(igas)%conc)
+        !$omp target exit data map(release:this%concs(igas)%conc)
         deallocate(this%concs(igas)%conc)
         nullify   (this%concs(igas)%conc)
       end if
@@ -149,11 +152,19 @@ function set_vmr_scalar(this, gas, w) result(error_msg)
     if (.not. associated(this%concs(igas)%conc)) then
       allocate(this%concs(igas)%conc(1,1))
       !$acc enter data create(this%concs(igas)%conc)
+      !$omp target enter data map(alloc:this%concs(igas)%conc)
     end if
 
+    p => this%concs(igas)%conc(:,:)
     !$acc kernels
+    !$omp target map(to:w)
+#ifdef _CRAYFTN
+    p(:,:) = w
+#else
     this%concs(igas)%conc(:,:) = w
+#endif
     !$acc end kernels
+    !$omp end target
   end function set_vmr_scalar
   ! -------------------------------------------------------------------------------------
   function set_vmr_1d(this, gas, w) result(error_msg)
@@ -164,6 +175,7 @@ function set_vmr_1d(this, gas, w) result(error_msg)
                          intent(in   ) :: w
     character(len=128)                 :: error_msg
     ! ---------
+    real(wp), dimension(:,:), pointer :: p
     integer :: igas
     ! ---------
     error_msg = ''
@@ -191,6 +203,7 @@ function set_vmr_1d(this, gas, w) result(error_msg)
     if (associated(this%concs(igas)%conc)) then
       if ( any(shape(this%concs(igas)%conc) /= [1, this%nlay]) ) then
         !$acc exit data delete(this%concs(igas)%conc)
+        !$omp target exit data map(release:this%concs(igas)%conc)
         deallocate(this%concs(igas)%conc)
         nullify   (this%concs(igas)%conc)
       end if
@@ -198,11 +211,19 @@ function set_vmr_1d(this, gas, w) result(error_msg)
     if (.not. associated(this%concs(igas)%conc)) then
       allocate(this%concs(igas)%conc(1,this%nlay))
       !$acc enter data create(this%concs(igas)%conc)
+      !$omp target enter data map(alloc:this%concs(igas)%conc)
     end if
 
+    p => this%concs(igas)%conc(:,:)
     !$acc kernels copyin(w)
+    !$omp target map(to:w)
+#ifdef _CRAYFTN
+    p(1,:) = w
+#else
     this%concs(igas)%conc(1,:) = w
+#endif
     !$acc end kernels
+    !$omp end target
 
     !$acc exit data delete(w)
   end function set_vmr_1d
@@ -215,6 +236,7 @@ function set_vmr_2d(this, gas, w) result(error_msg)
                          intent(in   ) :: w
     character(len=128)                 :: error_msg
     ! ---------
+    real(wp), dimension(:,:), pointer :: p
     integer :: igas
     ! ---------
     error_msg = ''
@@ -249,6 +271,7 @@ function set_vmr_2d(this, gas, w) result(error_msg)
     if (associated(this%concs(igas)%conc)) then
       if ( any(shape(this%concs(igas)%conc) /= [this%ncol,this%nlay]) ) then
         !$acc exit data delete(this%concs(igas)%conc)
+        !$omp target exit data map(release:this%concs(igas)%conc)
         deallocate(this%concs(igas)%conc)
         nullify   (this%concs(igas)%conc)
       end if
@@ -256,11 +279,19 @@ function set_vmr_2d(this, gas, w) result(error_msg)
     if (.not. associated(this%concs(igas)%conc)) then
       allocate(this%concs(igas)%conc(this%ncol,this%nlay))
       !$acc enter data create(this%concs(igas)%conc)
+      !$omp target enter data map(alloc:this%concs(igas)%conc)
     end if
 
+    p => this%concs(igas)%conc(:,:)
     !$acc kernels copyin(w)
+    !$omp target map(to:w)
+#ifdef _CRAYFTN
+    p(:,:) = w(:,:)
+#else
     this%concs(igas)%conc(:,:) = w(:,:)
+#endif
     !$acc end kernels
+    !$omp end target
   end function set_vmr_2d
   ! -------------------------------------------------------------------------------------
   !
@@ -276,6 +307,7 @@ function get_vmr_1d(this, gas, array) result(error_msg)
     real(wp), dimension(:),   intent(out) :: array
     character(len=128) :: error_msg
     ! ---------------------
+    real(wp), dimension(:,:), pointer :: p
     integer :: igas
     ! ---------------------
     error_msg = ''
@@ -294,17 +326,32 @@ function get_vmr_1d(this, gas, array) result(error_msg)
     end if
     if(error_msg /= "") return
 
+    p => this%concs(igas)%conc(:,:)
     !$acc data copyout (array) present(this)
+    !$omp target data map(from:array)
     if(size(this%concs(igas)%conc, 2) > 1) then
       !$acc kernels default(none)
+      !$omp target
+#ifdef _CRAYFTN
+      array(:) = p(1,:)
+#else
       array(:) = this%concs(igas)%conc(1,:)
+#endif
       !$acc end kernels
+      !$omp end target
     else
       !$acc kernels default(none)
+      !$omp target
+#ifdef _CRAYFTN
+      array(:) = p(1,1)
+#else
       array(:) = this%concs(igas)%conc(1,1)
+#endif
       !$acc end kernels
+      !$omp end target
     end if
     !$acc end data
+    !$omp end target data
 
   end function get_vmr_1d
   ! -------------------------------------------------------------------------------------
@@ -317,6 +364,7 @@ function get_vmr_2d(this, gas, array) result(error_msg)
     real(wp), dimension(:,:), intent(out) :: array
     character(len=128)                    :: error_msg
     ! ---------------------
+    real(wp), dimension(:,:), pointer :: p
     integer :: icol, ilay, igas
     ! ---------------------
     error_msg = ''
@@ -338,31 +386,49 @@ function get_vmr_2d(this, gas, array) result(error_msg)
     end if
     if(error_msg /= "") return
 
+    p => this%concs(igas)%conc(:,:)
     !$acc data copyout (array) present(this, this%concs)
+    !$omp target data map(from:array)
     if(size(this%concs(igas)%conc, 1) > 1) then      ! Concentration stored as 2D
       !$acc parallel loop collapse(2) default(none)
+      !$omp target teams distribute parallel do simd
       do ilay = 1, size(array,2)
         do icol = 1, size(array,1)
           !print *, (size(this%concs))
+#ifdef _CRAYFTN
+           array(icol,ilay) = p(icol,ilay)
+#else
           array(icol,ilay) = this%concs(igas)%conc(icol,ilay)
+#endif
         end do
       end do
     else if(size(this%concs(igas)%conc, 2) > 1) then ! Concentration stored as 1D
       !$acc parallel loop collapse(2) default(none)
+      !$omp target teams distribute parallel do simd
       do ilay = 1, size(array,2)
         do icol = 1, size(array,1)
+#ifdef _CRAYFTN
+          array(icol,ilay) = p(1,ilay)
+#else
          array(icol, ilay) = this%concs(igas)%conc(1,ilay)
+#endif
         end do
       end do
     else                                             ! Concentration stored as scalar
       !$acc parallel loop collapse(2) default(none)
+      !$omp target teams distribute parallel do simd
       do ilay = 1, size(array,2)
         do icol = 1, size(array,1)
+#ifdef _CRAYFTN
+          array(icol,ilay) = p(1,1)
+#else
           array(icol,ilay) = this%concs(igas)%conc(1,1)
+#endif
         end do
       end do
     end if
     !$acc end data
+    !$omp end target data
 
   end function get_vmr_2d
   ! -------------------------------------------------------------------------------------
@@ -376,6 +442,7 @@ function get_subset_range(this, start, n, subset) result(error_msg)
     class(ty_gas_concs),      intent(inout) :: subset
     character(len=128)                      :: error_msg
     ! ---------------------
+    real(wp), dimension(:,:), pointer :: p1, p2
     integer :: i
     ! ---------------------
     error_msg = ''
@@ -391,6 +458,7 @@ function get_subset_range(this, start, n, subset) result(error_msg)
     allocate(subset%gas_name(size(this%gas_name)), &
              subset%concs   (size(this%concs))) ! These two arrays should be the same length
     !$acc enter data create(subset, subset%concs)
+    !$omp target enter data map(alloc:subset%concs)
     subset%nlay = this%nlay
     subset%ncol = merge(n, 0, this%ncol > 0)
     subset%gas_name(:)  = this%gas_name(:)
@@ -402,15 +470,30 @@ function get_subset_range(this, start, n, subset) result(error_msg)
       !
       allocate(subset%concs(i)%conc(min(max(subset%ncol,1), size(this%concs(i)%conc, 1)), &
                                     min(    subset%nlay,    size(this%concs(i)%conc, 2))))
+      p1 => subset%concs(i)%conc(:,:)
+      p2 => this%concs(i)%conc(:,:)
       !$acc enter data create(subset%concs(i)%conc)
+      !$omp target enter data map(alloc:subset%concs(i)%conc)
       if(size(this%concs(i)%conc, 1) > 1) then      ! Concentration stored as 2D
         !$acc kernels
+        !$omp target
+#ifdef _CRAYFTN
+        p1(:,:) = p2(start:(start+n-1),:)
+#else
         subset%concs(i)%conc(:,:) = this%concs(i)%conc(start:(start+n-1),:)
+#endif
         !$acc end kernels
+        !$omp end target
       else
         !$acc kernels
+        !$omp target
+#ifdef _CRAYFTN
+        p1(:,:) = p2(:,:)
+#else
         subset%concs(i)%conc(:,:) = this%concs(i)%conc(:,:)
+#endif
         !$acc end kernels
+        !$omp end target
       end if
     end do
 
@@ -432,11 +515,13 @@ subroutine reset(this)
       do i = 1, size(this%concs)
         if(associated(this%concs(i)%conc)) then
           !$acc exit data delete(this%concs(i)%conc)
+          !$omp target exit data map(release:this%concs(i)%conc)
           deallocate(this%concs(i)%conc)
           nullify(this%concs(i)%conc)
         end if
       end do
       !$acc exit data delete(this%concs)
+      !$omp target exit data map(release:this%concs)
       deallocate(this%concs)
     end if
   end subroutine reset
@@ -477,6 +562,7 @@ function find_gas(this, gas)
     ! -----------------
     find_gas = GAS_NOT_IN_LIST
     if(.not. allocated(this%gas_name)) return
+    ! search gases using a loop. Fortran intrinsic findloc would be faster, but only supported since gfortran 9
     do igas = 1, size(this%gas_name)
       if (lower_case(trim(this%gas_name(igas))) == lower_case(trim(gas))) then
         find_gas = igas
diff --git a/rrtmgp/mo_gas_optics_rrtmgp.F90 b/rrtmgp/mo_gas_optics_rrtmgp.F90
index 3982f0978..944e04a9a 100644
--- a/rrtmgp/mo_gas_optics_rrtmgp.F90
+++ b/rrtmgp/mo_gas_optics_rrtmgp.F90
@@ -257,6 +257,7 @@ function gas_optics_int(this,                             &
     ! Gas optics
     !
     !$acc enter data create(jtemp, jpress, tropo, fmajor, jeta)
+    !$omp target enter data map(alloc:jtemp, jpress, tropo, fmajor, jeta)
     error_msg = compute_gas_taus(this,                       &
                                  ncol, nlay, ngpt, nband,    &
                                  play, plev, tlay, gas_desc, &
@@ -271,6 +272,7 @@ function gas_optics_int(this,                             &
     ! input data sizes and values
     !
     !$acc enter data copyin(tsfc, tlev) ! Should be fine even if tlev is not supplied
+    !$omp target enter data map(to:tsfc, tlev)
 
     if(check_extents) then
       if(.not. extents_are(tsfc, ncol)) &
@@ -312,6 +314,7 @@ function gas_optics_int(this,                             &
                          sources,                            &
                          tlev)
       !$acc exit data delete(tlev)
+      !$omp target exit data map(release:tlev)
     else
       error_msg = source(this,                               &
                          ncol, nlay, nband, ngpt,            &
@@ -320,7 +323,9 @@ function gas_optics_int(this,                             &
                          sources)
     end if
     !$acc exit data delete(tsfc)
+    !$omp target exit data map(release:tsfc)
     !$acc exit data delete(jtemp, jpress, tropo, fmajor, jeta)
+    !$omp target exit data map(release:jtemp, jpress, tropo, fmajor, jeta)
   end function gas_optics_int
   !------------------------------------------------------------------------------------------
   !
@@ -366,6 +371,7 @@ function gas_optics_ext(this,                         &
     ! Gas optics
     !
     !$acc enter data create(jtemp, jpress, tropo, fmajor, jeta)
+    !$omp target enter data map(alloc:jtemp, jpress, tropo, fmajor, jeta)
     error_msg = compute_gas_taus(this,                       &
                                  ncol, nlay, ngpt, nband,    &
                                  play, plev, tlay, gas_desc, &
@@ -373,6 +379,7 @@ function gas_optics_ext(this,                         &
                                  jtemp, jpress, jeta, tropo, fmajor, &
                                  col_dry)
     !$acc exit data delete(jtemp, jpress, tropo, fmajor, jeta)
+    !$omp target exit data map(release:jtemp, jpress, tropo, fmajor, jeta)
     if(error_msg  /= '') return
 
     ! ----------------------------------------------------------
@@ -380,6 +387,7 @@ function gas_optics_ext(this,                         &
     ! External source function is constant
     !
     !$acc enter data create(toa_src)
+    !$omp target enter data map(alloc:toa_src)
     if(check_extents) then
       if(.not. extents_are(toa_src, ncol, ngpt)) &
         error_msg = "gas_optics(): array toa_src has wrong size"
@@ -387,12 +395,14 @@ function gas_optics_ext(this,                         &
     if(error_msg  /= '') return
 
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do igpt = 1,ngpt
        do icol = 1,ncol
           toa_src(icol,igpt) = this%solar_source(igpt)
        end do
     end do
     !$acc exit data copyout(toa_src)
+    !$omp target exit data map(from:toa_src)
   end function gas_optics_ext
   !------------------------------------------------------------------------------------------
   !
@@ -469,6 +479,7 @@ function compute_gas_taus(this,                       &
     ! Check input data sizes and values
     !
     !$acc enter data copyin(play,plev,tlay)
+    !$omp target enter data map(to:play, plev, tlay)
     if(check_extents) then
       if(.not. extents_are(play, ncol, nlay  )) &
         error_msg = "gas_optics(): array play has wrong size"
@@ -490,7 +501,7 @@ function compute_gas_taus(this,                       &
     if(check_values) then
       if(any_vals_outside(play, this%press_ref_min,this%press_ref_max)) &
         error_msg = "gas_optics(): array play has values outside range"
-      if(any_vals_outside(plev, this%press_ref_min,this%press_ref_max)) &
+      if(any_vals_less_than(plev, 0._wp)) &
         error_msg = "gas_optics(): array plev has values outside range"
       if(any_vals_outside(tlay, this%temp_ref_min,  this%temp_ref_max)) &
         error_msg = "gas_optics(): array tlay has values outside range"
@@ -517,6 +528,7 @@ function compute_gas_taus(this,                       &
     ! Fill out the array of volume mixing ratios
     !
     !$acc enter data create(vmr)
+    !$omp target enter data map(alloc:vmr)
     do igas = 1, ngas
       !
       ! Get vmr if  gas is provided in ty_gas_concs
@@ -533,6 +545,7 @@ function compute_gas_taus(this,                       &
     idx_h2o = string_loc_in_array('h2o', this%gas_names)
     col_dry_wk => col_dry_arr
     !$acc enter data create(col_dry_wk, col_dry_arr, col_gas)
+    !$omp target enter data map(alloc:col_dry_wk, col_dry_arr, col_gas)
     if (present(col_dry)) then
       col_dry_wk => col_dry
     else
@@ -543,12 +556,14 @@ function compute_gas_taus(this,                       &
     ! compute column gas amounts [molec/cm^2]
     !
     !$acc parallel loop gang vector collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilay = 1, nlay
       do icol = 1, ncol
         col_gas(icol,ilay,0) = col_dry_wk(icol,ilay)
       end do
     end do
     !$acc parallel loop gang vector collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do igas = 1, ngas
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -557,15 +572,20 @@ function compute_gas_taus(this,                       &
       end do
     end do
     !$acc exit data delete(vmr)
+    !$omp target exit data map(release:vmr)
 
     !
     ! ---- calculate gas optical depths ----
     !
     !$acc enter data create(jtemp, jpress, jeta, tropo, fmajor)
+    !$omp target enter data map(alloc:jtemp, jpress, jeta, tropo, fmajor)
     !$acc enter data create(tau, tau_rayleigh)
+    !$omp target enter data map(alloc:tau, tau_rayleigh)
     !$acc enter data create(col_mix, fminor)
+    !$omp target enter data map(alloc:col_mix, fminor)
     !$acc enter data copyin(this)
     !$acc enter data copyin(this%gpoint_flavor)
+    !$omp target enter data map(to:this%gpoint_flavor)
     call zero_array(ngpt, nlay, ncol, tau)
     call interpolation(               &
             ncol,nlay,                &        ! problem dimensions
@@ -616,6 +636,7 @@ function compute_gas_taus(this,                       &
             tau)
     if (allocated(this%krayl)) then
       !$acc enter data attach(col_dry_wk) copyin(this%krayl)
+      !$omp target enter data map(to:col_dry_wk) map(to:this%krayl)
       call compute_tau_rayleigh(         & !Rayleigh scattering optical depths
             ncol,nlay,nband,ngpt,        &
             ngas,nflav,neta,npres,ntemp, & ! dimensions
@@ -626,16 +647,22 @@ function compute_gas_taus(this,                       &
             fminor,jeta,tropo,jtemp,     & ! local input
             tau_rayleigh)
       !$acc exit data detach(col_dry_wk) delete(this%krayl)
+      !$omp target exit data map(from:col_dry_wk) map(release:this%krayl)
     end if
     if (error_msg /= '') return
 
     ! Combine optical depths and reorder for radiative transfer solver.
     call combine_and_reorder(tau, tau_rayleigh, allocated(this%krayl), optical_props)
     !$acc exit data delete(play, tlay, plev)
+    !$omp target exit data map(release:play, tlay, plev)
     !$acc exit data delete(tau, tau_rayleigh)
+    !$omp target exit data map(release:tau, tau_rayleigh)
     !$acc exit data delete(col_dry_wk, col_dry_arr, col_gas, col_mix, fminor)
+    !$omp target exit data map(release:col_dry_wk, col_dry_arr, col_gas, col_mix, fminor)
     !$acc exit data delete(this%gpoint_flavor)
+    !$omp target exit data map(release:this%gpoint_flavor)
     !$acc exit data copyout(jtemp, jpress, jeta, tropo, fmajor)
+    !$omp target exit data map(from:jtemp, jpress, jeta, tropo, fmajor)
   end function compute_gas_taus
   !------------------------------------------------------------------------------------------
   !
@@ -678,6 +705,7 @@ function set_solar_variability(this,                      &
     ! Calculate solar source function for provided facular and sunspot indices
     !
     !$acc parallel loop
+    !$omp target teams distribute parallel do simd
     do igpt = 1, size(this%solar_source_quiet)
       this%solar_source(igpt) = this%solar_source_quiet(igpt) + &
                                 (mg_index - a_offset) * this%solar_source_facular(igpt) + &
@@ -708,9 +736,11 @@ function set_tsi(this, tsi) result(error_msg)
       ! Scale the solar source function to the input tsi
       !
       !$acc kernels
+      !$omp target
       norm = 1._wp/sum(this%solar_source(:))
       this%solar_source(:) = this%solar_source(:) * tsi * norm
       !$acc end kernels
+      !$omp end target
     end if
 
   end function set_tsi
@@ -790,9 +820,13 @@ function source(this,                               &
     !  which depend on mapping from spectral space that creates k-distribution.
     !$acc enter data copyin(sources)
     !$acc enter data create(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
+    !$omp target enter data map(alloc:sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
     !$acc enter data create(sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) attach(tlev_wk)
+    !$omp target enter data map(alloc:sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) map(to:tlev_wk)
     !$acc enter data create(sfc_source_Jac)
+    !$omp target enter data map(alloc:sfc_source_Jac)
     !$acc enter data create(sources%sfc_source_Jac)
+    !$omp target enter data map(alloc:sources%sfc_source_Jac)
     call compute_Planck_source(ncol, nlay, nbnd, ngpt, &
                 get_nflav(this), this%get_neta(), this%get_npres(), this%get_ntemp(), this%get_nPlanckTemp(), &
                 tlay, tlev_wk, tsfc, merge(1,nlay,play(1,1) > play(1,nlay)), &
@@ -802,6 +836,7 @@ function source(this,                               &
                 sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t, &
                 sfc_source_Jac)
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do igpt = 1, ngpt
       do icol = 1, ncol
         sources%sfc_source    (icol,igpt) = sfc_source_t  (igpt,icol)
@@ -815,9 +850,13 @@ function source(this,                               &
     ! Transposition of a 2D array, for which we don't have a routine in mo_rrtmgp_util_reorder.
     !
     !$acc exit data delete(sfc_source_Jac)
+    !$omp target exit data map(release:sfc_source_Jac)
     !$acc exit data delete(sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) detach(tlev_wk)
+    !$omp target exit data map(release:sfc_source_t, lay_source_t, lev_source_inc_t, lev_source_dec_t) map(from:tlev_wk)
     !$acc exit data copyout(sources%sfc_source_Jac)
+    !$omp target exit data map(from:sources%sfc_source_Jac)
     !$acc exit data copyout(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
+    !$omp target exit data map(from:sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
     !$acc exit data copyout(sources)
   end function source
   !--------------------------------------------------------------------------------------------------------------------
@@ -905,11 +944,14 @@ function load_int(this, available_gases, gas_names, key_species,  &
              this%planck_frac      (size(planck_frac,1), size(planck_frac,2), size(planck_frac,3), size(planck_frac,4)), &
              this%optimal_angle_fit(size(optimal_angle_fit,    1), size(optimal_angle_fit,   2)))
     !$acc enter data create(this%totplnk, this%planck_frac, this%optimal_angle_fit)
+    !$omp target enter data map(alloc:this%totplnk, this%planck_frac, this%optimal_angle_fit)
     !$acc kernels
+    !$omp target
     this%totplnk = totplnk
     this%planck_frac = planck_frac
     this%optimal_angle_fit = optimal_angle_fit
     !$acc end kernels
+    !$omp end target
 
     ! Temperature steps for Planck function interpolation
     !   Assumes that temperature minimum and max are the same for the absorption coefficient grid and the
@@ -1014,11 +1056,14 @@ function load_ext(this, available_gases, gas_names, key_species,        &
     allocate(this%solar_source_quiet(ngpt), this%solar_source_facular(ngpt), &
              this%solar_source_sunspot(ngpt), this%solar_source(ngpt))
     !$acc enter data create(this%solar_source_quiet, this%solar_source_facular, this%solar_source_sunspot, this%solar_source)
+    !$omp target enter data map(alloc:this%solar_source_quiet, this%solar_source_facular, this%solar_source_sunspot, this%solar_source)
     !$acc kernels
+    !$omp target
     this%solar_source_quiet   = solar_quiet
     this%solar_source_facular = solar_facular
     this%solar_source_sunspot = solar_sunspot
     !$acc end kernels
+    !$omp end target
     err_message = this%set_solar_variability(mg_default, sb_default)
   end function load_ext
   !--------------------------------------------------------------------------------------------------------------------
@@ -1168,6 +1213,7 @@ function init_abs_coeffs(this, &
     this%temp_ref  = temp_ref
     this%kmajor    = kmajor
     !$acc enter data copyin(this%kmajor)
+    !$omp target enter data map(to:this%kmajor)
 
 
     if(allocated(rayl_lower) .neqv. allocated(rayl_upper)) then
@@ -1185,6 +1231,7 @@ function init_abs_coeffs(this, &
     allocate(this%press_ref_log(size(this%press_ref)))
     this%press_ref_log(:) = log(this%press_ref(:))
     !$acc enter data copyin(this%press_ref_log)
+    !$omp target enter data map(to:this%press_ref_log)
 
 
     ! log scale of reference pressure
@@ -1377,20 +1424,24 @@ function get_col_dry(vmr_h2o, plev, latitude) result(col_dry)
     ncol = size(plev, dim=1)
     nlev = size(plev, dim=2)
     !$acc enter data create(g0)
+    !$omp target enter data map(alloc:g0)
     if(present(latitude)) then
       ! A purely OpenACC implementation would probably compute g0 within the kernel below
       !$acc parallel loop
+      !$omp target teams distribute parallel do simd
       do icol = 1, ncol
         g0(icol) = helmert1 - helmert2 * cos(2.0_wp * pi * latitude(icol) / 180.0_wp) ! acceleration due to gravity [m/s^2]
       end do
     else
       !$acc parallel loop
+      !$omp target teams distribute parallel do simd
       do icol = 1, ncol
         g0(icol) = grav
       end do
     end if
 
     !$acc parallel loop gang vector collapse(2) copyin(plev,vmr_h2o) copyout(col_dry)
+    !$omp target teams distribute parallel do simd collapse(2) map(to:plev, vmr_h2o) map(from:col_dry)
     do ilev = 1, nlev-1
       do icol = 1, ncol
         delta_plev = abs(plev(icol,ilev) - plev(icol,ilev+1))
@@ -1401,6 +1452,7 @@ function get_col_dry(vmr_h2o, plev, latitude) result(col_dry)
       end do
     end do
     !$acc exit data delete (g0)
+    !$omp target exit data map(release:g0)
   end function get_col_dry
   !--------------------------------------------------------------------------------------------------------------------
   !
@@ -1432,6 +1484,7 @@ function compute_optimal_angles(this, optical_props, optimal_angles) result(err_
     ! column transmissivity
     !
     !$acc parallel loop gang vector collapse(2) copyin(optical_props, optical_props%tau, optical_props%gpt2band) copyout(optimal_angles)
+    !$omp target teams distribute parallel do simd collapse(2) map(to: optical_props%tau, optical_props%gpt2band) map(from:optimal_angles)
     do icol = 1, ncol
       do igpt = 1, ngpt
         !
@@ -1720,6 +1773,7 @@ subroutine reduce_minor_arrays(available_gases, &
       enddo
     endif
     !$acc enter data copyin(kminor_atm_red)
+    !$omp target enter data map(to:kminor_atm_red)
 
   end subroutine reduce_minor_arrays
 
@@ -1782,45 +1836,61 @@ subroutine combine_and_reorder(tau, tau_rayleigh, has_rayleigh, optical_props)
     if (.not. has_rayleigh) then
       ! index reorder (ngpt, nlay, ncol) -> (ncol,nlay,gpt)
       !$acc enter data copyin(tau)
+      !$omp target enter data map(to:tau)
       !$acc enter data create(optical_props%tau)
+      !$omp target enter data map(alloc:optical_props%tau)
       call reorder123x321(tau, optical_props%tau)
       select type(optical_props)
         type is (ty_optical_props_2str)
           !$acc enter data create(optical_props%ssa, optical_props%g)
+          !!$omp target enter data map(alloc:optical_props%ssa, optical_props%g) ! Not needed with Cray compiler
           call zero_array(     ncol,nlay,ngpt,optical_props%ssa)
           call zero_array(     ncol,nlay,ngpt,optical_props%g  )
           !$acc exit data copyout(optical_props%ssa, optical_props%g)
+          !!$omp target exit data map(from:optical_props%ssa, optical_props%g) ! Not needed with Cray compiler
         type is (ty_optical_props_nstr) ! We ought to be able to combine this with above
           nmom = size(optical_props%p, 1)
           !$acc enter data create(optical_props%ssa, optical_props%p)
+          !$omp target enter data map(alloc:optical_props%ssa, optical_props%p)
           call zero_array(     ncol,nlay,ngpt,optical_props%ssa)
           call zero_array(nmom,ncol,nlay,ngpt,optical_props%p  )
           !$acc exit data copyout(optical_props%ssa, optical_props%p)
+          !$omp target exit data map(from:optical_props%ssa, optical_props%p)
         end select
       !$acc exit data copyout(optical_props%tau)
+      !$omp target exit data map(from:optical_props%tau)
       !$acc exit data delete(tau)
+      !$omp target exit data map(release:tau)
     else
       ! combine optical depth and rayleigh scattering
       !$acc enter data copyin(tau, tau_rayleigh)
+      !$omp target enter data map(to:tau, tau_rayleigh)
       select type(optical_props)
         type is (ty_optical_props_1scl)
           ! User is asking for absorption optical depth
           !$acc enter data create(optical_props%tau)
+          !$omp target enter data map(alloc:optical_props%tau)
           call reorder123x321(tau, optical_props%tau)
           !$acc exit data copyout(optical_props%tau)
+          !$omp target exit data map(from:optical_props%tau)
         type is (ty_optical_props_2str)
           !$acc enter data create(optical_props%tau, optical_props%ssa, optical_props%g)
+          !$omp target enter data map(alloc:optical_props%tau, optical_props%ssa, optical_props%g)
           call combine_and_reorder_2str(ncol, nlay, ngpt,       tau, tau_rayleigh, &
                                         optical_props%tau, optical_props%ssa, optical_props%g)
           !$acc exit data copyout(optical_props%tau, optical_props%ssa, optical_props%g)
+          !$omp target exit data map(from:optical_props%tau, optical_props%ssa, optical_props%g)
         type is (ty_optical_props_nstr) ! We ought to be able to combine this with above
           nmom = size(optical_props%p, 1)
           !$acc enter data create(optical_props%tau, optical_props%ssa, optical_props%p)
+          !$omp target enter data map(alloc:optical_props%tau, optical_props%ssa, optical_props%p)
           call combine_and_reorder_nstr(ncol, nlay, ngpt, nmom, tau, tau_rayleigh, &
                                         optical_props%tau, optical_props%ssa, optical_props%p)
           !$acc exit data copyout(optical_props%tau, optical_props%ssa, optical_props%p)
+          !$omp target exit data map(from:optical_props%tau, optical_props%ssa, optical_props%p)
       end select
       !$acc exit data delete(tau, tau_rayleigh)
+      !$omp target exit data map(release:tau, tau_rayleigh)
     end if
     !$acc exit data copyout(optical_props)
   end subroutine combine_and_reorder
diff --git a/rte/kernels-openacc/mo_optical_props_kernels.F90 b/rte/kernels-openacc/mo_optical_props_kernels.F90
index 65c91eae4..711a64778 100644
--- a/rte/kernels-openacc/mo_optical_props_kernels.F90
+++ b/rte/kernels-openacc/mo_optical_props_kernels.F90
@@ -63,6 +63,10 @@ subroutine delta_scale_2str_f_k(ncol, nlay, ngpt, tau, ssa, g, f) &
     !$acc&     copy(ssa(:ncol,:nlay,:ngpt),tau(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(f(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(g(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:ssa(:ncol, :nlay, :ngpt), tau(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:f(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:g(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -92,6 +96,8 @@ subroutine delta_scale_2str_k(ncol, nlay, ngpt, tau, ssa, g) &
 
     !$acc  parallel loop collapse(3) &
     !$acc&     copy(tau(:ncol,:nlay,:ngpt),ssa(:ncol,:nlay,:ngpt),g(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:tau(:ncol, :nlay, :ngpt), ssa(:ncol, :nlay, :ngpt), g(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -137,6 +143,9 @@ subroutine increment_1scalar_by_1scalar(ncol, nlay, ngpt, &
     !$acc  parallel loop collapse(3) &
     !$acc&     copyin(tau2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -162,6 +171,10 @@ subroutine increment_1scalar_by_2stream(ncol, nlay, ngpt, &
     !$acc&     copyin(tau2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(ssa2(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -188,6 +201,10 @@ subroutine increment_1scalar_by_nstream(ncol, nlay, ngpt, &
     !$acc&     copyin(tau2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(ssa2(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -216,6 +233,10 @@ subroutine increment_2stream_by_1scalar(ncol, nlay, ngpt, &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(tau2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -248,6 +269,11 @@ subroutine increment_2stream_by_2stream(ncol, nlay, ngpt, &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(ssa2(:ncol,:nlay,:ngpt),tau2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt),g1(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:g2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :ngpt), tau2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt), g1(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -289,6 +315,11 @@ subroutine increment_2stream_by_nstream(ncol, nlay, ngpt, nmom2, &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(ssa2(:ncol,:nlay,:ngpt),tau2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt),g1(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:p2(:1, :ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :ngpt), tau2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt), g1(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -328,6 +359,10 @@ subroutine increment_nstream_by_1scalar(ncol, nlay, ngpt, &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(tau2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:tau2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -364,8 +399,15 @@ subroutine increment_nstream_by_2stream(ncol, nlay, ngpt, nmom1, &
     !$acc&     copyin(ssa2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(g2(:ncol,:nlay,:ngpt)) &
-    !$acc&     copy(temp_moms(:nmom1)) &
+    !$acc&     private(temp_moms) &
     !$acc&     copyin(tau2(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:p1(:nmom1, :ncol, :nlay, :ngpt), ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:g2(:ncol, :nlay, :ngpt)) &
+    !$omp& private(temp_moms) &
+    !$omp& map(to:tau2(:ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -415,6 +457,11 @@ subroutine increment_nstream_by_nstream(ncol, nlay, ngpt, nmom1, nmom2, &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(ssa2(:ncol,:nlay,:ngpt),tau2(:ncol,:nlay,:ngpt)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt),p1(:mom_lim,:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:p2(:mom_lim, :ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :ngpt), tau2(:ncol, :nlay, :ngpt)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt), p1(:mom_lim, :ncol, :nlay, :ngpt))
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -457,6 +504,10 @@ subroutine inc_1scalar_by_1scalar_bybnd(ncol, nlay, ngpt, &
     !$acc& copyin(tau2(:ncol,:nlay,:nbnd)) &
     !$acc& copy(tau1(:ncol,:nlay,:ngpt)) &
     !$acc& copyin(gpt_lims(:,:nbnd))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:gpt_lims(:, :nbnd))
     do igpt = 1 , ngpt
       do ilay = 1 , nlay
         do icol = 1 , ncol
@@ -485,6 +536,10 @@ subroutine inc_1scalar_by_2stream_bybnd(ncol, nlay, ngpt, &
     !$acc&     copyin(tau2(:ncol,:nlay,:nbnd),ssa2(:ncol,:nlay,:nbnd)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(gpt_lims(:,:nbnd))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:tau2(:ncol, :nlay, :nbnd), ssa2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:gpt_lims(:, :nbnd))
     do igpt = 1 , ngpt
       do ilay = 1 , nlay
         do icol = 1 , ncol
@@ -513,6 +568,10 @@ subroutine inc_1scalar_by_nstream_bybnd(ncol, nlay, ngpt, &
     !$acc&     copyin(gpt_lims(:,:nbnd),tau2(:ncol,:nlay,:nbnd)) &
     !$acc&     copy(tau1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(ssa2(:ncol,:nlay,:nbnd))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:gpt_lims(:, :nbnd), tau2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :nbnd))
     do igpt = 1 , ngpt
       do ilay = 1 , nlay
         do icol = 1 , ncol
@@ -545,6 +604,11 @@ subroutine inc_2stream_by_1scalar_bybnd(ncol, nlay, ngpt, &
     !$acc&     copyin(tau2(:ncol,:nlay,:nbnd)) &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(gpt_lims(:,:nbnd))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:gpt_lims(:, :nbnd))
     do igpt = 1 , ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -580,6 +644,13 @@ subroutine inc_2stream_by_2stream_bybnd(ncol, nlay, ngpt, &
     !$acc&     copyin(gpt_lims(:,:nbnd)) &
     !$acc&     copy(g1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(g2(:ncol,:nlay,:nbnd))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:tau2(:ncol, :nlay, :nbnd), ssa2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:gpt_lims(:, :nbnd)) &
+    !$omp& map(tofrom:g1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:g2(:ncol, :nlay, :nbnd))
     do igpt = 1 , ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -624,6 +695,12 @@ subroutine inc_2stream_by_nstream_bybnd(ncol, nlay, ngpt, nmom2, &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(p2(:1,:ncol,:nlay,:nbnd),gpt_lims(:,:nbnd)) &
     !$acc&     copy(g1(:ncol,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:tau2(:ncol, :nlay, :nbnd), ssa2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:p2(:1, :ncol, :nlay, :nbnd), gpt_lims(:, :nbnd)) &
+    !$omp& map(tofrom:g1(:ncol, :nlay, :ngpt))
     do igpt = 1 , ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -666,6 +743,11 @@ subroutine inc_nstream_by_1scalar_bybnd(ncol, nlay, ngpt, &
     !$acc&     copyin(tau2(:ncol,:nlay,:nbnd)) &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(gpt_lims(:,:nbnd))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:gpt_lims(:, :nbnd))
     do igpt = 1 , ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -704,8 +786,15 @@ subroutine inc_nstream_by_2stream_bybnd(ncol, nlay, ngpt, nmom1, &
     !$acc&     copyin(ssa2(:ncol,:nlay,:nbnd)) &
     !$acc&     copy(ssa1(:ncol,:nlay,:ngpt),p1(:nmom1,:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(tau2(:ncol,:nlay,:nbnd)) &
-    !$acc&     copy(temp_moms(:nmom1)) &
+    !$acc&     private(temp_moms) &
     !$acc&     copyin(gpt_lims(:,:nbnd),g2(:ncol,:nlay,:nbnd))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt), p1(:nmom1, :ncol, :nlay, :ngpt)) &
+    !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) &
+    !$omp& private(temp_moms) &
+    !$omp& map(to:gpt_lims(:, :nbnd), g2(:ncol, :nlay, :nbnd))
     do igpt = 1 , ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -760,6 +849,14 @@ subroutine inc_nstream_by_nstream_bybnd(ncol, nlay, ngpt, nmom1, nmom2, &
     !$acc&     copyin(tau2(:ncol,:nlay,:nbnd)) &
     !$acc&     copy(p1(:mom_lim,:ncol,:nlay,:ngpt)) &
     !$acc&     copyin(gpt_lims(:,:nbnd))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:p2(:mom_lim, :ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:ssa1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:ssa2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:tau1(:ncol, :nlay, :ngpt)) &
+    !$omp& map(to:tau2(:ncol, :nlay, :nbnd)) &
+    !$omp& map(tofrom:p1(:mom_lim, :ncol, :nlay, :ngpt)) &
+    !$omp& map(to:gpt_lims(:, :nbnd))
     do igpt = 1 , ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -802,6 +899,9 @@ subroutine extract_subset_dim1_3d(ncol, nlay, ngpt, array_in, colS, colE, array_
     !$acc parallel loop collapse(3) &
     !$acc&     copyout(array_out(:cole-cols+1,:nlay,:ngpt)) &
     !$acc&     copyin(array_in(cols:cole,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(from:array_out) &
+    !$omp& map(to:array_in)
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = colS, colE
@@ -825,6 +925,9 @@ subroutine extract_subset_dim2_4d(nmom, ncol, nlay, ngpt, array_in, colS, colE,
     !$acc parallel loop collapse(4) &
     !$acc&     copyout(array_out(:nmom,:cole-cols+1,:nlay,:ngpt)) &
     !$acc&     copyin(array_in(:nmom,cols:cole,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(4) &
+    !$omp& map(from:array_out) &
+    !$omp& map(to:array_in)
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = colS, colE
@@ -855,6 +958,10 @@ subroutine extract_subset_absorption_tau(ncol, nlay, ngpt, tau_in, ssa_in, &
     !$acc&     copyin(ssa_in(cols:cole,:nlay,:ngpt)) &
     !$acc&     copyout(tau_out(:cole-cols+1,:nlay,:ngpt)) &
     !$acc&     copyin(tau_in(cols:cole,:nlay,:ngpt))
+    !$omp target teams distribute parallel do simd collapse(3) &
+    !$omp& map(to:ssa_in) &
+    !$omp& map(from:tau_out) &
+    !$omp& map(to:tau_in)
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = colS, colE
diff --git a/rte/kernels-openacc/mo_rte_solver_kernels.F90 b/rte/kernels-openacc/mo_rte_solver_kernels.F90
index 775c57520..719922a92 100644
--- a/rte/kernels-openacc/mo_rte_solver_kernels.F90
+++ b/rte/kernels-openacc/mo_rte_solver_kernels.F90
@@ -39,11 +39,8 @@ module mo_rte_solver_kernels
             lw_solver_noscat, lw_solver_noscat_GaussQuad, lw_solver_2stream, &
             sw_solver_noscat,                             sw_solver_2stream
 
-  public :: lw_solver_1rescl_GaussQuad,  lw_solver_1rescl
   ! These routines don't really need to be visible but making them so is useful for testing.
-  public :: lw_source_noscat, lw_combine_sources, &
-            lw_source_2str, sw_source_2str, &
-            lw_two_stream, sw_two_stream, &
+  public :: lw_two_stream, sw_two_stream, &
             adding
 
   real(wp), parameter :: pi = acos(-1._wp)
@@ -62,42 +59,55 @@ module mo_rte_solver_kernels
   subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight,                             &
                               tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
                               radn_up, radn_dn, &
-                              sfc_srcJac, radn_upJac) bind(C, name="lw_solver_noscat")
-    integer,                    intent( in) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
-    logical(wl),                intent( in) :: top_at_1
-    real(wp), dimension(ncol,       ngpt), intent( in) :: D            ! secant of propagation angle  []
-    real(wp),                              intent( in) :: weight       ! quadrature weight
-    real(wp), dimension(ncol,nlay,  ngpt), intent( in) :: tau          ! Absorption optical thickness []
-    real(wp), dimension(ncol,nlay,  ngpt), intent( in) :: lay_source   ! Planck source at layer average temperature [W/m2]
+                              do_Jacobians, sfc_srcJac, radn_upJac, &
+                              do_rescaling, ssa, g) bind(C, name="lw_solver_noscat")
+    integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
+    logical(wl),                           intent(in   ) :: top_at_1
+    real(wp), dimension(ncol,       ngpt), intent(in   ) :: D            ! secant of propagation angle  []
+    real(wp),                              intent(in   ) :: weight       ! quadrature weight
+    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: tau          ! Absorption optical thickness []
+    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lay_source   ! Planck source at layer average temperature [W/m2]
     ! Planck source at layer edge for radiation in increasing/decreasing ilay direction
     ! lev_source_dec applies the mapping in layer i to the Planck function at layer i
     ! lev_source_inc applies the mapping in layer i to the Planck function at layer i+1
     real(wp), dimension(ncol,nlay,  ngpt), target, &
-                                           intent( in) :: lev_source_inc, lev_source_dec
-    real(wp), dimension(ncol,       ngpt), intent( in) :: sfc_emis         ! Surface emissivity      []
-    real(wp), dimension(ncol,       ngpt), intent( in) :: sfc_src          ! Surface source function [W/m2]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Radiances [W/m2-str]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: radn_up ! Radiances [W/m2-str]
-                                                                           ! Top level must contain incident flux boundary condition
-
-    real(wp), dimension(ncol       ,ngpt), intent(in )   :: sfc_srcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: radn_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
-    ! Local variables, no g-point dependency
+                                           intent(in   ) :: lev_source_inc, lev_source_dec
+    real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_emis     ! Surface emissivity      []
+    real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_src      ! Surface source function [W/m2]
+    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: radn_up      ! Radiances [W/m2-str]
+    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn      ! Top level must contain incident flux boundary condition
+
+    !
+    ! Optional variables - arrays aren't referenced if corresponding logical  == False
+    !
+    logical(wl),                           intent(in   ) :: do_Jacobians
+    real(wp), dimension(ncol       ,ngpt), intent(in   ) :: sfc_srcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
+    real(wp), dimension(ncol,nlay+1     ), intent(  out) :: radn_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    logical(wl),                           intent(in   ) :: do_rescaling
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: ssa, g    ! single-scattering albedo, asymmetry parameter
+    ! -------------------------------------------------------------------------------------------------
+    ! Local variables
     real(wp), dimension(ncol,nlay,ngpt) :: tau_loc, &  ! path length (tau/mu)
                                            trans       ! transmissivity  = exp(-tau)
     real(wp), dimension(ncol,nlay,ngpt) :: source_dn, source_up
-    real(wp), dimension(ncol,     ngpt) :: source_sfc, sfc_albedo, source_sfcJac
 
     real(wp), dimension(:,:,:), pointer :: lev_source_up, lev_source_dn ! Mapping increasing/decreasing indicies to up/down
 
     real(wp), parameter :: pi = acos(-1._wp)
-    integer             :: icol, ilev, igpt, top_level, ilay
+    integer             :: icol, ilay, ilev, igpt, top_level, sfc_level
 
     real(wp)            :: fact
     real(wp), parameter :: tau_thresh = sqrt(epsilon(tau_loc))
-    ! ------------------------------------
-
-
+    !
+    ! For Jacobians
+    !
+    real(wp), dimension(ncol,nlay+1,ngpt) :: gpt_Jac
+    real(wp)                              :: scalar_Jac ! Temp variable for reduction
+    !
+    ! Used when approximating scattering
+    !
+    real(wp)            :: ssal, wb, scaleTau, scaling
+    real(wp), dimension(ncol,nlay,  ngpt) :: An, Cn
     ! ------------------------------------
     ! Which way is up?
     ! Level Planck sources for upward and downward radiation
@@ -105,19 +115,33 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight,
     !                lev_source_dn => lev_source_inc, and vice-versa
     if(top_at_1) then
       top_level = 1
+      sfc_level = nlay+1
       lev_source_up => lev_source_dec
       lev_source_dn => lev_source_inc
     else
       top_level = nlay+1
+      sfc_level = 1
       lev_source_up => lev_source_inc
       lev_source_dn => lev_source_dec
     end if
 
-    !$acc enter data copyin(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,radn_dn)
-    !$acc enter data create(tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo,radn_up)
-    !$acc enter data attach(lev_source_up,lev_source_dn)
+    !$acc        enter data copyin(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,radn_dn)
+    !$omp target enter data map(to:d, tau, sfc_src, sfc_emis, lev_source_dec, lev_source_inc, lay_source, radn_dn)
+    !$acc        enter data attach(lev_source_up,lev_source_dn)
+    !$omp target enter data map(to:lev_source_up, lev_source_dn)
+    !$acc        enter data create(   tau_loc,trans,source_dn,source_up,radn_up)
+    !$omp target enter data map(alloc:tau_loc,trans,source_dn,source_up,radn_up)
+
+    !$acc        enter data copyin(sfc_srcJac)             if(do_Jacobians)
+    !$omp target enter data map(to:sfc_srcJac)             if(do_Jacobians)
+    !$acc        enter data create(   radn_upJac, gpt_Jac) if(do_Jacobians)
+    !$omp target enter data map(alloc:radn_upJac, gpt_Jac) if(do_Jacobians)
+
+    !$acc        enter data create(   An, Cn) if(do_rescaling)
+    !$omp target enter data map(alloc:An, Cn) if(do_rescaling)
 
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do igpt = 1, ngpt
       do icol = 1, ncol
         !
@@ -125,70 +149,126 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight,
         !   convert flux at top of domain to intensity assuming azimuthal isotropy
         !
         radn_dn(icol,top_level,igpt) = radn_dn(icol,top_level,igpt)/(2._wp * pi * weight)
-        !
-        ! Surface albedo, surface source function
-        !
-        sfc_albedo(icol,igpt) = 1._wp - sfc_emis(icol,igpt)
-        source_sfc(icol,igpt) = sfc_emis(icol,igpt) * sfc_src(icol,igpt)
       end do
     end do
 
-    !$acc enter data copyin(sfc_srcJac)
-    !$acc enter data create(source_sfcJac, radn_upJac)
-    !$acc parallel loop collapse(2)
-    do igpt = 1, ngpt
-      do icol = 1, ncol
-        source_sfcJac(icol,igpt) = sfc_emis(icol,igpt) * sfc_srcJac(icol,igpt)
-      end do
-    end do
-    ! NOTE: This kernel produces small differences between GPU and CPU
-    ! implementations on Ascent with PGI, we assume due to floating point
-    ! differences in the exp() function. These differences are small in the
-    ! RFMIP test case (10^-6).
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
           !
-          ! Optical path and transmission, used in source function and transport calculations
+          ! The wb and scaleTau terms are independent of propagation
+          !   angle D and could be pre-computed if several values of D are used
+          ! We re-compute them here to keep not have to localize memory use
           !
-          tau_loc(icol,ilay,igpt) = tau(icol,ilay,igpt)*D(icol,igpt)
-          trans  (icol,ilay,igpt) = exp(-tau_loc(icol,ilay,igpt))
-
-          call lw_source_noscat_stencil(ncol, nlay, ngpt, icol, ilay, igpt,        &
-                                        lay_source, lev_source_up, lev_source_dn,  &
-                                        tau_loc, trans,                            &
-                                        source_dn, source_up)
+          if(do_rescaling) then
+            ssal = ssa(icol, ilay, igpt)
+            wb = ssal*(1._wp - g(icol, ilay, igpt)) * 0.5_wp
+            scaleTau = (1._wp - ssal + wb)
+            ! here wb/scaleTau is parameter wb/(1-w(1-b)) of Eq.21 of the Tang paper
+            ! actually it is in line of parameter rescaling defined in Eq.7
+            ! potentialy if g=ssa=1  then  wb/scaleTau = NaN
+            ! it should not happen because g is never 1 in atmospheres
+            ! explanation of factor 0.4 note A of Table
+            Cn(icol,ilay,igpt) = 0.4_wp*wb/scaleTau
+            ! Eq.15 of the paper, multiplied by path length
+            tau_loc(icol,ilay,igpt) = tau(icol,ilay,igpt)*D(icol,igpt)*scaleTau
+            trans  (icol,ilay,igpt) = exp(-tau_loc(icol,ilay,igpt))
+            An     (icol,ilay,igpt) = (1._wp-trans(icol,ilay,igpt)**2)
+          else
+            !
+            ! Optical path and transmission, used in source function and transport calculations
+            !
+            tau_loc(icol,ilay,igpt) = tau(icol,ilay,igpt)*D(icol,igpt)
+            trans  (icol,ilay,igpt) = exp(-tau_loc(icol,ilay,igpt))
+          end if
+          call lw_source_noscat(lay_source(icol,ilay,igpt), &
+                                lev_source_up(icol,ilay,igpt), lev_source_dn(icol,ilay,igpt),  &
+                                tau_loc(icol,ilay,igpt), trans(icol,ilay,igpt),                &
+                                source_dn(icol,ilay,igpt), source_up(icol,ilay,igpt))
         end do
       end do
     end do
 
     !
-    ! Transport
+    ! Transport down
     !
-    call lw_transport_noscat(ncol, nlay, ngpt, top_at_1,  &
-                             tau_loc, trans, sfc_albedo, source_dn, source_up, source_sfc, &
-                             radn_up, radn_dn, source_sfcJac, radn_upJac)
-
+    call lw_transport_noscat_dn(ncol, nlay, ngpt, top_at_1, trans, source_dn, radn_dn)
+    !
+    ! Surface reflection and emission
+    !
+    !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
+    do igpt = 1, ngpt
+      do icol = 1, ncol
+        !
+        ! Surface albedo, surface source function
+        !
+        radn_up   (icol,sfc_level,igpt) = radn_dn(icol,sfc_level,igpt)*(1._wp - sfc_emis(icol,igpt)) + &
+                                          sfc_src(icol,          igpt)*         sfc_emis(icol,igpt)
+        if(do_Jacobians) &
+          gpt_Jac(icol,sfc_level,igpt) = sfc_srcJac(icol,        igpt)*         sfc_emis(icol,igpt)
+      end do
+    end do
+    !
+    ! Transport up, or up and down again if using rescaling
+    !
+    if(do_rescaling) then
+      call lw_transport_1rescl(ncol, nlay, ngpt, top_at_1, trans, &
+                               source_dn, source_up,              &
+                               radn_up, radn_dn, An, Cn,          &
+                               do_Jacobians, gpt_Jac) ! Standing in for Jacobian, i.e. rad_up_Jac, rad_dn_Jac)
+    else
+      call lw_transport_noscat_up(ncol, nlay, ngpt, top_at_1, trans, source_up, radn_up, &
+                                  do_Jacobians, gpt_Jac)
+    end if
     !
     ! Convert intensity to flux assuming azimuthal isotropy and quadrature weight
     !
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do igpt = 1, ngpt
       do ilev = 1, nlay+1
         do icol = 1, ncol
           radn_dn   (icol,ilev,igpt) = 2._wp * pi * weight * radn_dn   (icol,ilev,igpt)
           radn_up   (icol,ilev,igpt) = 2._wp * pi * weight * radn_up   (icol,ilev,igpt)
-          radn_upJac(icol,ilev,igpt) = 2._wp * pi * weight * radn_upJac(icol,ilev,igpt)
         end do
       end do
     end do
-    !$acc exit data delete(sfc_srcJac, source_sfcJac)
-    !$acc exit data copyout(radn_upJac)
+    !
+    ! Only broadband-integrated Jacobians are provided
+    !
+    if (do_Jacobians) then
+      !
+      ! Reduce by summing along g-point dimension
+      !
+      !$acc parallel loop gang vector collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
+      do ilev = 1, nlay+1
+        do icol = 1, ncol
+          scalar_Jac = 0.0_wp
+          do igpt = 1, ngpt
+            scalar_Jac = scalar_Jac + gpt_Jac(icol, ilev, igpt)
+          end do
+          radn_upJac(icol, ilev) = 2._wp * pi * weight * scalar_Jac
+        end do
+      end do
+    end if
 
-    !$acc exit data copyout(radn_dn,radn_up)
-    !$acc exit data delete(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo)
-    !$acc exit data detach(lev_source_up,lev_source_dn)
+    !$acc        exit data delete(     sfc_srcJac, gpt_Jac) if(do_Jacobians)
+    !$omp target exit data map(release:sfc_srcJac, gpt_Jac) if(do_Jacobians)
+    !$acc        exit data copyout( radn_upJac)             if(do_Jacobians)
+    !$omp target exit data map(from:radn_upJac)             if(do_Jacobians)
+
+    !$acc        exit data copyout( radn_dn,radn_up)
+    !$omp target exit data map(from:radn_dn,radn_up)
+    !$acc        exit data delete(     d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,tau_loc,trans,source_dn,source_up)
+    !$omp target exit data map(release:d, tau, sfc_src, sfc_emis, lev_source_dec, lev_source_inc, lay_source, tau_loc, trans, source_dn, source_up)
+    !$acc        exit data detach(  lev_source_up,lev_source_dn)
+    !$omp target exit data map(from:lev_source_up, lev_source_dn)
+    !$acc        exit data delete(     An, Cn) if(do_rescaling)
+    !$omp target exit data map(release:An, Cn) if(do_rescaling)
 
   end subroutine lw_solver_noscat
   ! ---------------------------------------------------------------
@@ -200,7 +280,8 @@ end subroutine lw_solver_noscat
   ! ---------------------------------------------------------------
   subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weights, &
                                    tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_up, flux_dn,&
-                                   sfc_srcJac, flux_upJac) &
+                                   do_Jacobians, sfc_srcJac, flux_upJac, &
+                                   do_rescaling, ssa, g) &
                                    bind (C, name="lw_solver_noscat_GaussQuad")
     integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
     logical(wl),                           intent(in   ) :: top_at_1
@@ -215,13 +296,22 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig
                                                ! Planck source at layer edge for radiation in decreasing ilay direction [W/m2]
     real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_emis         ! Surface emissivity      []
     real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_src          ! Surface source function [W/m2]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn ! Radiances [W/m2-str]
+    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn ! Radiances [W/m2-str], Top level must contain incident flux boundary condition
     real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: flux_up ! Radiances [W/m2-str]
-                                                                           ! Top level must contain incident flux boundary condition
-    real(wp), dimension(ncol       ,ngpt), intent(in )   :: sfc_srcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: flux_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    !
+    ! Optional variables - arrays aren't referenced if corresponding logical  == False
+    !
+    logical(wl),                           intent(in   ) :: do_Jacobians
+    real(wp), dimension(ncol       ,ngpt), intent(in   ) :: sfc_srcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
+    real(wp), dimension(ncol,nlay+1     ), intent(out  ) :: flux_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    logical(wl),                           intent(in   ) :: do_rescaling
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: ssa, g    ! single-scattering albedo, asymmetry parameter
+    ! ------------------------------------
+    !
     ! Local variables
-    real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up, radn_upJac ! Fluxes per quad angle
+    !
+    real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up
+    real(wp), dimension(ncol,nlay+1     ) :: radn_upJac ! Fluxes per quad angle
     real(wp), dimension(ncol,       ngpt) :: Ds_ncol
     real(wp), dimension(ncol,       ngpt) :: flux_top
 
@@ -229,13 +319,19 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig
     integer :: icol, ilev, igpt
 
     !$acc enter data copyin(Ds,weights,tau,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,flux_dn)
+    !$omp target enter data map(to:Ds, weights, tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_dn)
     !$acc enter data create(flux_up,radn_dn,radn_up,Ds_ncol,flux_top)
-    !$acc enter data copyin(sfc_srcJac)
-    !$acc enter data create(flux_upJac, radn_upJac)
+    !$omp target enter data map(alloc:flux_up, radn_dn, radn_up, Ds_ncol, flux_top)
+    !$acc        enter data copyin(sfc_srcJac) if(do_Jacobians)
+    !$omp target enter data map(to:sfc_srcJac) if(do_Jacobians)
+    !$acc        enter data create(   flux_upJac, radn_upJac) if(do_Jacobians)
+    !$omp target enter data map(alloc:flux_upJac, radn_upJac) if(do_Jacobians)
+    ! radn_upJac is needed only if do_Jacobians is true .and. nmus > 1
 
     ! ------------------------------------
     ! ------------------------------------
     !$acc  parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do igpt = 1, ngpt
       do icol = 1, ncol
         Ds_ncol(icol, igpt) = Ds(1)
@@ -245,12 +341,15 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig
     call lw_solver_noscat(ncol, nlay, ngpt, &
                           top_at_1, Ds_ncol, weights(1), tau, &
                           lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
-                          flux_up, flux_dn, sfc_srcJac, flux_upJac)
+                          flux_up, flux_dn, &
+                          do_Jacobians, sfc_srcJac, flux_upJac, &
+                          do_rescaling, ssa, g)
     !
     ! For more than one angle use local arrays
     !
     top_level = MERGE(1, nlay+1, top_at_1)
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do igpt = 1,ngpt
       do icol = 1,ncol
         flux_top(icol,igpt) = flux_dn(icol,top_level,igpt)
@@ -260,6 +359,7 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig
 
     do imu = 2, nmus
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           Ds_ncol(icol, igpt) = Ds(imu)
@@ -268,24 +368,32 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig
       call lw_solver_noscat(ncol, nlay, ngpt, &
                             top_at_1, Ds_ncol, weights(imu), tau, &
                             lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
-                            radn_up, radn_dn, sfc_srcJac, radn_upJac)
+                            radn_up, radn_dn, &
+                            do_Jacobians, sfc_srcJac, radn_upJac, &
+                            do_rescaling, ssa, g)
       !$acc  parallel loop collapse(3)
+      !$omp target teams distribute parallel do simd collapse(3)
       do igpt = 1, ngpt
         do ilev = 1, nlay+1
           do icol = 1, ncol
-            flux_up   (icol,ilev,igpt) = flux_up   (icol,ilev,igpt) + radn_up   (icol,ilev,igpt)
-            flux_dn   (icol,ilev,igpt) = flux_dn   (icol,ilev,igpt) + radn_dn   (icol,ilev,igpt)
-            flux_upJac(icol,ilev,igpt) = flux_upJac(icol,ilev,igpt) + radn_upJac(icol,ilev,igpt)
+            flux_up(icol,ilev,igpt) = flux_up(icol,ilev,igpt) + radn_up(icol,ilev,igpt)
+            flux_dn(icol,ilev,igpt) = flux_dn(icol,ilev,igpt) + radn_dn(icol,ilev,igpt)
+            if(do_Jacobians .and. igpt == 1) &
+              flux_upJac(icol,ilev) = flux_upJac(icol,ilev  ) + radn_upJac(icol,ilev  )
           end do
         end do
       end do
 
     end do ! imu loop
-    !$acc exit data delete(sfc_srcJac, radn_upJac)
-    !$acc exit data copyout(flux_upJac)
+    !$acc        exit data delete(     sfc_srcJac, radn_upJac) if(do_Jacobians)
+    !$omp target exit data map(release:sfc_srcJac, radn_upJac) if(do_Jacobians)
+    !$acc        exit data copyout( flux_upJac)                if(do_Jacobians)
+    !$omp target exit data map(from:flux_upJac)                if(do_Jacobians)
 
     !$acc exit data copyout(flux_up,flux_dn)
+    !$omp target exit data map(from:flux_up, flux_dn)
     !$acc exit data delete(Ds,weights,tau,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,radn_dn,radn_up,Ds_ncol,flux_top)
+    !$omp target exit data map(release:Ds, weights, tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, radn_dn, radn_up, Ds_ncol, flux_top)
   end subroutine lw_solver_noscat_GaussQuad
   ! -------------------------------------------------------------------------------------------------
   !
@@ -328,7 +436,9 @@ subroutine lw_solver_2stream (ncol, nlay, ngpt, top_at_1, &
     ! ------------------------------------
     ! ------------------------------------
     !$acc enter data copyin(tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_dn)
+    !$omp target enter data map(to:tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_dn)
     !$acc enter data create(flux_up, Rdif, Tdif, gamma1, gamma2, sfc_albedo, lev_source, source_dn, source_up, source_sfc)
+    !$omp target enter data map(alloc:flux_up, Rdif, Tdif, gamma1, gamma2, sfc_albedo, lev_source, source_dn, source_up, source_sfc)
     !
     ! RRTMGP provides source functions at each level using the spectral mapping
     !   of each adjacent layer. Combine these for two-stream calculations
@@ -354,6 +464,7 @@ subroutine lw_solver_2stream (ncol, nlay, ngpt, top_at_1, &
                         source_dn, source_up, source_sfc)
 
     !$acc  parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do igpt = 1, ngpt
       do icol = 1, ncol
         sfc_albedo(icol,igpt) = 1._wp - sfc_emis(icol,igpt)
@@ -368,8 +479,11 @@ subroutine lw_solver_2stream (ncol, nlay, ngpt, top_at_1, &
                 source_dn, source_up, source_sfc,  &
                 flux_up, flux_dn)
     !$acc exit data delete(tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src)
+    !$omp target exit data map(release:tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src)
     !$acc exit data delete(Rdif, Tdif, gamma1, gamma2, sfc_albedo, lev_source, source_dn, source_up, source_sfc)
+    !$omp target exit data map(release:Rdif, Tdif, gamma1, gamma2, sfc_albedo, lev_source, source_dn, source_up, source_sfc)
     !$acc exit data copyout(flux_up, flux_dn)
+    !$omp target exit data map(from:flux_up, flux_dn)
   end subroutine lw_solver_2stream
   ! -------------------------------------------------------------------------------------------------
   !
@@ -393,7 +507,9 @@ subroutine sw_solver_noscat(ncol, nlay, ngpt, &
     ! ------------------------------------
     ! ------------------------------------
     !$acc enter data copyin(tau, mu0) create(mu0_inv, flux_dir)
+    !$omp target enter data map(to:tau, mu0) map(alloc:mu0_inv, flux_dir)
     !$acc parallel loop
+    !$omp target teams distribute parallel do simd
     do icol = 1, ncol
       mu0_inv(icol) = 1._wp/mu0(icol)
     enddo
@@ -408,6 +524,7 @@ subroutine sw_solver_noscat(ncol, nlay, ngpt, &
       ! layer index = level index - 1
       ! previous level is up (-1)
       !$acc parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           do ilev = 2, nlay+1
@@ -419,6 +536,7 @@ subroutine sw_solver_noscat(ncol, nlay, ngpt, &
       ! layer index = level index
       ! previous level is up (+1)
       !$acc parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           do ilev = nlay, 1, -1
@@ -428,6 +546,7 @@ subroutine sw_solver_noscat(ncol, nlay, ngpt, &
       end do
     end if
     !$acc exit data delete(tau, mu0, mu0_inv) copyout(flux_dir)
+    !$omp target exit data map(release:tau, mu0, mu0_inv) map(from:flux_dir)
   end subroutine sw_solver_noscat
   ! -------------------------------------------------------------------------------------------------
   !
@@ -463,7 +582,9 @@ subroutine sw_solver_2stream (ncol, nlay, ngpt, top_at_1, &
     ! Cell properties: transmittance and reflectance for direct and diffuse radiation
     !
     !$acc enter data copyin(tau, ssa, g, mu0, sfc_alb_dir, sfc_alb_dif, flux_dn, flux_dir)
+    !$omp target enter data map(to:tau, ssa, g, mu0, sfc_alb_dir, sfc_alb_dif, flux_dn, flux_dir)
     !$acc enter data create(Rdif, Tdif, Rdir, Tdir, Tnoscat, source_up, source_dn, source_srf, flux_up)
+    !$omp target enter data map(alloc:Rdif, Tdif, Rdir, Tdir, Tnoscat, source_up, source_dn, source_srf, flux_up)
     call sw_two_stream(ncol, nlay, ngpt, mu0, &
                         tau , ssa , g   ,      &
                         Rdif, Tdif, Rdir, Tdir, Tnoscat)
@@ -477,6 +598,7 @@ subroutine sw_solver_2stream (ncol, nlay, ngpt, top_at_1, &
     ! adding computes only diffuse flux; flux_dn is total
     !
     !$acc  parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do igpt = 1, ngpt
       do ilay = 1, nlay+1
         do icol = 1, ncol
@@ -485,7 +607,9 @@ subroutine sw_solver_2stream (ncol, nlay, ngpt, top_at_1, &
       end do
     end do
     !$acc exit data copyout(flux_up, flux_dn, flux_dir)
+    !$omp target exit data map(from:flux_up, flux_dn, flux_dir)
     !$acc exit data delete (tau, ssa, g, mu0, sfc_alb_dir, sfc_alb_dif, Rdif, Tdif, Rdir, Tdir, Tnoscat, source_up, source_dn, source_srf)
+    !$omp target exit data map(release:tau, ssa, g, mu0, sfc_alb_dir, sfc_alb_dif, Rdif, Tdif, Rdir, Tdir, Tnoscat, source_up, source_dn, source_srf)
 
   end subroutine sw_solver_2stream
 
@@ -500,75 +624,40 @@ end subroutine sw_solver_2stream
   ! This routine implements point-wise stencil, and has to be called in a loop
   !
   ! ---------------------------------------------------------------
-  subroutine lw_source_noscat_stencil(ncol, nlay, ngpt, icol, ilay, igpt,                   &
-                                      lay_source, lev_source_up, lev_source_dn, tau, trans, &
-                                      source_dn, source_up)
+  subroutine lw_source_noscat(lay_source, lev_source_up, lev_source_dn, tau, trans, &
+                              source_dn, source_up)
     !$acc routine seq
+    !$omp declare target
     !
-    integer,                               intent(in)   :: ncol, nlay, ngpt
-    integer,                               intent(in)   :: icol, ilay, igpt ! Working point coordinates
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)   :: lay_source,    & ! Planck source at layer center
-                                                           lev_source_up, & ! Planck source at levels (layer edges),
-                                                           lev_source_dn, & !   increasing/decreasing layer index
-                                                           tau,           & ! Optical path (tau/mu)
-                                                           trans            ! Transmissivity (exp(-tau))
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout):: source_dn, source_up
-                                                                  ! Source function at layer edges
-                                                                  ! Down at the bottom of the layer, up at the top
+    real(wp), intent(in)   :: lay_source,    & ! Planck source at layer center
+                              lev_source_up, & ! Planck source at levels (layer edges),
+                              lev_source_dn, & !   increasing/decreasing layer index
+                              tau,           & ! Optical path (tau/mu)
+                              trans            ! Transmissivity (exp(-tau))
+    real(wp), intent(inout):: source_dn, source_up
+                                               ! Source function at layer edges
+                                               ! Down at the bottom of the layer, up at the top
     ! --------------------------------
     real(wp), parameter  :: tau_thresh = sqrt(epsilon(tau))
     real(wp)             :: fact
-
     ! ---------------------------------------------------------------
     !
     ! Weighting factor. Use 2nd order series expansion when rounding error (~tau^2)
     !   is of order epsilon (smallest difference from 1. in working precision)
     !   Thanks to Peter Blossey
     !
-    if(tau(icol,ilay,igpt) > tau_thresh) then
-      fact = (1._wp - trans(icol,ilay,igpt))/tau(icol,ilay,igpt) - trans(icol,ilay,igpt)
+    if(tau > tau_thresh) then
+      fact = (1._wp - trans)/tau - trans
     else
-      fact = tau(icol, ilay,igpt) * (0.5_wp - 1._wp/3._wp*tau(icol,ilay,igpt))
+      fact = tau * (0.5_wp - 1._wp/3._wp*tau)
     end if
     !
     ! Equation below is developed in Clough et al., 1992, doi:10.1029/92JD01419, Eq 13
     !
-    source_dn(icol,ilay,igpt) = (1._wp - trans(icol,ilay,igpt)) * lev_source_dn(icol,ilay,igpt) + &
-            2._wp * fact * (lay_source(icol,ilay,igpt) - lev_source_dn(icol,ilay,igpt))
-    source_up(icol,ilay,igpt) = (1._wp - trans(icol,ilay,igpt)) * lev_source_up(icol,ilay,igpt) + &
-            2._wp * fact * (lay_source(icol,ilay,igpt) - lev_source_up(icol,ilay,igpt))
-
-  end subroutine lw_source_noscat_stencil
-  ! ---------------------------------------------------------------
-  !
-  ! Driver function to compute LW source function for upward and downward emission
-  !
-  ! ---------------------------------------------------------------
-  subroutine lw_source_noscat(ncol, nlay, ngpt, lay_source, lev_source_up, lev_source_dn, tau, trans, &
-                              source_dn, source_up) bind(C, name="lw_source_noscat")
-    integer,                               intent(in) :: ncol, nlay, ngpt
-    real(wp), dimension(ncol, nlay, ngpt), intent(in) :: lay_source,    & ! Planck source at layer center
-                                                         lev_source_up, & ! Planck source at levels (layer edges),
-                                                         lev_source_dn, & !   increasing/decreasing layer index
-                                                         tau,           & ! Optical path (tau/mu)
-                                                         trans            ! Transmissivity (exp(-tau))
-    real(wp), dimension(ncol, nlay, ngpt), intent(out):: source_dn, source_up
-                                                                ! Source function at layer edges
-                                                                ! Down at the bottom of the layer, up at the top
-    ! --------------------------------
-    integer :: icol, ilay, igpt
-    ! ---------------------------------------------------------------
-    !$acc  parallel loop collapse(3)
-    do igpt = 1, ngpt
-      do ilay = 1, nlay
-        do icol = 1, ncol
-          call lw_source_noscat_stencil(ncol, nlay, ngpt, icol, ilay, igpt,        &
-                                        lay_source, lev_source_up, lev_source_dn,  &
-                                        tau, trans,                                &
-                                        source_dn, source_up)
-        end do
-      end do
-    end do
+    source_dn = (1._wp - trans) * lev_source_dn + &
+            2._wp * fact * (lay_source - lev_source_dn)
+    source_up = (1._wp - trans) * lev_source_up + &
+            2._wp * fact * (lay_source - lev_source_up)
 
   end subroutine lw_source_noscat
   ! ---------------------------------------------------------------
@@ -576,22 +665,15 @@ end subroutine lw_source_noscat
   ! Longwave no-scattering transport
   !
   ! ---------------------------------------------------------------
-  subroutine lw_transport_noscat(ncol, nlay, ngpt, top_at_1, &
-                                 tau, trans, sfc_albedo, source_dn, source_up, source_sfc, &
-                                 radn_up, radn_dn, source_sfcJac, radn_upJac) bind(C, name="lw_transport_noscat")
+  subroutine lw_transport_noscat_dn(ncol, nlay, ngpt, top_at_1, &
+                                    trans, source_dn,radn_dn) bind(C, name="lw_transport_noscat_dn")
+    !dir$ optimize(-O0)
     integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
     logical(wl),                           intent(in   ) :: top_at_1   !
-    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: tau, &     ! Absorption optical thickness, pre-divided by mu []
-                                                            trans      ! transmissivity = exp(-tau)
-    real(wp), dimension(ncol       ,ngpt), intent(in   ) :: sfc_albedo ! Surface albedo
-    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: source_dn, &
-                                                            source_up  ! Diffuse radiation emitted by the layer
-    real(wp), dimension(ncol       ,ngpt), intent(in   ) :: source_sfc ! Surface source function [W/m2]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn ! Radiances [W/m2-str]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: radn_up ! Radiances [W/m2-str]
-                                                                             ! Top level must contain incident flux boundary condition
-    real(wp), dimension(ncol       ,ngpt), intent(in )   :: source_sfcJac ! surface temperature Jacobian of surface source function [W/m2/K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: radn_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: trans      ! transmissivity = exp(-tau)
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: source_dn  ! Diffuse radiation emitted by the layer
+    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn    ! Radiances [W/m2-str]
+                                                                       ! Top level must contain incident flux boundary condition
     ! Local variables
     integer :: igpt, ilev, icol
     ! ---------------------------------------------------
@@ -601,22 +683,12 @@ subroutine lw_transport_noscat(ncol, nlay, ngpt, top_at_1, &
       ! Top of domain is index 1
       !
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
-          ! Downward propagation
           do ilev = 2, nlay+1
             radn_dn(icol,ilev,igpt) = trans(icol,ilev-1,igpt)*radn_dn(icol,ilev-1,igpt) + source_dn(icol,ilev-1,igpt)
           end do
-
-          ! Surface reflection and emission
-          radn_up   (icol,nlay+1,igpt) = radn_dn(icol,nlay+1,igpt)*sfc_albedo(icol,igpt) + source_sfc   (icol,igpt)
-          radn_upJac(icol,nlay+1,igpt) = source_sfcJac(icol,igpt)
-
-          ! Upward propagation
-          do ilev = nlay, 1, -1
-            radn_up   (icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up   (icol,ilev+1,igpt) + source_up(icol,ilev,igpt)
-            radn_upJac(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_upJac(icol,ilev+1,igpt)
-          end do
         end do
       end do
     else
@@ -624,27 +696,72 @@ subroutine lw_transport_noscat(ncol, nlay, ngpt, top_at_1, &
       ! Top of domain is index nlay+1
       !
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
-          ! Downward propagation
           do ilev = nlay, 1, -1
             radn_dn(icol,ilev,igpt) = trans(icol,ilev  ,igpt)*radn_dn(icol,ilev+1,igpt) + source_dn(icol,ilev,igpt)
           end do
+        end do
+      end do
+    end if
 
-          ! Surface reflection and emission
-          radn_up   (icol,1,igpt) = radn_dn(icol,1,igpt)*sfc_albedo(icol,igpt) + source_sfc   (icol,igpt)
-          radn_upJac(icol,1,igpt) = source_sfcJac(icol,igpt)
+  end subroutine lw_transport_noscat_dn
+  ! -------------------------------------------------------------------------------------------------
+  subroutine lw_transport_noscat_up(ncol, nlay, ngpt, &
+                                    top_at_1, trans, source_up, radn_up, do_Jacobians, radn_upJac) bind(C, name="lw_transport_noscat_up")
+    !dir$ optimize(-O0)
+    integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
+    logical(wl),                           intent(in   ) :: top_at_1   !
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: trans      ! transmissivity = exp(-tau)
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: source_up  ! Diffuse radiation emitted by the layer
+    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: radn_up    ! Radiances [W/m2-str]
+    logical(wl),                           intent(in   ) :: do_Jacobians
+    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: radn_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    ! Local variables
+    integer :: igpt, ilev, icol
+    ! ---------------------------------------------------
+    ! ---------------------------------------------------
+    if(top_at_1) then
+      !
+      ! Top of domain is index 1
+      !
+      !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
+      do igpt = 1, ngpt
+        do icol = 1, ncol
+          do ilev = nlay, 1, -1
+            radn_up     (icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up   (icol,ilev+1,igpt) + source_up(icol,ilev,igpt)
+          end do
+          if(do_Jacobians) then
+            do ilev = nlay, 1, -1
+              radn_upJac(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_upJac(icol,ilev+1,igpt)
+            end do
+          end if
+        end do
+      end do
 
-          ! Upward propagation
+    else
+      !
+      ! Top of domain is index nlay+1
+      !
+      !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
+      do igpt = 1, ngpt
+        do icol = 1, ncol
           do ilev = 2, nlay+1
-            radn_up   (icol,ilev,igpt) = trans(icol,ilev-1,igpt) * radn_up   (icol,ilev-1,igpt) +  source_up(icol,ilev-1,igpt)
-            radn_upJac(icol,ilev,igpt) = trans(icol,ilev-1,igpt) * radn_upJac(icol,ilev-1,igpt)
+            radn_up     (icol,ilev,igpt) = trans(icol,ilev-1,igpt) * radn_up   (icol,ilev-1,igpt) +  source_up(icol,ilev-1,igpt)
           end do
+          if(do_Jacobians) then
+            do ilev = nlay, 1, -1
+              radn_upJac(icol,ilev,igpt) = trans(icol,ilev-1,igpt) * radn_upJac(icol,ilev-1,igpt)
+            end do
+          end if
         end do
       end do
     end if
 
-  end subroutine lw_transport_noscat
+  end subroutine lw_transport_noscat_up
   ! -------------------------------------------------------------------------------------------------
   !
   ! Longwave two-stream solutions to diffuse reflectance and transmittance for a layer
@@ -673,9 +790,12 @@ subroutine lw_two_stream(ncol, nlay, ngpt, tau, w0, g, &
     ! ---------------------------------
     ! ---------------------------------
     !$acc enter data copyin(tau, w0, g)
+    !$omp target enter data map(to:tau, w0, g)
     !$acc enter data create(gamma1, gamma2, Rdif, Tdif)
+    !$omp target enter data map(alloc:gamma1, gamma2, Rdif, Tdif)
 
     !$acc  parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -715,7 +835,9 @@ subroutine lw_two_stream(ncol, nlay, ngpt, tau, w0, g, &
       end do
     end do
     !$acc exit data delete (tau, w0, g)
+    !$omp target exit data map(release:tau, w0, g)
     !$acc exit data copyout(gamma1, gamma2, Rdif, Tdif)
+    !$omp target exit data map(from:gamma1, gamma2, Rdif, Tdif)
   end subroutine lw_two_stream
   ! -------------------------------------------------------------------------------------------------
   !
@@ -736,9 +858,12 @@ subroutine lw_combine_sources(ncol, nlay, ngpt, top_at_1, &
     ! ---------------------------------------------------------------
     ! ---------------------------------
     !$acc enter data copyin(lev_src_inc, lev_src_dec)
+    !$omp target enter data map(to:lev_src_inc, lev_src_dec)
     !$acc enter data create(lev_source)
+    !$omp target enter data map(alloc:lev_source)
 
     !$acc  parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do igpt = 1, ngpt
       do ilay = 1, nlay+1
         do icol = 1,ncol
@@ -754,7 +879,9 @@ subroutine lw_combine_sources(ncol, nlay, ngpt, top_at_1, &
       end do
     end do
     !$acc exit data delete (lev_src_inc, lev_src_dec)
+    !$omp target exit data map(release:lev_src_inc, lev_src_dec)
     !$acc exit data copyout(lev_source)
+    !$omp target exit data map(from:lev_source)
   end subroutine lw_combine_sources
   ! ---------------------------------------------------------------
   !
@@ -786,9 +913,12 @@ subroutine lw_source_2str(ncol, nlay, ngpt, top_at_1,   &
     ! ---------------------------------------------------------------
     ! ---------------------------------
     !$acc enter data copyin(sfc_emis, sfc_src, lay_source, tau, gamma1, gamma2, rdif, tdif, lev_source)
+    !$omp target enter data map(to:sfc_emis, sfc_src, lay_source, tau, gamma1, gamma2, rdif, tdif, lev_source)
     !$acc enter data create(source_dn, source_up, source_sfc)
+    !$omp target enter data map(alloc:source_dn, source_up, source_sfc)
 
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do igpt = 1, ngpt
       do ilay = 1, nlay
         do icol = 1, ncol
@@ -819,7 +949,9 @@ subroutine lw_source_2str(ncol, nlay, ngpt, top_at_1,   &
       end do
     end do
     !$acc exit data delete(sfc_emis, sfc_src, lay_source, tau, gamma1, gamma2, rdif, tdif, lev_source)
+    !$omp target exit data map(release:sfc_emis, sfc_src, lay_source, tau, gamma1, gamma2, rdif, tdif, lev_source)
     !$acc exit data copyout(source_dn, source_up, source_sfc)
+    !$omp target exit data map(from:source_dn, source_up, source_sfc)
 
   end subroutine lw_source_2str
   ! -------------------------------------------------------------------------------------------------
@@ -856,9 +988,12 @@ subroutine sw_two_stream(ncol, nlay, ngpt, mu0, tau, w0, g, &
       ! ---------------------------------
       ! ---------------------------------
       !$acc enter data copyin (mu0, tau, w0, g)
+      !$omp target enter data map(to:mu0, tau, w0, g)
       !$acc enter data create(Rdif, Tdif, Rdir, Tdir, Tnoscat, mu0_inv)
+      !$omp target enter data map(alloc:Rdif, Tdif, Rdir, Tdir, Tnoscat, mu0_inv)
 
       !$acc parallel loop
+      !$omp target teams distribute parallel do simd
       do icol = 1, ncol
         mu0_inv(icol) = 1._wp/mu0(icol)
       enddo
@@ -867,6 +1002,7 @@ subroutine sw_two_stream(ncol, nlay, ngpt, mu0, tau, w0, g, &
       ! and CPU. This *might* be floating point differences in implementation of
       ! the exp function.
       !$acc  parallel loop collapse(3)
+      !$omp target teams distribute parallel do simd collapse(3)
       do igpt = 1, ngpt
         do ilay = 1, nlay
           do icol = 1, ncol
@@ -944,7 +1080,9 @@ subroutine sw_two_stream(ncol, nlay, ngpt, mu0, tau, w0, g, &
         end do
       end do
       !$acc exit data delete (mu0, tau, w0, g, mu0_inv)
+      !$omp target exit data map(release:mu0, tau, w0, g, mu0_inv)
       !$acc exit data copyout(Rdif, Tdif, Rdir, Tdir, Tnoscat)
+      !$omp target exit data map(from:Rdif, Tdif, Rdir, Tdir, Tnoscat)
 
     end subroutine sw_two_stream
   ! ---------------------------------------------------------------
@@ -967,10 +1105,13 @@ subroutine sw_source_2str(ncol, nlay, ngpt, top_at_1, Rdir, Tdir, Tnoscat, sfc_a
     ! ---------------------------------
     ! ---------------------------------
     !$acc enter data copyin (Rdir, Tdir, Tnoscat, sfc_albedo, flux_dn_dir)
+    !$omp target enter data map(to:Rdir, Tdir, Tnoscat, sfc_albedo, flux_dn_dir)
     !$acc enter data create(source_dn, source_up, source_sfc)
+    !$omp target enter data map(alloc:source_dn, source_up, source_sfc)
 
     if(top_at_1) then
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           do ilev = 1, nlay
@@ -985,6 +1126,7 @@ subroutine sw_source_2str(ncol, nlay, ngpt, top_at_1, Rdir, Tdir, Tnoscat, sfc_a
       ! layer index = level index
       ! previous level is up (+1)
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           do ilev = nlay, 1, -1
@@ -997,7 +1139,9 @@ subroutine sw_source_2str(ncol, nlay, ngpt, top_at_1, Rdir, Tdir, Tnoscat, sfc_a
       end do
     end if
     !$acc exit data copyout(source_dn, source_up, source_sfc, flux_dn_dir)
+    !$omp target exit data map(from:source_dn, source_up, source_sfc, flux_dn_dir)
     !$acc exit data delete(Rdir, Tdir, Tnoscat, sfc_albedo)
+    !$omp target exit data map(release:Rdir, Tdir, Tnoscat, sfc_albedo)
 
   end subroutine sw_source_2str
 ! ---------------------------------------------------------------
@@ -1012,6 +1156,7 @@ subroutine adding(ncol, nlay, ngpt, top_at_1, &
                     rdif, tdif,           &
                     src_dn, src_up, src_sfc, &
                     flux_up, flux_dn) bind(C, name="adding")
+    !dir$ optimize(-O0)
     integer,                               intent(in   ) :: ncol, nlay, ngpt
     logical(wl),                           intent(in   ) :: top_at_1
     real(wp), dimension(ncol       ,ngpt), intent(in   ) :: albedo_sfc
@@ -1042,10 +1187,13 @@ subroutine adding(ncol, nlay, ngpt, top_at_1, &
     ! We write the loops out explicitly so compilers will have no trouble optimizing them.
     !
     !$acc enter data copyin(albedo_sfc, rdif, tdif, src_dn, src_up, src_sfc, flux_dn)
+    !$omp target enter data map(to:albedo_sfc, rdif, tdif, src_dn, src_up, src_sfc, flux_dn)
     !$acc enter data create(flux_up, albedo, src, denom)
+    !$omp target enter data map(alloc:flux_up, albedo, src, denom)
 
     if(top_at_1) then
       !$acc parallel loop gang vector collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           ilev = nlay + 1
@@ -1093,6 +1241,7 @@ subroutine adding(ncol, nlay, ngpt, top_at_1, &
     else
 
       !$acc parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           ilev = 1
@@ -1139,7 +1288,9 @@ subroutine adding(ncol, nlay, ngpt, top_at_1, &
       end do
     end if
     !$acc exit data delete(albedo_sfc, rdif, tdif, src_dn, src_up, src_sfc, albedo, src, denom)
+    !$omp target exit data map(release:albedo_sfc, rdif, tdif, src_dn, src_up, src_sfc, albedo, src, denom)
     !$acc exit data copyout(flux_up, flux_dn)
+    !$omp target exit data map(from:flux_up, flux_dn)
   end subroutine adding
   ! ---------------------------------------------------------------
   !
@@ -1158,6 +1309,7 @@ subroutine apply_BC_gpt(ncol, nlay, ngpt, top_at_1, inc_flux, flux_dn) bind (C,
     !   Upper boundary condition
     if(top_at_1) then
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           flux_dn(icol,      1, igpt)  = inc_flux(icol,igpt)
@@ -1165,6 +1317,7 @@ subroutine apply_BC_gpt(ncol, nlay, ngpt, top_at_1, inc_flux, flux_dn) bind (C,
       end do
     else
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           flux_dn(icol, nlay+1, igpt)  = inc_flux(icol,igpt)
@@ -1187,6 +1340,7 @@ subroutine apply_BC_factor(ncol, nlay, ngpt, top_at_1, inc_flux, factor, flux_dn
     !   Upper boundary condition
     if(top_at_1) then
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           flux_dn(icol,      1, igpt)  = inc_flux(icol,igpt) * factor(icol)
@@ -1194,6 +1348,7 @@ subroutine apply_BC_factor(ncol, nlay, ngpt, top_at_1, inc_flux, factor, flux_dn
       end do
     else
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           flux_dn(icol, nlay+1, igpt)  = inc_flux(icol,igpt) * factor(icol)
@@ -1214,6 +1369,7 @@ subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="apply_
     !   Upper boundary condition
     if(top_at_1) then
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           flux_dn(icol,      1, igpt)  = 0._wp
@@ -1221,6 +1377,7 @@ subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="apply_
       end do
     else
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           flux_dn(icol, nlay+1, igpt)  = 0._wp
@@ -1230,360 +1387,6 @@ subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="apply_
   end subroutine apply_BC_0
 ! -------------------------------------------------------------------------------------------------
 !
-! Similar to Longwave no-scattering (lw_solver_noscat)
-!   a) relies on rescaling of the optical parameters based on asymetry factor and single scattering albedo
-!       scaling can be computed  by scaling_1rescl
-!   b) adds adustment term based on cloud properties (lw_transport_1rescl)
-!      adustment terms is computed based on solution of the Tang equations
-!      for "linear-in-tau" internal source (not in the paper)
-!
-!   Attention:
-!      use must prceompute scaling before colling the function
-!
-!   Implemented based on the paper
-!   Tang G, et al, 2018: https://doi.org/10.1175/JAS-D-18-0014.1
-!
-! -------------------------------------------------------------------------------------------------
-  subroutine lw_solver_1rescl(ncol, nlay, ngpt, top_at_1, D,                             &
-                              tau, scaling, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
-                              radn_up, radn_dn, &
-                              sfc_srcJac, rad_up_Jac, rad_dn_Jac) bind(C, name="lw_solver_1rescl")
-    integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
-    logical(wl),                           intent(in   ) :: top_at_1
-    real(wp), dimension(ncol,       ngpt), intent(in   ) :: D            ! secant of propagation angle  []
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: tau          ! Absorption optical thickness []
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: scaling          ! single scattering albedo []
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lay_source   ! Planck source at layer average temperature [W/m2]
-    ! Planck source at layer edge for radiation in increasing/decreasing ilay direction
-    ! lev_source_dec applies the mapping in layer i to the Planck function at layer i
-    ! lev_source_inc applies the mapping in layer i to the Planck function at layer i+1
-    real(wp), dimension(ncol,nlay,  ngpt), target, &
-                                           intent(in   ) :: lev_source_inc, lev_source_dec
-    real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_emis     ! Surface emissivity      []
-    real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_src      ! Surface source function [W/m2]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: radn_up      ! Radiances [W/m2-str]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn      ! Top level must contain incident flux boundary condition
-
-    real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_srcJac   ! Surface Temperature Jacobian source function [W/m2/K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: rad_up_Jac  ! Surface Temperature Jacobians [W/m2-str/K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: rad_dn_Jac  ! Top level set to 0
-    ! Local variables, WITH g-point dependency
-    real(wp), dimension(ncol,nlay,ngpt) :: tau_loc, &  ! path length (tau/mu)
-                                             trans       ! transmissivity  = exp(-tau)
-    real(wp), dimension(ncol,nlay,ngpt) :: source_dn, source_up
-    real(wp), dimension(ncol,     ngpt) :: source_sfc, sfc_albedo
-    real(wp), dimension(ncol,     ngpt) :: source_sfcJac
-
-    real(wp), dimension(:,:,:), pointer :: lev_source_up, lev_source_dn ! Mapping increasing/decreasing indicies to up/down
-
-    real(wp), parameter :: pi = acos(-1._wp)
-    integer             :: ilev, igpt, top_level
-    ! ------------------------------------
-    real(wp)            :: fact
-    real(wp), parameter :: tau_thresh = sqrt(epsilon(tau))
-    integer             :: icol
-    real(wp), dimension(ncol     )      :: sfcSource
-    real(wp), dimension(ncol,nlay,ngpt) :: An, Cn
-    ! ------------------------------------
-
-    ! Which way is up?
-    ! Level Planck sources for upward and downward radiation
-    ! When top_at_1, lev_source_up => lev_source_dec
-    !                lev_source_dn => lev_source_inc, and vice-versa
-    if(top_at_1) then
-      top_level = 1
-      lev_source_up => lev_source_dec
-      lev_source_dn => lev_source_inc
-    else
-      top_level = nlay+1
-      lev_source_up => lev_source_inc
-      lev_source_dn => lev_source_dec
-    end if
-
-    !$acc enter data copyin(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,radn_dn)
-    !$acc enter data copyin(scaling)
-    !$acc enter data create(tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo,radn_up)
-    !$acc enter data create(sfcSource, An, Cn)
-    !$acc enter data attach(lev_source_up,lev_source_dn)
-
-    !$acc enter data create(rad_up_Jac, rad_dn_Jac, source_sfcJac)
-    !$acc enter data copyin(sfc_srcJac)
-
-    ! NOTE: This kernel produces small differences between GPU and CPU
-    ! implementations on Ascent with PGI, we assume due to floating point
-    ! differences in the exp() function. These differences are small in the
-    ! RFMIP test case (10^-6).
-    !$acc parallel loop collapse(3)
-    do igpt = 1, ngpt
-      do ilev = 1, nlay
-        do icol = 1, ncol
-          !
-          ! Optical path and transmission, used in source function and transport calculations
-          !
-          tau_loc(icol,ilev,igpt) = tau(icol,ilev,igpt)*D(icol,igpt)
-          trans  (icol,ilev,igpt) = exp(-tau_loc(icol,ilev,igpt))
-          ! here scaling is used to store parameter wb/[(]1-w(1-b)] of Eq.21 of the Tang's paper
-          ! explanation of factor 0.4 note A of Table
-          Cn(icol,ilev,igpt) = 0.4_wp*scaling(icol,ilev,igpt)
-          An(icol,ilev,igpt) = (1._wp-trans(icol,ilev,igpt)*trans(icol,ilev,igpt))
-
-          ! initialize radn_dn_Jac
-          rad_dn_Jac(icol,ilev,igpt) = 0._wp
-        end do
-      end do
-    end do
-
-    !$acc parallel loop collapse(2)
-    do igpt = 1, ngpt
-      do icol = 1, ncol
-      !
-      ! Surface albedo, surface source function
-      !
-        sfc_albedo   (icol,igpt) = 1._wp - sfc_emis(icol,igpt)
-        source_sfc   (icol,igpt) = sfc_emis(icol,igpt) * sfc_src   (icol,igpt)
-        source_sfcJac(icol,igpt) = sfc_emis(icol,igpt) * sfc_srcJac(icol,igpt)
-      end do
-    end do
-
-
-    !
-    ! Source function for diffuse radiation
-    !
-    call lw_source_noscat(ncol, nlay, ngpt, &
-                          lay_source, lev_source_up, lev_source_dn, &
-                          tau_loc, trans, source_dn, source_up)
-
-    !
-    ! Transport
-    !
-    !  compute no-scattering fluxes
-    call lw_transport_noscat(ncol, nlay, ngpt, top_at_1,  &
-                             tau_loc, trans, sfc_albedo, source_dn, source_up, source_sfc, &
-                             radn_up, radn_dn,&
-                             source_sfcJac, rad_up_Jac)
-    !  make adjustment
-    call lw_transport_1rescl(ncol, nlay, ngpt, top_at_1,  &
-                             tau_loc, trans, &
-                             sfc_albedo, source_dn, source_up, &
-                             radn_up, radn_dn, An, Cn, rad_up_Jac, rad_dn_Jac)
-
-    !$acc exit data copyout(rad_up_Jac, rad_dn_Jac)
-    !$acc exit data delete(source_sfcJac, sfc_srcJac)
-
-
-    !$acc exit data copyout(radn_dn,radn_up)
-    !$acc exit data delete(sfcSource, An, Cn)
-    !$acc exit data delete(scaling)
-    !$acc exit data delete(d,tau,sfc_src,sfc_emis,lev_source_dec,lev_source_inc,lay_source,tau_loc,trans,source_dn,source_up,source_sfc,sfc_albedo)
-    !$acc exit data detach(lev_source_up,lev_source_dn)
-
-  end subroutine lw_solver_1rescl
-! -------------------------------------------------------------------------------------------------
-!
-!  Similar to lw_solver_noscat_GaussQuad.
-!    It is main solver to use the Tang approximation for fluxes
-!    In addition to the no scattering input parameters the user must provide
-!    scattering related properties (ssa and g) that the solver uses to compute scaling
-!
-! ---------------------------------------------------------------
-  subroutine lw_solver_1rescl_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weights, &
-                                   tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, &
-                                   sfc_emis, sfc_src,&
-                                   flux_up, flux_dn, &
-                                   sfc_src_Jac, flux_up_Jac, flux_dn_Jac) &
-                                   bind(C, name="lw_solver_1rescl_GaussQuad")
-    integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
-    logical(wl),                           intent(in   ) :: top_at_1
-    integer,                               intent(in   ) :: nmus         ! number of quadrature angles
-    real(wp), dimension(nmus),             intent(in   ) :: Ds, weights  ! quadrature secants, weights
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: tau  ! Optical thickness,
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: ssa  ! single-scattering albedo,
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: g       ! asymmetry parameter []
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lay_source   ! Planck source at layer average temperature [W/m2]
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lev_source_inc
-                                        ! Planck source at layer edge for radiation in increasing ilay direction [W/m2]
-                                        ! Includes spectral weighting that accounts for state-dependent frequency to g-space mapping
-    real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lev_source_dec
-                                               ! Planck source at layer edge for radiation in decreasing ilay direction [W/m2]
-    real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_emis     ! Surface emissivity      []
-    real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_src      ! Surface source function [W/m2]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: flux_up      ! Radiances [W/m2-str]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn      ! Top level must contain incident flux boundary condition
-
-    real(wp), dimension(ncol       ,ngpt), intent(in )   :: sfc_src_Jac  ! surface temperature Jacobian of surface source function [W/m2/K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: flux_up_Jac  ! surface temperature Jacobian of Radiances [W/m2-str / K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: flux_dn_Jac  ! surface temperature Jacobian of Radiances [W/m2-str / K]
-
-    ! Local variables
-    real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up ! Fluxes per quad angle
-    real(wp), dimension(ncol,       ngpt) :: Ds_ncol
-    real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn_Jac, radn_up_Jac ! Surface temperature Jsacobians per quad angle
-
-    integer :: imu, top_level,icol,ilev,igpt
-    real    :: weight
-
-    real(wp), dimension(ncol,ngpt)        :: fluxTOA          ! downward flux at TOA
-    real(wp), dimension(ncol,nlay,  ngpt) :: tauLoc           ! rescaled Tau
-    real(wp), dimension(ncol,nlay,  ngpt) :: scaling          ! scaling
-    real(wp), parameter                   :: tresh=1.0_wp - 1e-6_wp
-
-    !$acc enter data copyin(Ds,weights,tau,ssa,g,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,flux_dn,sfc_src_Jac)
-    !$acc enter data create(flux_up,radn_dn,radn_up,Ds_ncol, scaling, tauLoc,flux_up_Jac,flux_dn_Jac,radn_dn_Jac, radn_up_Jac)
-
-
-    ! Tang rescaling
-    if (any(ssa*g >= tresh)) then
-      call scaling_1rescl_safe(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g)
-    else
-      call scaling_1rescl(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g)
-    endif
-
-    ! ------------------------------------
-    !
-    ! For the first angle output arrays store total flux
-    !
-    top_level = MERGE(1, nlay+1, top_at_1)
-    ! store TOA flux
-    fluxTOA = flux_dn(1:ncol, top_level, 1:ngpt)
-
-    Ds_ncol(:,:) = Ds(1)
-    weight = 2._wp*pi*weights(1)
-    ! Transport is for intensity
-    !   convert flux at top of domain to intensity assuming azimuthal isotropy
-    !
-    radn_dn(1:ncol, top_level, 1:ngpt) = fluxTOA(1:ncol, 1:ngpt) / weight
-
-    call lw_solver_1rescl(ncol, nlay, ngpt, &
-                          top_at_1, Ds_ncol, tauLoc, scaling, &
-                          lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
-                          flux_up, flux_dn,sfc_src_Jac,flux_up_Jac,flux_dn_Jac)
-    !$acc  parallel loop collapse(3)
-    do igpt = 1, ngpt
-      do ilev = 1, nlay+1
-        do icol = 1, ncol
-          flux_up    (icol,ilev,igpt) = weight*flux_up    (icol,ilev,igpt)
-          flux_dn    (icol,ilev,igpt) = weight*flux_dn    (icol,ilev,igpt)
-          flux_up_Jac(icol,ilev,igpt) = weight*flux_up_Jac(icol,ilev,igpt)
-          flux_dn_Jac(icol,ilev,igpt) = weight*flux_dn_Jac(icol,ilev,igpt)
-        enddo
-      enddo
-    enddo
-
-    do imu = 2, nmus
-      Ds_ncol(:,:) = Ds(imu)
-      weight = 2._wp*pi*weights(imu)
-      radn_dn(1:ncol, top_level, 1:ngpt)  = fluxTOA(1:ncol, 1:ngpt) / weight
-      call lw_solver_1rescl(ncol, nlay, ngpt, &
-                            top_at_1, Ds_ncol, tauLoc, scaling, &
-                            lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
-                            radn_up, radn_dn,sfc_src_Jac,radn_up_Jac,radn_dn_Jac)
-
-      !$acc  parallel loop collapse(3)
-      do igpt = 1, ngpt
-        do ilev = 1, nlay+1
-          do icol = 1, ncol
-            flux_up    (icol,ilev,igpt) = flux_up    (icol,ilev,igpt) + weight*radn_up    (icol,ilev,igpt)
-            flux_dn    (icol,ilev,igpt) = flux_dn    (icol,ilev,igpt) + weight*radn_dn    (icol,ilev,igpt)
-            flux_up_Jac(icol,ilev,igpt) = flux_up_Jac(icol,ilev,igpt) + weight*radn_up_Jac(icol,ilev,igpt)
-            flux_dn_Jac(icol,ilev,igpt) = flux_dn_Jac(icol,ilev,igpt) + weight*radn_dn_Jac(icol,ilev,igpt)
-          enddo
-        enddo
-      enddo
-
-    end do
-   !$acc exit data delete(sfc_src_Jac,radn_dn_Jac, radn_up_Jac)
-   !$acc exit data copyout(flux_up_Jac,flux_dn_Jac)
-   !$acc exit data copyout(flux_up,flux_dn)
-   !$acc exit data delete(Ds,weights,tau,ssa,g,tauLoc,scaling,lay_source,lev_source_inc,lev_source_dec,sfc_emis,sfc_src,radn_dn,radn_up,Ds_ncol)
-  end subroutine lw_solver_1rescl_GaussQuad
-! -------------------------------------------------------------------------------------------------
-!
-!  Computes Tang scaling of layer optical thickness and scaling parameter
-!    unsafe if ssa*g =1.
-!
-! ---------------------------------------------------------------
-  pure subroutine scaling_1rescl(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g)
-    integer ,                              intent(in)    :: ncol
-    integer ,                              intent(in)    :: nlay
-    integer ,                              intent(in)    :: ngpt
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: tau
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: ssa
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: g
-
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: tauLoc
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: scaling
-
-    integer  :: icol, ilay, igpt
-    real(wp) :: wb, ssal, scaleTau
-    !$acc enter data copyin(tau, ssa, g)
-    !$acc enter data create(tauLoc, scaling)
-    !$acc parallel loop collapse(3)
-    do igpt=1,ngpt
-      do ilay=1,nlay
-        do icol=1,ncol
-          ssal = ssa(icol, ilay, igpt)
-          wb = ssal*(1._wp - g(icol, ilay, igpt)) / 2._wp
-          scaleTau = (1._wp - ssal + wb )
-
-          tauLoc(icol, ilay, igpt) = scaleTau * tau(icol, ilay, igpt) ! Eq.15 of the paper
-          !
-          ! here scaling is used to store parameter wb/[1-w(1-b)] of Eq.21 of the Tang's paper
-          ! actually it is in line of parameter rescaling defined in Eq.7
-          ! potentialy if g=ssa=1  then  wb/scaleTau = NaN
-          ! it should not happen
-          scaling(icol, ilay, igpt) = wb / scaleTau
-        enddo
-      enddo
-    enddo
-    !$acc exit data copyout(tauLoc, scaling)
-    !$acc exit data delete(tau, ssa, g)
-  end subroutine scaling_1rescl
-! -------------------------------------------------------------------------------------------------
-!
-!  Computes Tang scaling of layer optical thickness and scaling parameter
-!  Safe implementation
-!
-! ---------------------------------------------------------------
-  pure subroutine scaling_1rescl_safe(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g)
-    integer ,                              intent(in)    :: ncol
-    integer ,                              intent(in)    :: nlay
-    integer ,                              intent(in)    :: ngpt
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: tau
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: ssa
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: g
-
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: tauLoc
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: scaling
-
-    integer  :: icol, ilay, igpt
-    real(wp) :: wb, ssal, scaleTau
-    !$acc enter data copyin(tau, ssa, g)
-    !$acc enter data create(tauLoc, scaling)
-    !$acc parallel loop collapse(3)
-    do igpt=1,ngpt
-      do ilay=1,nlay
-        do icol=1,ncol
-          ssal = ssa(icol, ilay, igpt)
-          wb = ssal*(1._wp - g(icol, ilay, igpt)) / 2._wp
-          scaleTau = (1._wp - ssal + wb )
-
-          tauLoc(icol, ilay, igpt) = scaleTau * tau(icol, ilay, igpt) ! Eq.15 of the paper
-          !
-          ! here scaling is used to store parameter wb/[1-w(1-b)] of Eq.21 of the Tang's paper
-          ! actually it is in line of parameter rescaling defined in Eq.7
-          if (scaleTau < 1e-6_wp) then
-              scaling(icol, ilay, igpt) = 1.0_wp
-          else
-              scaling(icol, ilay, igpt) = wb / scaleTau
-          endif
-        enddo
-      enddo
-    enddo
-    !$acc exit data copyout(tauLoc, scaling)
-    !$acc exit data delete(tau, ssa, g)
-  end subroutine scaling_1rescl_safe
-! -------------------------------------------------------------------------------------------------
-!
 ! Similar to Longwave no-scattering tarnsport  (lw_transport_noscat)
 !   a) adds adjustment factor based on cloud properties
 !
@@ -1591,86 +1394,96 @@ end subroutine scaling_1rescl_safe
 !       the adjustmentFactor computation can be skipped where Cn <= epsilon
 !
 ! -------------------------------------------------------------------------------------------------
-  subroutine lw_transport_1rescl(ncol, nlay, ngpt, top_at_1, &
-                                 tau, trans, sfc_albedo, source_dn, source_up, &
-                                 radn_up, radn_dn, An, Cn,&
-                                 rad_up_Jac, rad_dn_Jac) bind(C, name="lw_transport_1rescl")
+subroutine lw_transport_1rescl(ncol, nlay, ngpt, top_at_1, &
+                               trans, source_dn, source_up, &
+                               radn_up, radn_dn, An, Cn,    &
+                               do_Jacobians, radn_up_Jac) bind(C, name="lw_transport_1rescl")
     integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
     logical(wl),                           intent(in   ) :: top_at_1   !
-    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: tau, &     ! Absorption optical thickness, pre-divided by mu []
-                                                       trans      ! transmissivity = exp(-tau)
-    real(wp), dimension(ncol       ,ngpt), intent(in   ) :: sfc_albedo ! Surface albedo
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: trans      ! transmissivity = exp(-tau)
     real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: source_dn, &
                                                             source_up  ! Diffuse radiation emitted by the layer
     real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_up    ! Radiances [W/m2-str]
     real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn    !Top level must contain incident flux boundary condition
     real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: An, Cn
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: rad_up_Jac ! Radiances [W/m2-str]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: rad_dn_Jac !Top level must contain incident flux boundary condition
+    logical(wl),                           intent(in   ) :: do_Jacobians
+    real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_up_Jac ! Radiances [W/m2-str]
+    ! ---------------------------------------------------
     ! Local variables
     integer :: ilev, icol, igpt
-    ! ---------------------------------------------------
     real(wp) :: adjustmentFactor
+    ! ---------------------------------------------------
     if(top_at_1) then
       !
       ! Top of domain is index 1
       !
       ! Downward propagation
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
-          ! 1st Upward propagation
+          ! Upward propagation
           do ilev = nlay, 1, -1
-            radn_up   (icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up   (icol,ilev+1,igpt) + source_up(icol,ilev,igpt)
-            rad_up_Jac(icol,ilev,igpt) = trans(icol,ilev,igpt)*rad_up_Jac(icol,ilev+1,igpt)
-
-            adjustmentFactor = Cn(icol,ilev,igpt)*&
+            adjustmentFactor = Cn(icol,ilev,igpt) * &
                    ( An(icol,ilev,igpt)*radn_dn(icol,ilev,igpt) - &
-                     source_dn(icol,ilev,igpt)  *trans(icol,ilev,igpt ) - &
+                     source_dn(icol,ilev,igpt)*trans(icol,ilev,igpt ) - &
                      source_up(icol,ilev,igpt))
-            radn_up(icol,ilev,igpt) = radn_up(icol,ilev,igpt) + adjustmentFactor
+            radn_up(icol,ilev,igpt)       = trans(icol,ilev,igpt)*radn_up    (icol,ilev+1,igpt) + &
+                                            source_up(icol,ilev,igpt) + adjustmentFactor
           enddo
+          if(do_Jacobians) then
+            do ilev = nlay, 1, -1
+              radn_up_Jac(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_up_Jac(icol,ilev+1,igpt)
+            end do
+          end if
+
+          ! radn_dn_Jac(icol,1,igpt) = 0._wp
           ! 2nd Downward propagation
           do ilev = 1, nlay
-            radn_dn   (icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_dn   (icol,ilev,igpt) + source_dn(icol,ilev,igpt)
-            rad_dn_Jac(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*rad_dn_Jac(icol,ilev,igpt)
+            ! radn_dn_Jac(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_dn_Jac(icol,ilev,igpt)
             adjustmentFactor = Cn(icol,ilev,igpt)*( &
                 An(icol,ilev,igpt)*radn_up(icol,ilev,igpt) - &
                 source_up(icol,ilev,igpt)*trans(icol,ilev,igpt) - &
                 source_dn(icol,ilev,igpt) )
-            radn_dn(icol,ilev+1,igpt)    = radn_dn(icol,ilev+1,igpt) + adjustmentFactor
-
-            adjustmentFactor             = Cn(icol,ilev,igpt)*An(icol,ilev,igpt)*rad_up_Jac(icol,ilev,igpt)
-            rad_dn_Jac(icol,ilev+1,igpt) = rad_dn_Jac(icol,ilev+1,igpt) + adjustmentFactor
+            radn_dn(icol,ilev+1,igpt)     = trans(icol,ilev,igpt)*radn_dn   (icol,ilev,  igpt) + &
+                                            source_dn(icol,ilev,igpt) + adjustmentFactor
+            ! adjustmentFactor             = Cn(icol,ilev,igpt)*An(icol,ilev,igpt)*radn_up_Jac(icol,ilev,igpt)
+            ! radn_dn_Jac(icol,ilev+1,igpt) = radn_dn_Jac(icol,ilev+1,igpt) + adjustmentFactor
           enddo
         enddo
       enddo
     else
       !$acc  parallel loop collapse(2)
+      !$omp target teams distribute parallel do simd collapse(2)
       do igpt = 1, ngpt
         do icol = 1, ncol
           ! Upward propagation
           do ilev = 1, nlay
-            radn_up   (icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_up   (icol,ilev,igpt) +  source_up(icol,ilev,igpt)
-            rad_up_Jac(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*rad_up_Jac(icol,ilev,igpt)
             adjustmentFactor = Cn(icol,ilev,igpt)*&
                    ( An(icol,ilev,igpt)*radn_dn(icol,ilev+1,igpt) - &
                      source_dn(icol,ilev,igpt) *trans(icol,ilev ,igpt) - &
                      source_up(icol,ilev,igpt))
-            radn_up(icol,ilev+1,igpt) = radn_up(icol,ilev+1,igpt) + adjustmentFactor
+            radn_up(icol,ilev+1,igpt)       = trans(icol,ilev,igpt)*radn_up   (icol,ilev,igpt) + &
+                                              source_up(icol,ilev,igpt) + adjustmentFactor
           end do
+          if(do_Jacobians) then
+            do ilev = 1, nlay
+              radn_up_Jac(icol,ilev+1,igpt) = trans(icol,ilev,igpt)*radn_up_Jac(icol,ilev,igpt)
+            end do
+          end if
+
           ! 2st Downward propagation
+          ! radn_dn_Jac(icol,nlay+1,igpt) = 0._wp
           do ilev = nlay, 1, -1
-            radn_dn   (icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_dn   (icol,ilev+1,igpt) + source_dn(icol,ilev,igpt)
-            rad_dn_Jac(icol,ilev,igpt) = trans(icol,ilev,igpt)*rad_dn_Jac(icol,ilev+1,igpt)
+            ! radn_dn_Jac(icol,ilev,igpt) = trans(icol,ilev,igpt)*radn_dn_Jac(icol,ilev+1,igpt)
             adjustmentFactor = Cn(icol,ilev,igpt)*( &
                     An(icol,ilev,igpt)*radn_up(icol,ilev,igpt) - &
                     source_up(icol,ilev,igpt)*trans(icol,ilev ,igpt ) - &
                     source_dn(icol,ilev,igpt) )
-            radn_dn(icol,ilev,igpt)    = radn_dn(icol,ilev,igpt) + adjustmentFactor
-
-            adjustmentFactor           = Cn(icol,ilev,igpt)*An(icol,ilev,igpt)*rad_up_Jac(icol,ilev,igpt)
-            rad_dn_Jac(icol,ilev,igpt) = rad_dn_Jac(icol,ilev,igpt) + adjustmentFactor
+            radn_dn(icol,ilev,igpt)         = trans(icol,ilev,igpt)*radn_dn   (icol,ilev+1,igpt) + &
+                                              source_dn(icol,ilev,igpt) + adjustmentFactor
+            ! adjustmentFactor           = Cn(icol,ilev,igpt)*An(icol,ilev,igpt)*radn_up_Jac(icol,ilev,igpt)
+            ! radn_dn_Jac(icol,ilev,igpt) = radn_dn_Jac(icol,ilev,igpt) + adjustmentFactor
           end do
         enddo
       enddo
diff --git a/rte/kernels/mo_fluxes_broadband_kernels.F90 b/rte/kernels/mo_fluxes_broadband_kernels.F90
index b49c6db99..4dedac741 100644
--- a/rte/kernels/mo_fluxes_broadband_kernels.F90
+++ b/rte/kernels/mo_fluxes_broadband_kernels.F90
@@ -28,7 +28,7 @@ module mo_fluxes_broadband_kernels
     !
     ! Spectral reduction over all points
     !
-  pure subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) bind(C, name="sum_broadband")
+  subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) bind(C, name="sum_broadband")
     integer,                               intent(in ) :: ncol, nlev, ngpt
     real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux
     real(wp), dimension(ncol, nlev),       intent(out) :: broadband_flux
@@ -37,7 +37,9 @@ pure subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) b
     real(wp) :: bb_flux_s ! local scalar version
 
     !$acc enter data copyin(spectral_flux) create(broadband_flux)
+    !$omp target enter data map(to:spectral_flux) map(alloc:broadband_flux)
     !$acc parallel loop gang vector collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilev = 1, nlev
       do icol = 1, ncol
 
@@ -51,12 +53,13 @@ pure subroutine sum_broadband(ncol, nlev, ngpt, spectral_flux, broadband_flux) b
       end do
     end do
     !$acc exit data delete(spectral_flux) copyout(broadband_flux)
+    !$omp target exit data map(release:spectral_flux) map(from:broadband_flux)
   end subroutine sum_broadband
   ! ----------------------------------------------------------------------------
   !
   ! Net flux: Spectral reduction over all points
   !
-  pure subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_flux_up, broadband_flux_net) &
+  subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_flux_up, broadband_flux_net) &
     bind(C, name="net_broadband_full")
     integer,                               intent(in ) :: ncol, nlev, ngpt
     real(wp), dimension(ncol, nlev, ngpt), intent(in ) :: spectral_flux_dn, spectral_flux_up
@@ -66,7 +69,9 @@ pure subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_
     real(wp) :: diff
 
     !$acc enter data copyin(spectral_flux_dn, spectral_flux_up) create(broadband_flux_net)
+    !$omp target enter data map(to:spectral_flux_dn, spectral_flux_up) map(alloc:broadband_flux_net)
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilev = 1, nlev
       do icol = 1, ncol
         diff = spectral_flux_dn(icol, ilev, 1   ) - spectral_flux_up(icol, ilev,     1)
@@ -74,22 +79,25 @@ pure subroutine net_broadband_full(ncol, nlev, ngpt, spectral_flux_dn, spectral_
       end do
     end do
     !$acc parallel loop collapse(3)
+    !$omp target teams distribute parallel do simd collapse(3)
     do igpt = 2, ngpt
       do ilev = 1, nlev
         do icol = 1, ncol
           diff = spectral_flux_dn(icol, ilev, igpt) - spectral_flux_up(icol, ilev, igpt)
           !$acc atomic update
+          !$omp atomic update
           broadband_flux_net(icol, ilev) = broadband_flux_net(icol, ilev) + diff
         end do
       end do
     end do
     !$acc exit data delete(spectral_flux_dn, spectral_flux_up) copyout(broadband_flux_net)
+    !$omp target exit data map(release:spectral_flux_dn, spectral_flux_up) map(from:broadband_flux_net)
   end subroutine net_broadband_full
   ! ----------------------------------------------------------------------------
   !
   ! Net flux when bradband flux up and down are already available
   !
-  pure subroutine net_broadband_precalc(ncol, nlev, flux_dn, flux_up, broadband_flux_net) &
+  subroutine net_broadband_precalc(ncol, nlev, flux_dn, flux_up, broadband_flux_net) &
     bind(C, name="net_broadband_precalc")
     integer,                         intent(in ) :: ncol, nlev
     real(wp), dimension(ncol, nlev), intent(in ) :: flux_dn, flux_up
@@ -97,13 +105,16 @@ pure subroutine net_broadband_precalc(ncol, nlev, flux_dn, flux_up, broadband_fl
 
     integer  :: icol, ilev
     !$acc enter data copyin(flux_dn, flux_up) create(broadband_flux_net)
+    !$omp target enter data map(to:flux_dn, flux_up) map(alloc:broadband_flux_net)
     !$acc parallel loop collapse(2)
+    !$omp target teams distribute parallel do simd collapse(2)
     do ilev = 1, nlev
       do icol = 1, ncol
          broadband_flux_net(icol,ilev) = flux_dn(icol,ilev) - flux_up(icol,ilev)
        end do
     end do
     !$acc exit data delete(flux_dn, flux_up) copyout(broadband_flux_net)
+    !$omp target exit data map(release:flux_dn, flux_up) map(from:broadband_flux_net)
   end subroutine net_broadband_precalc
   ! ----------------------------------------------------------------------------
 end module mo_fluxes_broadband_kernels
diff --git a/rte/kernels/mo_rte_solver_kernels.F90 b/rte/kernels/mo_rte_solver_kernels.F90
index a88391536..f20557cda 100644
--- a/rte/kernels/mo_rte_solver_kernels.F90
+++ b/rte/kernels/mo_rte_solver_kernels.F90
@@ -39,8 +39,6 @@ module mo_rte_solver_kernels
             lw_solver_noscat, lw_solver_noscat_GaussQuad, lw_solver_2stream, &
             sw_solver_noscat,                             sw_solver_2stream
 
-  public :: lw_solver_1rescl_GaussQuad,  lw_solver_1rescl
-
   ! These routines don't really need to be visible but making them so is useful for testing.
   public :: lw_source_noscat, lw_combine_sources, &
             lw_source_2str, sw_source_2str, &
@@ -63,7 +61,8 @@ module mo_rte_solver_kernels
   subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight,                             &
                               tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
                               radn_up, radn_dn, &
-                              sfc_srcJac, radn_upJac) bind(C, name="lw_solver_noscat")
+                              do_Jacobians, sfc_srcJac, radn_upJac, &
+                              do_rescaling, ssa, g) bind(C, name="lw_solver_noscat")
     integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
     logical(wl),                           intent(in   ) :: top_at_1
     real(wp), dimension(ncol,       ngpt), intent(in   ) :: D            ! secant of propagation angle  []
@@ -80,19 +79,41 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight,
     real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: radn_up      ! Radiances [W/m2-str]
     real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn      ! Top level must contain incident flux boundary condition
 
-    real(wp), dimension(ncol       ,ngpt), intent(in )   :: sfc_srcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: radn_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
-
+    !
+    ! Optional variables - arrays aren't referenced if corresponding logical  == False
+    !
+    logical(wl),                           intent(in   ) :: do_Jacobians
+    real(wp), dimension(ncol       ,ngpt), intent(in   ) :: sfc_srcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
+    real(wp), dimension(ncol,nlay+1     ), intent(  out) :: radn_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    logical(wl),                           intent(in   ) :: do_rescaling
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: ssa, g    ! single-scattering albedo, asymmetry parameter
+    ! ------------------------------------
     ! Local variables, no g-point dependency
+    !
     real(wp), dimension(ncol,nlay) :: tau_loc, &  ! path length (tau/mu)
-                                        trans       ! transmissivity  = exp(-tau)
+                                      trans       ! transmissivity  = exp(-tau)
     real(wp), dimension(ncol,nlay) :: source_dn, source_up
-    real(wp), dimension(ncol     ) :: source_sfc, sfc_albedo, source_sfcJac
+    real(wp), dimension(ncol     ) :: sfc_albedo
 
     real(wp), dimension(:,:,:), pointer :: lev_source_up, lev_source_dn ! Mapping increasing/decreasing indicies to up/down
 
     real(wp), parameter :: pi = acos(-1._wp)
-    integer             :: ilev, igpt, top_level
+    integer             :: icol, ilay, igpt, top_level, sfc_level
+    ! -------------------------------------------------------------------------------------------------
+    ! Optionally, use an approximate treatment of scattering using rescaling
+    !   Implemented based on the paper
+    !   Tang G, et al, 2018: https://doi.org/10.1175/JAS-D-18-0014.1
+    !   a) relies on rescaling of the optical parameters based on asymetry factor and single scattering albedo
+    !       scaling can be computed  by scaling_1rescl
+    !   b) adds adustment term based on cloud properties (lw_transport_1rescl)
+    !      adustment terms is computed based on solution of the Tang equations
+    !      for "linear-in-tau" internal source (not in the paper)
+    !
+    ! Used when approximating scattering
+    !
+    real(wp)                         :: ssal, wb, scaleTau
+    real(wp), dimension(ncol,nlay  ) :: An, Cn
+    real(wp), dimension(ncol,nlay+1) :: gpt_flux_Jac
     ! ------------------------------------
     ! Which way is up?
     ! Level Planck sources for upward and downward radiation
@@ -100,10 +121,12 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight,
     !                lev_source_dn => lev_source_inc, and vice-versa
     if(top_at_1) then
       top_level = 1
+      sfc_level = nlay+1
       lev_source_up => lev_source_dec
       lev_source_dn => lev_source_inc
     else
       top_level = nlay+1
+      sfc_level = 1
       lev_source_up => lev_source_inc
       lev_source_dn => lev_source_dec
     end if
@@ -118,10 +141,35 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight,
       !
       ! Optical path and transmission, used in source function and transport calculations
       !
-      do ilev = 1, nlay
-        tau_loc(:,ilev) = tau(:,ilev,igpt)*D(:,igpt)
-        trans  (:,ilev) = exp(-tau_loc(:,ilev))
-      end do
+      if (do_rescaling) then
+        !
+        ! The scaling and scaleTau terms are independent of propagation
+        !   angle D and could be pre-computed if several values of D are used
+        ! We re-compute them here to keep not have to localize memory use
+        !
+        do ilay = 1, nlay
+          do icol = 1, ncol
+            ssal = ssa(icol, ilay, igpt)
+            wb = ssal*(1._wp - g(icol, ilay, igpt)) * 0.5_wp
+            scaleTau = (1._wp - ssal + wb)
+            ! here wb/scaleTau is parameter wb/(1-w(1-b)) of Eq.21 of the Tang paper
+            ! actually it is in line of parameter rescaling defined in Eq.7
+            ! potentialy if g=ssa=1  then  wb/scaleTau = NaN
+            ! it should not happen because g is never 1 in atmospheres
+            ! explanation of factor 0.4 note A of Table
+            Cn(icol,ilay) = 0.4_wp*wb/scaleTau
+            ! Eq.15 of the paper, multiplied by path length
+            tau_loc(icol,ilay) = tau(icol,ilay,igpt)*D(icol,igpt)*scaleTau
+          end do
+          trans  (:,ilay) = exp(-tau_loc(:,ilay))
+          An(:,ilay) = (1._wp-trans(:,ilay)**2)
+        end do
+      else
+        do ilay = 1, nlay
+          tau_loc(:,ilay) = tau(:,ilay,igpt)*D(:,igpt)
+          trans  (:,ilay) = exp(-tau_loc(:,ilay))
+        end do
+      end if
       !
       ! Source function for diffuse radiation
       !
@@ -129,25 +177,47 @@ subroutine lw_solver_noscat(ncol, nlay, ngpt, top_at_1, D, weight,
                             lay_source(:,:,igpt), lev_source_up(:,:,igpt), lev_source_dn(:,:,igpt), &
                             tau_loc, trans, source_dn, source_up)
       !
-      ! Surface albedo, surface source function
+      ! Transport down
       !
-      sfc_albedo(:) = 1._wp - sfc_emis(:,igpt)
-      source_sfc(:) = sfc_emis(:,igpt) * sfc_src(:,igpt)
-      source_sfcJac(:) = sfc_emis(:,igpt) * sfc_srcJac(:,igpt)
+      call lw_transport_noscat_dn(ncol, nlay, top_at_1, trans, source_dn, radn_dn(:,:,igpt))
       !
-      ! Transport
+      ! Surface albedo, surface source function, reflection and emission
+      !
+      sfc_albedo(:)    = 1._wp - sfc_emis(:,igpt)
+      radn_up   (:,sfc_level,igpt) = radn_dn(:,sfc_level,igpt)*sfc_albedo(:) + &
+                                     sfc_emis(:,igpt) * sfc_src(:,igpt)
+      if(do_Jacobians) &
+        gpt_flux_Jac(:,sfc_level)    = sfc_emis(:,igpt) * sfc_srcJac(:,igpt)
       !
-      call lw_transport_noscat(ncol, nlay, top_at_1,  &
-                               tau_loc, trans, sfc_albedo, source_dn, source_up, source_sfc, &
-                               radn_up(:,:,igpt), radn_dn(:,:,igpt), &
-                               source_sfcJac, radn_upJac(:,:,igpt))
+      ! Transport up, or up and down again if using rescaling
+      !
+      if(do_rescaling) then
+        call lw_transport_1rescl(ncol, nlay, top_at_1, trans,                  &
+                                 source_dn, source_up,                         &
+                                 radn_up(:,:,igpt), radn_dn(:,:,igpt), An, Cn, &
+                                 do_Jacobians, gpt_flux_Jac) ! Standing in for Jacobian, i.e. rad_up_Jac(:,:,igpt), rad_dn_Jac(:,:,igpt))
+      else
+        call lw_transport_noscat_up(ncol, nlay, top_at_1, trans, source_up, radn_up(:,:,igpt), &
+                                    do_Jacobians, gpt_flux_Jac)
+      end if
+
       !
       ! Convert intensity to flux assuming azimuthal isotropy and quadrature weight
       !
       radn_dn   (:,:,igpt) = 2._wp * pi * weight * radn_dn   (:,:,igpt)
       radn_up   (:,:,igpt) = 2._wp * pi * weight * radn_up   (:,:,igpt)
-      radn_upJac(:,:,igpt) = 2._wp * pi * weight * radn_upJac(:,:,igpt)
+      !
+      ! Only broadband-integrated Jacobians are provided
+      !
+      if(do_Jacobians) then
+        if(igpt == 1) then
+          radn_upJac(:,:) =                    gpt_flux_Jac(:,:)
+        else
+          radn_upJac(:,:) =  radn_upJac(:,:) + gpt_flux_Jac(:,:)
+        end if
+      end if
     end do  ! g point loop
+    if(do_Jacobians) radn_upJac(:,:) = 2._wp * pi * weight * radn_upJac(:,:)
 
   end subroutine lw_solver_noscat
   ! -------------------------------------------------------------------------------------------------
@@ -159,7 +229,8 @@ end subroutine lw_solver_noscat
   ! ---------------------------------------------------------------
   subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weights, &
                                    tau, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, flux_up, flux_dn,&
-                                   sfc_srcJac, flux_upJac) &
+                                   do_Jacobians, sfc_srcJac, flux_upJac, &
+                                   do_rescaling, ssa, g) &
                                    bind(C, name="lw_solver_noscat_GaussQuad")
     integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
     logical(wl),                           intent(in   ) :: top_at_1
@@ -177,12 +248,21 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig
     real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: flux_up      ! Radiances [W/m2-str]
     real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn      ! Top level must contain incident flux boundary condition
 
-    real(wp), dimension(ncol       ,ngpt), intent(in )   :: sfc_srcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
-    real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: flux_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    !
+    ! Optional variables - arrays aren't referenced if corresponding logical  == False
+    !
+    logical(wl),                           intent(in   ) :: do_Jacobians
+    real(wp), dimension(ncol       ,ngpt), intent(in   ) :: sfc_srcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
+    real(wp), dimension(ncol,nlay+1     ), intent(out  ) :: flux_upJac    ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    logical(wl),                           intent(in   ) :: do_rescaling
+    real(wp), dimension(ncol,nlay  ,ngpt), intent(in   ) :: ssa, g    ! single-scattering albedo, asymmetry parameter
+    ! ------------------------------------
+    !
     ! Local variables
+    !
     real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up ! Fluxes per quad angle
     real(wp), dimension(ncol,       ngpt) :: Ds_ncol
-    real(wp), dimension(ncol,nlay+1,ngpt) :: radn_upJac ! perturbed Fluxes per quad angle
+    real(wp), dimension(ncol,nlay+1     ) :: radn_upJac ! perturbed Fluxes per quad angle
 
     integer :: imu, top_level
     ! ------------------------------------
@@ -194,23 +274,25 @@ subroutine lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weig
                           top_at_1, Ds_ncol, weights(1), tau, &
                           lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
                           flux_up, flux_dn, &
-                          sfc_srcJac, flux_upJac)
+                          do_Jacobians, sfc_srcJac, flux_upJac, &
+                          do_rescaling, ssa, g)
     !
     ! For more than one angle use local arrays
     !
     top_level = MERGE(1, nlay+1, top_at_1)
     call apply_BC(ncol, nlay, ngpt, top_at_1, flux_dn(:,top_level,:), radn_dn)
-
     do imu = 2, nmus
       Ds_ncol(:,:) = Ds(imu)
       call lw_solver_noscat(ncol, nlay, ngpt, &
                             top_at_1, Ds_ncol, weights(imu), tau, &
                             lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
                             radn_up, radn_dn, &
-                            sfc_srcJac, radn_upJac)
+                            do_Jacobians, sfc_srcJac, radn_upJac, &
+                            do_rescaling, ssa, g)
       flux_up   (:,:,:) = flux_up   (:,:,:) + radn_up   (:,:,:)
       flux_dn   (:,:,:) = flux_dn   (:,:,:) + radn_dn   (:,:,:)
-      flux_upJac(:,:,:) = flux_upJac(:,:,:) + radn_upJac(:,:,:)
+      if (do_Jacobians) &
+        flux_upJac(:,:) = flux_upJac(:,:  ) + radn_upJac(:,:  )
 
     end do
   end subroutine lw_solver_noscat_GaussQuad
@@ -440,26 +522,18 @@ subroutine lw_source_noscat(ncol, nlay, lay_source, lev_source_up, lev_source_dn
   end subroutine lw_source_noscat
   ! -------------------------------------------------------------------------------------------------
   !
-  ! Longwave no-scattering transport
+  ! Longwave no-scattering transport - separate routines for up and down
   !
   ! -------------------------------------------------------------------------------------------------
-  subroutine lw_transport_noscat(ncol, nlay, top_at_1, &
-                                 tau, trans, sfc_albedo, source_dn, source_up, source_sfc, &
-                                 radn_up, radn_dn,&
-                                 source_sfcJac, radn_upJac) bind(C, name="lw_transport_noscat")
+  subroutine lw_transport_noscat_dn(ncol, nlay, top_at_1,     &
+                                   trans, source_dn, radn_dn) bind(C, name="lw_transport_noscat_dn")
     integer,                          intent(in   ) :: ncol, nlay ! Number of columns, layers, g-points
     logical(wl),                      intent(in   ) :: top_at_1   !
-    real(wp), dimension(ncol,nlay  ), intent(in   ) :: tau, &     ! Absorption optical thickness, pre-divided by mu []
-                                                       trans      ! transmissivity = exp(-tau)
-    real(wp), dimension(ncol       ), intent(in   ) :: sfc_albedo ! Surface albedo
-    real(wp), dimension(ncol,nlay  ), intent(in   ) :: source_dn, &
-                                                       source_up  ! Diffuse radiation emitted by the layer
-    real(wp), dimension(ncol       ), intent(in   ) :: source_sfc ! Surface source function [W/m2]
-    real(wp), dimension(ncol,nlay+1), intent(  out) :: radn_up    ! Radiances [W/m2-str]
-    real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_dn    !Top level must contain incident flux boundary condition
+    real(wp), dimension(ncol,nlay  ), intent(in   ) :: trans      ! transmissivity = exp(-tau)
+    real(wp), dimension(ncol,nlay  ), intent(in   ) :: source_dn  ! Diffuse radiation emitted by the layer
+    real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_dn    ! Radiances [W/m2-str] Top level must contain incident flux boundary condition
 
-    real(wp), dimension(ncol       ), intent(in )   :: source_sfcJac    ! surface temperature Jacobian of surface source function [W/m2/K]
-    real(wp), dimension(ncol,nlay+1), intent(out)   :: radn_upJac       ! surface temperature Jacobian of Radiances [W/m2-str / K]
+    ! ---------------------------------------------------
     ! Local variables
     integer :: ilev
     ! ---------------------------------------------------
@@ -467,40 +541,55 @@ subroutine lw_transport_noscat(ncol, nlay, top_at_1, &
       !
       ! Top of domain is index 1
       !
-      ! Downward propagation
       do ilev = 2, nlay+1
         radn_dn(:,ilev) = trans(:,ilev-1)*radn_dn(:,ilev-1) + source_dn(:,ilev-1)
       end do
-
-      ! Surface reflection and emission
-      radn_up   (:,nlay+1) = radn_dn(:,nlay+1)*sfc_albedo(:) + source_sfc   (:)
-      radn_upJac(:,nlay+1) = source_sfcJac(:)
-
-      ! Upward propagation
-      do ilev = nlay, 1, -1
-        radn_up   (:,ilev) = trans(:,ilev  )*radn_up   (:,ilev+1) + source_up(:,ilev)
-        radn_upJac(:,ilev) = trans(:,ilev  )*radn_upJac(:,ilev+1)
-      end do
     else
       !
       ! Top of domain is index nlay+1
       !
-      ! Downward propagation
       do ilev = nlay, 1, -1
         radn_dn(:,ilev) = trans(:,ilev  )*radn_dn(:,ilev+1) + source_dn(:,ilev)
       end do
+    end if
+  end subroutine lw_transport_noscat_dn
+  ! -------------------------------------------------------------------------------------------------
+  subroutine lw_transport_noscat_up(ncol, nlay, top_at_1,     &
+                                   trans, source_up, radn_up, do_Jacobians, radn_upJac) bind(C, name="lw_transport_noscat_up")
+    integer,                          intent(in   ) :: ncol, nlay ! Number of columns, layers, g-points
+    logical(wl),                      intent(in   ) :: top_at_1   !
+    real(wp), dimension(ncol,nlay  ), intent(in   ) :: trans      ! transmissivity = exp(-tau)
+    real(wp), dimension(ncol,nlay  ), intent(in   ) :: source_up  ! Diffuse radiation emitted by the layer
+    real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_up    ! Radiances [W/m2-str] Top level must contain incident flux boundary condition
+    logical(wl),                      intent(in   ) :: do_Jacobians
+    real(wp), dimension(ncol,nlay+1), intent(  out) :: radn_upJac       ! surface temperature Jacobian of Radiances [W/m2-str / K]
 
-      ! Surface reflection and emission
-      radn_up   (:, 1) = radn_dn(:,1)*sfc_albedo(:) + source_sfc   (:)
-      radn_upJac(:, 1) = source_sfcJac(:)
-
+    ! ---------------------------------------------------
+    ! Local variables
+    integer :: ilev
+    ! ---------------------------------------------------
+    if(top_at_1) then
+      !
+      ! Top of domain is index 1
+      !
+      ! Upward propagation
+      do ilev = nlay, 1, -1
+        radn_up     (:,ilev) = trans(:,ilev  )*radn_up   (:,ilev+1) + source_up(:,ilev)
+        if(do_Jacobians) &
+          radn_upJac(:,ilev) = trans(:,ilev  )*radn_upJac(:,ilev+1)
+      end do
+    else
+      !
+      ! Top of domain is index nlay+1
+      !
       ! Upward propagation
       do ilev = 2, nlay+1
-        radn_up   (:,ilev) = trans(:,ilev-1) * radn_up   (:,ilev-1) +  source_up(:,ilev-1)
-        radn_upJac(:,ilev) = trans(:,ilev-1) * radn_upJac(:,ilev-1)
+        radn_up     (:,ilev) = trans(:,ilev-1) * radn_up   (:,ilev-1) +  source_up(:,ilev-1)
+        if(do_Jacobians) &
+          radn_upJac(:,ilev) = trans(:,ilev-1) * radn_upJac(:,ilev-1)
       end do
     end if
-  end subroutine lw_transport_noscat
+  end subroutine lw_transport_noscat_up
   ! -------------------------------------------------------------------------------------------------
   !
   ! Longwave two-stream solutions to diffuse reflectance and transmittance for a layer
@@ -537,16 +626,14 @@ pure subroutine lw_two_stream(ncol, nlay, tau, w0, g, &
         !
         gamma1(i,j)= LW_diff_sec * (1._wp - 0.5_wp * w0(i,j) * (1._wp + g(i,j))) ! Fu et al. Eq 2.9
         gamma2(i,j)= LW_diff_sec *          0.5_wp * w0(i,j) * (1._wp - g(i,j))  ! Fu et al. Eq 2.10
+        ! Eq 18;  k = SQRT(gamma1**2 - gamma2**2), limited below to avoid div by 0.
+        !   k = 0 for isotropic, conservative scattering; this lower limit on k
+        !   gives relative error with respect to conservative solution
+        !   of < 0.1% in Rdif down to tau = 10^-9
+        k(i) = sqrt(max((gamma1(i,j) - gamma2(i,j)) * (gamma1(i,j) + gamma2(i,j)), 1.e-12_wp))
       end do
 
-      ! Written to encourage vectorization of exponential, square root
-      ! Eq 18;  k = SQRT(gamma1**2 - gamma2**2), limited below to avoid div by 0.
-      !   k = 0 for isotropic, conservative scattering; this lower limit on k
-      !   gives relative error with respect to conservative solution
-      !   of < 0.1% in Rdif down to tau = 10^-9
-      k(1:ncol) = sqrt(max((gamma1(1:ncol,j) - gamma2(1:ncol,j)) * &
-                           (gamma1(1:ncol,j) + gamma2(1:ncol,j)),  &
-                           1.e-12_wp))
+      ! Written to encourage vectorization of exponential
       exp_minusktau(1:ncol) = exp(-tau(1:ncol,j)*k(1:ncol))
 
       !
@@ -705,16 +792,14 @@ pure subroutine sw_two_stream(ncol, nlay, mu0, tau, w0, g, &
 
         alpha1(i) = gamma1(i) * gamma4(i) + gamma2(i) * gamma3(i)           ! Eq. 16
         alpha2(i) = gamma1(i) * gamma3(i) + gamma2(i) * gamma4(i)           ! Eq. 17
+        ! Eq 18;  k = SQRT(gamma1**2 - gamma2**2), limited below to avoid div by 0.
+        !   k = 0 for isotropic, conservative scattering; this lower limit on k
+        !   gives relative error with respect to conservative solution
+        !   of < 0.1% in Rdif down to tau = 10^-9
+        k(i) = sqrt(max((gamma1(i) - gamma2(i)) * (gamma1(i) + gamma2(i)), 1.e-12_wp))
       end do
 
-      ! Written to encourage vectorization of exponential, square root
-      ! Eq 18;  k = SQRT(gamma1**2 - gamma2**2), limited below to avoid div by 0.
-      !   k = 0 for isotropic, conservative scattering; this lower limit on k
-      !   gives relative error with respect to conservative solution
-      !   of < 0.1% in Rdif down to tau = 10^-9
-      k(1:ncol) = sqrt(max((gamma1(1:ncol) - gamma2(1:ncol)) * &
-                           (gamma1(1:ncol) + gamma2(1:ncol)),  &
-                           1.e-12_wp))
+      ! Written to encourage vectorization of exponential
       exp_minusktau(1:ncol) = exp(-tau(1:ncol,j)*k(1:ncol))
 
       !
@@ -977,293 +1062,6 @@ pure subroutine apply_BC_0(ncol, nlay, ngpt, top_at_1, flux_dn) bind (C, name="a
 end subroutine apply_BC_0
 ! -------------------------------------------------------------------------------------------------
 !
-! Similar to Longwave no-scattering (lw_solver_noscat)
-!   a) relies on rescaling of the optical parameters based on asymetry factor and single scattering albedo
-!       scaling can be computed  by scaling_1rescl
-!   b) adds adustment term based on cloud properties (lw_transport_1rescl)
-!      adustment terms is computed based on solution of the Tang equations
-!      for "linear-in-tau" internal source (not in the paper)
-!
-!   Attention:
-!      use must prceompute scaling before colling the function
-!
-!   Implemented based on the paper
-!   Tang G, et al, 2018: https://doi.org/10.1175/JAS-D-18-0014.1
-!
-! -------------------------------------------------------------------------------------------------
-subroutine lw_solver_1rescl(ncol, nlay, ngpt, top_at_1, D,                             &
-                            tau, scaling, lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
-                            radn_up, radn_dn, &
-                            sfc_srcJac, rad_up_Jac, rad_dn_Jac) bind(C, name="lw_solver_1rescl")
-  integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
-  logical(wl),                           intent(in   ) :: top_at_1
-  real(wp), dimension(ncol,       ngpt), intent(in   ) :: D            ! secant of propagation angle  []
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: tau          ! Absorption optical thickness []
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: scaling          ! single scattering albedo []
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lay_source   ! Planck source at layer average temperature [W/m2]
-  ! Planck source at layer edge for radiation in increasing/decreasing ilay direction
-  ! lev_source_dec applies the mapping in layer i to the Planck function at layer i
-  ! lev_source_inc applies the mapping in layer i to the Planck function at layer i+1
-  real(wp), dimension(ncol,nlay,  ngpt), target, &
-                                         intent(in   ) :: lev_source_inc, lev_source_dec
-  real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_emis     ! Surface emissivity      []
-  real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_src      ! Surface source function [W/m2]
-  real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: radn_up      ! Radiances [W/m2-str]
-  real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: radn_dn      ! Top level must contain incident flux boundary condition
-
-  real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_srcJac   ! Surface Temperature Jacobian source function [W/m2/K]
-  real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: rad_up_Jac  ! Surface Temperature Jacobians [W/m2-str/K]
-  real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: rad_dn_Jac  ! Top level set to 0
-
-  ! Local variables, no g-point dependency
-  real(wp), dimension(ncol,nlay) :: tau_loc, &  ! path length (tau/mu)
-                                      trans       ! transmissivity  = exp(-tau)
-  real(wp), dimension(ncol,nlay) :: source_dn, source_up
-  real(wp), dimension(ncol     ) :: source_sfc, sfc_albedo
-  real(wp), dimension(ncol,nlay) :: An, Cn
-  real(wp), dimension(ncol,nlay+1) :: dummyRadn_upJac
-
-  real(wp), dimension(ncol,nlay+1) :: Radn_upJac
-  real(wp), dimension(ncol,nlay+1) :: Radn_dnJac
-  real(wp), dimension(ncol)        :: source_sfcJac
-
-  real(wp), dimension(:,:,:), pointer :: lev_source_up, lev_source_dn ! Mapping increasing/decreasing indicies to up/down
-
-  real(wp), parameter :: pi = acos(-1._wp)
-  integer             :: ilev, igpt, top_level
-  ! ------------------------------------
-  real(wp), parameter :: tau_thresh = sqrt(epsilon(tau))
-  ! ------------------------------------
-
-  ! Which way is up?
-  ! Level Planck sources for upward and downward radiation
-  ! When top_at_1, lev_source_up => lev_source_dec
-  !                lev_source_dn => lev_source_inc, and vice-versa
-  if(top_at_1) then
-    top_level = 1
-    lev_source_up => lev_source_dec
-    lev_source_dn => lev_source_inc
-  else
-    top_level = nlay+1
-    lev_source_up => lev_source_inc
-    lev_source_dn => lev_source_dec
-  end if
-
-  do igpt = 1, ngpt
-    !
-    ! Optical path and transmission, used in source function and transport calculations
-    !
-    do ilev = 1, nlay
-      tau_loc(:,ilev) = tau(:,ilev,igpt)*D(:,igpt)
-      trans  (:,ilev) = exp(-tau_loc(:,ilev))
-      !
-      ! here scaling is used to store parameter wb/(1-w(1-b)) of Eq.21 of the Tang's paper
-      ! explanation of factor 0.4 note A of Table
-      !
-      Cn(:,ilev) = 0.4_wp*scaling(:,ilev,igpt)
-      An(:,ilev) = (1._wp-trans(:,ilev)*trans(:,ilev))
-    end do
-
-    ! Source function for diffuse radiation
-    !
-    call lw_source_noscat(ncol, nlay, &
-                          lay_source(:,:,igpt), lev_source_up(:,:,igpt), lev_source_dn(:,:,igpt), &
-                          tau_loc, trans, source_dn, source_up)
-
-    !
-    ! Surface albedo, surface source function
-    !
-    sfc_albedo(:)    = 1._wp - sfc_emis(:,igpt)
-    source_sfc(:)    = sfc_emis(:,igpt) * sfc_src   (:,igpt)
-    source_sfcJac(:) = sfc_emis(:,igpt) * sfc_srcJac(:,igpt)
-    !
-    ! Transport
-    !
-    !  compute no-scattering fluxes
-    call lw_transport_noscat(ncol, nlay, top_at_1,  &
-                             tau_loc, trans, sfc_albedo, source_dn, source_up, source_sfc, &
-                             radn_up(:,:,igpt), radn_dn(:,:,igpt), &
-                             source_sfcJac, rad_up_Jac(:,:,igpt))
-
-    rad_dn_Jac(:,:,igpt) = 0._wp
-    !  make adjustment
-    call lw_transport_1rescl(ncol, nlay, top_at_1, trans, &
-                             source_dn, source_up, &
-                             radn_up(:,:,igpt), radn_dn(:,:,igpt), An, Cn,&
-                             rad_up_Jac(:,:,igpt), rad_dn_Jac(:,:,igpt))
-
-  end do  ! g point loop
-end subroutine lw_solver_1rescl
-! -------------------------------------------------------------------------------------------------
-!
-!  Similar to lw_solver_noscat_GaussQuad.
-!    It is main solver to use the rescaled-for-scattering approximation for fluxes
-!    In addition to the no scattering input parameters the user must provide
-!    scattering related properties (ssa and g) that the solver uses to compute scaling
-!
-! ---------------------------------------------------------------
-subroutine lw_solver_1rescl_GaussQuad(ncol, nlay, ngpt, top_at_1, nmus, Ds, weights, &
-                                 tau, ssa, g, lay_source, lev_source_inc, lev_source_dec, &
-                                 sfc_emis, sfc_src, &
-                                 flux_up, flux_dn,  &
-                                 sfc_src_Jac, flux_up_Jac, flux_dn_Jac) &
-                                 bind(C, name="lw_solver_1rescl_GaussQuad")
-  integer,                               intent(in   ) :: ncol, nlay, ngpt ! Number of columns, layers, g-points
-  logical(wl),                           intent(in   ) :: top_at_1
-  integer,                               intent(in   ) :: nmus         ! number of quadrature angles
-  real(wp), dimension(nmus),             intent(in   ) :: Ds, weights  ! quadrature secants, weights
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: tau  ! Optical thickness,
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: ssa  ! single-scattering albedo,
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: g       ! asymmetry parameter []
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lay_source   ! Planck source at layer average temperature [W/m2]
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lev_source_inc
-                                      ! Planck source at layer edge for radiation in increasing ilay direction [W/m2]
-                                      ! Includes spectral weighting that accounts for state-dependent frequency to g-space mapping
-  real(wp), dimension(ncol,nlay,  ngpt), intent(in   ) :: lev_source_dec
-                                             ! Planck source at layer edge for radiation in decreasing ilay direction [W/m2]
-  real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_emis     ! Surface emissivity      []
-  real(wp), dimension(ncol,       ngpt), intent(in   ) :: sfc_src      ! Surface source function [W/m2]
-  real(wp), dimension(ncol,nlay+1,ngpt), intent(  out) :: flux_up      ! Radiances [W/m2-str]
-  real(wp), dimension(ncol,nlay+1,ngpt), intent(inout) :: flux_dn      ! Top level must contain incident flux boundary condition
-
-  real(wp), dimension(ncol       ,ngpt), intent(in )   :: sfc_src_Jac  ! surface temperature Jacobian of surface source function [W/m2/K]
-  real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: flux_up_Jac  ! surface temperature Jacobian of Radiances [W/m2-str / K]
-  real(wp), dimension(ncol,nlay+1,ngpt), intent(out)   :: flux_dn_Jac  ! surface temperature Jacobian of Radiances [W/m2-str / K]
-  ! Local variables
-  real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn, radn_up ! Fluxes per quad angle
-  real(wp), dimension(ncol,       ngpt) :: Ds_ncol
-  real(wp), dimension(ncol,nlay+1,ngpt) :: radn_dn_Jac, radn_up_Jac ! Fluxes per quad angle
-
-  real(wp), dimension(ncol,nlay,  ngpt) :: tauLoc           ! rescaled Tau
-  real(wp), dimension(ncol,nlay,  ngpt) :: scaling          ! scaling
-  real(wp), dimension(ncol,ngpt)        :: fluxTOA          ! downward flux at TOA
-
-  integer :: imu, top_level
-  real    :: weight
-  real(wp), parameter                   :: tresh=1.0_wp - 1e-6_wp
-
-  ! Tang rescaling
-  if (any(ssa*g >= tresh)) then
-    call scaling_1rescl_safe(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g)
-  else
-    call scaling_1rescl(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g)
-  endif
-  ! ------------------------------------
-  !
-  ! For the first angle output arrays store total flux
-  !
-  top_level = MERGE(1, nlay+1, top_at_1)
-  fluxTOA = flux_dn(1:ncol, top_level, 1:ngpt)
-  Ds_ncol(:,:) = Ds(1)
-  weight = 2._wp*pi*weights(1)
-  ! Transport is for intensity
-  !   convert flux at top of domain to intensity assuming azimuthal isotropy
-  !
-  radn_dn(1:ncol, top_level, 1:ngpt)  = fluxTOA(1:ncol, 1:ngpt) / weight
-  call lw_solver_1rescl(ncol, nlay, ngpt, &
-                        top_at_1, Ds_ncol, tauLoc, scaling, &
-                        lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
-                        flux_up, flux_dn, sfc_src_Jac, flux_up_Jac, flux_dn_Jac)
-
-  flux_up     = flux_up     * weight
-  flux_dn     = flux_dn     * weight
-  flux_up_Jac = flux_up_Jac * weight
-  flux_dn_Jac = flux_dn_Jac * weight
-  do imu = 2, nmus
-    Ds_ncol(:,:) = Ds(imu)
-    weight = 2._wp*pi*weights(imu)
-    ! Transport is for intensity
-    !   convert flux at top of domain to intensity assuming azimuthal isotropy
-    !
-    radn_dn(1:ncol, top_level, 1:ngpt)  = fluxTOA(1:ncol, 1:ngpt) / weight
-    call lw_solver_1rescl(ncol, nlay, ngpt, &
-                          top_at_1, Ds_ncol, tauLoc, scaling, &
-                          lay_source, lev_source_inc, lev_source_dec, sfc_emis, sfc_src, &
-                          radn_up, radn_dn,sfc_src_Jac,radn_up_Jac,radn_dn_Jac)
-
-    flux_up    (:,:,:) = flux_up    (:,:,:) + weight*radn_up    (:,:,:)
-    flux_dn    (:,:,:) = flux_dn    (:,:,:) + weight*radn_dn    (:,:,:)
-    flux_up_Jac(:,:,:) = flux_up_Jac(:,:,:) + weight*radn_up_Jac(:,:,:)
-    flux_dn_Jac(:,:,:) = flux_dn_Jac(:,:,:) + weight*radn_dn_Jac(:,:,:)
-  end do
-  end subroutine lw_solver_1rescl_GaussQuad
-! -------------------------------------------------------------------------------------------------
-!
-!  Computes re-scaled layer optical thickness and scaling parameter
-!    unsafe if ssa*g =1.
-!
-! ---------------------------------------------------------------
-  pure subroutine scaling_1rescl(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g)
-    integer ,                              intent(in)    :: ncol
-    integer ,                              intent(in)    :: nlay
-    integer ,                              intent(in)    :: ngpt
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: tau
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: ssa
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: g
-
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: tauLoc
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: scaling
-
-
-    integer  :: icol, ilay, igpt
-    real(wp) :: wb, ssal, scaleTau
-    do igpt=1,ngpt
-      do ilay=1,nlay
-        do icol=1,ncol
-          ssal = ssa(icol, ilay, igpt)
-          wb = ssal*(1._wp - g(icol, ilay, igpt)) / 2._wp
-          scaleTau = (1._wp - ssal + wb )
-          tauLoc(icol, ilay, igpt) = scaleTau * tau(icol, ilay, igpt)   ! Eq.15 of the paper
-          !
-          ! here scaling is used to store parameter wb/(1-w(1-b)) of Eq.21 of the Tang paper
-          ! actually it is in line of parameter rescaling defined in Eq.7
-          ! potentialy if g=ssa=1  then  wb/scaleTau = NaN
-          ! it should not happen
-          scaling(icol, ilay, igpt) = wb / scaleTau
-        enddo
-      enddo
-    enddo
-  end subroutine scaling_1rescl
-! -------------------------------------------------------------------------------------------------
-!
-!  Computes re-scaled layer optical thickness and scaling parameter
-!    safe implementation
-!
-! ---------------------------------------------------------------
-  pure subroutine scaling_1rescl_safe(ncol, nlay, ngpt, tauLoc, scaling, tau, ssa, g)
-    integer ,                              intent(in)    :: ncol
-    integer ,                              intent(in)    :: nlay
-    integer ,                              intent(in)    :: ngpt
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: tau
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: ssa
-    real(wp), dimension(ncol, nlay, ngpt), intent(in)    :: g
-
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: tauLoc
-    real(wp), dimension(ncol, nlay, ngpt), intent(inout) :: scaling
-
-    integer  :: icol, ilay, igpt
-    real(wp) :: wb, ssal, scaleTau
-    do igpt=1,ngpt
-      do ilay=1,nlay
-        do icol=1,ncol
-          ssal = ssa(icol, ilay, igpt)
-          wb = ssal*(1._wp - g(icol, ilay, igpt)) / 2._wp
-          scaleTau = (1._wp - ssal + wb )
-          tauLoc(icol, ilay, igpt) = scaleTau * tau(icol, ilay, igpt)   ! Eq.15 of the paper
-          !
-          ! here scaling is used to store parameter wb/(1-w(1-b)) of Eq.21 of the Tang paper
-          ! actually it is in line of parameter rescaling defined in Eq.7
-          if (scaleTau < 1e-6_wp) then
-            scaling(icol, ilay, igpt) = 1.0_wp
-          else
-            scaling(icol, ilay, igpt) = wb / scaleTau
-          endif
-        enddo
-      enddo
-    enddo
-  end subroutine scaling_1rescl_safe
-! -------------------------------------------------------------------------------------------------
-!
 ! Similar to Longwave no-scattering tarnsport  (lw_transport_noscat)
 !   a) adds adjustment factor based on cloud properties
 !
@@ -1274,7 +1072,7 @@ end subroutine scaling_1rescl_safe
 subroutine lw_transport_1rescl(ncol, nlay, top_at_1, &
                                trans, source_dn, source_up, &
                                radn_up, radn_dn, An, Cn,&
-                               radn_up_Jac, radn_dn_Jac) bind(C, name="lw_transport_1rescl")
+                               do_Jacobians, radn_up_Jac) bind(C, name="lw_transport_1rescl")
   integer,                          intent(in   ) :: ncol, nlay ! Number of columns, layers, g-points
   logical(wl),                      intent(in   ) :: top_at_1   !
   real(wp), dimension(ncol,nlay  ), intent(in   ) :: trans      ! transmissivity = exp(-tau)
@@ -1283,8 +1081,13 @@ subroutine lw_transport_1rescl(ncol, nlay, top_at_1, &
   real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_up    ! Radiances [W/m2-str]
   real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_dn    !Top level must contain incident flux boundary condition
   real(wp), dimension(ncol,nlay),   intent(in   ) :: An, Cn
+  logical(wl),                      intent(in   ) :: do_Jacobians
   real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_up_Jac ! Surface temperature Jacobians [W/m2-str/K]
-  real(wp), dimension(ncol,nlay+1), intent(inout) :: radn_dn_Jac !Top level must set to 0
+  !
+  ! We could in principle compute a downwelling Jacobian too, but it's small
+  !   (only a small proportion of LW is scattered) and it complicates code and the API,
+  !   so we will not
+  !
 
   ! Local variables
   integer :: ilev, icol
@@ -1296,26 +1099,26 @@ subroutine lw_transport_1rescl(ncol, nlay, top_at_1, &
     !
     ! 1st Upward propagation
     do ilev = nlay, 1, -1
-      radn_up    (:,ilev) = trans(:,ilev)*radn_up    (:,ilev+1) + source_up(:,ilev)
-      radn_up_Jac(:,ilev) = trans(:,ilev)*radn_up_Jac(:,ilev+1)
       do icol=1,ncol
-          adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_dn(icol,ilev) - &
-                   trans(icol,ilev)*source_dn(icol,ilev) - source_up(icol,ilev) )
-          radn_up (icol,ilev) = radn_up(icol,ilev) + adjustmentFactor
-        enddo
+        adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_dn(icol,ilev) - &
+                 trans(icol,ilev)*source_dn(icol,ilev) - source_up(icol,ilev) )
+        radn_up (icol,ilev) = trans(icol,ilev)*radn_up(icol,ilev+1) + source_up(icol,ilev) + &
+                              adjustmentFactor
+      end do
+      if(do_Jacobians) &
+        radn_up_Jac(:,ilev) = trans(:,ilev)*radn_up_Jac(:,ilev+1)
     end do
     ! 2nd Downward propagation
+    ! radn_dn_Jac(:,1) = 0._wp
     do ilev = 1, nlay
-      radn_dn    (:,ilev+1) = trans(:,ilev)*radn_dn    (:,ilev) + source_dn(:,ilev)
-      radn_dn_Jac(:,ilev+1) = trans(:,ilev)*radn_dn_Jac(:,ilev)
+      ! radn_dn_Jac(:,ilev+1) = trans(:,ilev)*radn_dn_Jac(:,ilev)
       do icol=1,ncol
           adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_up(icol,ilev) - &
                    trans(icol,ilev)*source_up(icol,ilev) - source_dn(icol,ilev) )
-
-          radn_dn    (icol,ilev+1) = radn_dn(icol,ilev+1) + adjustmentFactor
-
-          adjustmentFactor         = Cn(icol,ilev)*An(icol,ilev)*radn_up_Jac(icol,ilev)
-          radn_dn_Jac(icol,ilev+1) = radn_dn_Jac(icol,ilev+1) + adjustmentFactor
+          radn_dn(icol,ilev+1) = trans(icol,ilev)*radn_dn(icol,ilev) + source_dn(icol,ilev) + &
+                                 adjustmentFactor
+          ! adjustmentFactor         = Cn(icol,ilev)*An(icol,ilev)*radn_up_Jac(icol,ilev)
+          ! radn_dn_Jac(icol,ilev+1) = radn_dn_Jac(icol,ilev+1) + adjustmentFactor
       enddo
     end do
   else
@@ -1324,26 +1127,28 @@ subroutine lw_transport_1rescl(ncol, nlay, top_at_1, &
     !
     ! Upward propagation
     do ilev = 1, nlay
-      radn_up    (:,ilev+1) = trans(:,ilev) * radn_up    (:,ilev) +  source_up(:,ilev)
-      radn_up_Jac(:,ilev+1) = trans(:,ilev) * radn_up_Jac(:,ilev)
+      radn_up      (:,ilev+1) = trans(:,ilev) * radn_up    (:,ilev) +  source_up(:,ilev)
       do icol=1,ncol
           adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_dn(icol,ilev+1) - &
                    trans(icol,ilev)*source_dn(icol,ilev) - source_up(icol,ilev) )
-          radn_up(icol,ilev+1) = radn_up(icol,ilev+1) + adjustmentFactor
+          radn_up(icol,ilev+1) = trans(icol,ilev)*radn_up(icol,ilev) +  source_up(icol,ilev) + &
+                                 adjustmentFactor
       enddo
+      if(do_Jacobians) &
+        radn_up_Jac(:,ilev+1) = trans(:,ilev) * radn_up_Jac(:,ilev)
     end do
 
     ! 2st Downward propagation
+    ! radn_dn_Jac(:,nlay+1) = 0._wp
     do ilev = nlay, 1, -1
-      radn_dn    (:,ilev) = trans(:,ilev)*radn_dn    (:,ilev+1) + source_dn(:,ilev)
-      radn_dn_Jac(:,ilev) = trans(:,ilev)*radn_dn_Jac(:,ilev+1)
+      ! radn_dn_Jac(:,ilev) = trans(:,ilev)*radn_dn_Jac(:,ilev+1)
       do icol=1,ncol
           adjustmentFactor = Cn(icol,ilev)*( An(icol,ilev)*radn_up(icol,ilev) - &
                    trans(icol,ilev)*source_up(icol,ilev) - source_dn(icol,ilev) )
-          radn_dn(icol,ilev)  = radn_dn(icol,ilev) + adjustmentFactor
-
-          adjustmentFactor    = Cn(icol,ilev)*An(icol,ilev)*radn_up_Jac(icol,ilev)
-          radn_dn_Jac(icol,ilev) = radn_dn_Jac(icol,ilev) + adjustmentFactor
+          radn_dn(icol,ilev)  = trans(icol,ilev)*radn_dn(icol,ilev+1) + source_dn(icol,ilev) + &
+                                adjustmentFactor
+          ! adjustmentFactor    = Cn(icol,ilev)*An(icol,ilev)*radn_up_Jac(icol,ilev)
+          ! radn_dn_Jac(icol,ilev) = radn_dn_Jac(icol,ilev) + adjustmentFactor
       enddo
     end do
   end if
diff --git a/rte/mo_optical_props.F90 b/rte/mo_optical_props.F90
index 3aa2bf548..d254b2f49 100644
--- a/rte/mo_optical_props.F90
+++ b/rte/mo_optical_props.F90
@@ -163,6 +163,7 @@ end function subset_range_abstract
     procedure, public  :: validate => validate_1scalar
     procedure, public  :: get_subset => subset_1scl_range
     procedure, public  :: delta_scale => delta_scale_1scl
+    procedure, public  :: finalize_1scl
 
     procedure, private :: alloc_only_1scl
     procedure, private :: init_and_alloc_1scl
@@ -178,6 +179,7 @@ end function subset_range_abstract
     procedure, public  :: validate => validate_2stream
     procedure, public  :: get_subset => subset_2str_range
     procedure, public  :: delta_scale => delta_scale_2str
+    procedure, public  :: finalize_2str
 
     procedure, private :: alloc_only_2str
     procedure, private :: init_and_alloc_2str
@@ -194,6 +196,7 @@ end function subset_range_abstract
     procedure, public :: get_subset => subset_nstr_range
     procedure, public :: delta_scale => delta_scale_nstr
     procedure, public :: get_nmom
+    procedure, public :: finalize_nstr
 
     procedure, private :: alloc_only_nstr
     procedure, private :: init_and_alloc_nstr
@@ -343,16 +346,14 @@ function alloc_only_2str(this, ncol, nlay) result(err_message)
     character(len=128)              :: err_message
 
     err_message = ""
-    if(.not. this%is_initialized()) then
+    if(.not. this%is_initialized()) &
       err_message = "optical_props%alloc: spectral discretization hasn't been provided"
-      return
-    end if
-    if(any([ncol, nlay] <= 0)) then
+    if(any([ncol, nlay] <= 0)) &
       err_message = "optical_props%alloc: must provide positive extents for ncol, nlay"
-    else
-      if(allocated(this%tau)) deallocate(this%tau)
-      allocate(this%tau(ncol,nlay,this%get_ngpt()))
-    end if
+    if(err_message /= "") return
+
+    if(allocated(this%tau)) deallocate(this%tau)
+    allocate(this%tau(ncol,nlay,this%get_ngpt()))
     if(allocated(this%ssa)) deallocate(this%ssa)
     if(allocated(this%g  )) deallocate(this%g  )
     allocate(this%ssa(ncol,nlay,this%get_ngpt()), this%g(ncol,nlay,this%get_ngpt()))
@@ -366,16 +367,14 @@ function alloc_only_nstr(this, nmom, ncol, nlay) result(err_message)
     character(len=128)              :: err_message
 
     err_message = ""
-    if(.not. this%is_initialized()) then
+    if(.not. this%is_initialized()) &
       err_message = "optical_props%alloc: spectral discretization hasn't been provided"
-      return
-    end if
-    if(any([ncol, nlay] <= 0)) then
+    if(any([ncol, nlay] <= 0)) &
       err_message = "optical_props%alloc: must provide positive extents for ncol, nlay"
-    else
-      if(allocated(this%tau)) deallocate(this%tau)
-      allocate(this%tau(ncol,nlay,this%get_ngpt()))
-    end if
+    if(err_message /= "") return
+
+    if(allocated(this%tau)) deallocate(this%tau)
+    allocate(this%tau(ncol,nlay,this%get_ngpt()))
     if(allocated(this%ssa)) deallocate(this%ssa)
     if(allocated(this%p  )) deallocate(this%p  )
     allocate(this%ssa(ncol,nlay,this%get_ngpt()), this%p(nmom,ncol,nlay,this%get_ngpt()))
@@ -484,6 +483,38 @@ function copy_and_alloc_nstr(this, nmom, ncol, nlay, spectral_desc, name) result
   end function copy_and_alloc_nstr
   ! ------------------------------------------------------------------------------------------
   !
+  ! Finalize routines
+  !
+  ! ------------------------------------------------------------------------------------------
+  function finalize_1scl(this) result(err_message)
+    class(ty_optical_props_1scl) :: this
+    character(len=128)           :: err_message
+
+    if(allocated(this%tau)) deallocate(this%tau)
+    err_message = ""
+  end function finalize_1scl
+  ! ---------------------------------------------------------------------------
+  function finalize_2str(this) result(err_message)
+    class(ty_optical_props_2str) :: this
+    character(len=128)           :: err_message
+
+    if(allocated(this%tau)) deallocate(this%tau)
+    if(allocated(this%ssa)) deallocate(this%ssa)
+    if(allocated(this%g  )) deallocate(this%g  )
+    err_message = ""
+  end function finalize_2str
+  ! ---------------------------------------------------------------------------
+  function finalize_nstr(this) result(err_message)
+    class(ty_optical_props_nstr) :: this
+    character(len=128)           :: err_message
+
+    if(allocated(this%tau)) deallocate(this%tau)
+    if(allocated(this%ssa)) deallocate(this%ssa)
+    if(allocated(this%p  )) deallocate(this%p  )
+    err_message = ""
+  end function finalize_nstr
+  ! ------------------------------------------------------------------------------------------
+  !
   !  Routines for array classes: delta-scaling, validation (ensuring all values can be used )
   !
   ! ------------------------------------------------------------------------------------------
@@ -809,6 +840,12 @@ function increment(op_in, op_io) result(err_message)
     integer :: ncol, nlay, ngpt, nmom
     ! -----
     err_message = ""
+    if(.not. op_in%is_initialized()) &
+      err_message = "ty_optical_props%increment: Incrementing optical properties aren't initialized"
+    if(.not. op_in%is_initialized()) &
+      err_message = "ty_optical_props%increment: optical properties to be incremented aren't initialized"
+    if(err_message /= "")  return
+
     ncol = op_io%get_ncol()
     nlay = op_io%get_nlay()
     ngpt = op_io%get_ngpt()
diff --git a/rte/mo_rte_lw.F90 b/rte/mo_rte_lw.F90
index 488f5ec34..940387bdd 100644
--- a/rte/mo_rte_lw.F90
+++ b/rte/mo_rte_lw.F90
@@ -43,8 +43,7 @@ module mo_rte_lw
                         only: ty_source_func_lw
   use mo_fluxes,        only: ty_fluxes, ty_fluxes_broadband
   use mo_rte_solver_kernels, &
-                        only: apply_BC, lw_solver_noscat, lw_solver_noscat_GaussQuad, lw_solver_2stream, &
-                              lw_solver_1rescl_GaussQuad
+                        only: apply_BC, lw_solver_noscat, lw_solver_noscat_GaussQuad, lw_solver_2stream
   implicit none
   private
 
@@ -59,28 +58,25 @@ function rte_lw(optical_props, top_at_1, &
                   sources, sfc_emis,       &
                   fluxes,                  &
                   inc_flux, n_gauss_angles, use_2stream, &
-                  lw_Ds, flux_up_Jac, flux_dn_Jac) result(error_msg)
-    class(ty_optical_props_arry), intent(in   ) :: optical_props     ! Array of ty_optical_props. This type is abstract
-                                                                     ! and needs to be made concrete, either as an array
-                                                                     ! (class ty_optical_props_arry) or in some user-defined way
+                  lw_Ds, flux_up_Jac) result(error_msg)
+    class(ty_optical_props_arry), intent(in   ) :: optical_props     ! Set of optical properties as one or more arrays
     logical,                      intent(in   ) :: top_at_1          ! Is the top of the domain at index 1?
                                                                      ! (if not, ordering is bottom-to-top)
-    type(ty_source_func_lw),      intent(in   ) :: sources
+    type(ty_source_func_lw),      intent(in   ) :: sources        ! Derived type with Planck source functions
     real(wp), dimension(:,:),     intent(in   ) :: sfc_emis       ! emissivity at surface [] (nband, ncol)
-    class(ty_fluxes),             intent(inout) :: fluxes         ! Array of ty_fluxes. Default computes broadband fluxes at all levels
+    class(ty_fluxes),             intent(inout) :: fluxes         ! Dervied type for computing spectral integrals from g-point fluxes.
+                                                                  ! Default computes broadband fluxes at all levels
                                                                   ! if output arrays are defined. Can be extended per user desires.
     real(wp), dimension(:,:),   &
                 target, optional, intent(in   ) :: inc_flux       ! incident flux at domain top [W/m2] (ncol, ngpts)
-    integer,            optional, intent(in   ) :: n_gauss_angles ! Number of angles used in Gaussian quadrature
+    integer,            optional, intent(in   ) :: n_gauss_angles ! Number of angles used in Gaussian quadrature (max 3)
                                                                   ! (no-scattering solution)
     logical,            optional, intent(in   ) :: use_2stream    ! When 2-stream parameters (tau/ssa/g) are provided, use 2-stream methods
                                                                   ! Default is to use re-scaled longwave transport
     real(wp), dimension(:,:),   &
-                      optional,   intent(in   ) :: lw_Ds          ! linear fit to column transmissivity (ncol,ngpt)
-    real(wp), dimension(:,:),   &
-                target, optional, intent(inout) :: flux_up_Jac    ! surface temperature flux  Jacobian [W/m2/K] (ncol, nlay+1)
-    real(wp), dimension(:,:),   &
-                target, optional, intent(inout) :: flux_dn_Jac    ! surface temperature flux  Jacobian [W/m2/K] (ncol, nlay+1)
+                      optional,   intent(in   ) :: lw_Ds          ! User-specifed 1/cos of transport angle per col, g-point
+    real(wp), dimension(:,:), target,  &
+                      optional,   intent(inout) :: flux_up_Jac    ! surface temperature flux  Jacobian [W/m2/K] (ncol, nlay+1)
     character(len=128)                          :: error_msg      ! If empty, calculation was successful
     ! --------------------------------
     !
@@ -90,11 +86,11 @@ function rte_lw(optical_props, top_at_1, &
     integer  :: n_quad_angs
     integer  :: icol, iband, igpt
     real(wp) :: lw_Ds_wt
-    logical  :: using_2stream
+    logical  :: using_2stream, do_Jacobians
     real(wp), dimension(:,:,:), allocatable :: gpt_flux_up, gpt_flux_dn
     real(wp), dimension(:,:),   allocatable :: sfc_emis_gpt
-    real(wp), dimension(:,:,:), allocatable :: gpt_flux_upJac, gpt_flux_dnJac
-    type(ty_fluxes_broadband)               :: Jac_fluxes
+    real(wp), dimension(1,1), target        :: decoy
+    real(wp), dimension(:,:), pointer       :: jacobian
     ! --------------------------------------------------
     !
     ! Weights and angle secants for first order (k=1) Gaussian quadrature.
@@ -124,8 +120,13 @@ function rte_lw(optical_props, top_at_1, &
     nlay  = optical_props%get_nlay()
     ngpt  = optical_props%get_ngpt()
     nband = optical_props%get_nband()
+    do_Jacobians = present(flux_up_Jac)
+    if(do_Jacobians) then
+      jacobian => flux_up_Jac
+    else
+      jacobian => decoy
+    end if
     error_msg = ""
-
     ! ------------------------------------------------------------------------------------
     !
     ! Error checking -- input consistency of sizes and validity of values
@@ -135,7 +136,7 @@ function rte_lw(optical_props, top_at_1, &
     if(.not. fluxes%are_desired()) &
       error_msg = "rte_lw: no space allocated for fluxes"
 
-    if (present(flux_up_Jac) .and. check_extents) then
+    if (do_Jacobians .and. check_extents) then
       if( .not. extents_are(flux_up_Jac, ncol, nlay+1)) &
         error_msg = "rte_lw: flux Jacobian inconsistently sized"
     endif
@@ -208,10 +209,10 @@ function rte_lw(optical_props, top_at_1, &
         end if
       class is (ty_optical_props_2str)
         if (present(lw_Ds)) &
-          error_msg = "rte_lw: lw_Ds not valid input for _2str class"
+          error_msg = "rte_lw: lw_Ds not valid when providing scattering optical properties"
         if (using_2stream .and. n_quad_angs /= 1) &
           error_msg = "rte_lw: using_2stream=true incompatible with specifying n_gauss_angles"
-        if (using_2stream .and. (present(flux_up_Jac) .or. present(flux_up_Jac))) &
+        if (using_2stream .and. do_Jacobians) &
           error_msg = "rte_lw: can't provide Jacobian of fluxes w.r.t surface temperature with 2-stream"
       class default
         error_msg =  "rte_lw: lw_solver(...ty_optical_props_nstr...) not yet implemented"
@@ -228,28 +229,31 @@ function rte_lw(optical_props, top_at_1, &
         error_msg = trim(optical_props%get_name()) // ': ' // trim(error_msg)
       return
     end if
-
     ! ------------------------------------------------------------------------------------
     !
     !    Lower boundary condition -- expand surface emissivity by band to gpoints
     !
     allocate(gpt_flux_up (ncol, nlay+1, ngpt), gpt_flux_dn(ncol, nlay+1, ngpt))
-    allocate(gpt_flux_upJac(ncol, nlay+1, ngpt))
     allocate(sfc_emis_gpt(ncol,         ngpt))
     !!$acc enter data copyin(sources, sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source)
     !$acc enter data copyin(optical_props)
-    !$acc enter data create(gpt_flux_dn, gpt_flux_up)
-    !$acc enter data create(gpt_flux_upJac)
-    !$acc enter data create(sfc_emis_gpt)
+    !$acc        enter data create(   gpt_flux_dn, gpt_flux_up)
+    !$omp target enter data map(alloc:gpt_flux_dn, gpt_flux_up)
+    !$acc        enter data create(   sfc_emis_gpt)
+    !$omp target enter data map(alloc:sfc_emis_gpt)
+    !$omp        enter data create(   flux_up_Jac) if(do_Jacobians)
+    !$omp target enter data map(alloc:flux_up_Jac) if(do_Jacobians)
 
     call expand_and_transpose(optical_props, sfc_emis, sfc_emis_gpt)
     !
     !   Upper boundary condition
     !
     if(present(inc_flux)) then
-      !$acc enter data copyin(inc_flux)
+      !$acc        enter data copyin(inc_flux)
+      !$omp target enter data map(to:inc_flux)
       call apply_BC(ncol, nlay, ngpt, logical(top_at_1, wl), inc_flux, gpt_flux_dn)
-      !$acc exit data delete(inc_flux)
+      !$acc        exit data delete(     inc_flux)
+      !$omp target exit data map(release:inc_flux)
     else
       !
       ! Default is zero incident diffuse flux
@@ -266,7 +270,8 @@ function rte_lw(optical_props, top_at_1, &
         !
         ! No scattering two-stream calculation
         !
-        !$acc enter data copyin(optical_props%tau)
+        !$acc        enter data copyin(optical_props%tau)
+        !$omp target enter data map(to:optical_props%tau)
         error_msg =  optical_props%validate()
         if(len_trim(error_msg) > 0) return
 
@@ -277,7 +282,9 @@ function rte_lw(optical_props, top_at_1, &
                                 optical_props%tau, &
                                 sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, &
                                 sfc_emis_gpt, sources%sfc_source,  &
-                                gpt_flux_up, gpt_flux_dn, sources%sfc_source_Jac, gpt_flux_upJac)
+                                gpt_flux_up, gpt_flux_dn,          &
+                                logical(do_Jacobians, wl), sources%sfc_source_Jac, jacobian, &
+                                logical(.false., wl),  optical_props%tau, optical_props%tau)
         else
           call lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, &
                                 logical(top_at_1, wl), &
@@ -288,22 +295,19 @@ function rte_lw(optical_props, top_at_1, &
                                 sources%lay_source, sources%lev_source_inc, &
                                 sources%lev_source_dec, &
                                 sfc_emis_gpt, sources%sfc_source,  &
-                                gpt_flux_up, gpt_flux_dn, sources%sfc_source_Jac, gpt_flux_upJac)
+                                gpt_flux_up, gpt_flux_dn,          &
+                                logical(do_Jacobians, wl), sources%sfc_source_Jac, jacobian, &
+                                logical(.false., wl),  optical_props%tau, optical_props%tau)
         end if
-        !$acc exit data delete(optical_props%tau)
+        !$acc        exit data delete(     optical_props%tau)
+        !$omp target exit data map(release:optical_props%tau)
       class is (ty_optical_props_2str)
-
-        if (present(flux_dn_Jac) .and. check_extents) then
-          if( .not. extents_are(flux_dn_Jac, ncol, nlay+1)) then
-            error_msg = "rte_lw: flux_dn_Jac inconsistently sized"
-            return
-          end if
-        endif
         if (using_2stream) then
           !
           ! two-stream calculation with scattering
           !
-          !$acc enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g)
+          !$acc        enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g)
+          !$omp target enter data map(to:optical_props%tau, optical_props%ssa, optical_props%g)
           error_msg =  optical_props%validate()
           if(len_trim(error_msg) > 0) return
           call lw_solver_2stream(ncol, nlay, ngpt, logical(top_at_1, wl), &
@@ -311,24 +315,28 @@ function rte_lw(optical_props, top_at_1, &
                                  sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, &
                                  sfc_emis_gpt, sources%sfc_source,       &
                                  gpt_flux_up, gpt_flux_dn)
-          !$acc exit data delete(optical_props%tau, optical_props%ssa, optical_props%g)
+          !$acc        exit data delete(     optical_props%tau, optical_props%ssa, optical_props%g)
+          !$omp target exit data map(release:optical_props%tau, optical_props%ssa, optical_props%g)
         else
           !
           ! Re-scaled solution to account for scattering
           !
-          allocate(gpt_flux_dnJac (ncol, nlay+1, ngpt))
-          !$acc enter data create(gpt_flux_dnJac)
-          !$acc enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g)
-          call lw_solver_1rescl_GaussQuad(ncol, nlay, ngpt, logical(top_at_1, wl), &
-                                 n_quad_angs, gauss_Ds(1:n_quad_angs,n_quad_angs), &
-                                 gauss_wts(1:n_quad_angs,n_quad_angs), &
-                                 optical_props%tau, optical_props%ssa, optical_props%g, &
-                                 sources%lay_source, sources%lev_source_inc, &
-                                 sources%lev_source_dec, &
-                                 sfc_emis_gpt, sources%sfc_source,&
-                                 gpt_flux_up, gpt_flux_dn, &
-                                 sources%sfc_source_Jac, gpt_flux_upJac, gpt_flux_dnJac)
-          !$acc exit data delete(optical_props%tau, optical_props%ssa, optical_props%g)
+          !$acc        enter data copyin(optical_props%tau, optical_props%ssa, optical_props%g)
+          !$omp target enter data map(to:optical_props%tau, optical_props%ssa, optical_props%g)
+          call lw_solver_noscat_GaussQuad(ncol, nlay, ngpt, &
+                                logical(top_at_1, wl), &
+                                n_quad_angs, &
+                                gauss_Ds(1:n_quad_angs,n_quad_angs), &
+                                gauss_wts(1:n_quad_angs,n_quad_angs), &
+                                optical_props%tau, &
+                                sources%lay_source, sources%lev_source_inc, &
+                                sources%lev_source_dec, &
+                                sfc_emis_gpt, sources%sfc_source,  &
+                                gpt_flux_up, gpt_flux_dn, &
+                                logical(do_Jacobians, wl), sources%sfc_source_Jac, jacobian, &
+                                logical(.true., wl),  optical_props%ssa, optical_props%g)
+          !$acc        exit data delete(     optical_props%tau, optical_props%ssa, optical_props%g)
+          !$omp target exit data map(release:optical_props%tau, optical_props%ssa, optical_props%g)
         endif
       class is (ty_optical_props_nstr)
         !
@@ -342,34 +350,14 @@ function rte_lw(optical_props, top_at_1, &
     ! ...and reduce spectral fluxes to desired output quantities
     !
     error_msg = fluxes%reduce(gpt_flux_up, gpt_flux_dn, optical_props, top_at_1)
-    if (error_msg /= '') return
-
-    if (present(flux_up_Jac)) Jac_fluxes%flux_up => flux_up_Jac
-    select type (optical_props)
-      class is (ty_optical_props_1scl)
-        !
-        ! gpoint Jacobian fluxes aren't defined for _1scl
-        !
-        error_msg = Jac_fluxes%reduce(gpt_flux_upJac, gpt_flux_upJac, optical_props, top_at_1)
-      class is (ty_optical_props_2str)
-        !
-        ! Compute Jacobians when using rescaling approach for scattering
-        !
-        if(.not. using_2stream) then
-          if (present(flux_dn_Jac)) Jac_fluxes%flux_dn => flux_dn_Jac
-          error_msg = Jac_fluxes%reduce(gpt_flux_upJac, gpt_flux_dnJac, optical_props, top_at_1)
-          !$acc exit data delete(gpt_flux_dnJac)
-          deallocate(gpt_flux_dnJac)
-        end if
-      end select
-
-    !$acc exit data delete(gpt_flux_upJac)
-    deallocate(gpt_flux_upJac)
 
-    !$acc exit data delete(sfc_emis_gpt)
-    !$acc exit data delete(gpt_flux_up,gpt_flux_dn)
-    !$acc exit data delete(optical_props)
+    !$acc        exit data delete(     gpt_flux_up, gpt_flux_dn, sfc_emis_gpt)
+    !$omp target exit data map(release:gpt_flux_up, gpt_flux_dn, sfc_emis_gpt)
+    !$acc        exit data delete(optical_props)
     !!$acc exit data delete(sources%lay_source, sources%lev_source_inc, sources%lev_source_dec, sources%sfc_source,sources)
+    !$omp        exit data copyout( flux_up_Jac) if(do_Jacobians)
+    !$omp target exit data map(from:flux_up_Jac) if(do_Jacobians)
+
   end function rte_lw
   !--------------------------------------------------------------------------------------------------------------------
   !
@@ -389,6 +377,7 @@ subroutine expand_and_transpose(ops,arr_in,arr_out)
     ngpt  = ops%get_ngpt()
     limits = ops%get_band_lims_gpoint()
     !$acc parallel loop collapse(2) copyin(arr_in, limits)
+    !$omp target teams distribute parallel do simd collapse(2) map(to:arr_in, limits)
     do iband = 1, nband
       do icol = 1, ncol
         do igpt = limits(1, iband), limits(2, iband)
diff --git a/rte/mo_rte_sw.F90 b/rte/mo_rte_sw.F90
index 18169b3e3..c698fd538 100644
--- a/rte/mo_rte_sw.F90
+++ b/rte/mo_rte_sw.F90
@@ -135,6 +135,7 @@ function rte_sw(atmos, top_at_1,                 &
     !   and switch dimension ordering
 
     !$acc enter data create(sfc_alb_dir_gpt, sfc_alb_dif_gpt)
+    !$omp target enter data map(alloc:sfc_alb_dir_gpt, sfc_alb_dif_gpt)
     call expand_and_transpose(atmos, sfc_alb_dir, sfc_alb_dir_gpt)
     call expand_and_transpose(atmos, sfc_alb_dif, sfc_alb_dif_gpt)
     ! ------------------------------------------------------------------------------------
@@ -147,15 +148,21 @@ function rte_sw(atmos, top_at_1,                 &
     !   direct and diffuse to represent the total, consistent with the LW
     !
     !$acc enter data copyin(mu0)
+    !$omp target enter data map(to:mu0)
     !$acc enter data create(gpt_flux_up, gpt_flux_dn, gpt_flux_dir)
+    !$omp target enter data map(alloc:gpt_flux_up, gpt_flux_dn, gpt_flux_dir)
 
     !$acc enter data copyin(inc_flux)
+    !$omp target enter data map(to:inc_flux)
     call apply_BC(ncol, nlay, ngpt, logical(top_at_1, wl),   inc_flux, mu0, gpt_flux_dir)
     !$acc exit data delete(inc_flux)
+    !$omp target exit data map(release:inc_flux)
     if(present(inc_flux_dif)) then
       !$acc enter data copyin(inc_flux_dif)
+      !$omp target enter data map(to:inc_flux_dif)
       call apply_BC(ncol, nlay, ngpt, logical(top_at_1, wl), inc_flux_dif,  gpt_flux_dn )
       !$acc exit data delete(inc_flux_dif)
+      !$omp target exit data map(release:inc_flux_dif)
     else
       call apply_BC(ncol, nlay, ngpt, logical(top_at_1, wl),                gpt_flux_dn )
     end if
@@ -166,6 +173,7 @@ function rte_sw(atmos, top_at_1,                 &
         ! Direct beam only
         !
         !$acc enter data copyin(atmos, atmos%tau)
+        !$omp target enter data map(to:atmos%tau)
         error_msg =  atmos%validate()
         if(len_trim(error_msg) > 0) return
         call sw_solver_noscat(ncol, nlay, ngpt, logical(top_at_1, wl), &
@@ -177,11 +185,13 @@ function rte_sw(atmos, top_at_1,                 &
         !gpt_flux_up = 0._wp
         !gpt_flux_dn = 0._wp
         !$acc exit data delete(atmos%tau, atmos)
+        !$omp target exit data map(release:atmos%tau)
       class is (ty_optical_props_2str)
         !
         ! two-stream calculation with scattering
         !
         !$acc enter data copyin(atmos, atmos%tau, atmos%ssa, atmos%g)
+        !$omp target enter data map(to:atmos%tau, atmos%ssa, atmos%g)
         error_msg =  atmos%validate()
         if(len_trim(error_msg) > 0) return
         call sw_solver_2stream(ncol, nlay, ngpt, logical(top_at_1, wl), &
@@ -189,7 +199,9 @@ function rte_sw(atmos, top_at_1,                 &
                                sfc_alb_dir_gpt, sfc_alb_dif_gpt,        &
                                gpt_flux_up, gpt_flux_dn, gpt_flux_dir)
         !$acc exit data delete(atmos%tau, atmos%ssa, atmos%g, atmos)
+        !$omp target exit data map(release:atmos%tau, atmos%ssa, atmos%g)
         !$acc exit data delete(sfc_alb_dir_gpt, sfc_alb_dif_gpt)
+        !$omp target exit data map(release:sfc_alb_dir_gpt, sfc_alb_dif_gpt)
       class is (ty_optical_props_nstr)
         !
         ! n-stream calculation
@@ -208,7 +220,9 @@ function rte_sw(atmos, top_at_1,                 &
     !
     error_msg = fluxes%reduce(gpt_flux_up, gpt_flux_dn, atmos, top_at_1, gpt_flux_dir)
     !$acc exit data delete(mu0)
+    !$omp target exit data map(release:mu0)
     !$acc exit data delete(gpt_flux_up, gpt_flux_dn, gpt_flux_dir)
+    !$omp target exit data map(release:gpt_flux_up, gpt_flux_dn, gpt_flux_dir)
   end function rte_sw
   !--------------------------------------------------------------------------------------------------------------------
   !
@@ -228,6 +242,7 @@ subroutine expand_and_transpose(ops,arr_in,arr_out)
     ngpt  = ops%get_ngpt()
     limits = ops%get_band_lims_gpoint()
     !$acc parallel loop collapse(2) copyin(arr_in, limits)
+    !$omp target teams distribute parallel do simd collapse(2) map(to:arr_in, limits)
     do iband = 1, nband
       do icol = 1, ncol
         do igpt = limits(1, iband), limits(2, iband)
diff --git a/rte/mo_rte_util_array.F90 b/rte/mo_rte_util_array.F90
index c946cdb78..9ef28380e 100644
--- a/rte/mo_rte_util_array.F90
+++ b/rte/mo_rte_util_array.F90
@@ -48,8 +48,10 @@ logical function any_vals_less_than_1D(array, check_value)
     real(wp) :: minValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array) map(from:minValue)
     minValue = minval(array)
     !$acc end kernels
+    !$omp end target
 
     any_vals_less_than_1D = (minValue < check_value)
 
@@ -62,8 +64,10 @@ logical function any_vals_less_than_2D(array, check_value)
     real(wp) :: minValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array) map(from:minValue)
     minValue = minval(array)
     !$acc end kernels
+    !$omp end target
 
     any_vals_less_than_2D = (minValue < check_value)
 
@@ -75,9 +79,28 @@ logical function any_vals_less_than_3D(array, check_value)
 
     real(wp) :: minValue
 
+#ifdef _OPENMP
+    integer :: dim1, dim2, dim3, i, j, k
+    dim1 = size(array,1)
+    dim2 = size(array,2)
+    dim3 = size(array,3)
+    minValue = array(1,1,1) ! initialize to some value
+    !$omp target teams map(to:array) &
+    !$omp defaultmap(tofrom:scalar) reduction(min:minValue)
+    !$omp distribute parallel do simd reduction(min:minValue)
+    do i = 1, dim1
+       do j = 1, dim2
+          do k = 1, dim3
+             minValue = min(minValue,array(i,j,k))
+          enddo
+       enddo
+    enddo
+    !$omp end target teams
+#else
     !$acc kernels copyin(array)
     minValue = minval(array)
     !$acc end kernels
+#endif
 
     any_vals_less_than_3D = (minValue < check_value)
 
@@ -93,8 +116,10 @@ logical function any_vals_less_than_1D_masked(array, mask, check_value)
     real(wp) :: minValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array, mask) map(from:minValue)
     minValue = minval(array, mask=mask)
     !$acc end kernels
+    !$omp end target
 
     any_vals_less_than_1D_masked = (minValue < check_value)
 
@@ -108,8 +133,10 @@ logical function any_vals_less_than_2D_masked(array, mask, check_value)
     real(wp) :: minValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array, mask) map(from:minValue)
     minValue = minval(array, mask=mask)
     !$acc end kernels
+    !$omp end target
 
     any_vals_less_than_2D_masked = (minValue < check_value)
 
@@ -123,8 +150,10 @@ logical function any_vals_less_than_3D_masked(array, mask, check_value)
     real(wp) :: minValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array, mask) map(from:minValue)
     minValue = minval(array, mask=mask)
     !$acc end kernels
+    !$omp end target
 
     any_vals_less_than_3D_masked = (minValue < check_value)
 
@@ -139,9 +168,11 @@ logical function any_vals_outside_1D(array, checkMin, checkMax)
     real(wp) :: minValue, maxValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array) map(from:minValue, maxValue)
     minValue = minval(array)
     maxValue = maxval(array)
     !$acc end kernels
+    !$omp end target
     any_vals_outside_1D = minValue < checkMin .or. maxValue > checkMax
 
   end function any_vals_outside_1D
@@ -153,9 +184,11 @@ logical function any_vals_outside_2D(array, checkMin, checkMax)
     real(wp) :: minValue, maxValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array) map(from:minValue, maxValue)
     minValue = minval(array)
     maxValue = maxval(array)
     !$acc end kernels
+    !$omp end target
     any_vals_outside_2D = minValue < checkMin .or. maxValue > checkMax
 
   end function any_vals_outside_2D
@@ -168,10 +201,33 @@ logical function any_vals_outside_3D(array, checkMin, checkMax)
       ! but an explicit loop is the only current solution on GPUs
     real(wp) :: minValue, maxValue
 
+
+#ifdef _OPENMP
+    integer :: dim1, dim2, dim3, i, j, k
+    dim1 = size(array,1)
+    dim2 = size(array,2)
+    dim3 = size(array,3)
+    minValue = array(1,1,1) ! initialize to some value
+    maxValue = array(1,1,1) ! initialize to some value
+    !$omp target teams map(to:array) &
+    !$omp defaultmap(tofrom:scalar) reduction(min:minValue) reduction(max:maxValue)
+    !$omp distribute parallel do simd reduction(min:minValue) reduction(max:maxValue)
+    do i= 1, dim1
+       do j = 1, dim2
+          do k = 1, dim3
+             minValue = min(minValue,array(i,j,k))
+             maxValue = max(maxValue,array(i,j,k))
+          enddo
+       enddo
+    enddo
+    !$omp end target teams
+#else
     !$acc kernels copyin(array)
     minValue = minval(array)
     maxValue = maxval(array)
     !$acc end kernels
+#endif
+
     any_vals_outside_3D = minValue < checkMin .or. maxValue > checkMax
 
   end function any_vals_outside_3D
@@ -186,9 +242,11 @@ logical function any_vals_outside_1D_masked(array, mask, checkMin, checkMax)
     real(wp) :: minValue, maxValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array, mask) map(from:minValue, maxValue)
     minValue = minval(array, mask=mask)
     maxValue = maxval(array, mask=mask)
     !$acc end kernels
+    !$omp end target
     any_vals_outside_1D_masked = minValue < checkMin .or. maxValue > checkMax
 
   end function any_vals_outside_1D_masked
@@ -201,9 +259,11 @@ logical function any_vals_outside_2D_masked(array, mask, checkMin, checkMax)
     real(wp) :: minValue, maxValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array, mask) map(from:minValue, maxValue)
     minValue = minval(array, mask=mask)
     maxValue = maxval(array, mask=mask)
     !$acc end kernels
+    !$omp end target
     any_vals_outside_2D_masked = minValue < checkMin .or. maxValue > checkMax
 
   end function any_vals_outside_2D_masked
@@ -216,9 +276,11 @@ logical function any_vals_outside_3D_masked(array, mask, checkMin, checkMax)
     real(wp) :: minValue, maxValue
 
     !$acc kernels copyin(array)
+    !$omp target map(to:array, mask) map(from:minValue, maxValue)
     minValue = minval(array, mask=mask)
     maxValue = maxval(array, mask=mask)
     !$acc end kernels
+    !$omp end target
     any_vals_outside_3D_masked = minValue < checkMin .or. maxValue > checkMax
 
   end function any_vals_outside_3D_masked
@@ -308,6 +370,7 @@ subroutine zero_array_1D(ni, array) bind(C, name="zero_array_1D")
     integer :: i
     ! -----------------------
     !$acc parallel loop copyout(array)
+    !$omp target teams distribute parallel do simd map(from:array) 
     do i = 1, ni
       array(i) = 0.0_wp
     end do
@@ -320,6 +383,7 @@ subroutine zero_array_3D(ni, nj, nk, array) bind(C, name="zero_array_3D")
     integer :: i,j,k
     ! -----------------------
     !$acc parallel loop collapse(3) copyout(array)
+    !$omp target teams distribute parallel do simd collapse(3) map(from:array)
     do k = 1, nk
       do j = 1, nj
         do i = 1, ni
@@ -337,6 +401,7 @@ subroutine zero_array_4D(ni, nj, nk, nl, array) bind(C, name="zero_array_4D")
     integer :: i,j,k,l
     ! -----------------------
     !$acc parallel loop collapse(4) copyout(array)
+    !$omp target teams distribute parallel do simd collapse(4) map(from:array)
     do l = 1, nl
       do k = 1, nk
         do j = 1, nj
diff --git a/tests/Makefile b/tests/Makefile
index dbc0906df..ed64c65b0 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,26 +1,22 @@
+#
 # Location of RTE+RRTMGP libraries, module files.
+#
 RRTMGP_BUILD = $(RRTMGP_ROOT)/build
-# Sets macros FC, FCFLAGS consistent with RTE+RRTMGP
--include $(RRTMGP_BUILD)/Makefile.conf
-
-# Location of netcdf C and Fortran libraries. Could specify with environment variables if file doesn't exist
--include $(RRTMGP_ROOT)/examples/rfmip-clear-sky/Makefile.libs
 #
 # RRTMGP library, module files
 #
-LDFLAGS   += -L$(RRTMGP_BUILD)
-LIBS      += -lrrtmgp -lrte
+# LDFLAGS   += -L$(RRTMGP_BUILD)
+# LIBS      += -lrrtmgp -lrte
 FCINCLUDE += -I$(RRTMGP_BUILD)
-
 #
 # netcdf library, module files
-# C and Fortran interfaces respectively
+# Environment variables NCHOME and NFHOME point to root of C and Fortran interfaces respectively -
 #
 FCINCLUDE += -I$(NFHOME)/include
 LDFLAGS   += -L$(NFHOME)/lib -L$(NCHOME)/lib
 LIBS      += -lnetcdff -lnetcdf
 
-VPATH  = .:$(RRTMGP_ROOT)/examples:$(RRTMGP_ROOT)/examples/rfmip-clear-sky
+VPATH  = .:$(RRTMGP_ROOT)/examples:$(RRTMGP_ROOT)/examples/rfmip-clear-sky:$(RRTMGP_ROOT)/examples/all-sky
 VPATH += $(RRTMGP_ROOT)/extensions:$(RRTMGP_ROOT)/extensions/cloud_optics:$(RRTMGP_ROOT)/extensions/solar_variability
 
 # Compilation rules
@@ -38,7 +34,7 @@ ADDITIONS  = mo_heating_rates.o mo_compute_bc.o mo_rrtmgp_clr_all_sky.o
 ADDITIONS += mo_load_coefficients.o mo_simple_netcdf.o mo_rfmip_io.o
 ADDITIONS += mo_testing_io.o
 # Cloud optics
-CLOUDS    += mo_cloud_sampling.o mo_cloud_optics.o mo_load_cloud_coefficients.o
+CLOUDS    += mo_cloud_sampling.o mo_cloud_optics.o mo_load_cloud_coefficients.o mo_garand_atmos_io.o
 # Solar variability
 ADDITIONS += mo_solar_variability.o
 
@@ -62,5 +58,13 @@ mo_load_coefficients.o: $(LIB_DEPS) mo_simple_netcdf.o mo_load_coefficients.F90
 mo_rfmip_io.o.o:        $(LIB_DEPS) mo_simple_netcdf.o mo_rfmip_io.F90
 mo_simple_netcdf.o:     $(LIB_DEPS) mo_simple_netcdf.F90
 
+tests:
+	cp ${RRTMGP_ROOT}/examples/rfmip-clear-sky/multiple_input4MIPs_radiation_RFMIP_UColorado-RFMIP-1-2_none.nc ./test_atmospheres.nc
+	$(RUN_CMD) ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-lw-g256-2018-12-04.nc
+	$(RUN_CMD) ./clear_sky_regression test_atmospheres.nc ${RRTMGP_ROOT}/rrtmgp/data/rrtmgp-data-sw-g224-2018-12-04.nc
+
+check:
+	python verification.py
+
 clean:
 	-rm clear_sky_regression *.o *.optrpt
diff --git a/tests/validation-plots.py b/tests/validation-plots.py
index 0b223cb0b..9f77dcd63 100644
--- a/tests/validation-plots.py
+++ b/tests/validation-plots.py
@@ -36,11 +36,12 @@ def make_comparison_plot(variants, labels, reference, vscale, col_dim="site", la
         # Reverse vertical ordering
         plt.ylim(vscale.max(), vscale.min())
 
-def construct_lbl_esgf_name(var):
+def construct_lbl_esgf_name(var, esgf_node="llnl"):
     #
     # For a given variable name, provide the OpenDAP URL for the LBLRM line-by-line results
     #
     prefix = "http://esgf3.dkrz.de/thredds/dodsC/cmip6/RFMIP/AER/LBLRTM-12-8/rad-irf/r1i1p1f1/Efx/"
+    if(esgf_node == "llnl"): prefix = "http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/RFMIP/AER/LBLRTM-12-8/rad-irf/r1i1p1f1/Efx/"
     return(prefix + var + "/gn/v20190514/" + var + "_Efx_LBLRTM-12-8_rad-irf_r1i1p1f1_gn.nc")
 
 ########################################################################
@@ -68,6 +69,9 @@ def construct_lbl_esgf_name(var):
                                       reset_coords().swap_dims({"level":"plev"}).interp(plev=plev) for i in np.arange(0,  gp.site.size)], dim = 'site')
 
     cols = cc.glasbey_dark
+    plev.load()
+    gpi.load()
+    lbli.load()
     with PdfPages('validation-figures.pdf') as pdf:
         ########################################################################
         # Longwave