diff --git a/.github/backup/test_libclouphxx.yml b/.github/backup/test_libclouphxx.yml
new file mode 100644
index 000000000..f533ef025
--- /dev/null
+++ b/.github/backup/test_libclouphxx.yml
@@ -0,0 +1,101 @@
+#    - name: Install Nvidia driver
+#      if: ${{!matrix.disable_cuda}}
+#      run: sudo apt install --no-install-recommends nvidia-driver-470
+#    
+#    - name: Install Singularity
+#      #when installed from this action, .SIF is always converted to sandbox (could be related to: https://githubmemory.com/repo/hpcng/singularity/issues/6065)
+#      uses: eWaterCycle/setup-singularity@v6
+#      with:
+#        singularity-version: 3.7.1
+#      #apt installation following https://sylabs.io/guides/3.0/user-guide/installation.html, but this is a too old version and uninstalls python-is-python3
+##      run: |
+##        wget -O- http://neuro.debian.net/lists/focal.de-fzj.libre | sudo tee /etc/apt/sources.list.d/neurodebian.sources.list
+##        sudo apt-key adv --recv-keys --keyserver hkps://keyserver.ubuntu.com 0xA5D32F012649A5A9
+##        sudo apt-get update
+##        sudo apt-get install -y singularity-container
+#            
+#    - name: Cache UWLCM Singularity image
+#      id: cache_singularity
+#      uses: actions/cache@v2
+#      with:
+#        path: '${{ github.workspace }}/singularity_images/uwlcm_ubuntu_20_04_cuda_11_4.sif'
+#        key: 'sng_ubuntu_20_04_cuda_11_4'
+#        
+#    - name: Download UWLCM Singularity image
+#      if: steps.cache_singularity.outputs.cache-hit != 'true'
+#      run: |
+#        mkdir '${{ github.workspace }}/singularity_images'
+#        singularity pull --disable_cache --dir '${{ github.workspace }}/singularity_images' library://pdziekan/default/uwlcm:ubuntu_20_04_cuda_11_4
+#      # disable Singularity cache, we cache manually
+#
+#    - name: Set friendly Singularity image name
+#      uses: allenevans/set-env@v2.0.0
+#      with:
+#        SI: '${{ github.workspace }}/singularity_images/uwlcm_ubuntu_20_04_cuda_11_4.sif'
+#      
+#    - name: Configure CMake
+#      # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
+#      # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
+#      run: singularity exec $SI cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DLIBCLOUDPHXX_FORCE_MULTI_CUDA=True -DLIBCLOUDPHXX_DISABLE_CUDA=${{matrix.disable_cuda}}
+##
+#    - name: Build libcloudph++
+#      # Build your program with the given configuration
+#      run: VERBOSE=1 singularity exec $SI cmake --build ${{github.workspace}}/build --config ${{matrix.build_type}} -j${{matrix.threads}}
+      
+      
+ #   - name: Run Singularity shell
+ #     run: singularity shell '${{ github.workspace }}/singularity_images/uwlcm_ubuntu_20_04_cuda_11_1_1.0.sif'
+        
+#    - name: Download UWLCM Singularity container
+#      run: wget https://github.com/igfuw/UWLCM/blob/0fc77ec68053936e36eea4b49f11b3dd2cb1a827/singularity/sng_ubuntu_20_04_cuda_11_1
+        
+#    - name: Build UWLCM Singularity container
+#      run: singularity build sng_ubuntu_20_04_cuda_11_1.sif sng_ubuntu_20_04_cuda_11_1
+    
+    # TODO: cache cuda-toolki
+    
+    #- name: Install cuda-toolkit
+    #  id: cuda-toolkit
+    #  uses: Jimver/cuda-toolkit@v0.2.4
+    #  with:
+    #    linux-local-args: '["--toolkit"]'
+
+  #  - name: Install boost
+  #    uses: MarkusJx/install-boost@v2.0.0
+  #    id: install-boost
+  #    with:
+  #      # REQUIRED: Specify the required boost version
+  #      # A list of supported versions can be found here: 
+  #      # https://github.com/actions/boost-versions/blob/main/versions-manifest.json
+  #      boost_version: 1.73.0
+  #      # OPTIONAL: Specify a platform version
+  #      #platform_version: 18.04
+  #      # OPTIONAL: Specify a custom install location
+  #      #boost_install_dir: /home/runner/some_directory
+  
+#    - name: Install Thrust
+#      #run: sudo apt-get install libthrust-dev
+#      run: |
+#        git clone --depth=1 git://github.com/thrust/thrust.git --branch 1.9.10-1
+#        sudo ln -s `pwd`/thrust/thrust /usr/local/include/thrust
+        
+#    - name: Cache Boost
+#      uses: actions/cache@v2
+#      with:
+#        path: '${{ runner.workspace }}/boost_*.tar.gz'
+#        key: 'boost-1.72.0'
+
+ #   - name: Build Boost
+ #     id: boost
+ #     # This won't re-download the archive unnecessarily:
+ #     uses: egor-tensin/build-boost@v1
+ #     with:
+ #       version: 1.72.0
+ #       libraries: date_time # in fact we don't need any compiled libs, only header libs, but when nothing is passed all libs are built
+ #       platform: x64
+ #       configuration: RelWithDebInfo
+      
+    #- name: Install hdf5 (Linux)
+    #  run: sudo apt-get install libhdf5-dev
+    #  #if: matrix.os == 'ubuntu-latest'
+      
diff --git a/.github/workflows/test_libclouphxx.yml b/.github/workflows/test_libclouphxx.yml
new file mode 100644
index 000000000..8779ce86d
--- /dev/null
+++ b/.github/workflows/test_libclouphxx.yml
@@ -0,0 +1,292 @@
+name: Test libcloudph++
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build_CUDA:
+    runs-on: ubuntu-20.04
+    strategy:
+      matrix:
+        name: ["CUDA"]
+        build_type: ["Debug", "RelWithDebInfoPortable"]
+        include:
+        - name: "CUDA"
+          disable_cuda: false
+          # not enough RAM to compile Debug CUDA on 4 threads
+          threads: 1
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: build libcloudph++
+      uses: igfuw/libcloudphxx_build@v0.2-beta
+      with:
+        disable_cuda: ${{matrix.disable_cuda}}
+        build_type: ${{matrix.build_type}}
+        threads: ${{matrix.threads}}
+        path: ${{ github.workspace }}
+
+  build:
+    runs-on: ubuntu-20.04
+    strategy:
+      matrix:
+        name: ["no_CUDA"]
+        build_type: ["Debug", "RelWithDebInfoPortable"]
+        mpi: ["none", "mvapich2"]
+        #mpi: ["none"]
+        include:
+        - name: "no_CUDA"
+          disable_cuda: true
+          threads: 4
+        - mpi: "none"
+          tag: "ubuntu_20_04_cuda_11_4"
+          cxx: "g++"
+        - mpi: "mvapich2"
+          tag: "ubuntu_20_04_cuda_11_4_mvapich2"
+          cxx: "mpic++"
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: build libcloudph++
+      uses: igfuw/libcloudphxx_build@v0.2-beta
+      with:
+        disable_cuda: ${{matrix.disable_cuda}}
+        build_type: ${{matrix.build_type}}
+        threads: ${{matrix.threads}}
+        path: ${{ github.workspace }}
+        install_prefix: ${{ github.workspace }}/installed 
+        tag: ${{ matrix.tag }}
+        cxx: ${{ matrix.cxx }}
+
+    # tar build dir before upload as artifact to retain permission and case-sensitive names
+    - name: Compress libcloudph++ build
+      run: tar -cvf build.tar build
+
+    - name: Upload libcloudph++ build
+      uses: actions/upload-artifact@v2
+      with:
+        name: libcloud_build_${{matrix.build_type}}_mpi_${{matrix.mpi}}_tar
+        path: build.tar
+
+  # test jobs
+  unit_test:
+    needs: build
+    runs-on: ubuntu-20.04
+
+    strategy:
+      matrix:
+        build_type: ["RelWithDebInfoPortable", "Debug"]
+        mpi: ["none", "mvapich2"]
+        include:
+        - mpi: "none"
+          tag: "ubuntu_20_04_cuda_11_4"
+        - mpi: "mvapich2"
+          tag: "ubuntu_20_04_cuda_11_4_mvapich2"
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Download libcloudph++ build
+      uses: actions/download-artifact@v2
+      with:
+        name: libcloud_build_${{matrix.build_type}}_mpi_${{matrix.mpi}}_tar
+
+    - name: Decompress libcloudph++ build
+      run: tar -xvf build.tar
+
+    - name: load UWLCM Singularity image
+      uses: igfuw/load_UWLCM_singularity_image@v0.1
+      with:
+        path: ${{ github.workspace }}/singularity_images
+        tag: ${{ matrix.tag }}
+
+    - name: Run unit tests
+      working-directory: ${{github.workspace}}/build
+      # See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
+      run: OMP_NUM_THREADS=4 singularity exec $SI ctest -C ${{matrix.build_type}} || cat Testing/Temporary/LastTest.log / # "/" intentional! (just to make cat exit with an error code)
+
+
+  kinematic_2D_test:
+    needs: build
+    runs-on: ubuntu-20.04
+
+    strategy:
+      matrix:
+        build_type: ["RelWithDebInfoPortable"]
+        mpi: ["none"]
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Download libcloudph++ build
+      uses: actions/download-artifact@v2
+      with:
+        name: libcloud_build_${{matrix.build_type}}_mpi_${{matrix.mpi}}_tar
+
+    - name: Decompress libcloudph++ build
+      run: tar -xvf build.tar
+
+    # Debugging with a ssh session
+#    - name: Setup tmate session
+#      uses: mxschmitt/action-tmate@v3
+
+    - name: Install libcloudph++
+      run: sudo cmake --install build
+
+    - name: checkout libmpdata++ repo
+      uses: actions/checkout@v2
+      with:
+        repository: igfuw/libmpdataxx
+        path: libmpdataxx
+
+    - name: Install libmpdata++
+      uses: igfuw/libmpdataxx_install@v0.4
+      with:
+        build_type: ${{matrix.build_type}}
+        threads: 4
+        path: ${{ github.workspace }}/libmpdataxx/libmpdata++
+        install_prefix: ${{ github.workspace }}/installed
+
+    - name: Configure kinematic_2D CMake
+      working-directory: ${{github.workspace}}/models/kinematic_2D
+      run: singularity exec -B ${{ github.workspace }}/installed/ $SI cmake -B build -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -Dlibcloudph++_DIR=${{ github.workspace }}/installed/share/libcloudph++ -Dlibmpdata++_DIR=${{ github.workspace }}/installed/share/libmpdata++
+
+  
+    - name: Build kinematic_2D 
+      working-directory: ${{github.workspace}}/models/kinematic_2D
+      run: VERBOSE=1 singularity exec -B ${{ github.workspace }}/installed/ $SI cmake --build build --config ${{matrix.build_type}} 
+
+    - name: Run kinematic_2D tests
+      working-directory: ${{github.workspace}}/models/kinematic_2D/build
+      run: |
+        singularity exec -B ${{ github.workspace }}/installed/ $SI ctest -VV -R travis # compare icicle results against reference data (done for full simulation for bulk schemes and a couple of steps for lagrangian)
+        cat Testing/Temporary/LastTest.log
+
+    # Debugging with a ssh session on failure
+#    - name: Setup tmate session
+#      if: ${{ failure() }}
+#      uses: mxschmitt/action-tmate@v3
+
+
+  parcel_test:
+    needs: build
+# do after kinematic_2D_test, because otherwise we occasionally get "illegal instruction" errors, probably because libcloudphxx is installed simultaneously on the same runner at the same location by two different jobs (?)
+#    needs: kinematic_2D_test
+    runs-on: ubuntu-20.04
+
+    strategy:
+      matrix:
+        build_type: ["RelWithDebInfoPortable", "Debug"]
+        mpi: ["none"]
+        include:
+        - build_type: "RelWithDebInfoPortable"
+          long_tests: true
+          debug_tests: false
+        - build_type: "Debug"
+          long_tests: false
+          debug_tests: true
+
+
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Download libcloudph++ build
+      uses: actions/download-artifact@v2
+      with:
+        name: libcloud_build_${{matrix.build_type}}_mpi_${{matrix.mpi}}_tar
+
+    - name: Decompress libcloudph++ build
+      run: tar -xvf build.tar
+
+    - name: load UWLCM Singularity image
+      uses: igfuw/load_UWLCM_singularity_image@v0.1
+      with:
+        path: ${{ github.workspace }}/singularity_images
+
+    - name: Install libcloudph++
+      run: sudo cmake --install build
+
+    - name: checkout parcel repo
+      uses: actions/checkout@v2
+      with:
+        repository: igfuw/parcel
+        path: parcel
+
+    - run: mkdir parcel/plots/outputs
+
+    - name: run parcel unit_test
+      working-directory: ${{github.workspace}}/parcel
+      if: ${{matrix.long_tests}}
+      run: PYTHONPATH=${{ github.workspace }}/installed/usr/lib/python3/dist-packages singularity exec -B${{ github.workspace }}/installed $SI python3 -m pytest -s -v unit_test
+      #run: PYTHONPATH=${{ github.workspace }}/installed/usr/lib/python3/dist-packages singularity exec -B${{ github.workspace }}/installed,/usr/lib/python3/dist-packages/Gnuplot $SI python3 -m pytest -v unit_test
+
+    - name: run parcel long_test
+      working-directory: ${{github.workspace}}/parcel
+      if: ${{matrix.long_tests}}
+      run: PYTHONPATH=${{ github.workspace }}/installed/usr/lib/python3/dist-packages singularity exec -B${{ github.workspace }}/installed $SI python3 -m pytest -s -v long_test
+
+    - name: run parcel unit_test_debug
+      working-directory: ${{github.workspace}}/parcel
+      if: ${{matrix.debug_tests}}
+      run: PYTHONPATH=${{ github.workspace }}/installed/usr/lib/python3/dist-packages singularity exec -B${{ github.workspace }}/installed $SI python3 -m pytest -s -v unit_test_debug
+
+  build_and_test_KiD-A:
+    runs-on: ubuntu-20.04
+    strategy:
+      matrix:
+        name: ["no_CUDA"]
+        build_type: ["RelWithDebInfoPortable"]
+        mpi: ["none"]
+        include:
+        - name: "no_CUDA"
+          disable_cuda: true
+          threads: 4
+
+    steps:
+    - name: Checkout libcloudphxx PR
+      uses: actions/checkout@v2
+      with:
+        fetch-depth: 0 
+
+    - name: merge with the KiD-A branch
+      run: |
+        git config --global user.email "pdziekan@fuw.edu.pl"
+        git config --global user.name "Piotr Dziekan"
+        git fetch origin
+        git merge origin/kida-1d
+
+    - run: grep diag_accr include/libcloudph++/lgrngn/particles.hpp
+
+    - name: build libcloudph++
+      uses: igfuw/libcloudphxx_build@v0.2-beta
+      with:
+        disable_cuda: ${{matrix.disable_cuda}}
+        build_type: ${{matrix.build_type}}
+        threads: ${{matrix.threads}}
+        path: ${{ github.workspace }}
+        install_prefix: ${{ github.workspace }}/installed 
+
+    - name: Install libcloudph++
+      run: sudo cmake --install build
+
+    - name: checkout KiD-libcloud repo
+      uses: actions/checkout@v2
+      with:
+        repository: igfuw/kid-libcloud
+        path: kid-libcloud
+
+    - name: run KiD LWP test
+      working-directory: ${{github.workspace}}/kid-libcloud
+      run: PYTHONPATH=${{ github.workspace }}/installed/usr/lib/python3/dist-packages singularity exec -B${{ github.workspace }}/installed  $SI bash ./.travis_scripts/lwp_test.sh
+
+  call_test_uwlcm_hlpr:
+    uses: igfuw/UWLCM/.github/workflows/test_uwlcm_hlpr.yml@master
+    with:
+      UWLCM_sha: "master"
+      libcloudphxx_sha: ${{ github.sha }} # merge PR SHA
+      libmpdataxx_sha: "master"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 03bd41a7a..00d9fa58a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,7 @@ ENDIF(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
 set(CMAKE_CXX_FLAGS_DEBUG "")
 set(CMAKE_CXX_FLAGS_RELEASE "")
 set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFOPORTABLE "") # without march=native
 
 ############################################################################################
 # see if CUDA is available
@@ -61,6 +62,7 @@ else()
   set(CMAKE_CUDA_FLAGS_DEBUG "")
   set(CMAKE_CUDA_FLAGS_RELEASE "")
   set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "")
+  set(CMAKE_CUDA_FLAGS_RELWITHDEBINFOPORTABLE "")
 endif()
 
 #############################################################################################
@@ -95,8 +97,8 @@ endif()
 set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) # mpic++ will not work
 
 ############################################################################################
-# create the library target
-add_library(cloudphxx_lgrngn SHARED ${files})
+# create the lagrangian microphysics library target
+add_library(cloudphxx_lgrngn SHARED ${files}) 
 
 ############################################################################################
 
@@ -114,9 +116,10 @@ target_include_directories(cloudphxx_lgrngn
 # enabling c++14
 target_compile_features(cloudphxx_lgrngn PUBLIC cxx_std_14)
 # config-specific flags
-target_compile_options(cloudphxx_lgrngn PRIVATE $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:DEBUG>>:         -g -Og -DTHRUST_DEBUG>)
-target_compile_options(cloudphxx_lgrngn PRIVATE $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:RELWITHDEBINFO>>: -Ofast -march=native>)
-target_compile_options(cloudphxx_lgrngn PRIVATE $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:RELEASE>>:       -DNDEBUG -Ofast -march=native -Winline>)
+target_compile_options(cloudphxx_lgrngn PRIVATE $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:DEBUG>>:                  -g -Og -DTHRUST_DEBUG>)
+target_compile_options(cloudphxx_lgrngn PRIVATE $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:RELEASE>>:                -Ofast -march=native -Winline -DNDEBUG >)
+target_compile_options(cloudphxx_lgrngn PRIVATE $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:RELWITHDEBINFO>>:         -Ofast -march=native>)
+target_compile_options(cloudphxx_lgrngn PRIVATE $<$<AND:$<COMPILE_LANGUAGE:CXX>,$<CONFIG:RELWITHDEBINFOPORTABLE>>: -Ofast>)
 
 # add dependencies
 if (CMAKE_CUDA_COMPILER)
@@ -205,13 +208,16 @@ if (CMAKE_CUDA_COMPILER)
     # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#gpu-compilation
     list(SORT msg)
     list(GET msg 0 VIRT)
+    if (VIRT STREQUAL "?")
+      set (VIRT "35")
+    endif()
     target_compile_options(cloudphxx_lgrngn PRIVATE  $<$<COMPILE_LANGUAGE:CUDA>: --gpu-architecture=compute_${VIRT}>)
 
     set(GPU_CODE_OPTIONS  --gpu-code=)
     foreach(code ${msg})
       message(STATUS "CUDA capability: ${code}")
       if (code STREQUAL "?")
-        set (code "20")
+        set (code "35")
         message(STATUS "CUDA capability check failed, assuming a default of ${code}")
       endif()
       set(arch ${code})
@@ -302,9 +308,7 @@ file(WRITE "${tmpdir}/test.cpp" "
   #include <thrust/system/cpp/vector.h>
 
   #include <boost/numeric/odeint.hpp>
-  #include <boost/numeric/odeint/external/thrust/thrust_algebra.hpp>
-  #include <boost/numeric/odeint/external/thrust/thrust_operations.hpp>
-  #include <boost/numeric/odeint/external/thrust/thrust_resize.hpp>
+  #include <boost/numeric/odeint/external/thrust/thrust.hpp>
   
   struct rhs 
   {
@@ -314,6 +318,7 @@ file(WRITE "${tmpdir}/test.cpp" "
       const float /* t */
     )  
     {
+      assert(psi.size() == dot_psi.size());
     }  
   };
   
@@ -325,7 +330,7 @@ file(WRITE "${tmpdir}/test.cpp" "
       thrust::cpp::vector<float>, // deriv_type
       float,                      // time_type
       boost::numeric::odeint::thrust_algebra,
-      boost::numeric::odeint::thrust_operations 
+      boost::numeric::odeint::thrust_operations
     > chem_stepper;
   
     thrust::cpp::vector<float> v(2);
@@ -359,22 +364,42 @@ add_subdirectory(tests)
 add_subdirectory(include) 
 add_subdirectory(bindings) 
 
+############################################################################################
+
+# create a header-only library target (enough for blk_1m blk_2m and common)
+add_library(cloudphxx_headers INTERFACE)
+
+target_include_directories(cloudphxx_headers 
+  INTERFACE
+    $<INSTALL_INTERFACE:include>
+)
+
+target_compile_features(cloudphxx_headers INTERFACE cxx_std_14)
+
+target_link_libraries(cloudphxx_headers
+  INTERFACE
+    Boost::boost
+)
+
 ############################################################################################
 # installation
-set_target_properties(cloudphxx_lgrngn PROPERTIES DEBUG_POSTFIX _dbg RELWITHDEBINFO_POSTFIX _relwithdbg)
+set_target_properties(cloudphxx_lgrngn PROPERTIES DEBUG_POSTFIX _dbg RELWITHDEBINFO_POSTFIX _relwithdbg RELWITHDEBINFOPORTABLE_POSTFIX _relwithdbg)
 
 # install files and export targets
-install(TARGETS cloudphxx_lgrngn EXPORT cloudphxx_lgrngn-targets 
+install(TARGETS cloudphxx_lgrngn cloudphxx_headers EXPORT cloudphxx-targets 
   LIBRARY DESTINATION lib
   INCLUDES DESTINATION include)
 
-install(EXPORT cloudphxx_lgrngn-targets NAMESPACE clphxx:: DESTINATION share/libcloudph++)
+install(EXPORT cloudphxx-targets NAMESPACE clphxx:: DESTINATION share/libcloudph++)
+
+#install(TARGETS cloudphxx_headers EXPORT cloudphxx_headers-targets)
+#  INCLUDES DESTINATION include) # these files were already installed by lgrngn
+
+#install(EXPORT cloudphxx_headers-targets NAMESPACE clphxx:: DESTINATION share/libcloudph++)
 
 # generate and install a config file
 include(CMakePackageConfigHelpers)
 
-#export(TARGETS cloudphxx_lgrngn FILE MyLibraryConfig.cmake)
-
 configure_package_config_file(
   libcloudph++-config.cmake.in
   "${CMAKE_CURRENT_BINARY_DIR}/libcloudph++-config.cmake"
@@ -387,4 +412,3 @@ install(
   DESTINATION
     share/libcloudph++
 )
-############################################################################################
diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt
index 9ec6f7fed..039533ae1 100644
--- a/bindings/python/CMakeLists.txt
+++ b/bindings/python/CMakeLists.txt
@@ -2,16 +2,10 @@ add_library(cloudphxx SHARED lib.cpp)
 add_dependencies(cloudphxx git_revision.h)
 set_target_properties(cloudphxx PROPERTIES SUFFIX ".so") # e.g. Mac defaults to .dylib which is not looked for by Python
 
-# informing the Python bindings where to find Python
-find_package(PythonLibs)
-if (NOT PYTHON_LIBRARIES)
-  message(FATAL_ERROR "
-    Python libraries not found. 
-    Please install them (e.g. sudo apt-get install python-dev).
-  ")
-endif()
-target_include_directories(cloudphxx PRIVATE ${PYTHON_INCLUDE_DIRS})
-target_link_libraries(cloudphxx ${PYTHON_LIBRARIES})
+find_package(Python3 REQUIRED COMPONENTS Interpreter Development NumPy)
+
+target_include_directories(cloudphxx PRIVATE ${Python3_INCLUDE_DIRS})
+target_link_libraries(cloudphxx ${Python3_LIBRARIES})
 
 # informing the Python bindings where to find Boost.Python 
 #if(${Boost_MINOR_VERSION} LESS 67)
@@ -48,8 +42,10 @@ if(${Boost_MINOR_VERSION} LESS 65)
   find_package(Boost COMPONENTS python3 REQUIRED)
   target_link_libraries(cloudphxx ${Boost_LIBRARIES})
 else()
-  message(STATUS "boost numpy as numpy3")
-  find_package(Boost COMPONENTS numpy3 REQUIRED) # also finds python3
+#  message(STATUS "boost numpy as numpy3")
+#  find_package(Boost COMPONENTS numpy3 REQUIRED) # also finds python3
+  message(STATUS "boost numpy as numpy")
+  find_package(Boost COMPONENTS numpy REQUIRED) # also finds python3
   target_link_libraries(cloudphxx ${Boost_LIBRARIES})
   target_compile_options(cloudphxx PRIVATE  -DBPNUMPY)
 #  if(${Boost_MINOR_VERSION} LESS 67)
@@ -85,20 +81,18 @@ endif()
 
 target_link_libraries(cloudphxx cloudphxx_lgrngn)
 
-#to retain rpath to libcloudphxx_lgrngn.so linked by libcloudphxx.so after installation
+# build rpath to libcloudphxx_lgrngn.so in the build directory
+set_property(TARGET cloudphxx PROPERTY BUILD_RPATH_USE_ORIGIN TRUE)
+set_property(TARGET cloudphxx PROPERTY BUILD_RPATH "${ORIGIN}/")
+target_link_options(cloudphxx PRIVATE "-Wl,--disable-new-dtags") # otherwise BUILD_RPATH sets RUNPATH instead of RPATH, and LD_LIBRARY_PATH has precedence over RUNPATH. TODO: when installing, it is probably better to set RUNPATH...
+
+#install rpath to libcloudphxx_lgrngn.so linked by libcloudphxx.so after installation
 set_property(TARGET cloudphxx PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
 set_property(TARGET cloudphxx PROPERTY INSTALL_RPATH_USE_LINK_PATH TRUE)
 
-# where to install python modules (see http://stackoverflow.com/questions/1242904/finding-python-site-packages-directory-with-cmake)
-execute_process(
-  COMMAND "${PYTHON_EXECUTABLE}" -c "if True:
-    from distutils import sysconfig as sc
-    print(sc.get_python_lib(prefix='', plat_specific=True))"
-  OUTPUT_VARIABLE PYTHON_SITE
-  OUTPUT_STRIP_TRAILING_WHITESPACE)
-
+# Python_SITELIB is an absolute path, we manually prepend install prefix
 install ( TARGETS cloudphxx
    LIBRARY
-     DESTINATION ${PYTHON_SITE}
+     DESTINATION "${CMAKE_INSTALL_PREFIX}${Python3_SITELIB}"
      COMPONENT library
 )
diff --git a/bindings/python/common.hpp b/bindings/python/common.hpp
index fb11862be..58c174c18 100644
--- a/bindings/python/common.hpp
+++ b/bindings/python/common.hpp
@@ -88,6 +88,12 @@ namespace libcloudphxx
 	return cmn::theta_dry::p(rhod  * si::kilograms / si::cubic_metres, r * si::kilograms / si::kilograms, T * si::kelvins) / si::pascals;
       }
 
+      template <typename real_t>
+      real_t visc(const real_t &T)
+      {
+	return cmn::vterm::visc(T * si::kelvins) / si::pascals / si::seconds;
+      }
+
       template <typename real_t>
       real_t rw3_cr(const real_t &rd3, const real_t &kappa, const real_t &T)
       {
diff --git a/bindings/python/lgrngn.hpp b/bindings/python/lgrngn.hpp
index a1b871ccf..fca0a95c5 100644
--- a/bindings/python/lgrngn.hpp
+++ b/bindings/python/lgrngn.hpp
@@ -319,6 +319,31 @@ namespace libcloudphxx
         }
       }
 
+      template <typename real_t>
+      void set_rdd( // rlx_dry_distros
+        lgr::opts_init_t<real_t> *arg,
+        const bp::dict &kappa_func
+      )
+      {
+        arg->rlx_dry_distros.clear();
+        if(len(kappa_func.keys()) == 0)
+          return;
+
+        // loop over kappas
+        for (int i = 0; i < len(kappa_func.keys()); ++i)
+        {
+          const real_t kappa = bp::extract<real_t>(kappa_func.keys()[i]);
+          const bp::list nlnrd_kparange_zrange_list = bp::extract<bp::list>(kappa_func.values()[i]);
+          assert(len(nlnrd_kparange_zrange_list) == 3);
+          auto nlnrd = std::make_shared<detail::pyunary<real_t>>(nlnrd_kparange_zrange_list[0]);
+          const bp::list kparange_list = bp::extract<bp::list>(nlnrd_kparange_zrange_list[1]);
+          std::pair<real_t, real_t> kparange{bp::extract<real_t>(kparange_list[0]), bp::extract<real_t>(kparange_list[1])};
+          const bp::list zrange_list = bp::extract<bp::list>(nlnrd_kparange_zrange_list[2]);
+          std::pair<real_t, real_t> zrange{bp::extract<real_t>(zrange_list[0]), bp::extract<real_t>(zrange_list[1])};
+          arg->rlx_dry_distros[kappa] = std::make_tuple(nlnrd, kparange, zrange);
+        }
+      }
+
       template <typename real_t>
       void get_ds(
         lgr::opts_init_t<real_t> *arg
@@ -351,6 +376,14 @@ namespace libcloudphxx
         throw std::runtime_error("source_dry_distros does not feature a getter yet - TODO");
       }
 
+      template <typename real_t>
+      void get_rdd(
+        lgr::opts_init_t<real_t> *arg
+      )
+      {
+        throw std::runtime_error("relax_dry_distros does not feature a getter yet - TODO");
+      }
+
       template <typename real_t>
       void set_kp(
         lgr::opts_init_t<real_t> *arg,
diff --git a/bindings/python/lib.cpp b/bindings/python/lib.cpp
index e285596c4..8d86cd566 100644
--- a/bindings/python/lib.cpp
+++ b/bindings/python/lib.cpp
@@ -135,6 +135,7 @@ BOOST_PYTHON_MODULE(libcloudphxx)
     bp::def("l_v", &common::l_v<real_t>);
     bp::def("T", &common::T<real_t>);
     bp::def("p", &common::p<real_t>);
+    bp::def("visc", &common::visc<real_t>);
     bp::def("rw3_cr", &common::rw3_cr<real_t>);
     bp::def("S_cr", &common::S_cr<real_t>);
     bp::def("p_hydro", &common::p_hydro<real_t>);
@@ -197,7 +198,7 @@ BOOST_PYTHON_MODULE(libcloudphxx)
       .value("OpenMP", lgr::OpenMP)
       .value("CUDA",   lgr::CUDA)
       .value("multi_CUDA",   lgr::multi_CUDA);
-    bp::enum_<lgr::kernel_t::kernel_t>("kernel_t") 
+    bp::enum_<lgr::kernel_t>("kernel_t") 
       .value("geometric", lgr::kernel_t::geometric)
       .value("golovin", lgr::kernel_t::golovin)
       .value("hall", lgr::kernel_t::hall)
@@ -209,21 +210,25 @@ BOOST_PYTHON_MODULE(libcloudphxx)
       .value("vohl_davis_no_waals", lgr::kernel_t::vohl_davis_no_waals)
       .value("hall_pinsky_stratocumulus", lgr::kernel_t::hall_pinsky_stratocumulus)
       .value("hall_pinsky_cumulonimbus", lgr::kernel_t::hall_pinsky_cumulonimbus);
-    bp::enum_<lgr::vt_t::vt_t>("vt_t") 
+    bp::enum_<lgr::vt_t>("vt_t") 
       .value("beard76", lgr::vt_t::beard76)
       .value("beard77", lgr::vt_t::beard77)
       .value("beard77fast", lgr::vt_t::beard77fast)
       .value("khvorostyanov_spherical", lgr::vt_t::khvorostyanov_spherical)
       .value("khvorostyanov_nonspherical", lgr::vt_t::khvorostyanov_nonspherical);
-    bp::enum_<lgr::RH_formula_t::RH_formula_t>("RH_formula_t") 
+    bp::enum_<lgr::RH_formula_t>("RH_formula_t") 
       .value("pv_cc", lgr::RH_formula_t::pv_cc)
       .value("rv_cc", lgr::RH_formula_t::rv_cc)
       .value("pv_tet", lgr::RH_formula_t::pv_tet)
       .value("rv_tet", lgr::RH_formula_t::rv_tet);
-    bp::enum_<lgr::as_t::as_t>("as_t") 
+    bp::enum_<lgr::as_t>("as_t") 
       .value("implicit", lgr::as_t::implicit)
       .value("euler", lgr::as_t::euler)
       .value("pred_corr", lgr::as_t::pred_corr);
+    bp::enum_<lgr::src_t>("src_t") 
+      .value("off", lgr::src_t::off)
+      .value("simple", lgr::src_t::simple)
+      .value("matching", lgr::src_t::matching);
 
     bp::enum_<cmn::chem::chem_species_t>("chem_species_t")
       .value("H",    cmn::chem::H)
@@ -243,6 +248,7 @@ BOOST_PYTHON_MODULE(libcloudphxx)
       .def_readwrite("coal", &lgr::opts_t<real_t>::coal)
       .def_readwrite("src", &lgr::opts_t<real_t>::src)
       .def_readwrite("rcyc", &lgr::opts_t<real_t>::rcyc)
+      .def_readwrite("rlx", &lgr::opts_t<real_t>::rlx)
       .def_readwrite("chem_dsl", &lgr::opts_t<real_t>::chem_dsl)
       .def_readwrite("chem_dsc", &lgr::opts_t<real_t>::chem_dsc)
       .def_readwrite("chem_rct", &lgr::opts_t<real_t>::chem_rct)
@@ -257,6 +263,7 @@ BOOST_PYTHON_MODULE(libcloudphxx)
       .add_property("dry_sizes", &lgrngn::get_ds<real_t>, &lgrngn::set_ds<real_t>)
       .add_property("src_dry_distros", &lgrngn::get_sdd<real_t>, &lgrngn::set_sdd<real_t>)
       .add_property("src_dry_sizes", &lgrngn::get_ds<real_t>, &lgrngn::set_sds<real_t>)
+      .add_property("rlx_dry_distros", &lgrngn::get_rdd<real_t>, &lgrngn::set_rdd<real_t>)
       .def_readwrite("nx", &lgr::opts_init_t<real_t>::nx)
       .def_readwrite("ny", &lgr::opts_init_t<real_t>::ny)
       .def_readwrite("nz", &lgr::opts_init_t<real_t>::nz)
@@ -284,7 +291,7 @@ BOOST_PYTHON_MODULE(libcloudphxx)
       .def_readwrite("coal_switch", &lgr::opts_init_t<real_t>::coal_switch)
       .def_readwrite("sedi_switch", &lgr::opts_init_t<real_t>::sedi_switch)
       .def_readwrite("subs_switch", &lgr::opts_init_t<real_t>::subs_switch)
-      .def_readwrite("src_switch", &lgr::opts_init_t<real_t>::src_switch)
+      .def_readwrite("rlx_switch", &lgr::opts_init_t<real_t>::rlx_switch)
       .def_readwrite("turb_adve_switch", &lgr::opts_init_t<real_t>::turb_adve_switch)
       .def_readwrite("turb_cond_switch", &lgr::opts_init_t<real_t>::turb_cond_switch)
       .def_readwrite("turb_coal_switch", &lgr::opts_init_t<real_t>::turb_coal_switch)
@@ -293,6 +300,7 @@ BOOST_PYTHON_MODULE(libcloudphxx)
       .def_readwrite("sstp_coal", &lgr::opts_init_t<real_t>::sstp_coal)
       .def_readwrite("sstp_chem", &lgr::opts_init_t<real_t>::sstp_chem)
       .def_readwrite("supstp_src", &lgr::opts_init_t<real_t>::supstp_src)
+      .def_readwrite("supstp_rlx", &lgr::opts_init_t<real_t>::supstp_rlx)
       .def_readwrite("kernel", &lgr::opts_init_t<real_t>::kernel)
       .def_readwrite("adve_scheme", &lgr::opts_init_t<real_t>::adve_scheme)
       .def_readwrite("sd_conc", &lgr::opts_init_t<real_t>::sd_conc)
@@ -300,13 +308,18 @@ BOOST_PYTHON_MODULE(libcloudphxx)
       .def_readwrite("aerosol_independent_of_rhod", &lgr::opts_init_t<real_t>::aerosol_independent_of_rhod)
       .def_readwrite("sd_const_multi", &lgr::opts_init_t<real_t>::sd_const_multi)
       .def_readwrite("src_sd_conc", &lgr::opts_init_t<real_t>::src_sd_conc)
+      .def_readwrite("rlx_bins", &lgr::opts_init_t<real_t>::rlx_bins)
+      .def_readwrite("rlx_sd_per_bin", &lgr::opts_init_t<real_t>::rlx_sd_per_bin)
+      .def_readwrite("rlx_timescale", &lgr::opts_init_t<real_t>::rlx_timescale)
       .def_readwrite("n_sd_max", &lgr::opts_init_t<real_t>::n_sd_max)
       .def_readwrite("terminal_velocity", &lgr::opts_init_t<real_t>::terminal_velocity)
+      .def_readwrite("src_type", &lgr::opts_init_t<real_t>::src_type)
       .def_readwrite("RH_formula", &lgr::opts_init_t<real_t>::RH_formula)
       .def_readwrite("chem_rho", &lgr::opts_init_t<real_t>::chem_rho)
       .def_readwrite("RH_max", &lgr::opts_init_t<real_t>::RH_max)
       .def_readwrite("rng_seed", &lgr::opts_init_t<real_t>::rng_seed)
       .def_readwrite("rng_seed_init", &lgr::opts_init_t<real_t>::rng_seed_init)
+      .def_readwrite("rng_seed_init_switch", &lgr::opts_init_t<real_t>::rng_seed_init_switch)
       .def_readwrite("diag_incloud_time", &lgr::opts_init_t<real_t>::diag_incloud_time)
       .add_property("w_LS", &lgrngn::get_w_LS<real_t>, &lgrngn::set_w_LS<real_t>)
       .add_property("SGS_mix_len", &lgrngn::get_SGS_mix_len<real_t>, &lgrngn::set_SGS_mix_len<real_t>)
diff --git a/include/libcloudph++/blk_1m/extincl.hpp b/include/libcloudph++/blk_1m/extincl.hpp
index 955557c77..11e8338d7 100644
--- a/include/libcloudph++/blk_1m/extincl.hpp
+++ b/include/libcloudph++/blk_1m/extincl.hpp
@@ -3,6 +3,10 @@
 #include <algorithm>
 
 #include <boost/numeric/odeint.hpp>
+//#include <boost/numeric/odeint/stepper/runge_kutta4.hpp>
+//#include <boost/numeric/odeint/algebra/vector_space_algebra.hpp>
+//#include <boost/numeric/odeint/algebra/default_operations.hpp>
+//#include <boost/numeric/odeint/util/resizer.hpp>
 
 #include "../common/const_cp.hpp"
 #include "../common/theta_dry.hpp"
diff --git a/include/libcloudph++/common/detail/toms748.hpp b/include/libcloudph++/common/detail/toms748.hpp
index a571445dd..8e475c62d 100644
--- a/include/libcloudph++/common/detail/toms748.hpp
+++ b/include/libcloudph++/common/detail/toms748.hpp
@@ -12,6 +12,7 @@
 #include <cfloat>
 #include <cassert>
 #include <stdio.h>
+#include <stdint.h>
 
 namespace libcloudphxx {
 namespace common {
diff --git a/include/libcloudph++/lgrngn/RH_formula.hpp b/include/libcloudph++/lgrngn/RH_formula.hpp
index 79305d05e..2d41835c7 100644
--- a/include/libcloudph++/lgrngn/RH_formula.hpp
+++ b/include/libcloudph++/lgrngn/RH_formula.hpp
@@ -4,15 +4,18 @@ namespace libcloudphxx
 {
   namespace lgrngn
   {
-    namespace RH_formula_t //separate namespace to avoid member name conflicts with kernel enumerator, TODO: in c++11 change it to an enum class
-    {   
 //<listing>
-      enum RH_formula_t { pv_cc,      // RH = pv / pvs with pvs from the Clausius-Clapeyron equation
-                          rv_cc,      // RH = rv / rvs with rvs from the Clausius-Clapeyron equation
-                          pv_tet,     // RH = pv / pvs with pvs from the Tetens equation
-                          rv_tet      // RH = rv / rvs with rvs from the Tetens equation
-                        }; 
+    enum class RH_formula_t { pv_cc,      // RH = pv / pvs with pvs from the Clausius-Clapeyron equation
+                              rv_cc,      // RH = rv / rvs with rvs from the Clausius-Clapeyron equation
+                              pv_tet,     // RH = pv / pvs with pvs from the Tetens equation
+                              rv_tet      // RH = rv / rvs with rvs from the Tetens equation
+                            }; 
 //</listing>
-    }; 
+    const std::unordered_map<RH_formula_t, std::string> RH_formula_name = {
+      {RH_formula_t::pv_cc, "pv_cc"},
+      {RH_formula_t::rv_cc, "rv_cc"},
+      {RH_formula_t::pv_tet, "pv_tet"},
+      {RH_formula_t::rv_tet, "rv_tet"}
+    };
   };
 };
diff --git a/include/libcloudph++/lgrngn/advection_scheme.hpp b/include/libcloudph++/lgrngn/advection_scheme.hpp
index 21af39313..b3f124711 100644
--- a/include/libcloudph++/lgrngn/advection_scheme.hpp
+++ b/include/libcloudph++/lgrngn/advection_scheme.hpp
@@ -4,11 +4,14 @@ namespace libcloudphxx
 {
   namespace lgrngn
   {
-    namespace as_t //separate namespace to avoid member name conflicts with kernel enumerator, TODO: in c++11 change it to an enum class
-    {   
 //<listing>
-      enum as_t { undefined, implicit, euler, pred_corr }; 
+    enum class as_t { undefined, implicit, euler, pred_corr }; 
 //</listing>
-    }; 
+    const std::unordered_map<as_t, std::string> as_name = {
+      {as_t::undefined, "undefined"},
+      {as_t::implicit, "implicit"},
+      {as_t::euler, "euler"},
+      {as_t::pred_corr, "pred_corr"}
+    };
   };
 };
diff --git a/include/libcloudph++/lgrngn/backend.hpp b/include/libcloudph++/lgrngn/backend.hpp
index 756099b4e..94b438f4f 100644
--- a/include/libcloudph++/lgrngn/backend.hpp
+++ b/include/libcloudph++/lgrngn/backend.hpp
@@ -4,9 +4,15 @@ namespace libcloudphxx
 {
   namespace lgrngn
   {
-    // to make inclusion of Thrust not neccesarry here
 //<listing>
-    enum backend_t { serial, OpenMP, CUDA, multi_CUDA }; 
+    enum backend_t { undefined, serial, OpenMP, CUDA, multi_CUDA }; 
 //</listing>
+    const std::unordered_map<backend_t, std::string> backend_name = {
+      {undefined, "undefined"},
+      {serial, "serial"},
+      {OpenMP, "OpenMP"},
+      {CUDA, "CUDA"},
+      {multi_CUDA, "multi_CUDA"}
+    };
   };
 };
diff --git a/include/libcloudph++/lgrngn/ccn_source.hpp b/include/libcloudph++/lgrngn/ccn_source.hpp
new file mode 100644
index 000000000..02f4a1c9a
--- /dev/null
+++ b/include/libcloudph++/lgrngn/ccn_source.hpp
@@ -0,0 +1,20 @@
+#pragma once 
+
+namespace libcloudphxx
+{
+  namespace lgrngn
+  {
+//<listing>
+    enum class src_t { off, simple, matching };
+//</listing>
+    // off - no source
+    // simple -   src_dry_distros: new SD are added;                                                               src_dry_sizes: new SD are added
+    // matching - src_dry_distros: find similar SD and increase their multiplicity. Add new SD if match not found; src_dry_sizes: new SD are added
+
+    const std::unordered_map<src_t, std::string> src_name = {
+      {src_t::off, "off"},
+      {src_t::simple, "simple"},
+      {src_t::matching, "matching"}
+    };
+  };
+};
diff --git a/include/libcloudph++/lgrngn/kernel.hpp b/include/libcloudph++/lgrngn/kernel.hpp
index 65aa01dd5..59d8899ea 100644
--- a/include/libcloudph++/lgrngn/kernel.hpp
+++ b/include/libcloudph++/lgrngn/kernel.hpp
@@ -4,11 +4,23 @@ namespace libcloudphxx
 {
   namespace lgrngn
   {
-    namespace kernel_t //separate namespace to avoid member name conflicts with terminal_velocity enumerator, TODO: in c++11 change it to an enum class
-    {
 //<listing>
-      enum kernel_t { undefined, geometric, golovin, hall, hall_davis_no_waals, Long, onishi_hall, onishi_hall_davis_no_waals, hall_pinsky_1000mb_grav, hall_pinsky_cumulonimbus, hall_pinsky_stratocumulus, vohl_davis_no_waals}; 
+    enum class kernel_t { undefined, geometric, golovin, hall, hall_davis_no_waals, Long, onishi_hall, onishi_hall_davis_no_waals, hall_pinsky_1000mb_grav, hall_pinsky_cumulonimbus, hall_pinsky_stratocumulus, vohl_davis_no_waals}; 
 //</listing>
+
+    const std::unordered_map<kernel_t, std::string> kernel_name = {
+      {kernel_t::undefined, "undefined"},
+      {kernel_t::geometric, "geometric"},
+      {kernel_t::golovin, "golovin"},
+      {kernel_t::hall, "hall"},
+      {kernel_t::hall_davis_no_waals, "hall_davis_no_waals"},
+      {kernel_t::Long, "Long"},
+      {kernel_t::onishi_hall, "onishi_hall"},
+      {kernel_t::onishi_hall_davis_no_waals, "onishi_hall_davis_no_waals"},
+      {kernel_t::hall_pinsky_1000mb_grav, "hall_pinsky_1000mb_grav"},
+      {kernel_t::hall_pinsky_cumulonimbus, "hall_pinsky_cumulonimbus"},
+      {kernel_t::hall_pinsky_stratocumulus, "hall_pinsky_stratocumulus"},
+      {kernel_t::vohl_davis_no_waals, "vohl_davis_no_waals"}
     };
   };
 };
diff --git a/include/libcloudph++/lgrngn/opts.hpp b/include/libcloudph++/lgrngn/opts.hpp
index 3f54557d4..8a32a7e25 100644
--- a/include/libcloudph++/lgrngn/opts.hpp
+++ b/include/libcloudph++/lgrngn/opts.hpp
@@ -19,7 +19,7 @@ namespace libcloudphxx
     struct opts_t 
     {
       // process toggling
-      bool adve, sedi, subs, cond, coal, src, rcyc, turb_adve, turb_cond, turb_coal;
+      bool adve, sedi, subs, cond, coal, src, rlx, rcyc, turb_adve, turb_cond, turb_coal;
 
       // RH limit for drop growth
       real_t RH_max;       
@@ -33,7 +33,7 @@ namespace libcloudphxx
 
       // ctor with defaults (C++03 compliant) ...
       opts_t() : 
-        adve(true), sedi(true), subs(false), cond(true), coal(true), src(false), rcyc(false),
+        adve(true), sedi(true), subs(false), cond(true), coal(true), src(false), rlx(false), rcyc(false),
         chem_dsl(false), chem_dsc(false), chem_rct(false),
         turb_adve(false), turb_cond(false), turb_coal(false),
         RH_max(44), // :) (anything greater than 1.1 would be enough
diff --git a/include/libcloudph++/lgrngn/opts_init.hpp b/include/libcloudph++/lgrngn/opts_init.hpp
index 6249414d0..504e5f8bc 100644
--- a/include/libcloudph++/lgrngn/opts_init.hpp
+++ b/include/libcloudph++/lgrngn/opts_init.hpp
@@ -12,6 +12,7 @@
 #include "terminal_velocity.hpp"
 #include "advection_scheme.hpp"
 #include "RH_formula.hpp"
+#include "ccn_source.hpp"
 #include "../common/chem.hpp"
 
 namespace libcloudphxx
@@ -48,9 +49,6 @@ namespace libcloudphxx
 
       // no. of substeps 
       int sstp_cond, sstp_coal; 
-  
-      // timestep interval at which source will be applied
-      int supstp_src;
 
       // Lagrangian domain extents
       real_t x0, y0, z0, x1, y1, z1;
@@ -74,30 +72,17 @@ namespace libcloudphxx
       // should be enough to store particles from sources
       unsigned long long n_sd_max; 
 
-      // source distro per unit time
-      dry_distros_t src_dry_distros;
-
-      // number of SDs created from src_dry_distros per cell per source iteration
-      unsigned long long src_sd_conc;
-
-      // dry sizes of droplets added from the source, STP_concentration created per unit time instead of the STP_concentration 
-      dry_sizes_t src_dry_sizes;
-
-      // box in which aerosol from source will be created
-      // will be rounded to cell number - cells are supposed to be uniform
-      real_t src_x0, src_y0, src_z0, src_x1, src_y1, src_z1;
-
       // coalescence Kernel type
-      kernel_t::kernel_t kernel;
+      kernel_t kernel;
 
       // terminal velocity formula
-      vt_t::vt_t terminal_velocity;
+      vt_t terminal_velocity;
 
       // super-droplet advection scheme
-      as_t::as_t adve_scheme;
+      as_t adve_scheme;
 
       // RH formula
-      RH_formula_t::RH_formula_t RH_formula;
+      RH_formula_t RH_formula;
 //</listing>
  
       // coalescence kernel parameters
@@ -107,7 +92,7 @@ namespace libcloudphxx
            coal_switch,  // if false no coalescence throughout the whole simulation
            sedi_switch,  // if false no sedimentation throughout the whole simulation
            subs_switch,  // if false no subsidence throughout the whole simulation
-           src_switch,   // if false no source throughout the whole simulation
+           rlx_switch,   // if false no relaxation throughout the whole simulation
            turb_adve_switch,   // if true, turbulent motion of SDs is modeled
            turb_cond_switch,   // if true, turbulent condensation of SDs is modeled
            turb_coal_switch,   // if true, turbulent coalescence kernels can be used
@@ -126,6 +111,9 @@ namespace libcloudphxx
       int rng_seed,
           rng_seed_init; // seed used to init SD (positions and dry sizes)
 
+      // should the separate rng seed for initialization be used?
+      bool rng_seed_init_switch;
+
       // no of GPUs per MPI node to use, 0 for all available
       int dev_count; 
 
@@ -138,7 +126,7 @@ namespace libcloudphxx
       // SGS mixing length profile [m]
       std::vector<real_t> SGS_mix_len;
 
-      real_t rd_min; // minimal dry radius of droplets (works only for init from spectrum)
+      real_t rd_min, rd_max; // min/max dry radius of droplets [m] 
 
       bool no_ccn_at_init; // if true, no ccn / SD are put at the start of the simulation
 
@@ -146,6 +134,57 @@ namespace libcloudphxx
            periodic_topbot_walls; // if true, top and bot walls are periodic. Open otherwise
 
 
+      // --- aerosol source stuff ---
+
+      // type of CCN source
+      src_t src_type;
+
+      // source distro per unit time
+      dry_distros_t src_dry_distros;
+
+      // number of SDs created from src_dry_distros per cell per source iteration
+      unsigned long long src_sd_conc;
+
+      // dry sizes of droplets added from the source, STP_concentration created per unit time instead of the STP_concentration 
+      dry_sizes_t src_dry_sizes;
+
+      // box in which aerosol from source will be created
+      // will be rounded to cell number - cells are supposed to be uniform
+      real_t src_x0, src_y0, src_z0, src_x1, src_y1, src_z1;
+  
+      // timestep interval at which source will be applied
+      int supstp_src;
+
+
+      // --- aerosol relaxation stuff ---
+      // initial dry sizes of aerosol
+      // defined with a distribution
+      // uses shared_ptr to make opts_init copyable
+      typedef std::unordered_map<
+        real_t,                // kappa
+        std::tuple<
+          std::shared_ptr<unary_function<real_t>>, // n(ln(rd)) @ STP; alternatively it's n(ln(rd)) independent of rhod if aerosol_independent_of_rhod=true
+          std::pair<real_t, real_t>, // kappa range of CCN considered to belong to this distribution, ranges of different members of the map need to be exclusive (TODO: add a check of this)
+          std::pair<real_t, real_t>  // range of altitudes at which this relaxation acts
+        >
+      > rlx_dry_distros_t;
+
+      rlx_dry_distros_t rlx_dry_distros;
+
+      // number of bins into which the relaxation distro is divided
+      unsigned long long rlx_bins;
+
+      // number of SD created per bin
+      real_t rlx_sd_per_bin; // floating, because it gets divided by the number of GPUs per node * number of nodes
+  
+      // timestep interval at which relaxation will be applied
+      int supstp_rlx;
+
+      // relaxation time scale [s]
+      real_t rlx_timescale;
+
+      // -- ctors ---
+
       // ctor with defaults (C++03 compliant) ...
       opts_init_t() : 
         nx(0), ny(0), nz(0),
@@ -158,12 +197,12 @@ namespace libcloudphxx
         sd_const_multi(0),
         dt(0),   
         sstp_cond(1), sstp_coal(1), sstp_chem(1),         
-        supstp_src(1),
         chem_switch(false),  // chemical reactions turned off by default
         sedi_switch(true),  // sedimentation turned on by default
         subs_switch(false),  // subsidence turned off by default
         coal_switch(true),  // coalescence turned on by default
-        src_switch(false),  // source turned off by default
+        src_type(src_t::off),  // source turned off by default
+        rlx_switch(false), 
         exact_sstp_cond(false),
         turb_cond_switch(false),
         turb_adve_switch(false),
@@ -171,7 +210,7 @@ namespace libcloudphxx
         RH_max(.95), // value seggested in Lebo and Seinfeld 2011
         chem_rho(0), // dry particle density  //TODO add checking if the user gave a different value (np w init)  (was 1.8e-3)
         rng_seed(44),
-        rng_seed_init(rng_seed),
+        rng_seed_init(44),
         terminal_velocity(vt_t::undefined),
         kernel(kernel_t::undefined),
         adve_scheme(as_t::implicit),
@@ -186,12 +225,19 @@ namespace libcloudphxx
         src_y1(0),
         src_z0(0),
         src_z1(0),
-        rd_min(0.),
+        supstp_src(1),
+        rlx_bins(0),
+        rlx_timescale(1),
+        rlx_sd_per_bin(0),
+        supstp_rlx(1),
+        rd_min(-1), // negative means that rd_min will be automatically detected
+        rd_max(-1),
         diag_incloud_time(false),
         no_ccn_at_init(false),
         open_side_walls(false),
         periodic_topbot_walls(false),
-        variable_dt_switch(false)
+        variable_dt_switch(false),
+        rng_seed_init_switch(false)
       {}
 
       // dtor (just to silence -Winline warnings)
diff --git a/include/libcloudph++/lgrngn/terminal_velocity.hpp b/include/libcloudph++/lgrngn/terminal_velocity.hpp
index 8ecc59b88..3a21417d9 100644
--- a/include/libcloudph++/lgrngn/terminal_velocity.hpp
+++ b/include/libcloudph++/lgrngn/terminal_velocity.hpp
@@ -4,11 +4,16 @@ namespace libcloudphxx
 {
   namespace lgrngn
   {
-    namespace vt_t //separate namespace to avoid member name conflicts with kernel enumerator, TODO: in c++11 change it to an enum class
-    {   
 //<listing>
-      enum vt_t { undefined, beard76, beard77, beard77fast, khvorostyanov_spherical, khvorostyanov_nonspherical }; 
+    enum class vt_t { undefined, beard76, beard77, beard77fast, khvorostyanov_spherical, khvorostyanov_nonspherical }; 
 //</listing>
-    }; 
+    const std::unordered_map<vt_t, std::string> vt_name = {
+      {vt_t::undefined, "undefined"},
+      {vt_t::beard76, "beard76"},
+      {vt_t::beard77, "beard77"},
+      {vt_t::beard77fast, "beard77fast"},
+      {vt_t::khvorostyanov_spherical, "khvorostyanov_spherical"},
+      {vt_t::khvorostyanov_nonspherical, "khvorostyanov_nonspherical"}
+    };
   };
 };
diff --git a/libcloudph++-config.cmake.in b/libcloudph++-config.cmake.in
index 1bf38e806..3011c7529 100644
--- a/libcloudph++-config.cmake.in
+++ b/libcloudph++-config.cmake.in
@@ -5,6 +5,6 @@ include(CMakeFindDependencyMacro)
 
 find_dependency(Boost REQUIRED)
 
-if(NOT TARGET clphxx::cloudphxx_lgrngn)
-    include("${cloudphxx_CMAKE_DIR}/cloudphxx_lgrngn-targets.cmake")
+if(NOT TARGET clphxx::cloudphxx_lgrngn OR NOT TARGET clphxx::cloudphxx_headers)
+    include("${cloudphxx_CMAKE_DIR}/cloudphxx-targets.cmake")
 endif()
diff --git a/src/detail/config.hpp b/src/detail/config.hpp
index 25c36950f..44c01d5bd 100644
--- a/src/detail/config.hpp
+++ b/src/detail/config.hpp
@@ -28,7 +28,9 @@ namespace libcloudphxx
         // range of beard77fast bins:
         const real_t vt0_ln_r_min, vt0_ln_r_max;
 
-        const real_t bcond_tolerance = 2e-4; // [m]; error tolerance for position near bcond after distmem copy  
+        const real_t bcond_tolerance = 5e-4; // [m]; error tolerance for position near bcond after distmem copy  
+
+        const real_t rlx_conc_tolerance = 0.1; // tolerance of the relaxation scheme; new SD will be created if missing_conc/expected_conc > tolerance
 
         // ctor
         config():
diff --git a/src/detail/distmem_opts.hpp b/src/detail/distmem_opts.hpp
index dd9601b2d..79f400a5e 100644
--- a/src/detail/distmem_opts.hpp
+++ b/src/detail/distmem_opts.hpp
@@ -46,6 +46,8 @@ namespace libcloudphxx
 
         opts_init.n_sd_max = opts_init.n_sd_max / size + 1;
 
+        opts_init.rlx_sd_per_bin /= size;
+
         return n_x_bfr;
       }
     }
diff --git a/src/detail/urand.hpp b/src/detail/urand.hpp
index 6e8213be5..b927585e5 100644
--- a/src/detail/urand.hpp
+++ b/src/detail/urand.hpp
@@ -21,7 +21,7 @@ namespace libcloudphxx
       {
 #if !defined(__NVCC__)
         // serial version using C++11's <random>
-        using engine_t = std::mt19937;
+        using engine_t = std::mt19937; // TODO: if real_t = double, use std::mt19937_64
         using dist_u01_t = std::uniform_real_distribution<real_t>;
         using dist_normal01_t = std::normal_distribution<real_t>;
         using dist_un_t = std::uniform_int_distribution<unsigned int>;
@@ -66,7 +66,7 @@ namespace libcloudphxx
           const thrust_size_t n
         ) {
           // note: generate_n copies the third argument!!!
-          std::generate_n(u01.begin(), n, fnctr_u01({engine, dist_u01})); 
+          std::generate_n(u01.begin(), n, fnctr_u01({engine, dist_u01})); // [0,1) range 
         }
 
         void generate_normal_n(
@@ -131,10 +131,12 @@ namespace libcloudphxx
           const thrust_size_t n
         )
         {
-          int status = curandGenerateUniform(gen, thrust::raw_pointer_cast(v.data()), n);
+          int status = curandGenerateUniform(gen, thrust::raw_pointer_cast(v.data()), n); // (0,1] range
           assert(status == CURAND_STATUS_SUCCESS /* && "curandGenerateUniform failed"*/);
           _unused(status);
-
+          // shift into the expected [0,1) range
+          namespace arg = thrust::placeholders;
+          thrust::transform(v.begin(), v.begin() + n, v.begin(), float(1) - arg::_1);
         }
 
         void generate_n(
@@ -142,9 +144,12 @@ namespace libcloudphxx
           const thrust_size_t n
         )
         {
-          int status = curandGenerateUniformDouble(gen, thrust::raw_pointer_cast(v.data()), n);
+          int status = curandGenerateUniformDouble(gen, thrust::raw_pointer_cast(v.data()), n); // (0,1] range
           assert(status == CURAND_STATUS_SUCCESS /* && "curandGenerateUniform failed"*/);
           _unused(status);
+          // shift into the expected [0,1) range
+          namespace arg = thrust::placeholders;
+          thrust::transform(v.begin(), v.begin() + n, v.begin(), double(1) - arg::_1);
         }
 
         void generate_normal_n(
diff --git a/src/impl/particles_impl.ipp b/src/impl/particles_impl.ipp
index 409fb074f..b07d3332c 100644
--- a/src/impl/particles_impl.ipp
+++ b/src/impl/particles_impl.ipp
@@ -11,9 +11,9 @@
 
 #include <boost/array.hpp>
 #include <boost/numeric/odeint.hpp>
-#include <boost/numeric/odeint/external/thrust/thrust_algebra.hpp>
-#include <boost/numeric/odeint/external/thrust/thrust_operations.hpp>
-#include <boost/numeric/odeint/external/thrust/thrust_resize.hpp>
+//#include <boost/numeric/odeint/stepper/runge_kutta4.hpp>
+//#include <boost/numeric/odeint/util/resizer.hpp>
+#include <boost/numeric/odeint/external/thrust/thrust.hpp>
 
 #include <map>
 #include <set>
@@ -51,7 +51,7 @@ namespace libcloudphxx
                     n_part_to_init;    // number of SDs to be initialized by source
       detail::rng<real_t, device> rng;
       detail::config<real_t> config;
-      as_t::as_t adve_scheme;         // actual advection scheme used, might be different from opts_init.adve_scheme if courant>halo
+      as_t adve_scheme;         // actual advection scheme used, might be different from opts_init.adve_scheme if courant>halo
 
       // pointer to collision kernel
       kernel_base<real_t, n_t> *p_kernel;
@@ -172,7 +172,7 @@ namespace libcloudphxx
            allow_sstp_chem;
 
       // timestep counter
-      n_t stp_ctr;
+      n_t src_stp_ctr, rlx_stp_ctr;
 
       // maps linear Lagrangian component indices into Eulerian component linear indices
       // the map key is the address of the Thrust vector
@@ -276,10 +276,10 @@ namespace libcloudphxx
 
       // methods
 
-      // fills u01[0:n] with random numbers
+      // fills u01 with n random real numbers uniformly distributed in range [0,1)
       void rand_u01(thrust_size_t n) { rng.generate_n(u01, n); }
 
-      // fills un[0:n] with random numbers
+      // fills un with n random integers uniformly distributed on the whole integer range
       void rand_un(thrust_size_t n) { rng.generate_n(un, n); }
 
       // max(1, n)
@@ -310,7 +310,8 @@ namespace libcloudphxx
         n_user_params(_opts_init.kernel_parameters.size()),
         un(tmp_device_n_part),
         rng(_opts_init.rng_seed),
-        stp_ctr(0),
+        src_stp_ctr(0),
+        rlx_stp_ctr(0),
 	bcond(bcond),
         n_x_bfr(0),
         n_cell_bfr(0),
@@ -424,7 +425,7 @@ namespace libcloudphxx
       void init_SD_with_distros_sd_conc(const common::unary_function<real_t> &, const real_t &);
       void init_SD_with_distros_tail(const common::unary_function<real_t> &, const real_t);
       void init_SD_with_distros_const_multi(const common::unary_function<real_t> &);
-      void init_SD_with_distros_finalize(const real_t &);
+      void init_SD_with_distros_finalize(const real_t &, const bool unravel_ijk = true);
       void init_SD_with_sizes();
       void init_sanity_check(
         const arrinfo_t<real_t>, const arrinfo_t<real_t>, const arrinfo_t<real_t>,
@@ -486,6 +487,8 @@ namespace libcloudphxx
       void hskpng_sort();
       void hskpng_shuffle_and_sort();
       void hskpng_count();
+      void ravel_ijk(const thrust_size_t begin_shift = 0);
+      void unravel_ijk(const thrust_size_t begin_shift = 0);
       void hskpng_ijk();
       void hskpng_Tpr();
       void hskpng_mfp();
@@ -510,8 +513,20 @@ namespace libcloudphxx
       void moms_rng(
         const real_t &min, const real_t &max, 
         const typename thrust_device::vector<real_t>::iterator &vec_bgn,
+        const thrust_size_t npart,
         const bool cons
       ); 
+      void moms_rng(
+        const real_t &min, const real_t &max, 
+        const typename thrust_device::vector<real_t>::iterator &vec_bgn,
+        const bool cons
+      ); 
+      void moms_calc(
+        const typename thrust_device::vector<real_t>::iterator &vec_bgn,
+        const thrust_size_t npart,
+        const real_t power,
+        const bool specific = true
+      );
       void moms_calc(
         const typename thrust_device::vector<real_t>::iterator &vec_bgn,
         const real_t power,
@@ -563,9 +578,14 @@ namespace libcloudphxx
       void bcnd();
 
       void src(const real_t &dt);
+      void src_dry_distros_simple(const real_t &dt);
+      void src_dry_distros_matching(const real_t &dt);
       void src_dry_distros(const real_t &dt);
       void src_dry_sizes(const real_t &dt);
 
+      void rlx(const real_t);
+      void rlx_dry_distros(const real_t);
+
       void sstp_step(const int &step);
       void sstp_step_exact(const int &step);
       void sstp_step_ssp(const real_t &dt);
@@ -575,6 +595,11 @@ namespace libcloudphxx
 
       void post_copy(const opts_t<real_t>&);
 
+      // two functions for calculating changes in rv and th due to condensation on SDs initialized during simulation, e.g. via source or relaxation
+      // NOTE: curently not used, because of small sizes of these droplets
+      void ante_adding_SD();
+      void post_adding_SD();
+
       // distmem stuff
       void xchng_domains();
       void xchng_courants();
diff --git a/src/impl/particles_impl_ante_adding_SD.ipp b/src/impl/particles_impl_ante_adding_SD.ipp
new file mode 100644
index 000000000..b77502a11
--- /dev/null
+++ b/src/impl/particles_impl_ante_adding_SD.ipp
@@ -0,0 +1,42 @@
+// vim:filetype=cpp
+/** @file
+  * @copyright University of Warsaw
+  * @section LICENSE
+  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+  */
+
+namespace libcloudphxx
+{
+  namespace lgrngn
+  {
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::ante_adding_SD()
+    {   
+      // --- calc liquid water content before src ---
+      hskpng_sort(); 
+      thrust_device::vector<real_t> &drv(tmp_device_real_cell1); // NOTE: this can't be changed by any function called before a call to after_adding_SD...
+      thrust::fill(drv.begin(), drv.end(), real_t(0.));
+
+      moms_all();
+      moms_calc(rw2.begin(), real_t(3./2.));
+
+      // drv = - tot_vol_bfr
+      thrust::transform(
+        count_mom.begin(), count_mom.begin() + count_n,                    // input - 1st arg
+        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output
+        thrust::negate<real_t>()
+      );
+
+      // drv = -tot_vol_bfr + dry_vol_bfr
+/*
+      moms_calc(rd3.begin(), 1);
+      thrust::transform(
+        count_mom.begin(), count_mom.begin() + count_n,                    // input - 1st arg
+        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // 2nd arg
+        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output
+        thrust::plus<real_t>()
+      );
+*/
+    }
+  };  
+};
diff --git a/src/impl/particles_impl_bcnd.ipp b/src/impl/particles_impl_bcnd.ipp
index 8fcf255d5..6eebda9ee 100644
--- a/src/impl/particles_impl_bcnd.ipp
+++ b/src/impl/particles_impl_bcnd.ipp
@@ -124,8 +124,8 @@ namespace libcloudphxx
               arg::_1 >= opts_init.x1
             ) - rgt_id.begin();
 
-            const auto no_of_n_vctrs_copied(int(1));
-            const auto no_of_real_vctrs_copied(distmem_real_vctrs.size());
+            const int no_of_n_vctrs_copied(int(1));
+            const int no_of_real_vctrs_copied(distmem_real_vctrs.size());
 
             if(lft_count*no_of_n_vctrs_copied > in_n_bfr.size() || rgt_count*no_of_n_vctrs_copied  > in_n_bfr.size())
             {
diff --git a/src/impl/particles_impl_cond_sstp.ipp b/src/impl/particles_impl_cond_sstp.ipp
index a322458ca..01c145724 100644
--- a/src/impl/particles_impl_cond_sstp.ipp
+++ b/src/impl/particles_impl_cond_sstp.ipp
@@ -16,7 +16,7 @@ namespace libcloudphxx
       {
         RH<real_t> resolved_RH;
 
-        RH_sgs(RH_formula_t::RH_formula_t RH_formula):
+        RH_sgs(RH_formula_t RH_formula):
           resolved_RH(RH_formula)
         {}
 
diff --git a/src/impl/particles_impl_dist_analysis.ipp b/src/impl/particles_impl_dist_analysis.ipp
index 74cafaafd..ac63c9740 100644
--- a/src/impl/particles_impl_dist_analysis.ipp
+++ b/src/impl/particles_impl_dist_analysis.ipp
@@ -20,17 +20,10 @@ namespace libcloudphxx
       const real_t dt
     )
     {
-      // probing the spectrum to find rd_min-rd_max range     
-      // when analysing distro for source, multiplier takes into account that
-      // the distribution is assumed to represent number of particles created per unit of time! 
-      // TODO: document that
-
-      // values to start the search 
-      real_t rd_min = config.rd_min_init, rd_max = config.rd_max_init;
-
-      bool found_optimal_range = false;
-      while (!found_optimal_range)
+      if(opts_init.rd_min >= 0 && opts_init.rd_max >= 0) // user-defined bin edges
       {
+        real_t rd_min = opts_init.rd_min, rd_max = opts_init.rd_max;
+
         multiplier = log(rd_max / rd_min)
           / sd_conc
           * dt
@@ -41,24 +34,46 @@ namespace libcloudphxx
 
         log_rd_min = log(rd_min);
         log_rd_max = log(rd_max);
+      }
+      else if (opts_init.rd_min < 0 && opts_init.rd_max < 0) // automatic detection of bin edges
+      {
+        // probing the spectrum to find rd_min-rd_max range     
+        // when analysing distro for source, multiplier takes into account that
+        // the distribution is assumed to represent number of particles created per unit of time! 
+        // TODO: document that
+
+        // values to start the search 
+        real_t rd_min = config.rd_min_init, rd_max = config.rd_max_init;
+
+        bool found_optimal_range = false;
+        while (!found_optimal_range)
+        {
+          multiplier = log(rd_max / rd_min)
+            / sd_conc
+            * dt
+            * (n_dims == 0
+              ? dv[0]
+              : (opts_init.dx * opts_init.dy * opts_init.dz)
+            );
 
-        impl::n_t
-          n_min = n_of_lnrd_stp(log_rd_min) * multiplier,
-          n_max = n_of_lnrd_stp(log_rd_max) * multiplier;
+          log_rd_min = log(rd_min);
+          log_rd_max = log(rd_max);
 
-        if (rd_min == config.rd_min_init && n_min != 0)
-          throw std::runtime_error(detail::formatter() << "Initial dry radii distribution is non-zero (" << n_min << ") for rd_min_init (" << config.rd_min_init <<")");
-        if (rd_max == config.rd_max_init && n_max != 0)
-          throw std::runtime_error(detail::formatter() << "Initial dry radii distribution is non-zero (" << n_max << ") for rd_max_init (" << config.rd_max_init <<")");
+          impl::n_t
+            n_min = n_of_lnrd_stp(log_rd_min) * multiplier,
+            n_max = n_of_lnrd_stp(log_rd_max) * multiplier;
 
-        if      (n_min == 0) rd_min *= 1.01;
-        else if (n_max == 0) rd_max /= 1.01;
-        else found_optimal_range = true;
+          if (rd_min == config.rd_min_init && n_min != 0)
+            throw std::runtime_error(detail::formatter() << "Initial dry radii distribution is non-zero (" << n_min << ") for rd_min_init (" << config.rd_min_init <<")");
+          if (rd_max == config.rd_max_init && n_max != 0)
+            throw std::runtime_error(detail::formatter() << "Initial dry radii distribution is non-zero (" << n_max << ") for rd_max_init (" << config.rd_max_init <<")");
+
+          if      (n_min == 0) rd_min *= 1.01;
+          else if (n_max == 0) rd_max /= 1.01;
+          else found_optimal_range = true;
+        }
       }
-#if !defined(__NVCC__)
-      using std::max;
-#endif
-      log_rd_min = max(log_rd_min, real_t(log(opts_init.rd_min))); // user-defined lower limit for rd
+      else assert(false && "opts_init.rd_min * opts_init.rd_max < 0");
     };
 
     template <typename real_t, backend_t device>
@@ -66,37 +81,42 @@ namespace libcloudphxx
       const common::unary_function<real_t> &n_of_lnrd_stp
     )
     {
-      // TODO: add same sanity check as above
-      //       how does brent algorithm work for functions with multiple minima??
-      std::pair<real_t, real_t> init_distr_max; // [ln(position of distribution's maximum), -function value at maximum]
-      boost::uintmax_t n_iter = config.n_iter;
-      init_distr_max = boost::math::tools::brent_find_minima(detail::eval_and_mul<real_t>(n_of_lnrd_stp, -1), log(config.rd_min_init), log(config.rd_max_init), 200, n_iter); // bits = 200 to make algorithm choose max precision available
+      if(opts_init.rd_min >= 0 && opts_init.rd_max >= 0) // user-defined bin edges
+      {
+        log_rd_min = log(opts_init.rd_min);
+        log_rd_max = log(opts_init.rd_max);
+      }
+      else if (opts_init.rd_min < 0 && opts_init.rd_max < 0) // automatic detection of bin edges
+      {
+        // TODO: add same sanity check as above
+        //       how does brent algorithm work for functions with multiple minima??
+        std::pair<real_t, real_t> init_distr_max; // [ln(position of distribution's maximum), -function value at maximum]
+        boost::uintmax_t n_iter = config.n_iter;
+        init_distr_max = boost::math::tools::brent_find_minima(detail::eval_and_mul<real_t>(n_of_lnrd_stp, -1), log(config.rd_min_init), log(config.rd_max_init), 200, n_iter); // bits = 200 to make algorithm choose max precision available
 
-      real_t init_dist_bound_value = -init_distr_max.second / config.threshold; // value of the distribution at which we bind it
-      n_iter = config.n_iter;
-      // TODO: it could be written more clearly by creating an object detail::eval_and_oper<real_t>(*n_of_lnrd_stp, -init_dist_bound_value, 1), but for some reason it doesnt give the correct values
-      log_rd_min = 
-        common::detail::toms748_solve(
-          detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value),
-          real_t(log(config.rd_min_init)), init_distr_max.first,
-          detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value)(real_t(log(config.rd_min_init))),
-          detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value)(init_distr_max.first),
-          config.eps_tolerance, n_iter
-        );
+        real_t init_dist_bound_value = -init_distr_max.second / config.threshold; // value of the distribution at which we bind it
+        n_iter = config.n_iter;
+        // TODO: it could be written more clearly by creating an object detail::eval_and_oper<real_t>(*n_of_lnrd_stp, -init_dist_bound_value, 1), but for some reason it doesnt give the correct values
+        log_rd_min = 
+          common::detail::toms748_solve(
+            detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value),
+            real_t(log(config.rd_min_init)), init_distr_max.first,
+            detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value)(real_t(log(config.rd_min_init))),
+            detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value)(init_distr_max.first),
+            config.eps_tolerance, n_iter
+          );
 
-      n_iter = config.n_iter;
-      log_rd_max = 
-        common::detail::toms748_solve(
-          detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value),
-          init_distr_max.first, real_t(log(config.rd_max_init)),
-          detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value)(init_distr_max.first),
-          detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value)(real_t(log(config.rd_max_init))),
-          config.eps_tolerance, n_iter
-        );
-#if !defined(__NVCC__)
-      using std::max;
-#endif
-      log_rd_min = max(log_rd_min, real_t(log(opts_init.rd_min))); // user-defined lower limit for rd
+        n_iter = config.n_iter;
+        log_rd_max = 
+          common::detail::toms748_solve(
+            detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value),
+            init_distr_max.first, real_t(log(config.rd_max_init)),
+            detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value)(init_distr_max.first),
+            detail::eval_and_add<real_t>(n_of_lnrd_stp, -init_dist_bound_value)(real_t(log(config.rd_max_init))),
+            config.eps_tolerance, n_iter
+          );
+      }
+      else assert(false && "opts_init.rd_min * opts_init.rd_max < 0");
     }
   };
 };
diff --git a/src/impl/particles_impl_hskpng_Tpr.ipp b/src/impl/particles_impl_hskpng_Tpr.ipp
index f5a161ef7..38433bcad 100644
--- a/src/impl/particles_impl_hskpng_Tpr.ipp
+++ b/src/impl/particles_impl_hskpng_Tpr.ipp
@@ -133,9 +133,9 @@ namespace libcloudphxx
 */
 
         // an alternative implementation with formula choice at functor call
-        const RH_formula_t::RH_formula_t RH_formula;
+        const RH_formula_t RH_formula;
         // the type of formula to be used for RH
-        RH(RH_formula_t::RH_formula_t RH_formula):
+        RH(RH_formula_t RH_formula):
           RH_formula(RH_formula)
         {}
 
diff --git a/src/impl/particles_impl_hskpng_count.ipp b/src/impl/particles_impl_hskpng_count.ipp
index 2619094c1..f2e4398b6 100644
--- a/src/impl/particles_impl_hskpng_count.ipp
+++ b/src/impl/particles_impl_hskpng_count.ipp
@@ -17,6 +17,7 @@ namespace libcloudphxx
     {   
       hskpng_sort();
 
+
       // computing count_* - number of particles per grid cell
       thrust::pair<
         thrust_device::vector<thrust_size_t>::iterator,
@@ -28,6 +29,21 @@ namespace libcloudphxx
         count_num.begin()                       // output - values
       );
       count_n = it_pair.first - count_ijk.begin();
+#if !defined(NDEBUG)
+      if(count_n > n_cell)
+      {
+        std::cerr << "count_n: " << count_n << std::endl;
+        std::cerr << "n_cell: " << n_cell << std::endl;
+        /*
+        std::cerr << "ijk:" << std::endl;
+        debug::print(ijk);
+        */
+        std::cerr << "sorted_ijk:" << std::endl;
+        debug::print(sorted_ijk);
+        std::cerr << "count_ijk:" << std::endl;
+        debug::print(count_ijk);
+      }
+#endif
       assert(count_n <= n_cell);
     }   
   };  
diff --git a/src/impl/particles_impl_hskpng_ijk.ipp b/src/impl/particles_impl_hskpng_ijk.ipp
index a06b67a3c..2320462f2 100644
--- a/src/impl/particles_impl_hskpng_ijk.ipp
+++ b/src/impl/particles_impl_hskpng_ijk.ipp
@@ -28,58 +28,39 @@ namespace libcloudphxx
       };
     };
 
+    // calc ijk from i, j and k
     template <typename real_t, backend_t device>
-    void particles_t<real_t, device>::impl::hskpng_ijk()
-    {   
-      // helper functor
-      struct {
-        void operator()(
-          thrust_device::vector<real_t> &vx,
-          thrust_device::vector<thrust_size_t> &vi,
-          const real_t &vd
-        ) {
-          thrust::transform(
-            vx.begin(), vx.end(),                                // input
-            vi.begin(),                                          // output
-            detail::divide_by_constant_and_cast<double, thrust_size_t>(vd) // has to be done on doubles to avoid i==nx due to low precision of nvcc math
-          );
-        }
-      } helper;
-
-      if (opts_init.nx != 0) helper(x, i, opts_init.dx);
-      if (opts_init.ny != 0) helper(y, j, opts_init.dy);
-      if (opts_init.nz != 0) helper(z, k, opts_init.dz);
-
-      // raveling i, j & k into ijk
+    void particles_t<real_t, device>::impl::ravel_ijk(const thrust_size_t begin_shift) // default = 0
+    {
       switch (n_dims)
       {
         case 0: 
           break;
         case 1:
-          thrust::copy(i.begin(), i.end(), ijk.begin());
+          thrust::copy(i.begin()+begin_shift, i.end(), ijk.begin()+begin_shift);
           break;
         case 2:
           namespace arg = thrust::placeholders;
           thrust::transform(
-            i.begin(), i.end(), // input - first arg
-            k.begin(),          // input - second arg
-            ijk.begin(),        // output
+            i.begin()+begin_shift, i.end(), // input - first arg
+            k.begin()+begin_shift,          // input - second arg
+            ijk.begin()+begin_shift,        // output
             arg::_1 * opts_init.nz + arg::_2   // assuming z varies first
           );
           break;
         case 3:
           namespace arg = thrust::placeholders;
           thrust::transform(
-            i.begin(), i.end(), // input - first arg
-            j.begin(),          // input - second arg
-            ijk.begin(),        // output
+            i.begin()+begin_shift, i.end(), // input - first arg
+            j.begin()+begin_shift,          // input - second arg
+            ijk.begin()+begin_shift,        // output
             arg::_1 * (opts_init.nz * opts_init.ny) + 
             arg::_2 * opts_init.nz
           );
           thrust::transform(
-            ijk.begin(), ijk.end(),
-            k.begin(),
-            ijk.begin(), // in-place!
+            ijk.begin()+begin_shift, ijk.end(),
+            k.begin()+begin_shift,
+            ijk.begin()+begin_shift, // in-place!
             arg::_1 + arg::_2
           );
           // TODO: replace these two transforms with single one
@@ -87,6 +68,84 @@ namespace libcloudphxx
         default:
           assert(false);
       }
+    }
+
+
+    // calc i, j and k from ijk
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::unravel_ijk(const thrust_size_t begin_shift) // default = 0
+    {
+      switch(n_dims)
+      {
+        case 3:
+          namespace arg = thrust::placeholders;
+          // y
+          thrust::transform(
+            ijk.begin() + begin_shift, ijk.end(), // input - first arg
+            j.begin() + begin_shift,        // output
+            (arg::_1 / opts_init.nz) % (opts_init.ny) // z varies first
+          );
+          // z
+          thrust::transform(
+            ijk.begin() + begin_shift, ijk.end(), // input - first arg
+            k.begin() + begin_shift,        // output
+            arg::_1 % (opts_init.nz)   // z varies first
+          );
+          // x
+          thrust::transform(
+            ijk.begin() + begin_shift, ijk.end(), // input - first arg
+            i.begin() + begin_shift,        // output
+            arg::_1 / (opts_init.nz * opts_init.ny)    // z and y vary first
+          );
+          break;
+        case 2:
+          // z
+          thrust::transform(
+            ijk.begin() + begin_shift, ijk.end(), // input - first arg
+            k.begin() + begin_shift,        // output
+            arg::_1 % (opts_init.nz)   // z varies first
+          );
+          // x
+          thrust::transform(
+            ijk.begin() + begin_shift, ijk.end(), // input - first arg
+            i.begin() + begin_shift,        // output
+            arg::_1 / (opts_init.nz)
+          );
+          break;
+        case 1:
+          thrust::copy(ijk.begin() + begin_shift, ijk.end(), i.begin() + begin_shift); // only x
+        case 0:
+          break;
+        default:
+          assert(false);
+          break;
+      }
+    }
+
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::hskpng_ijk()
+    {   
+      // helper functor
+      struct {
+        void operator()(
+          thrust_device::vector<real_t> &vx,
+          thrust_device::vector<thrust_size_t> &vi,
+          const real_t &vd
+        ) {
+          thrust::transform(
+            vx.begin(), vx.end(),                                // input
+            vi.begin(),                                          // output
+            detail::divide_by_constant_and_cast<double, thrust_size_t>(vd) // has to be done on doubles to avoid i==nx due to low precision of nvcc math; TODO: now that rand uniform has range [0,1), float might be good here?
+          );
+        }
+      } helper;
+
+      if (opts_init.nx != 0) helper(x, i, opts_init.dx);
+      if (opts_init.ny != 0) helper(y, j, opts_init.dy);
+      if (opts_init.nz != 0) helper(z, k, opts_init.dz);
+
+      // raveling i, j & k into ijk
+      ravel_ijk();
       
       // flagging that particles are no longer sorted 
       sorted = false;
diff --git a/src/impl/particles_impl_hskpng_vterm.ipp b/src/impl/particles_impl_hskpng_vterm.ipp
index 7103d1d6e..765d7768c 100644
--- a/src/impl/particles_impl_hskpng_vterm.ipp
+++ b/src/impl/particles_impl_hskpng_vterm.ipp
@@ -37,10 +37,10 @@ namespace libcloudphxx
       template <typename real_t>
       struct common__vterm__vt
       {
-        vt_t::vt_t vt_eq; //type of terminal velocity formula to use 
+        vt_t vt_eq; //type of terminal velocity formula to use 
 
         //ctor
-        common__vterm__vt(const vt_t::vt_t &vt_eq): vt_eq(vt_eq) {}
+        common__vterm__vt(const vt_t &vt_eq): vt_eq(vt_eq) {}
 
         BOOST_GPU_ENABLED 
         real_t operator()(
@@ -98,10 +98,10 @@ namespace libcloudphxx
       template <typename real_t>
       struct common__vterm__vt__cached
       {
-        vt_t::vt_t vt_eq; //type of terminal velocity formula to use 
+        vt_t vt_eq; //type of terminal velocity formula to use 
 
         //ctor
-        common__vterm__vt__cached(const vt_t::vt_t &vt_eq): vt_eq(vt_eq) {}
+        common__vterm__vt__cached(const vt_t &vt_eq): vt_eq(vt_eq) {}
 
         BOOST_GPU_ENABLED 
         real_t operator()(
diff --git a/src/impl/particles_impl_init_SD_with_distros.ipp b/src/impl/particles_impl_init_SD_with_distros.ipp
index 48f813196..0a20ba6c1 100644
--- a/src/impl/particles_impl_init_SD_with_distros.ipp
+++ b/src/impl/particles_impl_init_SD_with_distros.ipp
@@ -50,7 +50,7 @@ namespace libcloudphxx
 
     // final inits common for tail/sd_conc/const_multi
     template <typename real_t, backend_t device>
-    void particles_t<real_t, device>::impl::init_SD_with_distros_finalize(const real_t &kappa)
+    void particles_t<real_t, device>::impl::init_SD_with_distros_finalize(const real_t &kappa, const bool unravel_ijk_switch)
     {
       // init kappa
       init_kappa(kappa);
@@ -78,8 +78,15 @@ namespace libcloudphxx
         chem_vol_ante();
       }
       
+      // ijk -> i, j, k
+      if(unravel_ijk_switch)
+        unravel_ijk(n_part_old);
+
       // initialising particle positions
       init_xyz();
+
+      if(opts_init.diag_incloud_time)
+        init_incloud_time();
     }
   };
 };
diff --git a/src/impl/particles_impl_init_SD_with_sizes.ipp b/src/impl/particles_impl_init_SD_with_sizes.ipp
index dfad97914..d09dee3cd 100644
--- a/src/impl/particles_impl_init_SD_with_sizes.ipp
+++ b/src/impl/particles_impl_init_SD_with_sizes.ipp
@@ -73,6 +73,9 @@ namespace libcloudphxx
           if (opts_init.chem_switch){
             chem_vol_ante();
           }
+      
+          // ijk -> i, j, k
+          unravel_ijk(n_part_old);
   
           // initialising particle positions
           init_xyz();
diff --git a/src/impl/particles_impl_init_chem.ipp b/src/impl/particles_impl_init_chem.ipp
index b5cabf27d..ec2b8e1ee 100644
--- a/src/impl/particles_impl_init_chem.ipp
+++ b/src/impl/particles_impl_init_chem.ipp
@@ -41,8 +41,8 @@ namespace libcloudphxx
           chem_rho(chem_rho) 
         {}
 
-	BOOST_GPU_ENABLED
-	real_t operator()(const real_t &rd3) const 
+        BOOST_GPU_ENABLED
+        real_t operator()(const real_t &rd3) const 
         { 
           using namespace common::molar_mass;
 
@@ -55,7 +55,7 @@ namespace libcloudphxx
 #endif
             * chem_rho * rd3
             * (M_NH3_H2O<real_t>() / (M_NH4<real_t>() + M_HSO4<real_t>()));
-	}
+        }
       };
 /*
       template <typename real_t>
@@ -65,8 +65,8 @@ namespace libcloudphxx
 
         chem_init_SO4(const real_t &chem_rho) : chem_rho(chem_rho) {}
 
-	BOOST_GPU_ENABLED
-	real_t operator()(const real_t &rd3) const 
+        BOOST_GPU_ENABLED
+        real_t operator()(const real_t &rd3) const 
         { 
           using namespace common::molar_mass;
  
@@ -79,7 +79,7 @@ namespace libcloudphxx
 #endif
             * chem_rho * rd3
             * (M_SO4<real_t>() / (M_NH4<real_t>() + M_SO4<real_t>()));
-	}
+        }
       };
 */
       template <typename real_t>
@@ -90,8 +90,8 @@ namespace libcloudphxx
 
         chem_init_S6(const real_t &chem_rho) : chem_rho(chem_rho) {}
 
-	BOOST_GPU_ENABLED
-	real_t operator()(const real_t &rd3) const 
+        BOOST_GPU_ENABLED
+        real_t operator()(const real_t &rd3) const 
         { 
           using namespace common::molar_mass;
 
@@ -104,7 +104,7 @@ namespace libcloudphxx
 #endif
             * chem_rho * rd3
             * (M_H2SO4<real_t>() / (M_NH4<real_t>() + M_HSO4<real_t>()));
-	}
+        }
       };
 
       template <typename real_t>
@@ -114,8 +114,8 @@ namespace libcloudphxx
  
         chem_init_H(const real_t &chem_rho) : chem_rho(chem_rho) {}
  
-	BOOST_GPU_ENABLED
-	real_t operator()(const real_t &rd3) const 
+        BOOST_GPU_ENABLED
+        real_t operator()(const real_t &rd3) const 
         { 
           using namespace common::molar_mass;
 
@@ -128,10 +128,12 @@ namespace libcloudphxx
 #endif
             * chem_rho * rd3
             * (M_H<real_t>() / (M_NH4<real_t>() + M_HSO4<real_t>()));
-	}
+        }
       };
     };
 
+    // NOTE: init_chem does not work for SD created during the simulation, hence chemistry is not compatible with distributed memory (multi_CUDA or MPI), src nor ccn relaxation
+    //       however, it works with removing SD in remove_n0 (?)
     template <typename real_t, backend_t device>
     void particles_t<real_t, device>::impl::init_chem()
     {
@@ -153,8 +155,8 @@ namespace libcloudphxx
           i < chem_rhs_beg 
             ? chem_ante_rhs
             : i < chem_rhs_fin
-	      ? chem_rhs
-	      : chem_post_rhs
+              ? chem_rhs
+              : chem_post_rhs
         );
         const int offset = 
           i < chem_rhs_beg
@@ -170,6 +172,7 @@ namespace libcloudphxx
       assert(chem_end[chem_all-1] == chem_post_rhs.end());
     }
 
+    // NOTE: valid only at init, not afterwards when SD are created e.g. from src, ccn_relax or distmem copies
     template <typename real_t, backend_t device>
     void particles_t<real_t, device>::impl::init_chem_aq()
     {
@@ -183,34 +186,33 @@ namespace libcloudphxx
         using namespace common::molar_mass;
         switch (i)
         {
-
           case NH3:
           {
-	    thrust::transform(
+            thrust::transform(
               rd3.begin(), rd3.end(),                                                    // input
               chem_bgn[i],                                                               // output
               detail::chem_init_NH4<real_t>(opts_init.chem_rho)
-	    );
+            );
           }
           break;
 
           case H:
           {
-	    thrust::transform(
+            thrust::transform(
               rd3.begin(), rd3.end(),                                                    // input
               chem_bgn[i],                                                               // output
               detail::chem_init_H<real_t>(opts_init.chem_rho)
-	    );
+            );
           }
           break;
 
           case S_VI:
           {
-	    thrust::transform(
+            thrust::transform(
               rd3.begin(), rd3.end(),                                                    // input
               chem_bgn[i],                                                               // output
               detail::chem_init_S6<real_t>(opts_init.chem_rho)
-	    );
+            );
           }
           break;
 
diff --git a/src/impl/particles_impl_init_dry_const_multi.ipp b/src/impl/particles_impl_init_dry_const_multi.ipp
index 7528051a0..28f86cf44 100644
--- a/src/impl/particles_impl_init_dry_const_multi.ipp
+++ b/src/impl/particles_impl_init_dry_const_multi.ipp
@@ -54,7 +54,7 @@ namespace libcloudphxx
 
       detail::calc_CDF(n_of_lnrd_stp, log_rd_min, log_rd_max, config.bin_precision, cdf);
 
-      // tossing random numbers [0,1] for dry radii
+      // tossing random numbers
       rand_u01(n_part_to_init);
 
       // rd3 temporarily means logarithm of radius!
diff --git a/src/impl/particles_impl_init_dry_sd_conc.ipp b/src/impl/particles_impl_init_dry_sd_conc.ipp
index ee2c461ca..fa4ebb2fe 100644
--- a/src/impl/particles_impl_init_dry_sd_conc.ipp
+++ b/src/impl/particles_impl_init_dry_sd_conc.ipp
@@ -42,7 +42,7 @@ namespace libcloudphxx
     template <typename real_t, backend_t device>
     void particles_t<real_t, device>::impl::init_dry_sd_conc()
     {
-      // tossing random numbers [0,1] for dry radii
+      // tossing random numbers
       rand_u01(n_part_to_init);
 
       // rd3 temporarily means logarithm of radius!
diff --git a/src/impl/particles_impl_init_ijk.ipp b/src/impl/particles_impl_init_ijk.ipp
index 709335f60..ee2282a33 100644
--- a/src/impl/particles_impl_init_ijk.ipp
+++ b/src/impl/particles_impl_init_ijk.ipp
@@ -12,10 +12,11 @@ namespace libcloudphxx
   {
     namespace detail
     {
+      template<class res_t>
       struct arbitrary_sequence //fill container with n 0s, m 1s, l 2s, etc...
       {
-        thrust_device::pointer<thrust_size_t> res;
-        arbitrary_sequence(thrust_device::pointer<thrust_size_t> res): res(res) {}
+        thrust_device::pointer<res_t> res;
+        arbitrary_sequence(thrust_device::pointer<res_t> res): res(res) {}
       
         template<typename Tuple>
         BOOST_GPU_ENABLED
@@ -45,7 +46,7 @@ namespace libcloudphxx
         thrust::make_zip_iterator(thrust::make_tuple(
           count_num.end(), ptr.end(), zero + n_cell
         )), 
-        detail::arbitrary_sequence(&(ijk[n_part_old]))
+        detail::arbitrary_sequence<thrust_size_t>(&(ijk[n_part_old]))
       );
     }
   };
diff --git a/src/impl/particles_impl_init_sanity_check.ipp b/src/impl/particles_impl_init_sanity_check.ipp
index 40a68b371..5d309dafa 100644
--- a/src/impl/particles_impl_init_sanity_check.ipp
+++ b/src/impl/particles_impl_init_sanity_check.ipp
@@ -54,36 +54,34 @@ namespace libcloudphxx
       if (!opts_init.chem_switch && ambient_chem.size() != 0)
         throw std::runtime_error("chemistry was switched off and ambient_chem is not empty");
 
-      // TODO: in source match chemical composition
-      if (opts_init.chem_switch && opts_init.src_switch)
+      if (opts_init.chem_switch && opts_init.src_type!=src_t::off)
         throw std::runtime_error("chemistry and aerosol source are not compatible");
 
-      if (opts_init.src_switch && opts_init.src_dry_distros.empty() && opts_init.src_dry_sizes.empty())
-        throw std::runtime_error("src_switch==True, but src_dry_distros and src_dry_sizes are empty");
+      if (opts_init.src_type!=src_t::off && opts_init.src_dry_distros.empty() && opts_init.src_dry_sizes.empty())
+        throw std::runtime_error("CCN source enabled, but src_dry_distros and src_dry_sizes are empty");
 
-      if (opts_init.src_switch && opts_init.src_dry_distros.size() > 1)
+      if (opts_init.src_type!=src_t::off && opts_init.src_dry_distros.size() > 1)
         throw std::runtime_error("src_dry_distros can only have a single kappa value.");
 
-      if (opts_init.src_switch && n_dims<2)
-        throw std::runtime_error("src_switch==True and n_dims<2. Source only works in 2D and 3D.");
+      if (opts_init.src_type==src_t::matching && opts_init.dry_distros.size() > 1)
+        throw std::runtime_error("For 'matching' CCN source, the initial aerosol distribution can only have one kappa value (na kappa matching done).");
 
-      if (opts_init.src_switch && !opts_init.src_dry_distros.empty() &&
-          opts_init.src_dry_distros.begin()->first != opts_init.dry_distros.begin()->first) throw std::runtime_error("Kappa of the source has to be the same as that of the initial profile");
+      if (opts_init.src_type!=src_t::off && n_dims<2)
+        throw std::runtime_error("CCN source works in 2D and 3D only.");
+
+      if (opts_init.src_type==src_t::matching && !opts_init.src_dry_distros.empty() &&
+          opts_init.src_dry_distros.begin()->first != opts_init.dry_distros.begin()->first) throw std::runtime_error("For 'matching' CCN source, kappa of the source has to be the same as that of the initial profile (no kappa matching done)");
 
       if(opts_init.dry_distros.size() > 1 && opts_init.chem_switch)
         throw std::runtime_error("chemistry and multiple kappa distributions are not compatible");
 
-      // TODO: in source match kappas 
-      if(opts_init.dry_distros.size() > 1 && opts_init.src_switch)
-        throw std::runtime_error("aerosol source and multiple kappa distributions are not compatible");
-
       if(opts_init.dry_distros.size() == 0 && opts_init.dry_sizes.size() == 0)
         throw std::runtime_error("Both dry_distros and dry_sizes are undefined");
 
       if(opts_init.sd_conc_large_tail && opts_init.sd_conc == 0)
         throw std::runtime_error("Sd_conc_large_tail make sense only with sd_conc init (i.e. sd_conc>0)");
 
-      if(opts_init.sd_const_multi > 0 && opts_init.src_switch)
+      if(opts_init.sd_const_multi > 0 && opts_init.src_type!=src_t::off)
         throw std::runtime_error("aerosol source and constant multiplicity option are not compatible"); // NOTE: why not?
 
       if (n_dims > 0)
@@ -127,6 +125,22 @@ namespace libcloudphxx
         throw std::runtime_error("at least one of opts_init.turb_adve_switch, opts_init.turb_cond_switch is true, but SGS mixing length profile size != nz");
       if(opts_init.SGS_mix_len.size() > 0 && *std::min(opts_init.SGS_mix_len.begin(), opts_init.SGS_mix_len.end()) <= 0)
         throw std::runtime_error("SGS_mix_len <= 0");
+      #if defined(USE_MPI)
+        if(opts_init.rlx_switch)
+          std::cerr << "libcloudph++ WARNING: relaxation is not fully supported in MPI runs. Mean calculation and addition of SD will be done locally on each node." << std::endl;
+        if(opts_init.chem_switch)
+          throw std::runtime_error("chemistry is not compatible with MPI");
+      #endif
+      if(n_dims < 2 && opts_init.rlx_switch)
+        throw std::runtime_error("CCN relaxation works only in 2D and 3D, set rlx_switch to false");
+      if(opts_init.rlx_switch && opts_init.rlx_bins <= 0)
+        throw std::runtime_error("rlx_bins <= 0");
+      if(opts_init.rlx_switch && opts_init.rlx_sd_per_bin <= 0)
+        throw std::runtime_error("rlx_sd_per_bin <= 0");
+      if(opts_init.rlx_switch && opts_init.rlx_timescale <= 0)
+        throw std::runtime_error("rlx_timescale <= 0");        
+      if(opts_init.rlx_switch && opts_init.chem_switch)
+        throw std::runtime_error("CCN relaxation does not work with chemistry");
     }
   };
 };
diff --git a/src/impl/particles_impl_init_xyz.ipp b/src/impl/particles_impl_init_xyz.ipp
index 36399b634..23978d31e 100644
--- a/src/impl/particles_impl_init_xyz.ipp
+++ b/src/impl/particles_impl_init_xyz.ipp
@@ -39,53 +39,6 @@ namespace libcloudphxx
     template <typename real_t, backend_t device>
     void particles_t<real_t, device>::impl::init_xyz()
     {
-      // get i, j, k from ijk 
-      switch(n_dims)
-      {
-        case 3:
-          namespace arg = thrust::placeholders;
-          // y
-          thrust::transform(
-            ijk.begin() + n_part_old, ijk.end(), // input - first arg
-            j.begin() + n_part_old,        // output
-            (arg::_1 / opts_init.nz) % (opts_init.ny) // z varies first
-          );
-          // z
-          thrust::transform(
-            ijk.begin() + n_part_old, ijk.end(), // input - first arg
-            k.begin() + n_part_old,        // output
-            arg::_1 % (opts_init.nz)   // z varies first
-          );
-          // x
-          thrust::transform(
-            ijk.begin() + n_part_old, ijk.end(), // input - first arg
-            i.begin() + n_part_old,        // output
-            arg::_1 / (opts_init.nz * opts_init.ny)    // z and y vary first
-          );
-          break;
-        case 2:
-          // z
-          thrust::transform(
-            ijk.begin() + n_part_old, ijk.end(), // input - first arg
-            k.begin() + n_part_old,        // output
-            arg::_1 % (opts_init.nz)   // z varies first
-          );
-          // x
-          thrust::transform(
-            ijk.begin() + n_part_old, ijk.end(), // input - first arg
-            i.begin() + n_part_old,        // output
-            arg::_1 / (opts_init.nz)
-          );
-          break;
-        case 1:
-          thrust::copy(ijk.begin() + n_part_old, ijk.end(), i.begin() + n_part_old); // only x
-        case 0:
-          break;
-        default:
-          assert(false);
-          break;
-      }
-
       thrust_device::vector<real_t> 
                   *v[3] = { &x,           &y,           &z           };
       const int    n[3] = { opts_init.nx, opts_init.ny, opts_init.nz };
@@ -99,10 +52,11 @@ namespace libcloudphxx
       {
         if (n[ix] == 0) continue;
 
-        // tossing random numbers [0,1] 
+        // tossing random numbers
         rand_u01(n_part_to_init);
 
 	// shifting from [0,1] to random position within respective cell 
+  // TODO: now the rand range is [0,1), include this here
         {
           namespace arg = thrust::placeholders;
 	  thrust::transform(
diff --git a/src/impl/particles_impl_moms.ipp b/src/impl/particles_impl_moms.ipp
index 5f52c5d6b..4baac9cd8 100644
--- a/src/impl/particles_impl_moms.ipp
+++ b/src/impl/particles_impl_moms.ipp
@@ -65,6 +65,7 @@ namespace libcloudphxx
     void particles_t<real_t, device>::impl::moms_rng(
       const real_t &min, const real_t &max, 
       const typename thrust_device::vector<real_t>::iterator &vec_bgn,
+      const thrust_size_t npart,
       const bool cons // is it a consecutive selection after previous one
     )
     {
@@ -75,21 +76,31 @@ namespace libcloudphxx
 
       if(!cons)
         thrust::transform(
-          n.begin(), n.end(),   // input - 1st arg
-          vec_bgn,              // input - 2nd arg
-          n_filtered.begin(),   // output
+          n.begin(), n.begin() + npart, // input - 1st arg
+          vec_bgn,                      // input - 2nd arg
+          n_filtered.begin(),           // output
           detail::range_filter<real_t>(min, max) 
         );
       else
         thrust::transform(
-          n_filtered.begin(), n_filtered.end(),   // input - 1st arg
-          vec_bgn,                                // input - 2nd arg
-          n_filtered.begin(),                     // output
+          n_filtered.begin(), n_filtered.begin() + npart,  // input - 1st arg
+          vec_bgn,                                         // input - 2nd arg
+          n_filtered.begin(),                              // output
           detail::range_filter<real_t>(min, max) 
         );
 
       selected_before_counting = true;
     }
+
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::moms_rng(
+      const real_t &min, const real_t &max, 
+      const typename thrust_device::vector<real_t>::iterator &vec_bgn,
+      const bool cons // is it a consecutive selection after previous one
+    )
+    {
+      moms_rng(min, max, vec_bgn, n_part, cons);
+    }
  
     // selects particles for which vec1[i] >= vec2[i]
     template <typename real_t, backend_t device>
@@ -179,6 +190,7 @@ namespace libcloudphxx
     template <typename real_t, backend_t device>
     void particles_t<real_t, device>::impl::moms_calc(
       const typename thrust_device::vector<real_t>::iterator &vec_bgn,
+      const thrust_size_t npart,
       const real_t power,
       const bool specific
     )
@@ -199,7 +211,7 @@ namespace libcloudphxx
         typename thrust_device::vector<real_t>::iterator
       > it_pair = thrust::reduce_by_key(
         // input - keys
-        sorted_ijk.begin(), sorted_ijk.end(),  
+        sorted_ijk.begin(), sorted_ijk.begin()+npart,  
         // input - values
         thrust::make_transform_iterator(
           zip_it_t(thrust::make_tuple(
@@ -285,7 +297,7 @@ namespace libcloudphxx
           std::cout << "sorted_id:" << std::endl;
           debug::print(sorted_id);
           std::cout << "vec:" << std::endl;
-          debug::print(vec_bgn, vec_bgn + n_part);
+          debug::print(vec_bgn, vec_bgn + npart);
           std::cout << "dv:" << std::endl;
           debug::print(dv);
           std::cout << "rhod:" << std::endl;
@@ -294,5 +306,15 @@ namespace libcloudphxx
       }
 #endif
     }
+
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::moms_calc(
+      const typename thrust_device::vector<real_t>::iterator &vec_bgn,
+      const real_t power,
+      const bool specific
+    )
+    {
+      moms_calc(vec_bgn, n_part, power, specific);
+    }
   };  
 };
diff --git a/src/impl/particles_impl_post_adding_SD.ipp b/src/impl/particles_impl_post_adding_SD.ipp
new file mode 100644
index 000000000..9bebe5352
--- /dev/null
+++ b/src/impl/particles_impl_post_adding_SD.ipp
@@ -0,0 +1,57 @@
+// vim:filetype=cpp
+/** @file
+  * @copyright University of Warsaw
+  * @section LICENSE
+  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+  */
+
+namespace libcloudphxx
+{
+  namespace lgrngn
+  {
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::post_adding_SD()
+    {   
+      thrust_device::vector<real_t> &drv(tmp_device_real_cell1); // NOTE: this can't be changed by any function called since ante_adding_SD() was called.....
+
+      // --- after source particles are no longer sorted ---
+      sorted = false;
+
+      // --- calc liquid water content after src ---
+      hskpng_sort(); 
+      moms_all();
+      moms_calc(rw2.begin(), real_t(3./2.));
+
+      // drv = tot_vol_after -tot_vol_bfr + dry_vol_bfr
+      thrust::transform(
+        count_mom.begin(), count_mom.begin() + count_n,                    // input - 1st arg
+        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // 2nd arg
+        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output
+        thrust::plus<real_t>()
+      );
+
+      // drv = tot_vol_after - dry_vol_after - tot_vol_bfr + dry_vol_bfr
+/*
+      moms_calc(rd3.begin(), 1);
+      thrust::transform(
+        count_mom.begin(), count_mom.begin() + count_n,                    // input - 1st arg
+        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // 2nd arg
+        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output
+        thrust::minus<real_t>()
+      );
+*/
+
+      // update th and rv
+      update_th_rv(drv);
+
+      // update count_ijk and count_num
+      hskpng_count();
+
+      // update vt of new SD
+      hskpng_vterm_invalid();
+
+      // init _old values in per-particle substepping
+      init_sstp();
+    }
+  };  
+};
diff --git a/src/impl/particles_impl_reserve_hskpng_npart.ipp b/src/impl/particles_impl_reserve_hskpng_npart.ipp
index ebf311072..063969e7f 100644
--- a/src/impl/particles_impl_reserve_hskpng_npart.ipp
+++ b/src/impl/particles_impl_reserve_hskpng_npart.ipp
@@ -78,8 +78,8 @@ namespace libcloudphxx
       // done using resize, because _bfr.end() is never used and we want to assert that buffer is large enough using the .size() function
       if(distmem())
       {
-        const auto no_of_n_vctrs_copied(int(1));
-        const auto no_of_real_vctrs_copied(distmem_real_vctrs.size());
+        const int no_of_n_vctrs_copied(int(1));
+        const int no_of_real_vctrs_copied(distmem_real_vctrs.size());
 
         in_n_bfr.resize(no_of_n_vctrs_copied * opts_init.n_sd_max / opts_init.nx / config.bfr_fraction);     // for n
         out_n_bfr.resize(no_of_n_vctrs_copied * opts_init.n_sd_max / opts_init.nx / config.bfr_fraction);
diff --git a/src/impl/particles_impl_rlx.ipp b/src/impl/particles_impl_rlx.ipp
new file mode 100644
index 000000000..3d83a4c2e
--- /dev/null
+++ b/src/impl/particles_impl_rlx.ipp
@@ -0,0 +1,23 @@
+// vim:filetype=cpp
+/** @file
+  * @copyright University of Warsaw
+  * @section LICENSE
+  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+  */
+
+namespace libcloudphxx
+{
+  namespace lgrngn
+  {
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::rlx(const real_t dt)
+    {   
+ //     ante_adding_SD();
+
+      if(!opts_init.rlx_dry_distros.empty())
+        rlx_dry_distros(dt);
+ 
+//      post_adding_SD();
+    }
+  };  
+};
diff --git a/src/impl/particles_impl_rlx_dry_distros.ipp b/src/impl/particles_impl_rlx_dry_distros.ipp
new file mode 100644
index 000000000..528b9eca7
--- /dev/null
+++ b/src/impl/particles_impl_rlx_dry_distros.ipp
@@ -0,0 +1,302 @@
+// vim:filetype=cpp
+/** @file
+  * @copyright University of Warsaw
+  * @section LICENSE
+  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+  */
+// #include <limits>
+#include <thrust/unique.h>
+#include <thrust/binary_search.h>
+#include <numeric>
+
+
+namespace libcloudphxx
+{
+  namespace lgrngn
+  {
+    namespace detail
+    {
+      /// @brief returns ret_t(x*c)
+      template <typename arg_t, typename ret_t>
+      struct multiply_by_constant_and_cast
+      {
+        arg_t c;
+        multiply_by_constant_and_cast(arg_t c) : c(c) {}
+
+        BOOST_GPU_ENABLED
+        ret_t operator()(arg_t x)
+        {
+          return ret_t(x*c);
+        }
+      };
+
+      template<class real_t>
+      struct calc_n_sd_to_create
+      {
+        const real_t tolerance;
+        const int n_sd_per_bin;
+
+        calc_n_sd_to_create(const real_t tol, const real_t n_sd_per_bin_real):
+          tolerance(tol),
+          n_sd_per_bin(std::max<int>(1, int(n_sd_per_bin_real + 0.5)))
+          {}
+
+        BOOST_GPU_ENABLED
+        int operator()(const real_t &a1, const real_t &a2)
+        {
+          return a2 > real_t(0) ?
+            a1 / a2 > tolerance ? n_sd_per_bin : 0
+          : 0;
+        }
+      };
+
+      // domain volume at this height level
+      template <typename real_t>
+      struct hor_dv_eval
+      {
+        // note: having a copy of opts_init here causes CUDA crashes (alignment problems?)
+        const real_t
+          dz,
+          x0, y0, z0,
+          x1, y1, z1;
+
+        hor_dv_eval(const opts_init_t<real_t> &o) :
+          dz(o.dz),
+          x0(o.x0), y0(o.y0), z0(o.z0),
+          x1(o.x1), y1(o.y1), z1(o.z1)
+        {}
+
+        BOOST_GPU_ENABLED
+        real_t operator()(const int &k)
+        {
+#if !defined(__NVCC__)
+          using std::min;
+          using std::max;
+#endif
+          return
+            max(real_t(0),
+              (x1 - x0) *
+              (y1 - y0) * // NOTE: size in y is taken into account even in 2D!
+              (min((k + 1) * dz, z1) - max(k * dz, z0))
+            );
+        }
+      };
+    };
+
+    // create new aerosol particles to relax towards a size distribution
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::rlx_dry_distros(const real_t dt)
+    {   
+      namespace arg = thrust::placeholders;
+
+      // vectors of size nz used in calculation of horizontal averages, TODO: allocate them at init
+      thrust_device::vector<real_t> hor_sum(opts_init.nz);
+      thrust_device::vector<real_t> hor_sum_count(opts_init.nz);
+      thrust_device::vector<real_t> hor_missing(opts_init.nz);
+      thrust_device::vector<thrust_size_t> hor_sum_k(opts_init.nz);
+      thrust_device::vector<real_t> expected_hor_sum(opts_init.nz);
+      thrust_device::vector<thrust_size_t> n_SD_to_create(opts_init.nz); // could be bool, but then thrust::reduce does not add bools as expected
+
+      // calc sum of ln(rd) ranges of all relax distributions
+      real_t tot_lnrd_rng = 0.;
+      for (typename opts_init_t<real_t>::rlx_dry_distros_t::const_iterator ddi = opts_init.rlx_dry_distros.begin(); ddi != opts_init.rlx_dry_distros.end(); ++ddi)
+      {
+        dist_analysis_sd_conc(
+          *(std::get<0>(ddi->second)),
+          opts_init.rlx_bins
+        );
+        tot_lnrd_rng += log_rd_max - log_rd_min;
+      }
+
+      const auto n_part_pre_relax = n_part;
+
+      // initialize SDs of each kappa-type
+      for (typename opts_init_t<real_t>::rlx_dry_distros_t::const_iterator ddi = opts_init.rlx_dry_distros.begin(); ddi != opts_init.rlx_dry_distros.end(); ++ddi)
+      {
+        const auto &kappa(ddi->first);
+        assert(kappa >= 0);
+        const auto &n_of_lnrd_stp(*(std::get<0>(ddi->second)));
+
+        // analyze distribution to get rd_min and max needed for bin sizes
+        // TODO: this was done a moment ago!
+        // TODO2: this could be done once at the start of the simulation!
+        dist_analysis_sd_conc(
+          n_of_lnrd_stp,
+          opts_init.rlx_bins
+        );
+        const real_t lnrd_rng = log_rd_max - log_rd_min;
+        assert(lnrd_rng > 0);
+
+        // calculate bin edges (in rd3)
+        // tmp vector with bin edges, probably could be allocated once in init
+        const int n_bins = opts_init.rlx_bins * lnrd_rng / tot_lnrd_rng;
+        assert(n_bins>0);
+        const real_t lnrd_bin_size = lnrd_rng / n_bins;
+        assert(lnrd_bin_size > 0);
+        std::vector<real_t> bin_rd3_left_edges(n_bins+1); // on CPU because of small number of edges
+        std::iota(bin_rd3_left_edges.begin(), bin_rd3_left_edges.end(), 0); // fill with a 0,1,2,... sequence
+        std::transform(bin_rd3_left_edges.begin(), bin_rd3_left_edges.end(), bin_rd3_left_edges.begin(), [log_rd_min_val=log_rd_min, lnrd_bin_size] (real_t bin_number) { return std::exp( 3 * (log_rd_min_val + bin_number * lnrd_bin_size)) ; }); // calculate left edges
+
+        // minimum and maximum cell indices
+        const int z_min_index = (std::get<2>(ddi->second)).first  / opts_init.dz,
+                  z_max_index = (std::get<2>(ddi->second)).second / opts_init.dz;
+
+        assert(z_max_index >= z_min_index);
+        assert(z_min_index >= 0);
+        assert(z_max_index < opts_init.nz);
+
+        const auto n_part_pre_bins_loop = n_part;
+
+        real_t expected_STP_concentration_tot = 0;
+
+        // loop over the bins
+        for(int bin_number=0; bin_number<bin_rd3_left_edges.size()-1; ++bin_number)
+        {
+          const real_t rd3_min = bin_rd3_left_edges.at(bin_number),
+                       rd3_max = bin_rd3_left_edges.at(bin_number+1);
+          assert(rd3_min < rd3_max);
+          // TODO: these selections could be optimised
+          // select droplets within the desired kappa range; only from the droplets existing before relaxation began, because relaxed ones are not sorted
+          moms_rng((std::get<1>(ddi->second)).first, (std::get<1>(ddi->second)).second, kpa.begin(), n_part_pre_relax, false);
+          // out of those, select droplets within the desired rd3 range
+          moms_rng(rd3_min, rd3_max, rd3.begin(), n_part_pre_relax, true);
+          // calculate 0-th non-specific moment of rd3 (number of droplets in a cell) of droplets in this rd3 and kappa range
+          moms_calc(rd3.begin(), n_part_pre_relax, 0, false);
+
+          // horizontal sum of this moment
+          thrust::fill(hor_sum.begin(), hor_sum.end(), 0);
+          thrust_device::vector<thrust_size_t> &count_k(tmp_device_size_cell);  // NOTE: tmp_device_size_cell is also used in some other inits, careful not to overwrite it!
+          thrust::transform(count_ijk.begin(), count_ijk.begin() + count_n, count_k.begin(), arg::_1 % opts_init.nz);
+          thrust::sort_by_key(count_k.begin(), count_k.begin() + count_n, count_mom.begin());
+
+          auto new_end = thrust::reduce_by_key(count_k.begin(), count_k.begin() + count_n, count_mom.begin(), hor_sum_k.begin(), hor_sum_count.begin()); 
+
+          int number_of_levels_with_droplets = new_end.first - hor_sum_k.begin(); // number of levels with any SD, not with SD in this size and kappa range
+          
+          assert(number_of_levels_with_droplets <= opts_init.nz);
+          thrust::copy(hor_sum_count.begin(), hor_sum_count.begin() + number_of_levels_with_droplets, thrust::make_permutation_iterator(hor_sum.begin(), hor_sum_k.begin()));
+          // divide sum by the number of cells at this level
+//          thrust::transform(hor_sum.begin(), hor_sum.end(), hor_sum.begin(), arg::_1 / (opts_init.nx * m1(opts_init.ny)));
+          
+          // calculate expected CCN number
+          const real_t bin_lnrd_center = log_rd_min + (bin_number + 0.5) * lnrd_bin_size;
+          const real_t expected_STP_concentration = n_of_lnrd_stp(bin_lnrd_center) * lnrd_bin_size;
+          assert(expected_STP_concentration >= 0);
+          expected_STP_concentration_tot += expected_STP_concentration;
+          thrust::transform(zero, zero + opts_init.nz, expected_hor_sum.begin(), detail::hor_dv_eval<real_t>(opts_init)); // fill with volume of the domain at this level
+          thrust::transform(expected_hor_sum.begin(), expected_hor_sum.end(), expected_hor_sum.begin(), expected_STP_concentration * arg::_1); // multiply by the expected concentration
+
+          // TODO: check for overflows?
+ 
+          // correcting STP -> actual ambient conditions
+          if(!opts_init.aerosol_independent_of_rhod)
+          {
+            using common::earth::rho_stp;
+            thrust::transform(
+              expected_hor_sum.begin(), 
+              expected_hor_sum.begin() + opts_init.nz, 
+              rhod.begin(),                 // rhod has size ncell, but vertical cooridnate varies first, so rhod.begin() to rhod.begin()+nz should be the vertical profile?
+              expected_hor_sum.begin(), 
+              arg::_1 * arg::_2 / real_t(rho_stp<real_t>() / si::kilograms * si::cubic_metres)
+            );
+          }
+
+          // set to zero outside of the defined range of altitudes
+          thrust::replace_if(expected_hor_sum.begin(), expected_hor_sum.begin()+opts_init.nz, zero, arg::_1 < z_min_index || arg::_1 >= z_max_index, real_t(0));
+
+          // calculate how many CCN are missing
+          thrust::transform(expected_hor_sum.begin(), expected_hor_sum.end(), hor_sum.begin(), hor_missing.begin(), arg::_1 - arg::_2);
+          thrust::replace_if(hor_missing.begin(), hor_missing.end(), arg::_1 < 0, 0);
+         
+          // set number of SDs to init; create only if concentration is lower than expected with a tolerance
+          thrust::transform(hor_missing.begin(), hor_missing.end(), expected_hor_sum.begin(), n_SD_to_create.begin(), detail::calc_n_sd_to_create<real_t>(config.rlx_conc_tolerance, opts_init.rlx_sd_per_bin));
+
+          n_part_old = n_part;
+          n_part_to_init = thrust::reduce(n_SD_to_create.begin(), n_SD_to_create.end());
+          n_part = n_part_old + n_part_to_init;
+
+          // resize arrays set in the bins loop: cell indices and rd3, resize should be cheap, because we allocate a large chunk of memory at the start
+          ijk.resize(n_part);
+          i.resize(n_part);
+          k.resize(n_part);
+          if(n_dims==3) j.resize(n_part); // we dont check in i and k because relax works only in 2D and 3D
+          rd3.resize(n_part);
+          n.resize(n_part);
+
+          // --- init k ---
+          thrust_device::vector<thrust_size_t> &ptr(tmp_device_size_cell);
+          thrust::exclusive_scan(n_SD_to_create.begin(), n_SD_to_create.end(), ptr.begin()); // number of SDs in cells to init up to (i-1)
+
+          thrust::for_each(
+            thrust::make_zip_iterator(thrust::make_tuple(
+              n_SD_to_create.begin(), ptr.begin(), zero
+            )),
+            thrust::make_zip_iterator(thrust::make_tuple(
+              n_SD_to_create.end(), ptr.end(), zero + opts_init.nz
+            )),
+            detail::arbitrary_sequence<thrust_size_t>(&(k[n_part_old]))
+          );
+
+
+          // --- init multiplicities (includes casting from real to n) ---
+#if !defined(__NVCC__)
+          using std::min;
+#endif
+
+          thrust::for_each(
+            thrust::make_zip_iterator(thrust::make_tuple(
+              n_SD_to_create.begin(), ptr.begin(), 
+              thrust::make_transform_iterator(hor_missing.begin(), arg::_1 / real_t(opts_init.rlx_sd_per_bin) * min(dt / opts_init.rlx_timescale, real_t(1)) + real_t(0.5))
+            )),
+            thrust::make_zip_iterator(thrust::make_tuple(
+              n_SD_to_create.end(), ptr.end(),
+              thrust::make_transform_iterator(hor_missing.end(), arg::_1 / real_t(opts_init.rlx_sd_per_bin) * min(dt / opts_init.rlx_timescale, real_t(1)) + real_t(0.5))
+            )),
+            detail::arbitrary_sequence<n_t>(&(n[n_part_old]))
+          );
+
+          // detecting possible overflows of n type
+          {
+            thrust_size_t ix = thrust::max_element(n.begin() + n_part_old, n.end()) - (n.begin() + n_part_old);
+            assert(n[ix] < (typename impl::n_t)(-1) / 10000);
+          }
+
+          // --- init of i and j ---
+          // tossing random numbers [0,1)  TODO: do it once for all bins
+          rand_u01(n_part_to_init * (n_dims)); // random numbers for: i, rd, j (j only in 3D)
+
+          thrust::transform(u01.begin(), u01.begin() + n_part_to_init, i.begin() + n_part_old, detail::multiply_by_constant_and_cast<real_t, thrust_size_t>(opts_init.nx));
+          if(n_dims==3) thrust::transform(u01.begin() + 2*n_part_to_init, u01.begin() + 3*n_part_to_init, j.begin() + n_part_old, detail::multiply_by_constant_and_cast<real_t, thrust_size_t>(opts_init.ny));
+
+          // raveling i, j & k into ijk; only of the new SD
+          ravel_ijk(n_part_old);
+
+          // init dry radius
+          // set rd3 randomized within the bin, uniformly distributed on the log(rd) axis
+          const real_t lnrd_min = std::log(std::pow(rd3_min, 1./3.));
+          thrust::transform(u01.begin() + 1*n_part_to_init, u01.begin() + 2*n_part_to_init, rd3.begin()+n_part_old, lnrd_min + arg::_1 * lnrd_bin_size);
+
+          // converting from lnrd to rd3
+          thrust::transform(
+            rd3.begin() + n_part_old,
+            rd3.end(),
+            rd3.begin() + n_part_old,
+            detail::exp3x<real_t>()
+          );
+
+          // NOTE: watch out not to mess up sorting while adding SDs to the bins, because moms_X functions require sorted data...
+        } // end of the bins loop
+
+        // init other SD characteristics that don't have to be initialized in the bins loop
+        n_part_old = n_part_pre_bins_loop;
+        n_part_to_init = n_part - n_part_old;
+        hskpng_resize_npart();
+
+        init_SD_with_distros_finalize(kappa, false); // no need to unravel ijk there, becaues i j k are already initialized
+
+        // TODO: asserts of newly added SD parameters? e.g. how many SD, how big is multiplicity etc.
+      } // end of the distros loop
+      sorted = false;
+    }
+  };  
+};
diff --git a/src/impl/particles_impl_src.ipp b/src/impl/particles_impl_src.ipp
index 71d53add0..4044b5cab 100644
--- a/src/impl/particles_impl_src.ipp
+++ b/src/impl/particles_impl_src.ipp
@@ -12,76 +12,15 @@ namespace libcloudphxx
     template <typename real_t, backend_t device>
     void particles_t<real_t, device>::impl::src(const real_t &dt)
     {   
-      // --- calc liquid water content before src ---
-      hskpng_sort(); 
-      thrust_device::vector<real_t> &drv(tmp_device_real_cell1); // NOTE: this can't be changed by any function called in src.....
-      thrust::fill(drv.begin(), drv.end(), real_t(0.));
-
-      moms_all();
-      moms_calc(rw2.begin(), real_t(3./2.));
-
-      // drv = - tot_vol_bfr
-      thrust::transform(
-        count_mom.begin(), count_mom.begin() + count_n,                    // input - 1st arg
-        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output
-        thrust::negate<real_t>()
-      );
-
-      // drv = -tot_vol_bfr + dry_vol_bfr
-/*
-      moms_calc(rd3.begin(), 1);
-      thrust::transform(
-        count_mom.begin(), count_mom.begin() + count_n,                    // input - 1st arg
-        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // 2nd arg
-        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output
-        thrust::plus<real_t>()
-      );
-*/
+  //    ante_adding_SD();
 
       if(!opts_init.src_dry_distros.empty())
         src_dry_distros(dt);
 
       if(!opts_init.src_dry_sizes.empty())
         src_dry_sizes(dt);
- 
-      // --- after source particles are no longer sorted ---
-      sorted = false;
-
-      // --- calc liquid water content after src ---
-      hskpng_sort(); 
-      moms_all();
-      moms_calc(rw2.begin(), real_t(3./2.));
-
-      // drv = tot_vol_after -tot_vol_bfr + dry_vol_bfr
-      thrust::transform(
-        count_mom.begin(), count_mom.begin() + count_n,                    // input - 1st arg
-        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // 2nd arg
-        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output
-        thrust::plus<real_t>()
-      );
-
-      // drv = tot_vol_after - dry_vol_after - tot_vol_bfr + dry_vol_bfr
-/*
-      moms_calc(rd3.begin(), 1);
-      thrust::transform(
-        count_mom.begin(), count_mom.begin() + count_n,                    // input - 1st arg
-        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // 2nd arg
-        thrust::make_permutation_iterator(drv.begin(), count_ijk.begin()), // output
-        thrust::minus<real_t>()
-      );
-*/
-
-      // update th and rv
-      update_th_rv(drv);
-
-      // update count_ijk and count_num
-      hskpng_count();
-
-      // update vt of new SD
-      hskpng_vterm_invalid();
 
-      // init _old values in per-particle substepping
-      init_sstp();
+//      post_adding_SD();
     }
   };  
 };
diff --git a/src/impl/particles_impl_src_dry_distros.ipp b/src/impl/particles_impl_src_dry_distros.ipp
index c6a89e551..ffd4ea560 100644
--- a/src/impl/particles_impl_src_dry_distros.ipp
+++ b/src/impl/particles_impl_src_dry_distros.ipp
@@ -12,382 +12,14 @@ namespace libcloudphxx
 {
   namespace lgrngn
   {
-    namespace detail
-    {
-      template<typename t_a, typename t_b>
-      struct two_keys_sort
-      {
-        BOOST_GPU_ENABLED
-        bool operator()(const thrust::tuple<t_a, t_b> &a, const thrust::tuple<t_a, t_b> &b)
-        {
-          if(a.head < b.head) return true;
-          if(a.head == b.head) return a.tail < b.tail;
-          return false;
-        }
-      };
-
-      template<typename real_t, typename n_t, typename res_t, n_t out_of_bins>
-      struct get_bin_no
-      {
-        real_t log_rd_min, log_rd_max, log_diff;
-        get_bin_no(const real_t &log_rd_min, const real_t &log_rd_max): log_rd_min(log_rd_min), log_rd_max(log_rd_max), log_diff(log_rd_max - log_rd_min) {}
-        
-        BOOST_GPU_ENABLED
-        res_t operator()(real_t rd3, n_t count_num)
-        {
-          real_t log_rd = log(rd3) / 3.;
-          if(log_rd < log_rd_min || log_rd >= log_rd_max || count_num == 0) return out_of_bins;
-          else  return ( (log_rd - log_rd_min) / log_diff * count_num); //count num is the number of bins in the cell
-        }
-      };
-    };
-
     // create new aerosol particles based on a size distribution
-    // if any SDs with dry radius similar to the one to be added are present,
-    // we increase their multiplicity instead of adding new SDs
     template <typename real_t, backend_t device>
     void particles_t<real_t, device>::impl::src_dry_distros(const real_t &dt)
-    {   
-      // set number of SDs to init; use count_num as storage
-      init_count_num_src(opts_init.src_sd_conc);
-
-      // -------- TODO: match not only sizes of old particles, but also kappas and chemical composition... --------
-
-      // --- sort already existing SDs; primary key ijk, secondary rd ---
-      // TODO: do all of this only on SDs in cells below src_z1?
-
-      // filling-in sorted_id with a sequence
-      thrust::sequence(sorted_id.begin(), sorted_id.end());
-     
-      // tmp vector with sorted rd3
-      thrust_device::vector<real_t> &sorted_rd3(tmp_device_real_part);
-
-      // use sorted_rd3 as tmp copy of rd3
-      thrust::copy(
-        rd3.begin(), rd3.end(), // from
-        sorted_rd3.begin()      // to
-      );
-
-      // copy ijk to sorted ijk
-      thrust::copy(
-        ijk.begin(), ijk.end(), // from
-        sorted_ijk.begin()      // to
-      );
-
-      // sorting by ijk and rd3
-      thrust::sort_by_key(
-        thrust::make_zip_iterator(thrust::make_tuple(
-          sorted_ijk.begin(),
-          sorted_rd3.begin()
-        )),
-        thrust::make_zip_iterator(thrust::make_tuple(
-          sorted_ijk.begin(),
-          sorted_rd3.begin()
-        )) + n_part,                           // keys
-        sorted_id.begin(),                     // values
-        detail::two_keys_sort<thrust_size_t, real_t>()
-      ); 
-
-      // analyze distribution to get rd_min and max needed for bin sizes
-      // TODO: this could be done once at the beginning of the simulation
-      dist_analysis_sd_conc(
-        *(opts_init.src_dry_distros.begin()->second),
-        opts_init.src_sd_conc,
-        dt
-      ); 
-
-      // --- see how many already existing SDs match size bins ---
-      {
-        namespace arg = thrust::placeholders;
-
-        // set no of particles to init
-        n_part_old = n_part;
-        n_part_to_init = thrust::reduce(count_num.begin(), count_num.end());
-        n_part = n_part_old + n_part_to_init;
-        hskpng_resize_npart();
-
-        thrust_size_t n_part_bfr_src = n_part_old,
-                      n_part_tot_in_src = n_part_to_init;
-
-        // tmp vector with bin number of existing SDs
-        thrust_device::vector<thrust_size_t> bin_no(n_part);
-
-        const thrust_size_t out_of_bins = 4444444444; // would cause an error for src_sd_conc > out_of_bins
-        // calc bin no
-        thrust::transform(
-          sorted_rd3.begin(),
-          sorted_rd3.end(),
-          thrust::make_permutation_iterator(count_num.begin(), sorted_ijk.begin()),
-          bin_no.begin(),
-          detail::get_bin_no<real_t, n_t, thrust_size_t, out_of_bins>(log_rd_min, log_rd_max)
-        );
-                    
-        // init ijk and rd3 of new particles
-        init_ijk();
-        init_dry_sd_conc(); 
-
-        // translate these new rd3 into bin no; bin_no just got resized
-        thrust::transform(
-          rd3.begin() + n_part_old,
-          rd3.end(),
-          thrust::make_permutation_iterator(count_num.begin(), ijk.begin() + n_part_old),
-          bin_no.begin() + n_part_old,
-          detail::get_bin_no<real_t, n_t, thrust_size_t, out_of_bins>(log_rd_min, log_rd_max)
-        );
-
-        // -- init new SDs that didnt have a match -- 
-        {
-          thrust_device::vector<thrust_size_t> tmp_bin_no(n_part_old);
-          thrust::copy(bin_no.begin(), bin_no.begin() + n_part_old, tmp_bin_no.begin());
-
-          thrust_size_t n_out_of_bins = thrust::count(tmp_bin_no.begin(), tmp_bin_no.end(), out_of_bins);
-        
-          // remove reference to those outside of bins from tmp_bin_no and sorted_ijk
-          thrust::remove_if(
-            sorted_ijk.begin(),
-            sorted_ijk.begin() + n_part_old,
-            tmp_bin_no.begin(),
-            arg::_1 == out_of_bins
-          );
-
-          thrust::remove(
-            tmp_bin_no.begin(),
-            tmp_bin_no.begin() + n_part_old,
-            out_of_bins
-          ); // if these two removes are done in a single step with a tuple, it fails on CUDA; TODO: report this?
-
-          thrust_size_t count_bins;
-          {
-          // remove duplicates from tmp_bin_no
-            thrust::pair<
-              thrust_device::vector<thrust_size_t>::iterator,
-              typename thrust_device::vector<thrust_size_t>::iterator
-            > np = thrust::unique_by_key(tmp_bin_no.begin(), tmp_bin_no.begin() + n_part_old - n_out_of_bins, sorted_ijk.begin());
-            count_bins = np.first - tmp_bin_no.begin(); // total no of bins with a match
-          }
-
-          // --- remove rd3 and ijk of newly added SDs that have counterparts ---
-          thrust_device::vector<bool> have_match(n_part_to_init);
-          // find those with a match
-          thrust::binary_search(
-            thrust::make_zip_iterator(thrust::make_tuple(
-              sorted_ijk.begin(),
-              tmp_bin_no.begin()
-            )),
-            thrust::make_zip_iterator(thrust::make_tuple(
-              sorted_ijk.begin(),
-              tmp_bin_no.begin()
-            )) + count_bins,
-            thrust::make_zip_iterator(thrust::make_tuple(
-              ijk.begin() + n_part_old,
-              bin_no.begin() + n_part_old
-            )),
-            thrust::make_zip_iterator(thrust::make_tuple(
-              ijk.begin() + n_part_old,
-              bin_no.begin() + n_part_old
-            )) + n_part_to_init,
-            have_match.begin(),
-            detail::two_keys_sort<thrust_size_t, thrust_size_t>()
-          );
-          // remove those with a match
-          thrust::remove_if(
-            thrust::make_zip_iterator(thrust::make_tuple(
-              rd3.begin() + n_part_old,
-              ijk.begin() + n_part_old
-            )),
-            thrust::make_zip_iterator(thrust::make_tuple(
-              rd3.begin() + n_part_old,
-              ijk.begin() + n_part_old
-            )) + n_part_to_init,
-            have_match.begin(),
-            thrust::identity<bool>()
-          );
-
-          n_part_to_init -= count_bins;
-          n_part -= count_bins;
-          hskpng_resize_npart();
-
-          // init other peoperties of SDs that didnt have a match
-          init_kappa(
-            opts_init.src_dry_distros.begin()->first
-          ); 
-
-          if(opts_init.diag_incloud_time)
-            init_incloud_time();
-
-          init_n_sd_conc(
-            *(opts_init.src_dry_distros.begin()->second)
-          ); // TODO: document that n_of_lnrd_stp is expected!
-
-          // init rw
-          init_wet();
-
-          // init x, y, z, i, j, k
-          init_xyz();
-
-          // TODO: init chem
-            
-          {
-            // count number of matched bins per cell
-            thrust::pair<
-              thrust_device::vector<thrust_size_t>::iterator,
-              typename thrust_device::vector<thrust_size_t>::iterator
-            > it_pair =  thrust::reduce_by_key(
-              sorted_ijk.begin(), sorted_ijk.begin() + count_bins, 
-              thrust::make_constant_iterator<thrust_size_t>(1), 
-              sorted_ijk.begin(), 
-              tmp_bin_no.begin()
-            );
-            count_bins = it_pair.first - sorted_ijk.begin(); // now it counts no of cells that have any bins matched
-          }
-
-          // set count_num to the number of SDs matched per cell
-          // they still need to be initialized
-          thrust::copy(
-            tmp_bin_no.begin(),
-            tmp_bin_no.begin() + count_bins,
-            thrust::make_permutation_iterator(count_num.begin(), sorted_ijk.begin())
-          );
-          // sorted_ijk no longer valid
-        }
-
-        // tmp vector to hold number of particles in a given size bin in a given cell
-        thrust_device::vector<thrust_size_t> bin_cell_count(n_part_tot_in_src +  n_cell + 1); // needs space for out_of_bins
-        // tmp vector for number of particles in bins up to this one
-        thrust_device::vector<thrust_size_t> bin_cell_count_ptr(n_part_tot_in_src +  n_cell + 1);
-
-        thrust_size_t count_bins;
-        {
-          thrust_device::vector<thrust_size_t> &out(bin_cell_count_ptr); // use it temporarily
-          // calc no of SDs in bins/cells
-          thrust::pair<
-            thrust_device::vector<thrust_size_t>::iterator,
-            typename thrust_device::vector<thrust_size_t>::iterator
-          > np = thrust::reduce_by_key(
-              bin_no.begin(),
-              bin_no.begin() + n_part_bfr_src,
-              thrust::make_constant_iterator<thrust_size_t>(1),
-              out.begin(),// output bin no - in place didn't work well, why?
-              bin_cell_count.begin()// output number of SDs
-            );
-          count_bins = np.second - bin_cell_count.begin(); // number of bins with SDs inside, includes the out_of_bins
-          thrust::copy(out.begin(), out.begin() + count_bins, bin_no.begin());
-        }
-
-        // number of SDs (incl. out_of_bins) in bins up to (i-1)
-        thrust::exclusive_scan(
-          bin_cell_count.begin(), 
-          bin_cell_count.begin() + count_bins, 
-          bin_cell_count_ptr.begin()
-        );
-
-        // remove out of bins from bin_cell_count, bins_no and bin cell_count_ptr
-        count_bins = 
-          thrust::remove_if(
-             thrust::make_zip_iterator(thrust::make_tuple(
-               bin_no.begin(), 
-               bin_cell_count.begin(),
-               bin_cell_count_ptr.begin()
-             )),
-             thrust::make_zip_iterator(thrust::make_tuple(
-               bin_no.begin(), 
-               bin_cell_count.begin(),
-               bin_cell_count_ptr.begin()
-             )) + count_bins,
-             bin_no.begin(),
-             arg::_1 == out_of_bins
-          ) - 
-           thrust::make_zip_iterator(thrust::make_tuple(
-             bin_no.begin(), 
-             bin_cell_count.begin(),
-             bin_cell_count_ptr.begin()
-           )); // count_bins now does not count out_of_bins
-
-        // randomly select which old SD will be increased
-        // overwrites sorted_rd3
-        rand_u01(count_bins);
-
-        // TODO: merge the following transforms into one
-        
-        // randomly choose one SD per bin
-        thrust::transform(
-          u01.begin(),
-          u01.begin() + count_bins,
-          bin_cell_count.begin(),
-          bin_cell_count.begin(),
-          thrust::multiplies<real_t>()
-        );
-        // translate no in bin to the total id
-        thrust::transform(
-          bin_cell_count_ptr.begin(),
-          bin_cell_count_ptr.begin() + count_bins,
-          bin_cell_count.begin(),
-          bin_cell_count.begin(),
-          thrust::plus<real_t>()
-        );
-
-        // --- increase multiplicity of existing SDs ---
-
-        n_part_old = n_part; // number of those before src + no of those w/o match
-        n_part_to_init = count_bins; // number of matched SDs
-        n_part = n_part_old + n_part_to_init;
-        hskpng_resize_npart();
-
-        // copy rd3 and ijk of the selected SDs to the end of the respective vectors
-        thrust::copy(
-          thrust::make_permutation_iterator(
-            rd3.begin(), thrust::make_permutation_iterator(
-              sorted_id.begin(), bin_cell_count.begin()      // translates no of sorted SD into id
-            )
-          ),
-          thrust::make_permutation_iterator(
-            rd3.begin(), thrust::make_permutation_iterator(
-              sorted_id.begin(), bin_cell_count.begin()
-            )
-          ) + count_bins,
-          rd3.begin() + n_part_old     // output
-        );
-        thrust::copy(
-          thrust::make_permutation_iterator(
-            ijk.begin(), thrust::make_permutation_iterator(
-              sorted_id.begin(), bin_cell_count.begin()      // translates no of sorted SD into id
-            )
-          ),
-          thrust::make_permutation_iterator(
-            ijk.begin(), thrust::make_permutation_iterator(
-              sorted_id.begin(), bin_cell_count.begin()
-            )
-          ) + count_bins,
-          ijk.begin() + n_part_old     // output
-        );
-
-        // init n of the copied SDs, but using the src distribution
-        init_n_sd_conc(
-          *(opts_init.src_dry_distros.begin()->second)
-        ); // TODO: document that n_of_lnrd_stp is expected!
-
-        // add the just-initialized multiplicities to the old ones
-        thrust::transform(
-          n.begin() + n_part_old,
-          n.end(),
-          thrust::make_permutation_iterator(
-            n.begin(), thrust::make_permutation_iterator(
-              sorted_id.begin(), bin_cell_count.begin()      // translates no of sorted SD into id
-            )
-          ),
-          thrust::make_permutation_iterator(
-            n.begin(), thrust::make_permutation_iterator(
-              sorted_id.begin(), bin_cell_count.begin()      // translates no of sorted SD into id
-            )
-          ), //in-place
-          thrust::plus<n_t>()
-        );
-        // TODO: check for overflows of na after addition
-
-        // --- properly reduce size of the vectors back to no before src + no w/o match ---
-        n_part = n_part_old;
-        hskpng_resize_npart();   
-      }
+    { 
+      if(opts_init.src_type == src_t::matching)
+        src_dry_distros_matching(dt);
+      if(opts_init.src_type == src_t::simple)
+        src_dry_distros_simple(dt);
     }
   };  
 };
diff --git a/src/impl/particles_impl_src_dry_distros_matching.ipp b/src/impl/particles_impl_src_dry_distros_matching.ipp
new file mode 100644
index 000000000..61faa20bb
--- /dev/null
+++ b/src/impl/particles_impl_src_dry_distros_matching.ipp
@@ -0,0 +1,396 @@
+// vim:filetype=cpp
+/** @file
+  * @copyright University of Warsaw
+  * @section LICENSE
+  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+  */
+// #include <limits>
+#include <thrust/unique.h>
+#include <thrust/binary_search.h>
+
+namespace libcloudphxx
+{
+  namespace lgrngn
+  {
+    namespace detail
+    {
+      template<typename t_a, typename t_b>
+      struct two_keys_sort
+      {
+        BOOST_GPU_ENABLED
+        bool operator()(const thrust::tuple<t_a, t_b> &a, const thrust::tuple<t_a, t_b> &b)
+        {
+          if(a.head < b.head) return true;
+          if(a.head == b.head) return a.tail < b.tail;
+          return false;
+        }
+      };
+
+      template<typename real_t, typename n_t, typename res_t, n_t out_of_bins>
+      struct get_bin_no
+      {
+        real_t log_rd_min, log_rd_max, log_diff;
+        get_bin_no(const real_t &log_rd_min, const real_t &log_rd_max): log_rd_min(log_rd_min), log_rd_max(log_rd_max), log_diff(log_rd_max - log_rd_min) {}
+        
+        BOOST_GPU_ENABLED
+        res_t operator()(real_t rd3, n_t count_num)
+        {
+          real_t log_rd = log(rd3) / 3.;
+          if(log_rd < log_rd_min || log_rd >= log_rd_max || count_num == 0) return out_of_bins;
+          else  return ( (log_rd - log_rd_min) / log_diff * count_num); //count num is the number of bins in the cell
+        }
+      };
+    };
+
+    // create new aerosol particles based on a size distribution
+    // if any SDs with dry radius similar to the one to be added are present,
+    // we increase their multiplicity instead of adding new SDs
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::src_dry_distros_matching(const real_t &dt)
+    {   
+      // set number of SDs to init; use count_num as storage
+      init_count_num_src(opts_init.src_sd_conc);
+
+      // -------- TODO: match not only sizes of old particles, but also kappas and chemical composition... --------
+
+      // --- sort already existing SDs; primary key ijk, secondary rd ---
+      // TODO: do all of this only on SDs in cells below src_z1?
+
+      // filling-in sorted_id with a sequence
+      thrust::sequence(sorted_id.begin(), sorted_id.end());
+     
+      // tmp vector with sorted rd3
+      thrust_device::vector<real_t> &sorted_rd3(tmp_device_real_part);
+
+      // use sorted_rd3 as tmp copy of rd3
+      thrust::copy(
+        rd3.begin(), rd3.end(), // from
+        sorted_rd3.begin()      // to
+      );
+
+      // copy ijk to sorted ijk
+      thrust::copy(
+        ijk.begin(), ijk.end(), // from
+        sorted_ijk.begin()      // to
+      );
+
+      // sorting by ijk and rd3
+      thrust::sort_by_key(
+        thrust::make_zip_iterator(thrust::make_tuple(
+          sorted_ijk.begin(),
+          sorted_rd3.begin()
+        )),
+        thrust::make_zip_iterator(thrust::make_tuple(
+          sorted_ijk.begin(),
+          sorted_rd3.begin()
+        )) + n_part,                           // keys
+        sorted_id.begin(),                     // values
+        detail::two_keys_sort<thrust_size_t, real_t>()
+      ); 
+
+      // analyze distribution to get rd_min and max needed for bin sizes
+      // TODO: this could be done once at the beginning of the simulation
+      dist_analysis_sd_conc(
+        *(opts_init.src_dry_distros.begin()->second),
+        opts_init.src_sd_conc,
+        dt
+      ); 
+
+      // --- see how many already existing SDs match size bins ---
+      {
+        namespace arg = thrust::placeholders;
+
+        // set no of particles to init
+        n_part_old = n_part;
+        n_part_to_init = thrust::reduce(count_num.begin(), count_num.end());
+        n_part = n_part_old + n_part_to_init;
+        hskpng_resize_npart();
+
+        thrust_size_t n_part_bfr_src = n_part_old,
+                      n_part_tot_in_src = n_part_to_init;
+
+        // tmp vector with bin number of existing SDs
+        thrust_device::vector<thrust_size_t> bin_no(n_part);
+
+        const thrust_size_t out_of_bins = 4444444444; // would cause an error for src_sd_conc > out_of_bins
+        // calc bin no
+        thrust::transform(
+          sorted_rd3.begin(),
+          sorted_rd3.end(),
+          thrust::make_permutation_iterator(count_num.begin(), sorted_ijk.begin()),
+          bin_no.begin(),
+          detail::get_bin_no<real_t, n_t, thrust_size_t, out_of_bins>(log_rd_min, log_rd_max)
+        );
+                    
+        // init ijk and rd3 of new particles
+        init_ijk();
+        init_dry_sd_conc(); 
+
+        // translate these new rd3 into bin no; bin_no just got resized
+        thrust::transform(
+          rd3.begin() + n_part_old,
+          rd3.end(),
+          thrust::make_permutation_iterator(count_num.begin(), ijk.begin() + n_part_old),
+          bin_no.begin() + n_part_old,
+          detail::get_bin_no<real_t, n_t, thrust_size_t, out_of_bins>(log_rd_min, log_rd_max)
+        );
+
+        // -- init new SDs that didnt have a match -- 
+        {
+          thrust_device::vector<thrust_size_t> tmp_bin_no(n_part_old);
+          thrust::copy(bin_no.begin(), bin_no.begin() + n_part_old, tmp_bin_no.begin());
+
+          thrust_size_t n_out_of_bins = thrust::count(tmp_bin_no.begin(), tmp_bin_no.end(), out_of_bins);
+        
+          // remove reference to those outside of bins from tmp_bin_no and sorted_ijk
+          thrust::remove_if(
+            sorted_ijk.begin(),
+            sorted_ijk.begin() + n_part_old,
+            tmp_bin_no.begin(),
+            arg::_1 == out_of_bins
+          );
+
+          thrust::remove(
+            tmp_bin_no.begin(),
+            tmp_bin_no.begin() + n_part_old,
+            out_of_bins
+          ); // if these two removes are done in a single step with a tuple, it fails on CUDA; TODO: report this?
+
+          thrust_size_t count_bins;
+          {
+          // remove duplicates from tmp_bin_no
+            thrust::pair<
+              thrust_device::vector<thrust_size_t>::iterator,
+              typename thrust_device::vector<thrust_size_t>::iterator
+            > np = thrust::unique_by_key(tmp_bin_no.begin(), tmp_bin_no.begin() + n_part_old - n_out_of_bins, sorted_ijk.begin());
+            count_bins = np.first - tmp_bin_no.begin(); // total no of bins with a match
+          }
+
+          // --- remove rd3 and ijk of newly added SDs that have counterparts ---
+          thrust_device::vector<bool> have_match(n_part_to_init);
+          // find those with a match
+          thrust::binary_search(
+            thrust::make_zip_iterator(thrust::make_tuple(
+              sorted_ijk.begin(),
+              tmp_bin_no.begin()
+            )),
+            thrust::make_zip_iterator(thrust::make_tuple(
+              sorted_ijk.begin(),
+              tmp_bin_no.begin()
+            )) + count_bins,
+            thrust::make_zip_iterator(thrust::make_tuple(
+              ijk.begin() + n_part_old,
+              bin_no.begin() + n_part_old
+            )),
+            thrust::make_zip_iterator(thrust::make_tuple(
+              ijk.begin() + n_part_old,
+              bin_no.begin() + n_part_old
+            )) + n_part_to_init,
+            have_match.begin(),
+            detail::two_keys_sort<thrust_size_t, thrust_size_t>()
+          );
+          // remove those with a match
+          thrust::remove_if(
+            thrust::make_zip_iterator(thrust::make_tuple(
+              rd3.begin() + n_part_old,
+              ijk.begin() + n_part_old
+            )),
+            thrust::make_zip_iterator(thrust::make_tuple(
+              rd3.begin() + n_part_old,
+              ijk.begin() + n_part_old
+            )) + n_part_to_init,
+            have_match.begin(),
+            thrust::identity<bool>()
+          );
+
+          n_part_to_init -= count_bins;
+          n_part -= count_bins;
+          hskpng_resize_npart();
+
+          // init other peoperties of SDs that didnt have a match
+          init_kappa(
+            opts_init.src_dry_distros.begin()->first
+          ); 
+
+          if(opts_init.diag_incloud_time)
+            init_incloud_time();
+
+          init_n_sd_conc(
+            *(opts_init.src_dry_distros.begin()->second)
+          ); // TODO: document that n_of_lnrd_stp is expected!
+
+          // init rw
+          init_wet();
+      
+          // ijk -> i, j, k
+          unravel_ijk(n_part_old);
+
+          // init x, y, z, i, j, k
+          init_xyz();
+
+          // TODO: init chem
+            
+          {
+            // count number of matched bins per cell
+            thrust::pair<
+              thrust_device::vector<thrust_size_t>::iterator,
+              typename thrust_device::vector<thrust_size_t>::iterator
+            > it_pair =  thrust::reduce_by_key(
+              sorted_ijk.begin(), sorted_ijk.begin() + count_bins, 
+              thrust::make_constant_iterator<thrust_size_t>(1), 
+              sorted_ijk.begin(), 
+              tmp_bin_no.begin()
+            );
+            count_bins = it_pair.first - sorted_ijk.begin(); // now it counts no of cells that have any bins matched
+          }
+
+          // set count_num to the number of SDs matched per cell
+          // they still need to be initialized
+          thrust::copy(
+            tmp_bin_no.begin(),
+            tmp_bin_no.begin() + count_bins,
+            thrust::make_permutation_iterator(count_num.begin(), sorted_ijk.begin())
+          );
+          // sorted_ijk no longer valid
+        }
+
+        // tmp vector to hold number of particles in a given size bin in a given cell
+        thrust_device::vector<thrust_size_t> bin_cell_count(n_part_tot_in_src +  n_cell + 1); // needs space for out_of_bins
+        // tmp vector for number of particles in bins up to this one
+        thrust_device::vector<thrust_size_t> bin_cell_count_ptr(n_part_tot_in_src +  n_cell + 1);
+
+        thrust_size_t count_bins;
+        {
+          thrust_device::vector<thrust_size_t> &out(bin_cell_count_ptr); // use it temporarily
+          // calc no of SDs in bins/cells
+          thrust::pair<
+            thrust_device::vector<thrust_size_t>::iterator,
+            typename thrust_device::vector<thrust_size_t>::iterator
+          > np = thrust::reduce_by_key(
+              bin_no.begin(),
+              bin_no.begin() + n_part_bfr_src,
+              thrust::make_constant_iterator<thrust_size_t>(1),
+              out.begin(),// output bin no - in place didn't work well, why?
+              bin_cell_count.begin()// output number of SDs
+            );
+          count_bins = np.second - bin_cell_count.begin(); // number of bins with SDs inside, includes the out_of_bins
+          thrust::copy(out.begin(), out.begin() + count_bins, bin_no.begin());
+        }
+
+        // number of SDs (incl. out_of_bins) in bins up to (i-1)
+        thrust::exclusive_scan(
+          bin_cell_count.begin(), 
+          bin_cell_count.begin() + count_bins, 
+          bin_cell_count_ptr.begin()
+        );
+
+        // remove out of bins from bin_cell_count, bins_no and bin cell_count_ptr
+        count_bins = 
+          thrust::remove_if(
+             thrust::make_zip_iterator(thrust::make_tuple(
+               bin_no.begin(), 
+               bin_cell_count.begin(),
+               bin_cell_count_ptr.begin()
+             )),
+             thrust::make_zip_iterator(thrust::make_tuple(
+               bin_no.begin(), 
+               bin_cell_count.begin(),
+               bin_cell_count_ptr.begin()
+             )) + count_bins,
+             bin_no.begin(),
+             arg::_1 == out_of_bins
+          ) - 
+           thrust::make_zip_iterator(thrust::make_tuple(
+             bin_no.begin(), 
+             bin_cell_count.begin(),
+             bin_cell_count_ptr.begin()
+           )); // count_bins now does not count out_of_bins
+
+        // randomly select which old SD will be increased
+        // overwrites sorted_rd3
+        rand_u01(count_bins);
+
+        // TODO: merge the following transforms into one
+        
+        // randomly choose one SD per bin
+        thrust::transform(
+          u01.begin(),
+          u01.begin() + count_bins,
+          bin_cell_count.begin(),
+          bin_cell_count.begin(),
+          thrust::multiplies<real_t>()
+        );
+        // translate no in bin to the total id
+        thrust::transform(
+          bin_cell_count_ptr.begin(),
+          bin_cell_count_ptr.begin() + count_bins,
+          bin_cell_count.begin(),
+          bin_cell_count.begin(),
+          thrust::plus<real_t>()
+        );
+
+        // --- increase multiplicity of existing SDs ---
+
+        n_part_old = n_part; // number of those before src + no of those w/o match
+        n_part_to_init = count_bins; // number of matched SDs
+        n_part = n_part_old + n_part_to_init;
+        hskpng_resize_npart();
+
+        // copy rd3 and ijk of the selected SDs to the end of the respective vectors
+        thrust::copy(
+          thrust::make_permutation_iterator(
+            rd3.begin(), thrust::make_permutation_iterator(
+              sorted_id.begin(), bin_cell_count.begin()      // translates no of sorted SD into id
+            )
+          ),
+          thrust::make_permutation_iterator(
+            rd3.begin(), thrust::make_permutation_iterator(
+              sorted_id.begin(), bin_cell_count.begin()
+            )
+          ) + count_bins,
+          rd3.begin() + n_part_old     // output
+        );
+        thrust::copy(
+          thrust::make_permutation_iterator(
+            ijk.begin(), thrust::make_permutation_iterator(
+              sorted_id.begin(), bin_cell_count.begin()      // translates no of sorted SD into id
+            )
+          ),
+          thrust::make_permutation_iterator(
+            ijk.begin(), thrust::make_permutation_iterator(
+              sorted_id.begin(), bin_cell_count.begin()
+            )
+          ) + count_bins,
+          ijk.begin() + n_part_old     // output
+        );
+
+        // init n of the copied SDs, but using the src distribution
+        init_n_sd_conc(
+          *(opts_init.src_dry_distros.begin()->second)
+        ); // TODO: document that n_of_lnrd_stp is expected!
+
+        // add the just-initialized multiplicities to the old ones
+        thrust::transform(
+          n.begin() + n_part_old,
+          n.end(),
+          thrust::make_permutation_iterator(
+            n.begin(), thrust::make_permutation_iterator(
+              sorted_id.begin(), bin_cell_count.begin()      // translates no of sorted SD into id
+            )
+          ),
+          thrust::make_permutation_iterator(
+            n.begin(), thrust::make_permutation_iterator(
+              sorted_id.begin(), bin_cell_count.begin()      // translates no of sorted SD into id
+            )
+          ), //in-place
+          thrust::plus<n_t>()
+        );
+        // TODO: check for overflows of na after addition
+
+        // --- properly reduce size of the vectors back to no before src + no w/o match ---
+        n_part = n_part_old;
+        hskpng_resize_npart();   
+      }
+    }
+  };  
+};
diff --git a/src/impl/particles_impl_src_dry_distros_simple.ipp b/src/impl/particles_impl_src_dry_distros_simple.ipp
new file mode 100644
index 000000000..8b7d733b8
--- /dev/null
+++ b/src/impl/particles_impl_src_dry_distros_simple.ipp
@@ -0,0 +1,69 @@
+// vim:filetype=cpp
+/** @file
+  * @copyright University of Warsaw
+  * @section LICENSE
+  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+  */
+// #include <limits>
+#include <thrust/unique.h>
+#include <thrust/binary_search.h>
+
+namespace libcloudphxx
+{
+  namespace lgrngn
+  {
+    // create new aerosol particles based on a size distribution
+    template <typename real_t, backend_t device>
+    void particles_t<real_t, device>::impl::src_dry_distros_simple(const real_t &dt)
+    {   
+      // set number of SDs to init; use count_num as storage
+      init_count_num_src(opts_init.src_sd_conc);
+
+      // analyze distribution to get rd_min and max needed for bin sizes
+      // TODO: this could be done once at the beginning of the simulation
+      dist_analysis_sd_conc(
+        *(opts_init.src_dry_distros.begin()->second),
+        opts_init.src_sd_conc,
+        dt
+      ); 
+
+      namespace arg = thrust::placeholders;
+
+      // set no of particles to init
+      n_part_old = n_part;
+      n_part_to_init = thrust::reduce(count_num.begin(), count_num.end());
+      n_part = n_part_old + n_part_to_init;
+      hskpng_resize_npart();
+
+      thrust_size_t n_part_bfr_src = n_part_old,
+                    n_part_tot_in_src = n_part_to_init;
+
+      // init ijk and rd3 of new particles
+      init_ijk();
+      init_dry_sd_conc(); 
+
+      // init other peoperties of SDs that didnt have a match
+      init_kappa(
+        opts_init.src_dry_distros.begin()->first
+      ); 
+
+      if(opts_init.diag_incloud_time)
+        init_incloud_time();
+
+      init_n_sd_conc(
+        *(opts_init.src_dry_distros.begin()->second)
+      ); // TODO: document that n_of_lnrd_stp is expected!
+
+      // init rw
+      init_wet();
+    
+      // ijk -> i, j, k
+      unravel_ijk(n_part_old);
+
+      // init x, y, z, i, j, k
+      init_xyz();
+
+      // TODO: init chem
+    }
+  };  
+};
diff --git a/src/impl/particles_impl_src_dry_sizes.ipp b/src/impl/particles_impl_src_dry_sizes.ipp
index 21c975300..8b44eee62 100644
--- a/src/impl/particles_impl_src_dry_sizes.ipp
+++ b/src/impl/particles_impl_src_dry_sizes.ipp
@@ -73,9 +73,15 @@ namespace libcloudphxx
           if (opts_init.chem_switch){
             chem_vol_ante();
           }
+      
+          // ijk -> i, j, k
+          unravel_ijk(n_part_old);
   
           // initialising particle positions
           init_xyz();
+
+          if(opts_init.diag_incloud_time)
+            init_incloud_time();
         }
       }
     }
diff --git a/src/particles.tpp b/src/particles.tpp
index 4483c8829..477f7a295 100644
--- a/src/particles.tpp
+++ b/src/particles.tpp
@@ -7,6 +7,9 @@
   */
 
 #include <iostream>
+#if !defined(NDEBUG)
+  #include <iomanip>
+#endif
 
 #include <libcloudph++/lgrngn/particles.hpp>
 
@@ -111,9 +114,15 @@
 #include "impl/particles_impl_rcyc.ipp"
 #include "impl/particles_impl_sstp.ipp"
 #include "impl/particles_impl_sstp_chem.ipp"
+#include "impl/particles_impl_ante_adding_SD.ipp"
+#include "impl/particles_impl_post_adding_SD.ipp"
 #include "impl/particles_impl_src.ipp"
+#include "impl/particles_impl_src_dry_distros_simple.ipp"
+#include "impl/particles_impl_src_dry_distros_matching.ipp"
 #include "impl/particles_impl_src_dry_distros.ipp"
 #include "impl/particles_impl_src_dry_sizes.ipp"
+#include "impl/particles_impl_rlx.ipp"
+#include "impl/particles_impl_rlx_dry_distros.ipp"
 #include "impl/particles_impl_update_incloud_time.ipp"
 #include "impl/particles_impl_adjust_timesteps.ipp"
 
diff --git a/src/particles_ctor.ipp b/src/particles_ctor.ipp
index 2e02b299f..d99580831 100644
--- a/src/particles_ctor.ipp
+++ b/src/particles_ctor.ipp
@@ -9,11 +9,6 @@
 #include <thrust/host_vector.h>
 #include <thrust/iterator/constant_iterator.h>
 
-#include <boost/numeric/odeint.hpp>
-#include <boost/numeric/odeint/external/thrust/thrust_algebra.hpp>
-#include <boost/numeric/odeint/external/thrust/thrust_operations.hpp>
-#include <boost/numeric/odeint/external/thrust/thrust_resize.hpp>
-
 #include <map>
 
 namespace libcloudphxx
diff --git a/src/particles_init.ipp b/src/particles_init.ipp
index 672682453..5fcf9554c 100644
--- a/src/particles_init.ipp
+++ b/src/particles_init.ipp
@@ -28,7 +28,8 @@ namespace libcloudphxx
       pimpl->init_sanity_check(th, rv, rhod, p, courant_x, courant_y, courant_z, ambient_chem);
 
       // set rng seed to be used during init
-      pimpl->rng.reseed(pimpl->opts_init.rng_seed_init);
+      if(pimpl->opts_init.rng_seed_init_switch)
+        pimpl->rng.reseed(pimpl->opts_init.rng_seed_init);
 
       // is a constant pressure profile used?
       pimpl->const_p = !p.is_null();
diff --git a/src/particles_multi_gpu_ctor.ipp b/src/particles_multi_gpu_ctor.ipp
index e51acbf57..f5d1905e9 100644
--- a/src/particles_multi_gpu_ctor.ipp
+++ b/src/particles_multi_gpu_ctor.ipp
@@ -37,6 +37,9 @@ namespace libcloudphxx
       const std::map<enum chem_species_t, const arrinfo_t<real_t> > ambient_chem
     )
     {
+      if(pimpl->glob_opts_init.rlx_switch)
+        std::cerr << "libcloudph++ WARNING: relaxation is not fully supported in the multi_CUDA backend. Mean calculation and addition of SD will be done locally on each GPU." << std::endl;
+
       pimpl->mcuda_run(
         &particles_t<real_t, CUDA>::init,
         th, rv, rhod, p, courant_1, courant_2, courant_3, ambient_chem
diff --git a/src/particles_step.ipp b/src/particles_step.ipp
index 0fa2f925e..0ffee2b0e 100644
--- a/src/particles_step.ipp
+++ b/src/particles_step.ipp
@@ -261,30 +261,13 @@ namespace libcloudphxx
         }
       }
 
-      // aerosol source, in sync since it changes th/rv
-      if (opts.src && !(pimpl->opts_init.src_x0 == 0 && pimpl->opts_init.src_x1 == 0)) // src_x0=0 and src_x1=0 is a way of disabling source in some domains in distmem simulations
-      {
-        // sanity check
-        if (pimpl->opts_init.src_switch == false) throw std::runtime_error("aerosol source was switched off in opts_init");
-
-        // introduce new particles with the given time interval
-        if(pimpl->stp_ctr % pimpl->opts_init.supstp_src == 0) 
-        {
-          pimpl->src(pimpl->opts_init.supstp_src * pimpl->dt);
-        }
-      }
-
-      if(opts.cond || (opts.src && pimpl->stp_ctr % pimpl->opts_init.supstp_src == 0))
+      if(opts.cond) // || (opts.src && pimpl->src_stp_ctr % pimpl->opts_init.supstp_src == 0) || (opts.rlx && pimpl->rlx_stp_ctr % pimpl->opts_init.supstp_rlx == 0))
       {
         // syncing out // TODO: this is not necesarry in off-line mode (see coupling with DALES)
         pimpl->sync(pimpl->th, th);
         pimpl->sync(pimpl->rv, rv);
       }
 
-      // update the step counter since src was turned on
-      if (opts.src) ++pimpl->stp_ctr;
-      else pimpl->stp_ctr = 0; //reset the counter if source was turned off
-
       if (opts.chem_dsl == true)
       {
         // syncing out trace gases // TODO: this is not necesarry in off-line mode (see coupling with DALES)
@@ -409,6 +392,42 @@ namespace libcloudphxx
         pimpl->subs(pimpl->dt);
       }
 
+      // NOTE: source and relax should affect th and rv (because we add humidifed aerosols), but these changes are minimal and we neglect them.
+      //       otherwise src and rlx should be don in step_sync
+
+      // aerosol source, in sync since it changes th/rv
+      if (opts.src && !(pimpl->opts_init.src_x0 == 0 && pimpl->opts_init.src_x1 == 0)) // src_x0=0 and src_x1=0 is a way of disabling source in some domains in distmem simulations
+      {
+        // sanity check
+        if (pimpl->opts_init.src_type == src_t::off) throw std::runtime_error("aerosol source was switched off in opts_init");
+
+        // introduce new particles with the given time interval
+        if(pimpl->src_stp_ctr % pimpl->opts_init.supstp_src == 0) 
+        {
+          pimpl->src(pimpl->opts_init.supstp_src * pimpl->dt);
+        }
+      }
+
+      // aerosol relaxation, in sync since it changes th/rv
+      // TODO: more sanity checks for rlx! 3D only, values of rlx_bins etc. check that appa ranges are exclusive, zmin<zmax, kpamin<kpamax,...
+      if (opts.rlx)
+      {
+        // sanity check
+        if (pimpl->opts_init.rlx_switch == false) throw std::runtime_error("aerosol relaxation was switched off in opts_init");
+
+        // introduce new particles with the given time interval
+        if(pimpl->rlx_stp_ctr % pimpl->opts_init.supstp_rlx == 0) 
+        {
+          pimpl->rlx(pimpl->opts_init.supstp_rlx * pimpl->dt);
+        }
+      }
+
+      // update the step counter since src/rlx was turned on
+      if (opts.src) ++pimpl->src_stp_ctr;
+      else pimpl->src_stp_ctr = 0; //reset the counter if source was turned off
+      if (opts.rlx) ++pimpl->rlx_stp_ctr;
+      else pimpl->rlx_stp_ctr = 0; //reset the counter if source was turned off
+
       // boundary condition + accumulated rainfall to be returned
       // distmem version overwrites i and tmp_device_size_part
       // and they both need to be unchanged untill distmem copies
diff --git a/tests/python/unit/CMakeLists.txt b/tests/python/unit/CMakeLists.txt
index c3d337546..872f6b52c 100644
--- a/tests/python/unit/CMakeLists.txt
+++ b/tests/python/unit/CMakeLists.txt
@@ -1,5 +1,6 @@
 # non-pytest tests
-foreach(test api_blk_1m api_blk_2m api_lgrngn api_common segfault_20150216 col_kernels terminal_velocities SD_removal uniform_init source chem_coal sstp_cond multiple_kappas adve_scheme lgrngn_subsidence sat_adj_blk_1m diag_incloud_time)
+foreach(test api_blk_1m api_blk_2m api_lgrngn api_common segfault_20150216 col_kernels terminal_velocities uniform_init source sstp_cond multiple_kappas adve_scheme lgrngn_subsidence sat_adj_blk_1m diag_incloud_time relax)
+
   #TODO: indicate that tests depend on the lib
   add_test(
     NAME ${test}
@@ -8,6 +9,18 @@ foreach(test api_blk_1m api_blk_2m api_lgrngn api_common segfault_20150216 col_k
   )
 endforeach()
 
+# tests with chemistry, chemistry does not work with MPI
+if(NOT USE_MPI)
+  foreach(test SD_removal chem_coal)
+    #TODO: indicate that tests depend on the lib
+    add_test(
+      NAME ${test}
+      WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/bindings/python" 
+      COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_SOURCE_DIR}/tests/python/unit/${test}.py"
+    )
+  endforeach()
+endif()
+
 ## pytest tests run with "python -m pytest"
 foreach(test lgrngn_adve)
   #TODO: indicate that tests depend on the lib
diff --git a/tests/python/unit/api_lgrngn.py b/tests/python/unit/api_lgrngn.py
index 7f233d7cc..ce5c912a8 100644
--- a/tests/python/unit/api_lgrngn.py
+++ b/tests/python/unit/api_lgrngn.py
@@ -34,7 +34,9 @@ def lognormal(lnr):
 opts_init.n_sd_max = int(1e6) # some space for tail SDs
 opts_init.rng_seed = 396
 opts_init.rng_seed_init = 456
+opts_init.rng_seed_init_switch = True
 opts_init.src_dry_distros = {kappa1:lognormal}
+opts_init.rlx_dry_distros = {kappa1:[lognormal, [0,2], [0,opts_init.dz]]}
 opts_init.src_sd_conc = 64
 opts_init.src_z1 = opts_init.dz
 opts_init.diag_incloud_time = True
@@ -67,7 +69,7 @@ def lognormal(lnr):
 print("coal_switch = ", opts_init.coal_switch)
 print("sedi_switch = ", opts_init.sedi_switch)
 print("subs_switch = ", opts_init.subs_switch)
-print("src_switch = ", opts_init.src_switch)
+print("src_type = ", opts_init.src_type)
 
 print("exact_sstp_cond = ", opts_init.exact_sstp_cond)
 print("sstp_cond =", opts_init.sstp_cond)
diff --git a/tests/python/unit/relax.py b/tests/python/unit/relax.py
new file mode 100644
index 000000000..ee0289794
--- /dev/null
+++ b/tests/python/unit/relax.py
@@ -0,0 +1,172 @@
+import sys
+#try:
+#  import boost.mpi
+#except:
+#  pass
+
+sys.path.insert(0, "../../bindings/python/")
+sys.path.insert(0, "../../../build/bindings/python/")
+
+from libcloudphxx import lgrngn
+
+from numpy import array as arr_t, frombuffer
+
+from math import exp, log, sqrt, pi
+from time import time
+
+def lognormal(lnr):
+  mean_r = .04e-6 / 2
+  stdev  = 1.4
+  n_tot  = 60e6 
+  return n_tot * exp(
+    -pow((lnr - log(mean_r)), 2) / 2 / pow(log(stdev),2)
+  ) / log(stdev) / sqrt(2*pi);
+
+def lognormal_rlx(lnr):
+  mean_r = .04e-6 / 2
+  stdev  = 1.4
+  n_tot  = 120e6 
+  return n_tot * exp(
+    -pow((lnr - log(mean_r)), 2) / 2 / pow(log(stdev),2)
+  ) / log(stdev) / sqrt(2*pi);
+
+#def lognormal_rlx(lnr):
+#  mean_r = .10e-6 / 2
+#  stdev  = 1.4
+#  n_tot  = 60e4
+#  return n_tot * exp(
+#    -pow((lnr - log(mean_r)), 2) / 2 / pow(log(stdev),2)
+#  ) / log(stdev) / sqrt(2*pi);
+
+def test(opts_init):
+  opts_init.supstp_rlx = 2
+  opts_init.rng_seed = int(time())
+  opts_init.dt = 1
+  opts_init.nx = 2;
+  opts_init.nz = 2;
+  opts_init.dx=1.;
+  opts_init.dz=1.;
+  opts_init.x0=0.;
+  opts_init.z0=0.;
+  opts_init.x1=opts_init.nx * opts_init.dx;
+  opts_init.z1=opts_init.nz * opts_init.dz;
+  opts_init.aerosol_independent_of_rhod=1;
+
+  opts_init.y0=0.;
+  opts_init.y1=1.;
+  
+  opts_init.chem_switch = 0;
+  opts_init.coal_switch = 0;
+  opts_init.adve_switch = 0;
+  opts_init.cond_switch = 0;
+  opts_init.sedi_switch = 0;
+  opts_init.rlx_switch = 1;
+  
+  opts = lgrngn.opts_t()
+  
+  opts.adve = 0;
+  opts.chem = 0;
+  opts.sedi = 0;
+  opts.coal = 0;
+  opts.cond = 0;
+  
+  rhod = arr_t([[  1.,    1.  ],[   1.,     1.  ]])
+  th   = arr_t([[300.,  300.  ],[ 300.,   300.  ]])
+  rv   = arr_t([[   .01,   .01],[    .01,    .01]])
+  
+  try:
+    prtcls = lgrngn.factory(lgrngn.backend_t.OpenMP, opts_init)
+  except:
+    prtcls = lgrngn.factory(lgrngn.backend_t.serial, opts_init)
+  
+  prtcls.init(th, rv, rhod)
+  
+  # 2 steps during which relaxation should be done once
+  opts.rlx = 1
+  for i in range(2):
+    prtcls.step_sync(opts,th,rv,rhod)
+    prtcls.step_async(opts)
+  
+  prtcls.diag_all()
+  prtcls.diag_sd_conc()
+  sd_conc = frombuffer(prtcls.outbuf()).copy()
+  
+  prtcls.diag_all()
+  prtcls.diag_wet_mom(0)
+  wet_mom0 = frombuffer(prtcls.outbuf()).copy()
+  
+  prtcls.diag_all()
+  prtcls.diag_wet_mom(1)
+  wet_mom1 = frombuffer(prtcls.outbuf()).copy()
+
+  return sd_conc, wet_mom0, wet_mom1
+
+# test source with dry_distros
+kappa = .61
+opts_init = lgrngn.opts_init_t()
+opts_init.dry_distros = {kappa:lognormal}
+opts_init.rlx_dry_distros = {kappa: [lognormal_rlx, [0,2],[0,opts_init.dz]]}
+opts_init.sd_conc = 1024
+opts_init.rlx_bins = 1024
+opts_init.rlx_timescale = 4 # whole simulation time is 2, so this means we should get half of the droplets added
+
+
+
+
+print(' --- dry_distros rlx sd_per_bin = 1 ---')
+
+opts_init.rlx_sd_per_bin = 1
+opts_init.n_sd_max = int((opts_init.sd_conc * 2 + opts_init.rlx_bins * opts_init.rlx_sd_per_bin * 2) * 2) # assuming nx=nz=2
+
+sd_conc, wet_mom0, wet_mom1 = test(opts_init)
+
+print('diag_sd_conc', sd_conc)
+# relaxation should add SD only in the lower cells
+# there is some randomness in the number of SD that should be added
+exp_sd_min = 1024 + opts_init.rlx_sd_per_bin * 400
+exp_sd_max = 1024 + opts_init.rlx_sd_per_bin * 600
+if sd_conc[0] < exp_sd_min or sd_conc[0] > exp_sd_max or sd_conc[2] < exp_sd_min or sd_conc[2] > exp_sd_max:
+  raise Exception("wrong amount of SDs were added")
+if not(sd_conc[1] == 1024 and sd_conc[3] == 1024):
+  raise Exception("SDs were added in wrong cells")
+
+print(('wet mom0', wet_mom0))
+print((wet_mom0[0] + wet_mom0[2]) / (wet_mom0[1] + wet_mom0[3])) # relax n_stp is two times bigger
+if abs((wet_mom0[0] + wet_mom0[2]) / (wet_mom0[1] + wet_mom0[3]) - 1.5)  > 0.01:  # we expect 1.5, because n_rlx is two tmes bigger, but relaxation time scale is twice the simulation time
+  raise Exception("incorrect multiplicity after source")
+
+print(('wet mom1', wet_mom1))
+print((wet_mom1[0] + wet_mom1[2]) / (wet_mom1[1] + wet_mom1[3])) # relax n_stp is two times bigger
+if abs((wet_mom1[0] + wet_mom1[2]) / (wet_mom1[1] + wet_mom1[3]) - 1.5)  > 0.01:
+  raise Exception("incorrect radius after source")
+
+
+
+
+print(' --- dry_distros rlx sd_per_bin = 10 ---')
+
+opts_init.rlx_sd_per_bin = 10
+opts_init.n_sd_max = int((opts_init.sd_conc * 2 + opts_init.rlx_bins * opts_init.rlx_sd_per_bin * 2) * 2) # assuming nx=nz=2
+
+sd_conc, wet_mom0, wet_mom1 = test(opts_init)
+
+print('diag_sd_conc', sd_conc)
+# relaxation should add SD only in the lower cells
+# there is some randomness in the number of SD that should be added
+exp_sd_min = 1024 + opts_init.rlx_sd_per_bin * 400
+exp_sd_max = 1024 + opts_init.rlx_sd_per_bin * 600
+if sd_conc[0] < exp_sd_min or sd_conc[0] > exp_sd_max or sd_conc[2] < exp_sd_min or sd_conc[2] > exp_sd_max:
+  raise Exception("wrong amount of SDs were added")
+if not(sd_conc[1] == 1024 and sd_conc[3] == 1024):
+  raise Exception("SDs were added in wrong cells")
+
+print(('wet mom0', wet_mom0))
+print((wet_mom0[0] + wet_mom0[2]) / (wet_mom0[1] + wet_mom0[3])) # relax n_stp is two times bigger
+if abs((wet_mom0[0] + wet_mom0[2]) / (wet_mom0[1] + wet_mom0[3]) - 1.5)  > 0.01:
+  raise Exception("incorrect multiplicity after source")
+
+print(('wet mom1', wet_mom1))
+print((wet_mom1[0] + wet_mom1[2]) / (wet_mom1[1] + wet_mom1[3])) # relax n_stp is two times bigger
+if abs((wet_mom1[0] + wet_mom1[2]) / (wet_mom1[1] + wet_mom1[3]) - 1.5)  > 0.01:
+  raise Exception("incorrect radius after source")
+
diff --git a/tests/python/unit/source.py b/tests/python/unit/source.py
index 8e6b79911..2b12e5ff5 100644
--- a/tests/python/unit/source.py
+++ b/tests/python/unit/source.py
@@ -51,7 +51,6 @@ def test(opts_init):
   opts_init.adve_switch = 0;
   opts_init.cond_switch = 0;
   opts_init.sedi_switch = 0;
-  opts_init.src_switch = 1;
   
   opts = lgrngn.opts_t()
   
@@ -92,16 +91,43 @@ def test(opts_init):
 
   return sd_conc, wet_mom0, wet_mom1
 
-# test source with dry_distros
 kappa = .61
+
+# ----------- test source with dry_distros simple ------------------
+print(' --- dry_distros simple src ---')
 opts_init = lgrngn.opts_init_t()
 opts_init.dry_distros = {kappa:lognormal}
 opts_init.src_dry_distros = {kappa:lognormal_src}
 opts_init.sd_conc = 1024
 opts_init.src_sd_conc = 512
 opts_init.n_sd_max = int((opts_init.sd_conc * 2 + opts_init.src_sd_conc * 2) * 2) # assuming nx=nz=2
+opts_init.src_type = lgrngn.src_t.simple;
+
+sd_conc, wet_mom0, wet_mom1 = test(opts_init)
 
-print(' --- dry_distros src ---')
+print('diag_sd_conc', sd_conc)
+if not(sd_conc[0] == 2048 and sd_conc[2] == 2048):
+  raise Exception("wrong amount of SDs were added")
+if not(sd_conc[1] == 1024 and sd_conc[3] == 1024):
+  raise Exception("SDs were added in wrong cells")
+
+print(('wet mom0', wet_mom0))
+if (abs( 2 - (wet_mom0[0] + wet_mom0[2]) / (wet_mom0[1] + wet_mom0[3]) ) > 0.015):
+  raise Exception("incorrect multiplicity after source")
+
+print(('wet mom1', wet_mom1))
+if (abs( (7.84 / 2.12) - (wet_mom1[0] + wet_mom1[2]) / (wet_mom1[1] + wet_mom1[3]) ) > 0.015):
+  raise Exception("incorrect radius after source")
+
+# --------------- test source with dry_distros matching ------------------
+print(' --- dry_distros matching src ---')
+opts_init = lgrngn.opts_init_t()
+opts_init.dry_distros = {kappa:lognormal}
+opts_init.src_dry_distros = {kappa:lognormal_src}
+opts_init.sd_conc = 1024
+opts_init.src_sd_conc = 512
+opts_init.n_sd_max = int((opts_init.sd_conc * 2 + opts_init.src_sd_conc * 2) * 2) # assuming nx=nz=2
+opts_init.src_type = lgrngn.src_t.matching;
 
 sd_conc, wet_mom0, wet_mom1 = test(opts_init)
 
@@ -119,14 +145,14 @@ def test(opts_init):
 if (abs( (7.84 / 2.12) - (wet_mom1[0] + wet_mom1[2]) / (wet_mom1[1] + wet_mom1[3]) ) > 0.015):
   raise Exception("incorrect radius after source")
 
-# test source with dry_sizes
-kappa = .61
+# --------- test source with dry_sizes ------------
+print(' --- dry_sizes src ---')
 opts_init = lgrngn.opts_init_t()
 opts_init.dry_sizes = {kappa : {1.e-6  : [30., 20], 15.e-6 : [10., 10]}}
 opts_init.src_dry_sizes = {kappa : {1.e-6  : [0.3, 10], 15.e-6 : [0.1, 5]}}
 opts_init.n_sd_max=240
+opts_init.src_type = lgrngn.src_t.simple; # dry sizes works the same for simple and matching (no matching done)
 
-print(' --- dry_sizes src ---')
 
 sd_conc, wet_mom0, wet_mom1 = test(opts_init)